Message ID | 421d18942d6ad28625530a8b3247595dc05eb100.1703110747.git.dxu@dxuuu.xyz (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | BPF |
Headers | show |
Series | [dwarves] pahole: Inject kfunc decl tags into BTF | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote: > This commit teaches pahole to parse symbols in .BTF_ids section in > vmlinux and discover exported kfuncs. Pahole then takes the list of > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > This enables downstream users and tools to dynamically discover which > kfuncs are available on a system by parsing vmlinux or module BTF, both > available in /sys/kernel/btf. > > Example of encoding: > > $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l > 388 > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940 > [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static > [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1 > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > --- > btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 202 insertions(+) > Hmm, looking more, seems like this will pick up non-kfunc functions as well. For example, kernel/trace/bpf_trace.c: BTF_SET_START(btf_allowlist_d_path) #ifdef CONFIG_SECURITY BTF_ID(func, security_file_permission) BTF_ID(func, security_inode_getattr) BTF_ID(func, security_file_open) #endif #ifdef CONFIG_SECURITY_PATH BTF_ID(func, security_path_truncate) #endif BTF_ID(func, vfs_truncate) BTF_ID(func, vfs_fallocate) BTF_ID(func, dentry_open) BTF_ID(func, vfs_getattr) BTF_ID(func, filp_close) BTF_SET_END(btf_allowlist_d_path) Maybe we need a codemod from: BTF_ID(func, ... to: BTF_ID(kfunc, ... The change to resolve_btfids would be relatively small. Not sure what the drawbacks are yet. One way or another we'll need to annotate the kfuncs in source.
On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote: > This commit teaches pahole to parse symbols in .BTF_ids section in > vmlinux and discover exported kfuncs. Pahole then takes the list of > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > This enables downstream users and tools to dynamically discover which > kfuncs are available on a system by parsing vmlinux or module BTF, both > available in /sys/kernel/btf. > > Example of encoding: > > $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l > 388 > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940 > [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static > [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1 > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> SNIP > + > +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, const char *kfunc) > +{ > + int nr_types, type_id, err = -1; > + struct btf *btf = encoder->btf; could we store the kuncs in sorted array (by name) and iterate all IDs just once while doing the bsearch for the name over the kfuncs array > + > + nr_types = btf__type_cnt(btf); > + for (type_id = 1; type_id < nr_types; type_id++) { > + const struct btf_type *type; > + const char *name; > + > + type = btf__type_by_id(btf, type_id); > + if (!type) { > + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", > + __func__, type_id); > + goto out; > + } > + > + if (!btf_is_func(type)) > + continue; > + > + name = btf__name_by_offset(btf, type->name_off); > + if (!name) { > + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", > + __func__, type_id); > + goto out; > + } > + > + if (strcmp(name, kfunc)) > + continue; > + > + err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, type_id, -1); > + if (err < 0) { > + fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n", > + __func__, kfunc, err); > + goto out; > + } > + > + err = 0; > + break; > + } > + > +out: > + return err; > +} > + > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > +{ > + const char *filename = encoder->filename; > + GElf_Shdr shdr_mem, *shdr; > + int symbols_shndx = -1; > + int idlist_shndx = -1; > + Elf_Scn *scn = NULL; > + Elf_Data *symbols; > + int fd, err = -1; > + size_t strtabidx; > + Elf *elf = NULL; > + size_t strndx; > + char *secname; > + int nr_syms; > + int i = 0; > + > + fd = open(filename, O_RDONLY); > + if (fd < 0) { > + fprintf(stderr, "Cannot open %s\n", filename); > + goto out; > + } > + > + if (elf_version(EV_CURRENT) == EV_NONE) { > + elf_error("Cannot set libelf version"); > + goto out; > + } > + > + elf = elf_begin(fd, ELF_C_READ, NULL); > + if (elf == NULL) { > + elf_error("Cannot update ELF file"); > + goto out; > + } SNIP > + } > + free(kfunc); > + } > + > + err = 0; > +out: leaking fd and elf object (elf_end) jirka > + return err; > +} > + > int btf_encoder__encode(struct btf_encoder *encoder) > { > int err; > @@ -1366,6 +1563,11 @@ int btf_encoder__encode(struct btf_encoder *encoder) > if (btf__type_cnt(encoder->btf) == 1) > return 0; > > + if (btf_encoder__tag_kfuncs(encoder)) { > + fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__); > + return -1; > + } > + > if (btf__dedup(encoder->btf, NULL)) { > fprintf(stderr, "%s: btf__dedup failed!\n", __func__); > return -1; > -- > 2.42.1 >
On Wed, Dec 20, 2023 at 11:37:01PM -0700, Daniel Xu wrote: > On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote: > > This commit teaches pahole to parse symbols in .BTF_ids section in > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > This enables downstream users and tools to dynamically discover which > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > available in /sys/kernel/btf. > > > > Example of encoding: > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l > > 388 > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940 > > [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static > > [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1 > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > --- > > btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 202 insertions(+) > > > > Hmm, looking more, seems like this will pick up non-kfunc functions as > well. For example, kernel/trace/bpf_trace.c: > > > BTF_SET_START(btf_allowlist_d_path) > #ifdef CONFIG_SECURITY > BTF_ID(func, security_file_permission) > BTF_ID(func, security_inode_getattr) > BTF_ID(func, security_file_open) > #endif > #ifdef CONFIG_SECURITY_PATH > BTF_ID(func, security_path_truncate) > #endif > BTF_ID(func, vfs_truncate) > BTF_ID(func, vfs_fallocate) > BTF_ID(func, dentry_open) > BTF_ID(func, vfs_getattr) > BTF_ID(func, filp_close) > BTF_SET_END(btf_allowlist_d_path) you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, which are bounded by __BTF_ID__set8__<name> symbols, which also provide size __BTF_ID__func_* symbol that has address inside the SET8 list is kfunc > > Maybe we need a codemod from: > > BTF_ID(func, ... > > to: > > BTF_ID(kfunc, ... I think it's better to keep just 'func' and not to do anything special for kfuncs in resolve_btfids logic to keep it simple also it's going to be already in pahole so if we want to make a fix in future you need to change pahole, resolve_btfids and possibly also kernel jirka > > > The change to resolve_btfids would be relatively small. Not sure what > the drawbacks are yet. > > One way or another we'll need to annotate the kfuncs in source.
On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote: > you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, > which are bounded by __BTF_ID__set8__<name> symbols, which also provide size +1 > > > > Maybe we need a codemod from: > > > > BTF_ID(func, ... > > > > to: > > > > BTF_ID(kfunc, ... > > I think it's better to keep just 'func' and not to do anything special for > kfuncs in resolve_btfids logic to keep it simple > > also it's going to be already in pahole so if we want to make a fix in future > you need to change pahole, resolve_btfids and possibly also kernel I still don't understand why you guys want to add it to vmlinux BTF. The kernel has no use in this additional data. It already knows about all kfuncs. This extra memory is a waste of space from kernel pov. Unless I am missing something. imo this logic belongs in bpftool only. It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
On 21/12/2023 17:05, Alexei Starovoitov wrote: > On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote: >> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, >> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size > > +1 > >>> >>> Maybe we need a codemod from: >>> >>> BTF_ID(func, ... >>> >>> to: >>> >>> BTF_ID(kfunc, ... >> >> I think it's better to keep just 'func' and not to do anything special for >> kfuncs in resolve_btfids logic to keep it simple >> >> also it's going to be already in pahole so if we want to make a fix in future >> you need to change pahole, resolve_btfids and possibly also kernel > > I still don't understand why you guys want to add it to vmlinux BTF. > The kernel has no use in this additional data. > It already knows about all kfuncs. > This extra memory is a waste of space from kernel pov. > Unless I am missing something. > > imo this logic belongs in bpftool only. > It can dump vmlinux BTF and emit __ksym protos into vmlinux.h > If the goal is to have bpftool detect all kfuncs, would having a BPF kfunc iterator that bpftool could use to iterate over registered kfuncs work perhaps?
On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote: > > On 21/12/2023 17:05, Alexei Starovoitov wrote: > > On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote: > >> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, > >> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size > > > > +1 > > > >>> > >>> Maybe we need a codemod from: > >>> > >>> BTF_ID(func, ... > >>> > >>> to: > >>> > >>> BTF_ID(kfunc, ... > >> > >> I think it's better to keep just 'func' and not to do anything special for > >> kfuncs in resolve_btfids logic to keep it simple > >> > >> also it's going to be already in pahole so if we want to make a fix in future > >> you need to change pahole, resolve_btfids and possibly also kernel > > > > I still don't understand why you guys want to add it to vmlinux BTF. > > The kernel has no use in this additional data. > > It already knows about all kfuncs. > > This extra memory is a waste of space from kernel pov. > > Unless I am missing something. > > > > imo this logic belongs in bpftool only. > > It can dump vmlinux BTF and emit __ksym protos into vmlinux.h > > > > If the goal is to have bpftool detect all kfuncs, would having a BPF > kfunc iterator that bpftool could use to iterate over registered kfuncs > work perhaps? The kernel code ? Why ? bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf.
Hi Alexei, On Thu, Dec 21, 2023 at 10:07:33AM -0800, Alexei Starovoitov wrote: > On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote: > > > > On 21/12/2023 17:05, Alexei Starovoitov wrote: > > > On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote: > > >> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, > > >> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size > > > > > > +1 > > > > > >>> > > >>> Maybe we need a codemod from: > > >>> > > >>> BTF_ID(func, ... > > >>> > > >>> to: > > >>> > > >>> BTF_ID(kfunc, ... > > >> > > >> I think it's better to keep just 'func' and not to do anything special for > > >> kfuncs in resolve_btfids logic to keep it simple > > >> > > >> also it's going to be already in pahole so if we want to make a fix in future > > >> you need to change pahole, resolve_btfids and possibly also kernel > > > > > > I still don't understand why you guys want to add it to vmlinux BTF. > > > The kernel has no use in this additional data. > > > It already knows about all kfuncs. > > > This extra memory is a waste of space from kernel pov. > > > Unless I am missing something. > > > > > > imo this logic belongs in bpftool only. > > > It can dump vmlinux BTF and emit __ksym protos into vmlinux.h > > > > > > > If the goal is to have bpftool detect all kfuncs, would having a BPF > > kfunc iterator that bpftool could use to iterate over registered kfuncs > > work perhaps? > > The kernel code ? Why ? > bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf. I think you're right for vmlinux -- bpftool can look at the elf file on a running system. But I'm not sure it works for modules. IIUC, the actual module ELF can go away while the kernel holds onto the memory (as with initramfs). And even if that wasn't the case, in containerized environments you may not be able to always see /lib/modules or similar. That's assuming there's not a way to get the module as with vmlinux /proc/kcore that I don't know about. Looking at the tree, there's about 20k export symbols: $ rg EXPORT_SYMBOL_GPL | wc -l 19471 Assuming kfuncs get there, that's about 312K of memory: >>> (20000 * (12 + 4)) >> 10 312 So maybe a worthwhile tradeoff? In general though, I kinda like having it in BTF cuz it's a structured way to export metadata. IMO the link between kfuncs and BTF_ID_set8 is kinda weak and might best be left as an implementation detail that kernel devs can keep an eye on. Thanks, Daniel
On 12/20/23 5:19 PM, Daniel Xu wrote: > This commit teaches pahole to parse symbols in .BTF_ids section in > vmlinux and discover exported kfuncs. Pahole then takes the list of > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > This enables downstream users and tools to dynamically discover which > kfuncs are available on a system by parsing vmlinux or module BTF, both > available in /sys/kernel/btf. > > Example of encoding: > > $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l > 388 > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940 > [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static > [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1 > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > --- > btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 202 insertions(+) > > diff --git a/btf_encoder.c b/btf_encoder.c > index fd04008..2697214 100644 > --- a/btf_encoder.c > +++ b/btf_encoder.c > @@ -34,6 +34,9 @@ > #include <pthread.h> > > #define BTF_ENCODER_MAX_PROTO 512 > +#define BTF_IDS_SECTION ".BTF_ids" > +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" > +#define BTF_KFUNC_TYPE_TAG "kfunc" Can this be bpf_kfunc? Elaborated on elsewhere in this reply > > /* state used to do later encoding of saved functions */ > struct btf_encoder_state { > @@ -1352,6 +1355,200 @@ out: > return err; > } > > +/* > + * Parse BTF_ID symbol and return the kfunc name. > + * > + * Returns: > + * Callee-owned string containing kfunc name if successful. nit: Caller-owned, not callee-owned > + * NULL if !kfunc or on error. > + */ > +static char *get_kfunc_name(const char *sym) > +{ > + char *kfunc, *end; > + > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > + return NULL; > + > + /* Strip prefix */ > + kfunc = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > + > + /* Strip suffix */ > + end = strrchr(kfunc, '_'); > + if (!end || *(end - 1) != '_') { > + free(kfunc); > + return NULL; > + } > + *(end - 1) = '\0'; > + > + return kfunc; > +} > + > +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, const char *kfunc) > +{ > + int nr_types, type_id, err = -1; > + struct btf *btf = encoder->btf; > + > + nr_types = btf__type_cnt(btf); > + for (type_id = 1; type_id < nr_types; type_id++) { > + const struct btf_type *type; > + const char *name; > + > + type = btf__type_by_id(btf, type_id); > + if (!type) { > + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", > + __func__, type_id); > + goto out; > + } > + > + if (!btf_is_func(type)) > + continue; > + > + name = btf__name_by_offset(btf, type->name_off); > + if (!name) { > + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", > + __func__, type_id); > + goto out; > + } > + > + if (strcmp(name, kfunc)) > + continue; > + > + err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, type_id, -1); In an ideal world we'd just add this tag to __bpf_kfunc macro definition, right? Then bpftool can generate fwd decls from generated vmlinux w/o any pahole changes. But no gcc support for BTF tags, so need to do this workaround. With that in mind, instead of unconditionally adding BTF_KFUNC_TYPE_TAG to funcs in btf id sets, can this code only do so if there isn't an existing BTF_KFUNC_TYPE_TAG pointing to it? It'd require another loop over btf types to built set of already-tagged funcs, but would future-proof this work. Alternatively, if existing btf__dedup call after btf_encoder__tag_kfuncs will get rid of these extraneous "tagged types" in the scenario where one already exists, then a comment here to that effect would be appreciated. My nit about BTF_KFUNC_TYPE_TAG earlier was related: if we do want __bpf_kfunc macro to add such a tag, might as well make the tag 'bpf_kfunc' so it's easier for unfamiliar folks to understand.
On Thu, Dec 21, 2023 at 10:18 AM Daniel Xu <dxu@dxuuu.xyz> wrote: > > Hi Alexei, > > On Thu, Dec 21, 2023 at 10:07:33AM -0800, Alexei Starovoitov wrote: > > On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote: > > > > > > On 21/12/2023 17:05, Alexei Starovoitov wrote: > > > > On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote: > > > >> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, > > > >> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size > > > > > > > > +1 > > > > > > > >>> > > > >>> Maybe we need a codemod from: > > > >>> > > > >>> BTF_ID(func, ... > > > >>> > > > >>> to: > > > >>> > > > >>> BTF_ID(kfunc, ... > > > >> > > > >> I think it's better to keep just 'func' and not to do anything special for > > > >> kfuncs in resolve_btfids logic to keep it simple > > > >> > > > >> also it's going to be already in pahole so if we want to make a fix in future > > > >> you need to change pahole, resolve_btfids and possibly also kernel > > > > > > > > I still don't understand why you guys want to add it to vmlinux BTF. > > > > The kernel has no use in this additional data. > > > > It already knows about all kfuncs. > > > > This extra memory is a waste of space from kernel pov. > > > > Unless I am missing something. > > > > > > > > imo this logic belongs in bpftool only. > > > > It can dump vmlinux BTF and emit __ksym protos into vmlinux.h > > > > > > > > > > If the goal is to have bpftool detect all kfuncs, would having a BPF > > > kfunc iterator that bpftool could use to iterate over registered kfuncs > > > work perhaps? > > > > The kernel code ? Why ? > > bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf. > > I think you're right for vmlinux -- bpftool can look at the elf file on > a running system. But I'm not sure it works for modules. > > IIUC, the actual module ELF can go away while the kernel holds onto the > memory (as with initramfs). And even if that wasn't the case, in > containerized environments you may not be able to always see > /lib/modules or similar. Indeed. Access to .ko files may be difficult even for full root without containers. What is vmlinux BTF before/after for our current set of kfuncs ?
On 21/12/2023 18:07, Alexei Starovoitov wrote: > On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote: >> >> On 21/12/2023 17:05, Alexei Starovoitov wrote: >>> On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote: >>>> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, >>>> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size >>> >>> +1 >>> >>>>> >>>>> Maybe we need a codemod from: >>>>> >>>>> BTF_ID(func, ... >>>>> >>>>> to: >>>>> >>>>> BTF_ID(kfunc, ... >>>> >>>> I think it's better to keep just 'func' and not to do anything special for >>>> kfuncs in resolve_btfids logic to keep it simple >>>> >>>> also it's going to be already in pahole so if we want to make a fix in future >>>> you need to change pahole, resolve_btfids and possibly also kernel >>> >>> I still don't understand why you guys want to add it to vmlinux BTF. >>> The kernel has no use in this additional data. >>> It already knows about all kfuncs. >>> This extra memory is a waste of space from kernel pov. >>> Unless I am missing something. >>> >>> imo this logic belongs in bpftool only. >>> It can dump vmlinux BTF and emit __ksym protos into vmlinux.h >>> >> >> If the goal is to have bpftool detect all kfuncs, would having a BPF >> kfunc iterator that bpftool could use to iterate over registered kfuncs >> work perhaps? > > The kernel code ? Why ? > bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf. Most distros don't have the vmlinux binary easily available; it needs to be either downloaded as part of debuginfo packages or uncompressed from vmlinuz.
On Fri, Dec 22, 2023 at 09:55:09AM +0000, Alan Maguire wrote: > On 21/12/2023 18:07, Alexei Starovoitov wrote: > > On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote: > >> > >> On 21/12/2023 17:05, Alexei Starovoitov wrote: > >>> On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote: > >>>> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, > >>>> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size > >>> > >>> +1 > >>> > >>>>> > >>>>> Maybe we need a codemod from: > >>>>> > >>>>> BTF_ID(func, ... > >>>>> > >>>>> to: > >>>>> > >>>>> BTF_ID(kfunc, ... > >>>> > >>>> I think it's better to keep just 'func' and not to do anything special for > >>>> kfuncs in resolve_btfids logic to keep it simple > >>>> > >>>> also it's going to be already in pahole so if we want to make a fix in future > >>>> you need to change pahole, resolve_btfids and possibly also kernel > >>> > >>> I still don't understand why you guys want to add it to vmlinux BTF. > >>> The kernel has no use in this additional data. > >>> It already knows about all kfuncs. > >>> This extra memory is a waste of space from kernel pov. > >>> Unless I am missing something. > >>> > >>> imo this logic belongs in bpftool only. > >>> It can dump vmlinux BTF and emit __ksym protos into vmlinux.h > >>> > >> > >> If the goal is to have bpftool detect all kfuncs, would having a BPF > >> kfunc iterator that bpftool could use to iterate over registered kfuncs > >> work perhaps? > > > > The kernel code ? Why ? > > bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf. > > Most distros don't have the vmlinux binary easily available; it needs to > be either downloaded as part of debuginfo packages or uncompressed from > vmlinuz. would reading the /proc/kcore be an option? I'm under impression it's default for distros but I might be wrong jirka
On 22/12/2023 12:46, Jiri Olsa wrote: > On Fri, Dec 22, 2023 at 09:55:09AM +0000, Alan Maguire wrote: >> On 21/12/2023 18:07, Alexei Starovoitov wrote: >>> On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote: >>>> >>>> On 21/12/2023 17:05, Alexei Starovoitov wrote: >>>>> On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote: >>>>>> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, >>>>>> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size >>>>> >>>>> +1 >>>>> >>>>>>> >>>>>>> Maybe we need a codemod from: >>>>>>> >>>>>>> BTF_ID(func, ... >>>>>>> >>>>>>> to: >>>>>>> >>>>>>> BTF_ID(kfunc, ... >>>>>> >>>>>> I think it's better to keep just 'func' and not to do anything special for >>>>>> kfuncs in resolve_btfids logic to keep it simple >>>>>> >>>>>> also it's going to be already in pahole so if we want to make a fix in future >>>>>> you need to change pahole, resolve_btfids and possibly also kernel >>>>> >>>>> I still don't understand why you guys want to add it to vmlinux BTF. >>>>> The kernel has no use in this additional data. >>>>> It already knows about all kfuncs. >>>>> This extra memory is a waste of space from kernel pov. >>>>> Unless I am missing something. >>>>> >>>>> imo this logic belongs in bpftool only. >>>>> It can dump vmlinux BTF and emit __ksym protos into vmlinux.h >>>>> >>>> >>>> If the goal is to have bpftool detect all kfuncs, would having a BPF >>>> kfunc iterator that bpftool could use to iterate over registered kfuncs >>>> work perhaps? >>> >>> The kernel code ? Why ? >>> bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf. >> >> Most distros don't have the vmlinux binary easily available; it needs to >> be either downloaded as part of debuginfo packages or uncompressed from >> vmlinuz. > > would reading the /proc/kcore be an option? I'm under impression it's > default for distros but I might be wrong > Good idea, I think it would be an option alright. From a user perspective though can we always assume the BTF id sets of kfuncs always match the set of available kfuncs? If the goal of this feature is to see which kfuncs are available to be used, we'd need some form of active participation by the kernel in producing the registered list I think. But again, depends what the goal is here. Alan > jirka
On Thu, Dec 21, 2023 at 04:52:54PM -0800, Alexei Starovoitov wrote: > On Thu, Dec 21, 2023 at 10:18 AM Daniel Xu <dxu@dxuuu.xyz> wrote: > > > > Hi Alexei, > > > > On Thu, Dec 21, 2023 at 10:07:33AM -0800, Alexei Starovoitov wrote: > > > On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote: > > > > > > > > On 21/12/2023 17:05, Alexei Starovoitov wrote: > > > > > On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote: > > > > >> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, > > > > >> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size > > > > > > > > > > +1 > > > > > > > > > >>> > > > > >>> Maybe we need a codemod from: > > > > >>> > > > > >>> BTF_ID(func, ... > > > > >>> > > > > >>> to: > > > > >>> > > > > >>> BTF_ID(kfunc, ... > > > > >> > > > > >> I think it's better to keep just 'func' and not to do anything special for > > > > >> kfuncs in resolve_btfids logic to keep it simple > > > > >> > > > > >> also it's going to be already in pahole so if we want to make a fix in future > > > > >> you need to change pahole, resolve_btfids and possibly also kernel > > > > > > > > > > I still don't understand why you guys want to add it to vmlinux BTF. > > > > > The kernel has no use in this additional data. > > > > > It already knows about all kfuncs. > > > > > This extra memory is a waste of space from kernel pov. > > > > > Unless I am missing something. > > > > > > > > > > imo this logic belongs in bpftool only. > > > > > It can dump vmlinux BTF and emit __ksym protos into vmlinux.h > > > > > > > > > > > > > If the goal is to have bpftool detect all kfuncs, would having a BPF > > > > kfunc iterator that bpftool could use to iterate over registered kfuncs > > > > work perhaps? > > > > > > The kernel code ? Why ? > > > bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf. > > > > I think you're right for vmlinux -- bpftool can look at the elf file on > > a running system. But I'm not sure it works for modules. > > > > IIUC, the actual module ELF can go away while the kernel holds onto the > > memory (as with initramfs). And even if that wasn't the case, in > > containerized environments you may not be able to always see > > /lib/modules or similar. > > Indeed. Access to .ko files may be difficult even for full root > without containers. > > What is vmlinux BTF before/after for our current set of kfuncs ? Before: $ pahole -J --btf_gen_floats -j --lang_exclude=rust --skip_encoding_btf_inconsistent_proto --btf_gen_optimized .tmp_vmlinux.btf $ ls -l .tmp_vmlinux.btf -rwxr-xr-x 1 dxu dxu 1159241688 Dec 22 13:44 .tmp_vmlinux.btf* After: $ /home/dxu/dev/pahole/build/pahole -J --btf_gen_floats -j --lang_exclude=rust --skip_encoding_btf_inconsistent_proto --btf_gen_optimized .tmp_vmlinux.btf $ ls -l .tmp_vmlinux.btf -rwxr-xr-x 1 dxu dxu 1159248104 Dec 22 13:47 .tmp_vmlinux.btf* 1159248104 - 1159241688 = 6416, so ~17B per kfunc (although we are currently overcounting kfuncs by a bit). Btw, for some reason the file size is not quite reproducible. I'm seeing ~500B deltas every time I rerun the same command. Maybe I'm doing something wrong? Not sure. Thanks, Daniel
On Fri, Dec 22, 2023 at 04:24:35PM +0000, Alan Maguire wrote: > On 22/12/2023 12:46, Jiri Olsa wrote: > > On Fri, Dec 22, 2023 at 09:55:09AM +0000, Alan Maguire wrote: > >> On 21/12/2023 18:07, Alexei Starovoitov wrote: > >>> On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote: > >>>> > >>>> On 21/12/2023 17:05, Alexei Starovoitov wrote: > >>>>> On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote: > >>>>>> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, > >>>>>> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size > >>>>> > >>>>> +1 > >>>>> > >>>>>>> > >>>>>>> Maybe we need a codemod from: > >>>>>>> > >>>>>>> BTF_ID(func, ... > >>>>>>> > >>>>>>> to: > >>>>>>> > >>>>>>> BTF_ID(kfunc, ... > >>>>>> > >>>>>> I think it's better to keep just 'func' and not to do anything special for > >>>>>> kfuncs in resolve_btfids logic to keep it simple > >>>>>> > >>>>>> also it's going to be already in pahole so if we want to make a fix in future > >>>>>> you need to change pahole, resolve_btfids and possibly also kernel > >>>>> > >>>>> I still don't understand why you guys want to add it to vmlinux BTF. > >>>>> The kernel has no use in this additional data. > >>>>> It already knows about all kfuncs. > >>>>> This extra memory is a waste of space from kernel pov. > >>>>> Unless I am missing something. > >>>>> > >>>>> imo this logic belongs in bpftool only. > >>>>> It can dump vmlinux BTF and emit __ksym protos into vmlinux.h > >>>>> > >>>> > >>>> If the goal is to have bpftool detect all kfuncs, would having a BPF > >>>> kfunc iterator that bpftool could use to iterate over registered kfuncs > >>>> work perhaps? > >>> > >>> The kernel code ? Why ? > >>> bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf. > >> > >> Most distros don't have the vmlinux binary easily available; it needs to > >> be either downloaded as part of debuginfo packages or uncompressed from > >> vmlinuz. > > > > would reading the /proc/kcore be an option? I'm under impression it's > > default for distros but I might be wrong > > > > Good idea, I think it would be an option alright. Yeah, /proc/kcore would work, but only for vmlinux. I mentioned in the other thread about how it probably wouldn't work for kfuncs defined in modules. > From a user > perspective though can we always assume the BTF id sets of kfuncs always > match the set of available kfuncs? If the goal of this feature is to see > which kfuncs are available to be used, we'd need some form of active > participation by the kernel in producing the registered list I think. > But again, depends what the goal is here. The goal is to query for available kfuncs. I also mentioned in the other thread the link between BTF id sets and available kfuncs is not super obvious. It might be ok for now. But I'll defer to people more familiar with it. BPF iterator would probably work, but it feels a bit complex for what is mostly static data. And for data that doesn't really need to be introspected (just BTF IDs). So I'm not sure it's the most ergonomic approach for userspace to consume. Maybe some kind fo sysfs file would be better? Could be as simple as a space separated list of BTF IDs that reference kfuncs. Thanks, Daniel
On Fri, Dec 22, 2023 at 12:55 PM Daniel Xu <dxu@dxuuu.xyz> wrote: > > > > From a user > > perspective though can we always assume the BTF id sets of kfuncs always > > match the set of available kfuncs? If the goal of this feature is to see > > which kfuncs are available to be used, we'd need some form of active > > participation by the kernel in producing the registered list I think. > > But again, depends what the goal is here. > > The goal is to query for available kfuncs. The list of available kfuncs across all modules and prog types doesn't really help the users to know whether their program will be accepted if they use this kfunc. Today kfuncs are grouped by prog type, but soon it will be more fine grained. We need to allow different set of kfuncs depending on struct_ops attach point and potentially flags at hook attach point. So the list of kfuncs is similar to the list of helpers. Adding kfunc btf_decl_tag to vmlinux BTF and module BTF only helps with generation of vmlinux.h or module.h so that the users don't need to manually write __ksym protos in their programs. Analogous to generation of bpf_helper_defs.h. re: your other email. Extra ~6k for vmlinux BTF is fine. As Dave suggested in the other email let's make sure that dedup removes the duplicated decl_tags or pahole doesn't add another one if btf_decl_tag is already present in __bpf_kfunc macro. pahole will essentially be a workaround for lack of btf_decl_tag in GCC.
On Thu, Dec 21, 2023 at 09:35:20AM +0100, Jiri Olsa wrote: > On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote: > > This commit teaches pahole to parse symbols in .BTF_ids section in > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > This enables downstream users and tools to dynamically discover which > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > available in /sys/kernel/btf. > > > > Example of encoding: > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l > > 388 > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940 > > [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static > > [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1 > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > SNIP > > > + > > +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, const char *kfunc) > > +{ > > + int nr_types, type_id, err = -1; > > + struct btf *btf = encoder->btf; > > could we store the kuncs in sorted array (by name) and iterate all IDs > just once while doing the bsearch for the name over the kfuncs array Ack, will take a look. > > > + > > + nr_types = btf__type_cnt(btf); > > + for (type_id = 1; type_id < nr_types; type_id++) { > > + const struct btf_type *type; > > + const char *name; > > + > > + type = btf__type_by_id(btf, type_id); > > + if (!type) { > > + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", > > + __func__, type_id); > > + goto out; > > + } > > + > > + if (!btf_is_func(type)) > > + continue; > > + > > + name = btf__name_by_offset(btf, type->name_off); > > + if (!name) { > > + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", > > + __func__, type_id); > > + goto out; > > + } > > + > > + if (strcmp(name, kfunc)) > > + continue; > > + > > + err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, type_id, -1); > > + if (err < 0) { > > + fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n", > > + __func__, kfunc, err); > > + goto out; > > + } > > + > > + err = 0; > > + break; > > + } > > + > > +out: > > + return err; > > +} > > + > > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) > > +{ > > + const char *filename = encoder->filename; > > + GElf_Shdr shdr_mem, *shdr; > > + int symbols_shndx = -1; > > + int idlist_shndx = -1; > > + Elf_Scn *scn = NULL; > > + Elf_Data *symbols; > > + int fd, err = -1; > > + size_t strtabidx; > > + Elf *elf = NULL; > > + size_t strndx; > > + char *secname; > > + int nr_syms; > > + int i = 0; > > + > > + fd = open(filename, O_RDONLY); > > + if (fd < 0) { > > + fprintf(stderr, "Cannot open %s\n", filename); > > + goto out; > > + } > > + > > + if (elf_version(EV_CURRENT) == EV_NONE) { > > + elf_error("Cannot set libelf version"); > > + goto out; > > + } > > + > > + elf = elf_begin(fd, ELF_C_READ, NULL); > > + if (elf == NULL) { > > + elf_error("Cannot update ELF file"); > > + goto out; > > + } > > SNIP > > > + } > > + free(kfunc); > > + } > > + > > + err = 0; > > +out: > > leaking fd and elf object (elf_end) Good catch thanks. Been writing too much rust... Thanks, Daniel
Hi Dave, On Thu, Dec 21, 2023 at 05:57:18PM -0500, David Marchevsky wrote: > > > On 12/20/23 5:19 PM, Daniel Xu wrote: > > This commit teaches pahole to parse symbols in .BTF_ids section in > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > This enables downstream users and tools to dynamically discover which > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > available in /sys/kernel/btf. > > > > Example of encoding: > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l > > 388 > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940 > > [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static > > [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1 > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > --- > > btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 202 insertions(+) > > > > diff --git a/btf_encoder.c b/btf_encoder.c > > index fd04008..2697214 100644 > > --- a/btf_encoder.c > > +++ b/btf_encoder.c > > @@ -34,6 +34,9 @@ > > #include <pthread.h> > > > > #define BTF_ENCODER_MAX_PROTO 512 > > +#define BTF_IDS_SECTION ".BTF_ids" > > +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" > > +#define BTF_KFUNC_TYPE_TAG "kfunc" > > Can this be bpf_kfunc? Elaborated on elsewhere in this reply Yeah, that's better. Good idea. > > > > > /* state used to do later encoding of saved functions */ > > struct btf_encoder_state { > > @@ -1352,6 +1355,200 @@ out: > > return err; > > } > > > > +/* > > + * Parse BTF_ID symbol and return the kfunc name. > > + * > > + * Returns: > > + * Callee-owned string containing kfunc name if successful. > > nit: Caller-owned, not callee-owned Fixed, thanks. > > > + * NULL if !kfunc or on error. > > + */ > > +static char *get_kfunc_name(const char *sym) > > +{ > > + char *kfunc, *end; > > + > > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > > + return NULL; > > + > > + /* Strip prefix */ > > + kfunc = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > > + > > + /* Strip suffix */ > > + end = strrchr(kfunc, '_'); > > + if (!end || *(end - 1) != '_') { > > + free(kfunc); > > + return NULL; > > + } > > + *(end - 1) = '\0'; > > + > > + return kfunc; > > +} > > + > > +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, const char *kfunc) > > +{ > > + int nr_types, type_id, err = -1; > > + struct btf *btf = encoder->btf; > > + > > + nr_types = btf__type_cnt(btf); > > + for (type_id = 1; type_id < nr_types; type_id++) { > > + const struct btf_type *type; > > + const char *name; > > + > > + type = btf__type_by_id(btf, type_id); > > + if (!type) { > > + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", > > + __func__, type_id); > > + goto out; > > + } > > + > > + if (!btf_is_func(type)) > > + continue; > > + > > + name = btf__name_by_offset(btf, type->name_off); > > + if (!name) { > > + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", > > + __func__, type_id); > > + goto out; > > + } > > + > > + if (strcmp(name, kfunc)) > > + continue; > > + > > + err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, type_id, -1); > > In an ideal world we'd just add this tag to __bpf_kfunc macro > definition, right? Then bpftool can generate fwd decls from generated > vmlinux w/o any pahole changes. But no gcc support for BTF tags, so need > to do this workaround. > > With that in mind, instead of unconditionally adding BTF_KFUNC_TYPE_TAG > to funcs in btf id sets, can this code only do so if there isn't an > existing BTF_KFUNC_TYPE_TAG pointing to it? It'd require another loop > over btf types to built set of already-tagged funcs, but would > future-proof this work. Alternatively, if existing btf__dedup call after > btf_encoder__tag_kfuncs will get rid of these extraneous "tagged types" > in the scenario where one already exists, then a comment here to that > effect would be appreciated. Yeah, I placed the call to btf_encoder__tag_kfuncs() right before the call to btf__dedup() in btf_encoder__encode() cuz I was noticing duplicates. After moving the call to the current location, I noticed the duplicates went away. I'll leave a comment to that effect. Thanks, Daniel
On Thu, Dec 21, 2023 at 09:35:28AM +0100, Jiri Olsa wrote: > On Wed, Dec 20, 2023 at 11:37:01PM -0700, Daniel Xu wrote: > > On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote: > > > This commit teaches pahole to parse symbols in .BTF_ids section in > > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > > > This enables downstream users and tools to dynamically discover which > > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > > available in /sys/kernel/btf. > > > > > > Example of encoding: > > > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l > > > 388 > > > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940 > > > [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static > > > [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1 > > > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > > --- > > > btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > > 1 file changed, 202 insertions(+) > > > > > > > Hmm, looking more, seems like this will pick up non-kfunc functions as > > well. For example, kernel/trace/bpf_trace.c: > > > > > > BTF_SET_START(btf_allowlist_d_path) > > #ifdef CONFIG_SECURITY > > BTF_ID(func, security_file_permission) > > BTF_ID(func, security_inode_getattr) > > BTF_ID(func, security_file_open) > > #endif > > #ifdef CONFIG_SECURITY_PATH > > BTF_ID(func, security_path_truncate) > > #endif > > BTF_ID(func, vfs_truncate) > > BTF_ID(func, vfs_fallocate) > > BTF_ID(func, dentry_open) > > BTF_ID(func, vfs_getattr) > > BTF_ID(func, filp_close) > > BTF_SET_END(btf_allowlist_d_path) > > you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, > which are bounded by __BTF_ID__set8__<name> symbols, which also provide size > > __BTF_ID__func_* symbol that has address inside the SET8 list is kfunc I managed to add that logic. But I did some spot checks and it looks like SET8 lists are not quite limited to kfuncs. For example, in net/mptcp/bpf.c: BTF_SET8_START(bpf_mptcp_fmodret_ids) BTF_ID_FLAGS(func, update_socket_protocol) BTF_SET8_END(bpf_mptcp_fmodret_ids) static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { .owner = THIS_MODULE, .set = &bpf_mptcp_fmodret_ids, }; And in net/socket.c: __bpf_hook_start(); __weak noinline int update_socket_protocol(int family, int type, int protocol) { return protocol; } __bpf_hook_end(); IOW, update_socket_protocol() is a hook, not a kfunc. > > > > > Maybe we need a codemod from: > > > > BTF_ID(func, ... > > > > to: > > > > BTF_ID(kfunc, ... > > I think it's better to keep just 'func' and not to do anything special for > kfuncs in resolve_btfids logic to keep it simple > > also it's going to be already in pahole so if we want to make a fix in future > you need to change pahole, resolve_btfids and possibly also kernel So maybe special annotation is still needed. WDYT? [..] Thanks, Daniel
On Tue, Jan 02, 2024 at 05:56:02PM -0700, Daniel Xu wrote: > On Thu, Dec 21, 2023 at 09:35:28AM +0100, Jiri Olsa wrote: > > On Wed, Dec 20, 2023 at 11:37:01PM -0700, Daniel Xu wrote: > > > On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote: > > > > This commit teaches pahole to parse symbols in .BTF_ids section in > > > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > > > > > This enables downstream users and tools to dynamically discover which > > > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > > > available in /sys/kernel/btf. > > > > > > > > Example of encoding: > > > > > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l > > > > 388 > > > > > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940 > > > > [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static > > > > [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1 > > > > > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > > > --- > > > > btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > > > 1 file changed, 202 insertions(+) > > > > > > > > > > Hmm, looking more, seems like this will pick up non-kfunc functions as > > > well. For example, kernel/trace/bpf_trace.c: > > > > > > > > > BTF_SET_START(btf_allowlist_d_path) > > > #ifdef CONFIG_SECURITY > > > BTF_ID(func, security_file_permission) > > > BTF_ID(func, security_inode_getattr) > > > BTF_ID(func, security_file_open) > > > #endif > > > #ifdef CONFIG_SECURITY_PATH > > > BTF_ID(func, security_path_truncate) > > > #endif > > > BTF_ID(func, vfs_truncate) > > > BTF_ID(func, vfs_fallocate) > > > BTF_ID(func, dentry_open) > > > BTF_ID(func, vfs_getattr) > > > BTF_ID(func, filp_close) > > > BTF_SET_END(btf_allowlist_d_path) > > > > you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, > > which are bounded by __BTF_ID__set8__<name> symbols, which also provide size > > > > __BTF_ID__func_* symbol that has address inside the SET8 list is kfunc > > I managed to add that logic. But I did some spot checks and it looks > like SET8 lists are not quite limited to kfuncs. For example, in > net/mptcp/bpf.c: > > BTF_SET8_START(bpf_mptcp_fmodret_ids) > BTF_ID_FLAGS(func, update_socket_protocol) > BTF_SET8_END(bpf_mptcp_fmodret_ids) > > static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { > .owner = THIS_MODULE, > .set = &bpf_mptcp_fmodret_ids, > }; > > And in net/socket.c: > > __bpf_hook_start(); > __weak noinline int update_socket_protocol(int family, int type, int protocol) > { > return protocol; > } > __bpf_hook_end(); > > IOW, update_socket_protocol() is a hook, not a kfunc. hum, right.. we use kfuncs set8 registration now also to mark attachable hooks for fmodret programs, see [1] there are similar hooks registered in HID code as well [1] 5b481acab4ce bpf: do not rely on ALLOW_ERROR_INJECTION for fmod_ret) > > > > > > > > > Maybe we need a codemod from: > > > > > > BTF_ID(func, ... > > > > > > to: > > > > > > BTF_ID(kfunc, ... > > > > I think it's better to keep just 'func' and not to do anything special for > > kfuncs in resolve_btfids logic to keep it simple > > > > also it's going to be already in pahole so if we want to make a fix in future > > you need to change pahole, resolve_btfids and possibly also kernel > > So maybe special annotation is still needed. WDYT? anyway, it looks like we actually do have flags field in set8 (thanks Kumar! ;-) ) struct btf_id_set8 { u32 cnt; u32 flags; struct { u32 id; u32 flags; } pairs[]; }; it's not mentioned in the commit changelog [2], but it looks like it was added just to keep data aligned, and AFAICS it's not used anywhere how about we add a flag saying this set8 has kfuncs in it jirka [2] ab21d6063c01 bpf: Introduce 8-byte BTF set
On Wed, Jan 03, 2024 at 09:48:09AM +0100, Jiri Olsa wrote: > On Tue, Jan 02, 2024 at 05:56:02PM -0700, Daniel Xu wrote: > > On Thu, Dec 21, 2023 at 09:35:28AM +0100, Jiri Olsa wrote: > > > On Wed, Dec 20, 2023 at 11:37:01PM -0700, Daniel Xu wrote: > > > > On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote: > > > > > This commit teaches pahole to parse symbols in .BTF_ids section in > > > > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > > > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > > > > > > > This enables downstream users and tools to dynamically discover which > > > > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > > > > available in /sys/kernel/btf. > > > > > > > > > > Example of encoding: > > > > > > > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l > > > > > 388 > > > > > > > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940 > > > > > [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static > > > > > [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1 > > > > > > > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > > > > > --- > > > > > btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > > > > 1 file changed, 202 insertions(+) > > > > > > > > > > > > > Hmm, looking more, seems like this will pick up non-kfunc functions as > > > > well. For example, kernel/trace/bpf_trace.c: > > > > > > > > > > > > BTF_SET_START(btf_allowlist_d_path) > > > > #ifdef CONFIG_SECURITY > > > > BTF_ID(func, security_file_permission) > > > > BTF_ID(func, security_inode_getattr) > > > > BTF_ID(func, security_file_open) > > > > #endif > > > > #ifdef CONFIG_SECURITY_PATH > > > > BTF_ID(func, security_path_truncate) > > > > #endif > > > > BTF_ID(func, vfs_truncate) > > > > BTF_ID(func, vfs_fallocate) > > > > BTF_ID(func, dentry_open) > > > > BTF_ID(func, vfs_getattr) > > > > BTF_ID(func, filp_close) > > > > BTF_SET_END(btf_allowlist_d_path) > > > > > > you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists, > > > which are bounded by __BTF_ID__set8__<name> symbols, which also provide size > > > > > > __BTF_ID__func_* symbol that has address inside the SET8 list is kfunc > > > > I managed to add that logic. But I did some spot checks and it looks > > like SET8 lists are not quite limited to kfuncs. For example, in > > net/mptcp/bpf.c: > > > > BTF_SET8_START(bpf_mptcp_fmodret_ids) > > BTF_ID_FLAGS(func, update_socket_protocol) > > BTF_SET8_END(bpf_mptcp_fmodret_ids) > > > > static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { > > .owner = THIS_MODULE, > > .set = &bpf_mptcp_fmodret_ids, > > }; > > > > And in net/socket.c: > > > > __bpf_hook_start(); > > __weak noinline int update_socket_protocol(int family, int type, int protocol) > > { > > return protocol; > > } > > __bpf_hook_end(); > > > > IOW, update_socket_protocol() is a hook, not a kfunc. > > hum, right.. we use kfuncs set8 registration now also to mark attachable > hooks for fmodret programs, see [1] > > there are similar hooks registered in HID code as well > > [1] 5b481acab4ce bpf: do not rely on ALLOW_ERROR_INJECTION for fmod_ret) > > > > > > > > > > > > > > Maybe we need a codemod from: > > > > > > > > BTF_ID(func, ... > > > > > > > > to: > > > > > > > > BTF_ID(kfunc, ... > > > > > > I think it's better to keep just 'func' and not to do anything special for > > > kfuncs in resolve_btfids logic to keep it simple > > > > > > also it's going to be already in pahole so if we want to make a fix in future > > > you need to change pahole, resolve_btfids and possibly also kernel > > > > So maybe special annotation is still needed. WDYT? > > anyway, it looks like we actually do have flags field in set8 (thanks Kumar! ;-) ) > > struct btf_id_set8 { > u32 cnt; > u32 flags; > struct { > u32 id; > u32 flags; > } pairs[]; > }; > > it's not mentioned in the commit changelog [2], but it looks like it was > added just to keep data aligned, and AFAICS it's not used anywhere > > how about we add a flag saying this set8 has kfuncs in it Nice, yeah. This makes sense. We can tag it at the BTF_SET8_START level to reduce churn. At same time, we can WARN_ON() if the flag is missing in register_btf_kfunc_id_set(). Thanks, Daniel
diff --git a/btf_encoder.c b/btf_encoder.c index fd04008..2697214 100644 --- a/btf_encoder.c +++ b/btf_encoder.c @@ -34,6 +34,9 @@ #include <pthread.h> #define BTF_ENCODER_MAX_PROTO 512 +#define BTF_IDS_SECTION ".BTF_ids" +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" +#define BTF_KFUNC_TYPE_TAG "kfunc" /* state used to do later encoding of saved functions */ struct btf_encoder_state { @@ -1352,6 +1355,200 @@ out: return err; } +/* + * Parse BTF_ID symbol and return the kfunc name. + * + * Returns: + * Callee-owned string containing kfunc name if successful. + * NULL if !kfunc or on error. + */ +static char *get_kfunc_name(const char *sym) +{ + char *kfunc, *end; + + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) + return NULL; + + /* Strip prefix */ + kfunc = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); + + /* Strip suffix */ + end = strrchr(kfunc, '_'); + if (!end || *(end - 1) != '_') { + free(kfunc); + return NULL; + } + *(end - 1) = '\0'; + + return kfunc; +} + +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, const char *kfunc) +{ + int nr_types, type_id, err = -1; + struct btf *btf = encoder->btf; + + nr_types = btf__type_cnt(btf); + for (type_id = 1; type_id < nr_types; type_id++) { + const struct btf_type *type; + const char *name; + + type = btf__type_by_id(btf, type_id); + if (!type) { + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", + __func__, type_id); + goto out; + } + + if (!btf_is_func(type)) + continue; + + name = btf__name_by_offset(btf, type->name_off); + if (!name) { + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", + __func__, type_id); + goto out; + } + + if (strcmp(name, kfunc)) + continue; + + err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, type_id, -1); + if (err < 0) { + fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n", + __func__, kfunc, err); + goto out; + } + + err = 0; + break; + } + +out: + return err; +} + +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder) +{ + const char *filename = encoder->filename; + GElf_Shdr shdr_mem, *shdr; + int symbols_shndx = -1; + int idlist_shndx = -1; + Elf_Scn *scn = NULL; + Elf_Data *symbols; + int fd, err = -1; + size_t strtabidx; + Elf *elf = NULL; + size_t strndx; + char *secname; + int nr_syms; + int i = 0; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Cannot open %s\n", filename); + goto out; + } + + if (elf_version(EV_CURRENT) == EV_NONE) { + elf_error("Cannot set libelf version"); + goto out; + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (elf == NULL) { + elf_error("Cannot update ELF file"); + goto out; + } + + /* Location symbol table and .BTF_ids sections */ + elf_getshdrstrndx(elf, &strndx); + while ((scn = elf_nextscn(elf, scn)) != NULL) { + Elf_Data *data; + + i++; + shdr = gelf_getshdr(scn, &shdr_mem); + if (shdr == NULL) { + elf_error("Failed to get ELF section(%d) hdr", i); + goto out; + } + + secname = elf_strptr(elf, strndx, shdr->sh_name); + if (!secname) { + elf_error("Failed to get ELF section(%d) hdr name", i); + goto out; + } + + data = elf_getdata(scn, 0); + if (!data) { + elf_error("Failed to get ELF section(%d) data", i); + goto out; + } + + if (shdr->sh_type == SHT_SYMTAB) { + symbols_shndx = i; + symbols = data; + strtabidx = shdr->sh_link; + } else if (!strcmp(secname, BTF_IDS_SECTION)) { + idlist_shndx = i; + } + } + + /* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */ + if (symbols_shndx == -1 || idlist_shndx == -1) { + err = 0; + goto out; + } + + /* + * Look for __BTF_ID__func__ symbols in .BTF_ids section and + * inject BTF decl tags for each of them. + */ + scn = elf_getscn(elf, symbols_shndx); + if (!scn) { + elf_error("Failed to get ELF symbol table"); + goto out; + } + + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) { + elf_error("Failed to get ELF symbol table header"); + goto out; + } + + nr_syms = shdr->sh_size / shdr->sh_entsize; + for (i = 0; i < nr_syms; i++) { + char *kfunc, *name; + GElf_Sym sym; + int err; + + if (!gelf_getsym(symbols, i, &sym)) { + elf_error("Failed to get ELF symbol(%d)", i); + goto out; + } + + if (sym.st_shndx != idlist_shndx) + continue; + + name = elf_strptr(elf, strtabidx, sym.st_name); + kfunc = get_kfunc_name(name); + if (!kfunc) + continue; + + err = btf_encoder__tag_kfunc(encoder, kfunc); + if (err) { + fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, kfunc); + free(kfunc); + goto out; + } + free(kfunc); + } + + err = 0; +out: + return err; +} + int btf_encoder__encode(struct btf_encoder *encoder) { int err; @@ -1366,6 +1563,11 @@ int btf_encoder__encode(struct btf_encoder *encoder) if (btf__type_cnt(encoder->btf) == 1) return 0; + if (btf_encoder__tag_kfuncs(encoder)) { + fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__); + return -1; + } + if (btf__dedup(encoder->btf, NULL)) { fprintf(stderr, "%s: btf__dedup failed!\n", __func__); return -1;
This commit teaches pahole to parse symbols in .BTF_ids section in vmlinux and discover exported kfuncs. Pahole then takes the list of kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. This enables downstream users and tools to dynamically discover which kfuncs are available on a system by parsing vmlinux or module BTF, both available in /sys/kernel/btf. Example of encoding: $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l 388 $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940 [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1 Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> --- btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+)