diff mbox series

[dwarves] pahole: Inject kfunc decl tags into BTF

Message ID 421d18942d6ad28625530a8b3247595dc05eb100.1703110747.git.dxu@dxuuu.xyz (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series [dwarves] pahole: Inject kfunc decl tags into BTF | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Daniel Xu Dec. 20, 2023, 10:19 p.m. UTC
This commit teaches pahole to parse symbols in .BTF_ids section in
vmlinux and discover exported kfuncs. Pahole then takes the list of
kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.

This enables downstream users and tools to dynamically discover which
kfuncs are available on a system by parsing vmlinux or module BTF, both
available in /sys/kernel/btf.

Example of encoding:

        $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l
        388

        $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940
        [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static
        [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
 btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 202 insertions(+)

Comments

Daniel Xu Dec. 21, 2023, 6:37 a.m. UTC | #1
On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote:
> This commit teaches pahole to parse symbols in .BTF_ids section in
> vmlinux and discover exported kfuncs. Pahole then takes the list of
> kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
>
> This enables downstream users and tools to dynamically discover which
> kfuncs are available on a system by parsing vmlinux or module BTF, both
> available in /sys/kernel/btf.
>
> Example of encoding:
>
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l
>         388
>
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940
>         [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static
>         [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1
>
> Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> ---
>  btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 202 insertions(+)
>

Hmm, looking more, seems like this will pick up non-kfunc functions as
well. For example, kernel/trace/bpf_trace.c:


        BTF_SET_START(btf_allowlist_d_path)
        #ifdef CONFIG_SECURITY
        BTF_ID(func, security_file_permission)
        BTF_ID(func, security_inode_getattr)
        BTF_ID(func, security_file_open)
        #endif
        #ifdef CONFIG_SECURITY_PATH
        BTF_ID(func, security_path_truncate)
        #endif
        BTF_ID(func, vfs_truncate)
        BTF_ID(func, vfs_fallocate)
        BTF_ID(func, dentry_open)
        BTF_ID(func, vfs_getattr)
        BTF_ID(func, filp_close)
        BTF_SET_END(btf_allowlist_d_path)

Maybe we need a codemod from:

        BTF_ID(func, ...

to:

        BTF_ID(kfunc, ...


The change to resolve_btfids would be relatively small. Not sure what
the drawbacks are yet.

One way or another we'll need to annotate the kfuncs in source.
Jiri Olsa Dec. 21, 2023, 8:35 a.m. UTC | #2
On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote:
> This commit teaches pahole to parse symbols in .BTF_ids section in
> vmlinux and discover exported kfuncs. Pahole then takes the list of
> kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> 
> This enables downstream users and tools to dynamically discover which
> kfuncs are available on a system by parsing vmlinux or module BTF, both
> available in /sys/kernel/btf.
> 
> Example of encoding:
> 
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l
>         388
> 
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940
>         [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static
>         [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1
> 
> Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>

SNIP

> +
> +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, const char *kfunc)
> +{
> +	int nr_types, type_id, err = -1;
> +	struct btf *btf = encoder->btf;

could we store the kuncs in sorted array (by name) and iterate all IDs
just once while doing the bsearch for the name over the kfuncs array

> +
> +	nr_types = btf__type_cnt(btf);
> +	for (type_id = 1; type_id < nr_types; type_id++) {
> +		const struct btf_type *type;
> +		const char *name;
> +
> +		type = btf__type_by_id(btf, type_id);
> +		if (!type) {
> +			fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n",
> +				__func__, type_id);
> +			goto out;
> +		}
> +
> +		if (!btf_is_func(type))
> +			continue;
> +
> +		name = btf__name_by_offset(btf, type->name_off);
> +		if (!name) {
> +			fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n",
> +				__func__, type_id);
> +			goto out;
> +		}
> +
> +		if (strcmp(name, kfunc))
> +		    continue;
> +
> +		err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, type_id, -1);
> +		if (err < 0) {
> +			fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n",
> +				__func__, kfunc, err);
> +			goto out;
> +		}
> +
> +		err = 0;
> +		break;
> +	}
> +
> +out:
> +	return err;
> +}
> +
> +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
> +{
> +	const char *filename = encoder->filename;
> +	GElf_Shdr shdr_mem, *shdr;
> +	int symbols_shndx = -1;
> +	int idlist_shndx = -1;
> +	Elf_Scn *scn = NULL;
> +	Elf_Data *symbols;
> +	int fd, err = -1;
> +	size_t strtabidx;
> +	Elf *elf = NULL;
> +	size_t strndx;
> +	char *secname;
> +	int nr_syms;
> +	int i = 0;
> +
> +	fd = open(filename, O_RDONLY);
> +	if (fd < 0) {
> +		fprintf(stderr, "Cannot open %s\n", filename);
> +		goto out;
> +	}
> +
> +	if (elf_version(EV_CURRENT) == EV_NONE) {
> +		elf_error("Cannot set libelf version");
> +		goto out;
> +	}
> +
> +	elf = elf_begin(fd, ELF_C_READ, NULL);
> +	if (elf == NULL) {
> +		elf_error("Cannot update ELF file");
> +		goto out;
> +	}

SNIP

> +		}
> +		free(kfunc);
> +	}
> +
> +	err = 0;
> +out:

leaking fd and elf object (elf_end)

jirka

> +	return err;
> +}
> +
>  int btf_encoder__encode(struct btf_encoder *encoder)
>  {
>  	int err;
> @@ -1366,6 +1563,11 @@ int btf_encoder__encode(struct btf_encoder *encoder)
>  	if (btf__type_cnt(encoder->btf) == 1)
>  		return 0;
>  
> +	if (btf_encoder__tag_kfuncs(encoder)) {
> +		fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__);
> +		return -1;
> +	}
> +
>  	if (btf__dedup(encoder->btf, NULL)) {
>  		fprintf(stderr, "%s: btf__dedup failed!\n", __func__);
>  		return -1;
> -- 
> 2.42.1
>
Jiri Olsa Dec. 21, 2023, 8:35 a.m. UTC | #3
On Wed, Dec 20, 2023 at 11:37:01PM -0700, Daniel Xu wrote:
> On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote:
> > This commit teaches pahole to parse symbols in .BTF_ids section in
> > vmlinux and discover exported kfuncs. Pahole then takes the list of
> > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> >
> > This enables downstream users and tools to dynamically discover which
> > kfuncs are available on a system by parsing vmlinux or module BTF, both
> > available in /sys/kernel/btf.
> >
> > Example of encoding:
> >
> >         $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l
> >         388
> >
> >         $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940
> >         [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static
> >         [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1
> >
> > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> > ---
> >  btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 202 insertions(+)
> >
> 
> Hmm, looking more, seems like this will pick up non-kfunc functions as
> well. For example, kernel/trace/bpf_trace.c:
> 
> 
>         BTF_SET_START(btf_allowlist_d_path)
>         #ifdef CONFIG_SECURITY
>         BTF_ID(func, security_file_permission)
>         BTF_ID(func, security_inode_getattr)
>         BTF_ID(func, security_file_open)
>         #endif
>         #ifdef CONFIG_SECURITY_PATH
>         BTF_ID(func, security_path_truncate)
>         #endif
>         BTF_ID(func, vfs_truncate)
>         BTF_ID(func, vfs_fallocate)
>         BTF_ID(func, dentry_open)
>         BTF_ID(func, vfs_getattr)
>         BTF_ID(func, filp_close)
>         BTF_SET_END(btf_allowlist_d_path)

you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
which are bounded by __BTF_ID__set8__<name> symbols, which also provide size

__BTF_ID__func_* symbol that has address inside the SET8 list is kfunc

> 
> Maybe we need a codemod from:
> 
>         BTF_ID(func, ...
> 
> to:
> 
>         BTF_ID(kfunc, ...

I think it's better to keep just 'func' and not to do anything special for
kfuncs in resolve_btfids logic to keep it simple

also it's going to be already in pahole so if we want to make a fix in future
you need to change pahole, resolve_btfids and possibly also kernel

jirka

> 
> 
> The change to resolve_btfids would be relatively small. Not sure what
> the drawbacks are yet.
> 
> One way or another we'll need to annotate the kfuncs in source.
Alexei Starovoitov Dec. 21, 2023, 5:05 p.m. UTC | #4
On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size

+1

> >
> > Maybe we need a codemod from:
> >
> >         BTF_ID(func, ...
> >
> > to:
> >
> >         BTF_ID(kfunc, ...
>
> I think it's better to keep just 'func' and not to do anything special for
> kfuncs in resolve_btfids logic to keep it simple
>
> also it's going to be already in pahole so if we want to make a fix in future
> you need to change pahole, resolve_btfids and possibly also kernel

I still don't understand why you guys want to add it to vmlinux BTF.
The kernel has no use in this additional data.
It already knows about all kfuncs.
This extra memory is a waste of space from kernel pov.
Unless I am missing something.

imo this logic belongs in bpftool only.
It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
Alan Maguire Dec. 21, 2023, 5:42 p.m. UTC | #5
On 21/12/2023 17:05, Alexei Starovoitov wrote:
> On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
>> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
> 
> +1
> 
>>>
>>> Maybe we need a codemod from:
>>>
>>>         BTF_ID(func, ...
>>>
>>> to:
>>>
>>>         BTF_ID(kfunc, ...
>>
>> I think it's better to keep just 'func' and not to do anything special for
>> kfuncs in resolve_btfids logic to keep it simple
>>
>> also it's going to be already in pahole so if we want to make a fix in future
>> you need to change pahole, resolve_btfids and possibly also kernel
> 
> I still don't understand why you guys want to add it to vmlinux BTF.
> The kernel has no use in this additional data.
> It already knows about all kfuncs.
> This extra memory is a waste of space from kernel pov.
> Unless I am missing something.
> 
> imo this logic belongs in bpftool only.
> It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
> 

If the goal is to have bpftool detect all kfuncs, would having a BPF
kfunc iterator that bpftool could use to iterate over registered kfuncs
work perhaps?
Alexei Starovoitov Dec. 21, 2023, 6:07 p.m. UTC | #6
On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> On 21/12/2023 17:05, Alexei Starovoitov wrote:
> > On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> >> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
> >> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
> >
> > +1
> >
> >>>
> >>> Maybe we need a codemod from:
> >>>
> >>>         BTF_ID(func, ...
> >>>
> >>> to:
> >>>
> >>>         BTF_ID(kfunc, ...
> >>
> >> I think it's better to keep just 'func' and not to do anything special for
> >> kfuncs in resolve_btfids logic to keep it simple
> >>
> >> also it's going to be already in pahole so if we want to make a fix in future
> >> you need to change pahole, resolve_btfids and possibly also kernel
> >
> > I still don't understand why you guys want to add it to vmlinux BTF.
> > The kernel has no use in this additional data.
> > It already knows about all kfuncs.
> > This extra memory is a waste of space from kernel pov.
> > Unless I am missing something.
> >
> > imo this logic belongs in bpftool only.
> > It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
> >
>
> If the goal is to have bpftool detect all kfuncs, would having a BPF
> kfunc iterator that bpftool could use to iterate over registered kfuncs
> work perhaps?

The kernel code ? Why ?
bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf.
Daniel Xu Dec. 21, 2023, 6:18 p.m. UTC | #7
Hi Alexei,

On Thu, Dec 21, 2023 at 10:07:33AM -0800, Alexei Starovoitov wrote:
> On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> >
> > On 21/12/2023 17:05, Alexei Starovoitov wrote:
> > > On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> > >> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
> > >> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
> > >
> > > +1
> > >
> > >>>
> > >>> Maybe we need a codemod from:
> > >>>
> > >>>         BTF_ID(func, ...
> > >>>
> > >>> to:
> > >>>
> > >>>         BTF_ID(kfunc, ...
> > >>
> > >> I think it's better to keep just 'func' and not to do anything special for
> > >> kfuncs in resolve_btfids logic to keep it simple
> > >>
> > >> also it's going to be already in pahole so if we want to make a fix in future
> > >> you need to change pahole, resolve_btfids and possibly also kernel
> > >
> > > I still don't understand why you guys want to add it to vmlinux BTF.
> > > The kernel has no use in this additional data.
> > > It already knows about all kfuncs.
> > > This extra memory is a waste of space from kernel pov.
> > > Unless I am missing something.
> > >
> > > imo this logic belongs in bpftool only.
> > > It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
> > >
> >
> > If the goal is to have bpftool detect all kfuncs, would having a BPF
> > kfunc iterator that bpftool could use to iterate over registered kfuncs
> > work perhaps?
> 
> The kernel code ? Why ?
> bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf.

I think you're right for vmlinux -- bpftool can look at the elf file on
a running system. But I'm not sure it works for modules. 

IIUC, the actual module ELF can go away while the kernel holds onto the
memory (as with initramfs). And even if that wasn't the case, in
containerized environments you may not be able to always see
/lib/modules or similar. That's assuming there's not a way to get the
module as with vmlinux /proc/kcore that I don't know about.

Looking at the tree, there's about 20k export symbols:

        $ rg EXPORT_SYMBOL_GPL | wc -l
        19471

Assuming kfuncs get there, that's about 312K of memory:

        >>> (20000 * (12 + 4)) >> 10
        312

So maybe a worthwhile tradeoff?

In general though, I kinda like having it in BTF cuz it's a structured
way to export metadata. IMO the link between kfuncs and BTF_ID_set8 is
kinda weak and might best be left as an implementation detail that
kernel devs can keep an eye on.

Thanks,
Daniel
David Marchevsky Dec. 21, 2023, 10:57 p.m. UTC | #8
On 12/20/23 5:19 PM, Daniel Xu wrote:
> This commit teaches pahole to parse symbols in .BTF_ids section in
> vmlinux and discover exported kfuncs. Pahole then takes the list of
> kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> 
> This enables downstream users and tools to dynamically discover which
> kfuncs are available on a system by parsing vmlinux or module BTF, both
> available in /sys/kernel/btf.
> 
> Example of encoding:
> 
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l
>         388
> 
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940
>         [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static
>         [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1
> 
> Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> ---
>  btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 202 insertions(+)
> 
> diff --git a/btf_encoder.c b/btf_encoder.c
> index fd04008..2697214 100644
> --- a/btf_encoder.c
> +++ b/btf_encoder.c
> @@ -34,6 +34,9 @@
>  #include <pthread.h>
>  
>  #define BTF_ENCODER_MAX_PROTO	512
> +#define BTF_IDS_SECTION		".BTF_ids"
> +#define BTF_ID_FUNC_PFX		"__BTF_ID__func__"
> +#define BTF_KFUNC_TYPE_TAG	"kfunc"

Can this be bpf_kfunc? Elaborated on elsewhere in this reply

>  
>  /* state used to do later encoding of saved functions */
>  struct btf_encoder_state {
> @@ -1352,6 +1355,200 @@ out:
>  	return err;
>  }
>  
> +/*
> + * Parse BTF_ID symbol and return the kfunc name.
> + *
> + * Returns:
> + *	Callee-owned string containing kfunc name if successful.

nit: Caller-owned, not callee-owned

> + *	NULL if !kfunc or on error.
> + */
> +static char *get_kfunc_name(const char *sym)
> +{
> +	char *kfunc, *end;
> +
> +	if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1))
> +		return NULL;
> +
> +	/* Strip prefix */
> +	kfunc = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1);
> +
> +	/* Strip suffix */
> +	end = strrchr(kfunc, '_');
> +	if (!end || *(end - 1) != '_') {
> +		free(kfunc);
> +		return NULL;
> +	}
> +	*(end - 1) = '\0';
> +
> +	return kfunc;
> +}
> +
> +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, const char *kfunc)
> +{
> +	int nr_types, type_id, err = -1;
> +	struct btf *btf = encoder->btf;
> +
> +	nr_types = btf__type_cnt(btf);
> +	for (type_id = 1; type_id < nr_types; type_id++) {
> +		const struct btf_type *type;
> +		const char *name;
> +
> +		type = btf__type_by_id(btf, type_id);
> +		if (!type) {
> +			fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n",
> +				__func__, type_id);
> +			goto out;
> +		}
> +
> +		if (!btf_is_func(type))
> +			continue;
> +
> +		name = btf__name_by_offset(btf, type->name_off);
> +		if (!name) {
> +			fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n",
> +				__func__, type_id);
> +			goto out;
> +		}
> +
> +		if (strcmp(name, kfunc))
> +		    continue;
> +
> +		err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, type_id, -1);

In an ideal world we'd just add this tag to __bpf_kfunc macro
definition, right? Then bpftool can generate fwd decls from generated
vmlinux w/o any pahole changes. But no gcc support for BTF tags, so need
to do this workaround.

With that in mind, instead of unconditionally adding BTF_KFUNC_TYPE_TAG
to funcs in btf id sets, can this code only do so if there isn't an
existing BTF_KFUNC_TYPE_TAG pointing to it? It'd require another loop
over btf types to built set of already-tagged funcs, but would
future-proof this work. Alternatively, if existing btf__dedup call after
btf_encoder__tag_kfuncs will get rid of these extraneous "tagged types"
in the scenario where one already exists, then a comment here to that
effect would be appreciated.

My nit about BTF_KFUNC_TYPE_TAG earlier was related: if we do want
__bpf_kfunc macro to add such a tag, might as well make the tag
'bpf_kfunc' so it's easier for unfamiliar folks to understand.
Alexei Starovoitov Dec. 22, 2023, 12:52 a.m. UTC | #9
On Thu, Dec 21, 2023 at 10:18 AM Daniel Xu <dxu@dxuuu.xyz> wrote:
>
> Hi Alexei,
>
> On Thu, Dec 21, 2023 at 10:07:33AM -0800, Alexei Starovoitov wrote:
> > On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> > >
> > > On 21/12/2023 17:05, Alexei Starovoitov wrote:
> > > > On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> > > >> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
> > > >> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
> > > >
> > > > +1
> > > >
> > > >>>
> > > >>> Maybe we need a codemod from:
> > > >>>
> > > >>>         BTF_ID(func, ...
> > > >>>
> > > >>> to:
> > > >>>
> > > >>>         BTF_ID(kfunc, ...
> > > >>
> > > >> I think it's better to keep just 'func' and not to do anything special for
> > > >> kfuncs in resolve_btfids logic to keep it simple
> > > >>
> > > >> also it's going to be already in pahole so if we want to make a fix in future
> > > >> you need to change pahole, resolve_btfids and possibly also kernel
> > > >
> > > > I still don't understand why you guys want to add it to vmlinux BTF.
> > > > The kernel has no use in this additional data.
> > > > It already knows about all kfuncs.
> > > > This extra memory is a waste of space from kernel pov.
> > > > Unless I am missing something.
> > > >
> > > > imo this logic belongs in bpftool only.
> > > > It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
> > > >
> > >
> > > If the goal is to have bpftool detect all kfuncs, would having a BPF
> > > kfunc iterator that bpftool could use to iterate over registered kfuncs
> > > work perhaps?
> >
> > The kernel code ? Why ?
> > bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf.
>
> I think you're right for vmlinux -- bpftool can look at the elf file on
> a running system. But I'm not sure it works for modules.
>
> IIUC, the actual module ELF can go away while the kernel holds onto the
> memory (as with initramfs). And even if that wasn't the case, in
> containerized environments you may not be able to always see
> /lib/modules or similar.

Indeed. Access to .ko files may be difficult even for full root
without containers.

What is vmlinux BTF before/after for our current set of kfuncs ?
Alan Maguire Dec. 22, 2023, 9:55 a.m. UTC | #10
On 21/12/2023 18:07, Alexei Starovoitov wrote:
> On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>>
>> On 21/12/2023 17:05, Alexei Starovoitov wrote:
>>> On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>>>> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
>>>> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
>>>
>>> +1
>>>
>>>>>
>>>>> Maybe we need a codemod from:
>>>>>
>>>>>         BTF_ID(func, ...
>>>>>
>>>>> to:
>>>>>
>>>>>         BTF_ID(kfunc, ...
>>>>
>>>> I think it's better to keep just 'func' and not to do anything special for
>>>> kfuncs in resolve_btfids logic to keep it simple
>>>>
>>>> also it's going to be already in pahole so if we want to make a fix in future
>>>> you need to change pahole, resolve_btfids and possibly also kernel
>>>
>>> I still don't understand why you guys want to add it to vmlinux BTF.
>>> The kernel has no use in this additional data.
>>> It already knows about all kfuncs.
>>> This extra memory is a waste of space from kernel pov.
>>> Unless I am missing something.
>>>
>>> imo this logic belongs in bpftool only.
>>> It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
>>>
>>
>> If the goal is to have bpftool detect all kfuncs, would having a BPF
>> kfunc iterator that bpftool could use to iterate over registered kfuncs
>> work perhaps?
> 
> The kernel code ? Why ?
> bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf.

Most distros don't have the vmlinux binary easily available; it needs to
be either downloaded as part of debuginfo packages or uncompressed from
vmlinuz.
Jiri Olsa Dec. 22, 2023, 12:46 p.m. UTC | #11
On Fri, Dec 22, 2023 at 09:55:09AM +0000, Alan Maguire wrote:
> On 21/12/2023 18:07, Alexei Starovoitov wrote:
> > On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> >>
> >> On 21/12/2023 17:05, Alexei Starovoitov wrote:
> >>> On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> >>>> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
> >>>> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
> >>>
> >>> +1
> >>>
> >>>>>
> >>>>> Maybe we need a codemod from:
> >>>>>
> >>>>>         BTF_ID(func, ...
> >>>>>
> >>>>> to:
> >>>>>
> >>>>>         BTF_ID(kfunc, ...
> >>>>
> >>>> I think it's better to keep just 'func' and not to do anything special for
> >>>> kfuncs in resolve_btfids logic to keep it simple
> >>>>
> >>>> also it's going to be already in pahole so if we want to make a fix in future
> >>>> you need to change pahole, resolve_btfids and possibly also kernel
> >>>
> >>> I still don't understand why you guys want to add it to vmlinux BTF.
> >>> The kernel has no use in this additional data.
> >>> It already knows about all kfuncs.
> >>> This extra memory is a waste of space from kernel pov.
> >>> Unless I am missing something.
> >>>
> >>> imo this logic belongs in bpftool only.
> >>> It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
> >>>
> >>
> >> If the goal is to have bpftool detect all kfuncs, would having a BPF
> >> kfunc iterator that bpftool could use to iterate over registered kfuncs
> >> work perhaps?
> > 
> > The kernel code ? Why ?
> > bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf.
> 
> Most distros don't have the vmlinux binary easily available; it needs to
> be either downloaded as part of debuginfo packages or uncompressed from
> vmlinuz.

would reading the /proc/kcore be an option? I'm under impression it's
default for distros but I might be wrong

jirka
Alan Maguire Dec. 22, 2023, 4:24 p.m. UTC | #12
On 22/12/2023 12:46, Jiri Olsa wrote:
> On Fri, Dec 22, 2023 at 09:55:09AM +0000, Alan Maguire wrote:
>> On 21/12/2023 18:07, Alexei Starovoitov wrote:
>>> On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote:
>>>>
>>>> On 21/12/2023 17:05, Alexei Starovoitov wrote:
>>>>> On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>>>>>> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
>>>>>> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
>>>>>
>>>>> +1
>>>>>
>>>>>>>
>>>>>>> Maybe we need a codemod from:
>>>>>>>
>>>>>>>         BTF_ID(func, ...
>>>>>>>
>>>>>>> to:
>>>>>>>
>>>>>>>         BTF_ID(kfunc, ...
>>>>>>
>>>>>> I think it's better to keep just 'func' and not to do anything special for
>>>>>> kfuncs in resolve_btfids logic to keep it simple
>>>>>>
>>>>>> also it's going to be already in pahole so if we want to make a fix in future
>>>>>> you need to change pahole, resolve_btfids and possibly also kernel
>>>>>
>>>>> I still don't understand why you guys want to add it to vmlinux BTF.
>>>>> The kernel has no use in this additional data.
>>>>> It already knows about all kfuncs.
>>>>> This extra memory is a waste of space from kernel pov.
>>>>> Unless I am missing something.
>>>>>
>>>>> imo this logic belongs in bpftool only.
>>>>> It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
>>>>>
>>>>
>>>> If the goal is to have bpftool detect all kfuncs, would having a BPF
>>>> kfunc iterator that bpftool could use to iterate over registered kfuncs
>>>> work perhaps?
>>>
>>> The kernel code ? Why ?
>>> bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf.
>>
>> Most distros don't have the vmlinux binary easily available; it needs to
>> be either downloaded as part of debuginfo packages or uncompressed from
>> vmlinuz.
> 
> would reading the /proc/kcore be an option? I'm under impression it's
> default for distros but I might be wrong
>

Good idea, I think it would be an option alright. From a user
perspective though can we always assume the BTF id sets of kfuncs always
match the set of available kfuncs? If the goal of this feature is to see
which kfuncs are available to be used, we'd need some form of active
participation by the kernel in producing the registered list I think.
But again, depends what the goal is here.

Alan

> jirka
Daniel Xu Dec. 22, 2023, 8:50 p.m. UTC | #13
On Thu, Dec 21, 2023 at 04:52:54PM -0800, Alexei Starovoitov wrote:
> On Thu, Dec 21, 2023 at 10:18 AM Daniel Xu <dxu@dxuuu.xyz> wrote:
> >
> > Hi Alexei,
> >
> > On Thu, Dec 21, 2023 at 10:07:33AM -0800, Alexei Starovoitov wrote:
> > > On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> > > >
> > > > On 21/12/2023 17:05, Alexei Starovoitov wrote:
> > > > > On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> > > > >> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
> > > > >> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
> > > > >
> > > > > +1
> > > > >
> > > > >>>
> > > > >>> Maybe we need a codemod from:
> > > > >>>
> > > > >>>         BTF_ID(func, ...
> > > > >>>
> > > > >>> to:
> > > > >>>
> > > > >>>         BTF_ID(kfunc, ...
> > > > >>
> > > > >> I think it's better to keep just 'func' and not to do anything special for
> > > > >> kfuncs in resolve_btfids logic to keep it simple
> > > > >>
> > > > >> also it's going to be already in pahole so if we want to make a fix in future
> > > > >> you need to change pahole, resolve_btfids and possibly also kernel
> > > > >
> > > > > I still don't understand why you guys want to add it to vmlinux BTF.
> > > > > The kernel has no use in this additional data.
> > > > > It already knows about all kfuncs.
> > > > > This extra memory is a waste of space from kernel pov.
> > > > > Unless I am missing something.
> > > > >
> > > > > imo this logic belongs in bpftool only.
> > > > > It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
> > > > >
> > > >
> > > > If the goal is to have bpftool detect all kfuncs, would having a BPF
> > > > kfunc iterator that bpftool could use to iterate over registered kfuncs
> > > > work perhaps?
> > >
> > > The kernel code ? Why ?
> > > bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf.
> >
> > I think you're right for vmlinux -- bpftool can look at the elf file on
> > a running system. But I'm not sure it works for modules.
> >
> > IIUC, the actual module ELF can go away while the kernel holds onto the
> > memory (as with initramfs). And even if that wasn't the case, in
> > containerized environments you may not be able to always see
> > /lib/modules or similar.
> 
> Indeed. Access to .ko files may be difficult even for full root
> without containers.
> 
> What is vmlinux BTF before/after for our current set of kfuncs ?

Before:

        $ pahole -J --btf_gen_floats -j --lang_exclude=rust --skip_encoding_btf_inconsistent_proto --btf_gen_optimized .tmp_vmlinux.btf
        $ ls -l .tmp_vmlinux.btf
        -rwxr-xr-x 1 dxu dxu 1159241688 Dec 22 13:44 .tmp_vmlinux.btf*


After:

        $ /home/dxu/dev/pahole/build/pahole -J --btf_gen_floats -j --lang_exclude=rust --skip_encoding_btf_inconsistent_proto --btf_gen_optimized .tmp_vmlinux.btf
        $ ls -l .tmp_vmlinux.btf
        -rwxr-xr-x 1 dxu dxu 1159248104 Dec 22 13:47 .tmp_vmlinux.btf*

1159248104 - 1159241688 = 6416, so ~17B per kfunc (although we are
currently overcounting kfuncs by a bit).

Btw, for some reason the file size is not quite reproducible. I'm seeing
~500B deltas every time I rerun the same command. Maybe I'm doing
something wrong? Not sure. 

Thanks,
Daniel
Daniel Xu Dec. 22, 2023, 8:55 p.m. UTC | #14
On Fri, Dec 22, 2023 at 04:24:35PM +0000, Alan Maguire wrote:
> On 22/12/2023 12:46, Jiri Olsa wrote:
> > On Fri, Dec 22, 2023 at 09:55:09AM +0000, Alan Maguire wrote:
> >> On 21/12/2023 18:07, Alexei Starovoitov wrote:
> >>> On Thu, Dec 21, 2023 at 9:43 AM Alan Maguire <alan.maguire@oracle.com> wrote:
> >>>>
> >>>> On 21/12/2023 17:05, Alexei Starovoitov wrote:
> >>>>> On Thu, Dec 21, 2023 at 12:35 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> >>>>>> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
> >>>>>> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
> >>>>>
> >>>>> +1
> >>>>>
> >>>>>>>
> >>>>>>> Maybe we need a codemod from:
> >>>>>>>
> >>>>>>>         BTF_ID(func, ...
> >>>>>>>
> >>>>>>> to:
> >>>>>>>
> >>>>>>>         BTF_ID(kfunc, ...
> >>>>>>
> >>>>>> I think it's better to keep just 'func' and not to do anything special for
> >>>>>> kfuncs in resolve_btfids logic to keep it simple
> >>>>>>
> >>>>>> also it's going to be already in pahole so if we want to make a fix in future
> >>>>>> you need to change pahole, resolve_btfids and possibly also kernel
> >>>>>
> >>>>> I still don't understand why you guys want to add it to vmlinux BTF.
> >>>>> The kernel has no use in this additional data.
> >>>>> It already knows about all kfuncs.
> >>>>> This extra memory is a waste of space from kernel pov.
> >>>>> Unless I am missing something.
> >>>>>
> >>>>> imo this logic belongs in bpftool only.
> >>>>> It can dump vmlinux BTF and emit __ksym protos into vmlinux.h
> >>>>>
> >>>>
> >>>> If the goal is to have bpftool detect all kfuncs, would having a BPF
> >>>> kfunc iterator that bpftool could use to iterate over registered kfuncs
> >>>> work perhaps?
> >>>
> >>> The kernel code ? Why ?
> >>> bpftool can do the same thing as this patch. Iterate over set8 in vmlinux elf.
> >>
> >> Most distros don't have the vmlinux binary easily available; it needs to
> >> be either downloaded as part of debuginfo packages or uncompressed from
> >> vmlinuz.
> > 
> > would reading the /proc/kcore be an option? I'm under impression it's
> > default for distros but I might be wrong
> >
> 
> Good idea, I think it would be an option alright.

Yeah, /proc/kcore would work, but only for vmlinux. I mentioned in the
other thread about how it probably wouldn't work for kfuncs defined in
modules.


> From a user
> perspective though can we always assume the BTF id sets of kfuncs always
> match the set of available kfuncs? If the goal of this feature is to see
> which kfuncs are available to be used, we'd need some form of active
> participation by the kernel in producing the registered list I think.
> But again, depends what the goal is here.

The goal is to query for available kfuncs. I also mentioned in the other
thread the link between BTF id sets and available kfuncs is not super
obvious. It might be ok for now. But I'll defer to people more familiar
with it.

BPF iterator would probably work, but it feels a bit complex for what is
mostly static data. And for data that doesn't really need to be
introspected (just BTF IDs). So I'm not sure it's the most ergonomic
approach for userspace to consume. Maybe some kind fo sysfs file would
be better? Could be as simple as a space separated list of BTF IDs that
reference kfuncs.

Thanks,
Daniel
Alexei Starovoitov Dec. 22, 2023, 10:11 p.m. UTC | #15
On Fri, Dec 22, 2023 at 12:55 PM Daniel Xu <dxu@dxuuu.xyz> wrote:
>
>
> > From a user
> > perspective though can we always assume the BTF id sets of kfuncs always
> > match the set of available kfuncs? If the goal of this feature is to see
> > which kfuncs are available to be used, we'd need some form of active
> > participation by the kernel in producing the registered list I think.
> > But again, depends what the goal is here.
>
> The goal is to query for available kfuncs.

The list of available kfuncs across all modules and prog types
doesn't really help the users to know whether their program
will be accepted if they use this kfunc.
Today kfuncs are grouped by prog type, but soon it will be more
fine grained. We need to allow different set of kfuncs depending
on struct_ops attach point and potentially flags at hook attach point.
So the list of kfuncs is similar to the list of helpers.

Adding kfunc btf_decl_tag to vmlinux BTF and module BTF only
helps with generation of vmlinux.h or module.h so that the users
don't need to manually write __ksym protos in their programs.
Analogous to generation of bpf_helper_defs.h.

re: your other email. Extra ~6k for vmlinux BTF is fine.
As Dave suggested in the other email let's make sure that dedup
removes the duplicated decl_tags or pahole doesn't add another
one if btf_decl_tag is already present in __bpf_kfunc macro.
pahole will essentially be a workaround for lack of btf_decl_tag in GCC.
Daniel Xu Dec. 23, 2023, 7:35 p.m. UTC | #16
On Thu, Dec 21, 2023 at 09:35:20AM +0100, Jiri Olsa wrote:
> On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote:
> > This commit teaches pahole to parse symbols in .BTF_ids section in
> > vmlinux and discover exported kfuncs. Pahole then takes the list of
> > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> > 
> > This enables downstream users and tools to dynamically discover which
> > kfuncs are available on a system by parsing vmlinux or module BTF, both
> > available in /sys/kernel/btf.
> > 
> > Example of encoding:
> > 
> >         $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l
> >         388
> > 
> >         $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940
> >         [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static
> >         [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1
> > 
> > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> 
> SNIP
> 
> > +
> > +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, const char *kfunc)
> > +{
> > +	int nr_types, type_id, err = -1;
> > +	struct btf *btf = encoder->btf;
> 
> could we store the kuncs in sorted array (by name) and iterate all IDs
> just once while doing the bsearch for the name over the kfuncs array

Ack, will take a look.

> 
> > +
> > +	nr_types = btf__type_cnt(btf);
> > +	for (type_id = 1; type_id < nr_types; type_id++) {
> > +		const struct btf_type *type;
> > +		const char *name;
> > +
> > +		type = btf__type_by_id(btf, type_id);
> > +		if (!type) {
> > +			fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n",
> > +				__func__, type_id);
> > +			goto out;
> > +		}
> > +
> > +		if (!btf_is_func(type))
> > +			continue;
> > +
> > +		name = btf__name_by_offset(btf, type->name_off);
> > +		if (!name) {
> > +			fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n",
> > +				__func__, type_id);
> > +			goto out;
> > +		}
> > +
> > +		if (strcmp(name, kfunc))
> > +		    continue;
> > +
> > +		err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, type_id, -1);
> > +		if (err < 0) {
> > +			fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n",
> > +				__func__, kfunc, err);
> > +			goto out;
> > +		}
> > +
> > +		err = 0;
> > +		break;
> > +	}
> > +
> > +out:
> > +	return err;
> > +}
> > +
> > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
> > +{
> > +	const char *filename = encoder->filename;
> > +	GElf_Shdr shdr_mem, *shdr;
> > +	int symbols_shndx = -1;
> > +	int idlist_shndx = -1;
> > +	Elf_Scn *scn = NULL;
> > +	Elf_Data *symbols;
> > +	int fd, err = -1;
> > +	size_t strtabidx;
> > +	Elf *elf = NULL;
> > +	size_t strndx;
> > +	char *secname;
> > +	int nr_syms;
> > +	int i = 0;
> > +
> > +	fd = open(filename, O_RDONLY);
> > +	if (fd < 0) {
> > +		fprintf(stderr, "Cannot open %s\n", filename);
> > +		goto out;
> > +	}
> > +
> > +	if (elf_version(EV_CURRENT) == EV_NONE) {
> > +		elf_error("Cannot set libelf version");
> > +		goto out;
> > +	}
> > +
> > +	elf = elf_begin(fd, ELF_C_READ, NULL);
> > +	if (elf == NULL) {
> > +		elf_error("Cannot update ELF file");
> > +		goto out;
> > +	}
> 
> SNIP
> 
> > +		}
> > +		free(kfunc);
> > +	}
> > +
> > +	err = 0;
> > +out:
> 
> leaking fd and elf object (elf_end)

Good catch thanks. Been writing too much rust...


Thanks,
Daniel
Daniel Xu Dec. 23, 2023, 7:40 p.m. UTC | #17
Hi Dave,

On Thu, Dec 21, 2023 at 05:57:18PM -0500, David Marchevsky wrote:
> 
> 
> On 12/20/23 5:19 PM, Daniel Xu wrote:
> > This commit teaches pahole to parse symbols in .BTF_ids section in
> > vmlinux and discover exported kfuncs. Pahole then takes the list of
> > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> > 
> > This enables downstream users and tools to dynamically discover which
> > kfuncs are available on a system by parsing vmlinux or module BTF, both
> > available in /sys/kernel/btf.
> > 
> > Example of encoding:
> > 
> >         $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l
> >         388
> > 
> >         $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940
> >         [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static
> >         [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1
> > 
> > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> > ---
> >  btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 202 insertions(+)
> > 
> > diff --git a/btf_encoder.c b/btf_encoder.c
> > index fd04008..2697214 100644
> > --- a/btf_encoder.c
> > +++ b/btf_encoder.c
> > @@ -34,6 +34,9 @@
> >  #include <pthread.h>
> >  
> >  #define BTF_ENCODER_MAX_PROTO	512
> > +#define BTF_IDS_SECTION		".BTF_ids"
> > +#define BTF_ID_FUNC_PFX		"__BTF_ID__func__"
> > +#define BTF_KFUNC_TYPE_TAG	"kfunc"
> 
> Can this be bpf_kfunc? Elaborated on elsewhere in this reply

Yeah, that's better. Good idea.

> 
> >  
> >  /* state used to do later encoding of saved functions */
> >  struct btf_encoder_state {
> > @@ -1352,6 +1355,200 @@ out:
> >  	return err;
> >  }
> >  
> > +/*
> > + * Parse BTF_ID symbol and return the kfunc name.
> > + *
> > + * Returns:
> > + *	Callee-owned string containing kfunc name if successful.
> 
> nit: Caller-owned, not callee-owned

Fixed, thanks.

> 
> > + *	NULL if !kfunc or on error.
> > + */
> > +static char *get_kfunc_name(const char *sym)
> > +{
> > +	char *kfunc, *end;
> > +
> > +	if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1))
> > +		return NULL;
> > +
> > +	/* Strip prefix */
> > +	kfunc = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1);
> > +
> > +	/* Strip suffix */
> > +	end = strrchr(kfunc, '_');
> > +	if (!end || *(end - 1) != '_') {
> > +		free(kfunc);
> > +		return NULL;
> > +	}
> > +	*(end - 1) = '\0';
> > +
> > +	return kfunc;
> > +}
> > +
> > +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, const char *kfunc)
> > +{
> > +	int nr_types, type_id, err = -1;
> > +	struct btf *btf = encoder->btf;
> > +
> > +	nr_types = btf__type_cnt(btf);
> > +	for (type_id = 1; type_id < nr_types; type_id++) {
> > +		const struct btf_type *type;
> > +		const char *name;
> > +
> > +		type = btf__type_by_id(btf, type_id);
> > +		if (!type) {
> > +			fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n",
> > +				__func__, type_id);
> > +			goto out;
> > +		}
> > +
> > +		if (!btf_is_func(type))
> > +			continue;
> > +
> > +		name = btf__name_by_offset(btf, type->name_off);
> > +		if (!name) {
> > +			fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n",
> > +				__func__, type_id);
> > +			goto out;
> > +		}
> > +
> > +		if (strcmp(name, kfunc))
> > +		    continue;
> > +
> > +		err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, type_id, -1);
> 
> In an ideal world we'd just add this tag to __bpf_kfunc macro
> definition, right? Then bpftool can generate fwd decls from generated
> vmlinux w/o any pahole changes. But no gcc support for BTF tags, so need
> to do this workaround.
> 
> With that in mind, instead of unconditionally adding BTF_KFUNC_TYPE_TAG
> to funcs in btf id sets, can this code only do so if there isn't an
> existing BTF_KFUNC_TYPE_TAG pointing to it? It'd require another loop
> over btf types to built set of already-tagged funcs, but would
> future-proof this work. Alternatively, if existing btf__dedup call after
> btf_encoder__tag_kfuncs will get rid of these extraneous "tagged types"
> in the scenario where one already exists, then a comment here to that
> effect would be appreciated.

Yeah, I placed the call to btf_encoder__tag_kfuncs() right before the
call to btf__dedup() in btf_encoder__encode() cuz I was noticing
duplicates. After moving the call to the current location, I noticed the
duplicates went away.

I'll leave a comment to that effect.

Thanks,
Daniel
Daniel Xu Jan. 3, 2024, 12:56 a.m. UTC | #18
On Thu, Dec 21, 2023 at 09:35:28AM +0100, Jiri Olsa wrote:
> On Wed, Dec 20, 2023 at 11:37:01PM -0700, Daniel Xu wrote:
> > On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote:
> > > This commit teaches pahole to parse symbols in .BTF_ids section in
> > > vmlinux and discover exported kfuncs. Pahole then takes the list of
> > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> > >
> > > This enables downstream users and tools to dynamically discover which
> > > kfuncs are available on a system by parsing vmlinux or module BTF, both
> > > available in /sys/kernel/btf.
> > >
> > > Example of encoding:
> > >
> > >         $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l
> > >         388
> > >
> > >         $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940
> > >         [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static
> > >         [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1
> > >
> > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> > > ---
> > >  btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 202 insertions(+)
> > >
> > 
> > Hmm, looking more, seems like this will pick up non-kfunc functions as
> > well. For example, kernel/trace/bpf_trace.c:
> > 
> > 
> >         BTF_SET_START(btf_allowlist_d_path)
> >         #ifdef CONFIG_SECURITY
> >         BTF_ID(func, security_file_permission)
> >         BTF_ID(func, security_inode_getattr)
> >         BTF_ID(func, security_file_open)
> >         #endif
> >         #ifdef CONFIG_SECURITY_PATH
> >         BTF_ID(func, security_path_truncate)
> >         #endif
> >         BTF_ID(func, vfs_truncate)
> >         BTF_ID(func, vfs_fallocate)
> >         BTF_ID(func, dentry_open)
> >         BTF_ID(func, vfs_getattr)
> >         BTF_ID(func, filp_close)
> >         BTF_SET_END(btf_allowlist_d_path)
> 
> you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
> which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
> 
> __BTF_ID__func_* symbol that has address inside the SET8 list is kfunc

I managed to add that logic. But I did some spot checks and it looks
like SET8 lists are not quite limited to kfuncs. For example, in
net/mptcp/bpf.c:

        BTF_SET8_START(bpf_mptcp_fmodret_ids)
        BTF_ID_FLAGS(func, update_socket_protocol)
        BTF_SET8_END(bpf_mptcp_fmodret_ids)

        static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
                .owner = THIS_MODULE,
                .set   = &bpf_mptcp_fmodret_ids,
        };

And in net/socket.c:

        __bpf_hook_start();
        __weak noinline int update_socket_protocol(int family, int type, int protocol)
        {
                return protocol;
        }
        __bpf_hook_end();

IOW, update_socket_protocol() is a hook, not a kfunc.

> 
> > 
> > Maybe we need a codemod from:
> > 
> >         BTF_ID(func, ...
> > 
> > to:
> > 
> >         BTF_ID(kfunc, ...
> 
> I think it's better to keep just 'func' and not to do anything special for
> kfuncs in resolve_btfids logic to keep it simple
> 
> also it's going to be already in pahole so if we want to make a fix in future
> you need to change pahole, resolve_btfids and possibly also kernel

So maybe special annotation is still needed. WDYT?

[..]

Thanks,
Daniel
Jiri Olsa Jan. 3, 2024, 8:48 a.m. UTC | #19
On Tue, Jan 02, 2024 at 05:56:02PM -0700, Daniel Xu wrote:
> On Thu, Dec 21, 2023 at 09:35:28AM +0100, Jiri Olsa wrote:
> > On Wed, Dec 20, 2023 at 11:37:01PM -0700, Daniel Xu wrote:
> > > On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote:
> > > > This commit teaches pahole to parse symbols in .BTF_ids section in
> > > > vmlinux and discover exported kfuncs. Pahole then takes the list of
> > > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> > > >
> > > > This enables downstream users and tools to dynamically discover which
> > > > kfuncs are available on a system by parsing vmlinux or module BTF, both
> > > > available in /sys/kernel/btf.
> > > >
> > > > Example of encoding:
> > > >
> > > >         $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l
> > > >         388
> > > >
> > > >         $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940
> > > >         [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static
> > > >         [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1
> > > >
> > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> > > > ---
> > > >  btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 202 insertions(+)
> > > >
> > > 
> > > Hmm, looking more, seems like this will pick up non-kfunc functions as
> > > well. For example, kernel/trace/bpf_trace.c:
> > > 
> > > 
> > >         BTF_SET_START(btf_allowlist_d_path)
> > >         #ifdef CONFIG_SECURITY
> > >         BTF_ID(func, security_file_permission)
> > >         BTF_ID(func, security_inode_getattr)
> > >         BTF_ID(func, security_file_open)
> > >         #endif
> > >         #ifdef CONFIG_SECURITY_PATH
> > >         BTF_ID(func, security_path_truncate)
> > >         #endif
> > >         BTF_ID(func, vfs_truncate)
> > >         BTF_ID(func, vfs_fallocate)
> > >         BTF_ID(func, dentry_open)
> > >         BTF_ID(func, vfs_getattr)
> > >         BTF_ID(func, filp_close)
> > >         BTF_SET_END(btf_allowlist_d_path)
> > 
> > you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
> > which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
> > 
> > __BTF_ID__func_* symbol that has address inside the SET8 list is kfunc
> 
> I managed to add that logic. But I did some spot checks and it looks
> like SET8 lists are not quite limited to kfuncs. For example, in
> net/mptcp/bpf.c:
> 
>         BTF_SET8_START(bpf_mptcp_fmodret_ids)
>         BTF_ID_FLAGS(func, update_socket_protocol)
>         BTF_SET8_END(bpf_mptcp_fmodret_ids)
> 
>         static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
>                 .owner = THIS_MODULE,
>                 .set   = &bpf_mptcp_fmodret_ids,
>         };
> 
> And in net/socket.c:
> 
>         __bpf_hook_start();
>         __weak noinline int update_socket_protocol(int family, int type, int protocol)
>         {
>                 return protocol;
>         }
>         __bpf_hook_end();
> 
> IOW, update_socket_protocol() is a hook, not a kfunc.

hum, right.. we use kfuncs set8 registration now also to mark attachable
hooks for fmodret programs, see [1]

there are similar hooks registered in HID code as well

[1] 5b481acab4ce bpf: do not rely on ALLOW_ERROR_INJECTION for fmod_ret)

> 
> > 
> > > 
> > > Maybe we need a codemod from:
> > > 
> > >         BTF_ID(func, ...
> > > 
> > > to:
> > > 
> > >         BTF_ID(kfunc, ...
> > 
> > I think it's better to keep just 'func' and not to do anything special for
> > kfuncs in resolve_btfids logic to keep it simple
> > 
> > also it's going to be already in pahole so if we want to make a fix in future
> > you need to change pahole, resolve_btfids and possibly also kernel
> 
> So maybe special annotation is still needed. WDYT?

anyway, it looks like we actually do have flags field in set8 (thanks Kumar! ;-) )

	struct btf_id_set8 {
		u32 cnt;
		u32 flags;
		struct {
			u32 id;
			u32 flags;
		} pairs[];
	};

it's not mentioned in the commit changelog [2], but it looks like it was
added just to keep data aligned, and AFAICS it's not used anywhere

how about we add a flag saying this set8 has kfuncs in it

jirka


[2] ab21d6063c01 bpf: Introduce 8-byte BTF set
Daniel Xu Jan. 3, 2024, 8:19 p.m. UTC | #20
On Wed, Jan 03, 2024 at 09:48:09AM +0100, Jiri Olsa wrote:
> On Tue, Jan 02, 2024 at 05:56:02PM -0700, Daniel Xu wrote:
> > On Thu, Dec 21, 2023 at 09:35:28AM +0100, Jiri Olsa wrote:
> > > On Wed, Dec 20, 2023 at 11:37:01PM -0700, Daniel Xu wrote:
> > > > On Wed, Dec 20, 2023 at 03:19:52PM -0700, Daniel Xu wrote:
> > > > > This commit teaches pahole to parse symbols in .BTF_ids section in
> > > > > vmlinux and discover exported kfuncs. Pahole then takes the list of
> > > > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> > > > >
> > > > > This enables downstream users and tools to dynamically discover which
> > > > > kfuncs are available on a system by parsing vmlinux or module BTF, both
> > > > > available in /sys/kernel/btf.
> > > > >
> > > > > Example of encoding:
> > > > >
> > > > >         $ bpftool btf dump file .tmp_vmlinux.btf | rg DECL_TAG | wc -l
> > > > >         388
> > > > >
> > > > >         $ bpftool btf dump file .tmp_vmlinux.btf | rg 68940
> > > > >         [68940] FUNC 'bpf_xdp_get_xfrm_state' type_id=68939 linkage=static
> > > > >         [128124] DECL_TAG 'kfunc' type_id=68940 component_idx=-1
> > > > >
> > > > > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> > > > > ---
> > > > >  btf_encoder.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >  1 file changed, 202 insertions(+)
> > > > >
> > > > 
> > > > Hmm, looking more, seems like this will pick up non-kfunc functions as
> > > > well. For example, kernel/trace/bpf_trace.c:
> > > > 
> > > > 
> > > >         BTF_SET_START(btf_allowlist_d_path)
> > > >         #ifdef CONFIG_SECURITY
> > > >         BTF_ID(func, security_file_permission)
> > > >         BTF_ID(func, security_inode_getattr)
> > > >         BTF_ID(func, security_file_open)
> > > >         #endif
> > > >         #ifdef CONFIG_SECURITY_PATH
> > > >         BTF_ID(func, security_path_truncate)
> > > >         #endif
> > > >         BTF_ID(func, vfs_truncate)
> > > >         BTF_ID(func, vfs_fallocate)
> > > >         BTF_ID(func, dentry_open)
> > > >         BTF_ID(func, vfs_getattr)
> > > >         BTF_ID(func, filp_close)
> > > >         BTF_SET_END(btf_allowlist_d_path)
> > > 
> > > you need to pick up only 'BTF_ID(func, ...)' IDs that belongs to SET8 lists,
> > > which are bounded by __BTF_ID__set8__<name> symbols, which also provide size
> > > 
> > > __BTF_ID__func_* symbol that has address inside the SET8 list is kfunc
> > 
> > I managed to add that logic. But I did some spot checks and it looks
> > like SET8 lists are not quite limited to kfuncs. For example, in
> > net/mptcp/bpf.c:
> > 
> >         BTF_SET8_START(bpf_mptcp_fmodret_ids)
> >         BTF_ID_FLAGS(func, update_socket_protocol)
> >         BTF_SET8_END(bpf_mptcp_fmodret_ids)
> > 
> >         static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
> >                 .owner = THIS_MODULE,
> >                 .set   = &bpf_mptcp_fmodret_ids,
> >         };
> > 
> > And in net/socket.c:
> > 
> >         __bpf_hook_start();
> >         __weak noinline int update_socket_protocol(int family, int type, int protocol)
> >         {
> >                 return protocol;
> >         }
> >         __bpf_hook_end();
> > 
> > IOW, update_socket_protocol() is a hook, not a kfunc.
> 
> hum, right.. we use kfuncs set8 registration now also to mark attachable
> hooks for fmodret programs, see [1]
> 
> there are similar hooks registered in HID code as well
> 
> [1] 5b481acab4ce bpf: do not rely on ALLOW_ERROR_INJECTION for fmod_ret)
> 
> > 
> > > 
> > > > 
> > > > Maybe we need a codemod from:
> > > > 
> > > >         BTF_ID(func, ...
> > > > 
> > > > to:
> > > > 
> > > >         BTF_ID(kfunc, ...
> > > 
> > > I think it's better to keep just 'func' and not to do anything special for
> > > kfuncs in resolve_btfids logic to keep it simple
> > > 
> > > also it's going to be already in pahole so if we want to make a fix in future
> > > you need to change pahole, resolve_btfids and possibly also kernel
> > 
> > So maybe special annotation is still needed. WDYT?
> 
> anyway, it looks like we actually do have flags field in set8 (thanks Kumar! ;-) )
> 
> 	struct btf_id_set8 {
> 		u32 cnt;
> 		u32 flags;
> 		struct {
> 			u32 id;
> 			u32 flags;
> 		} pairs[];
> 	};
> 
> it's not mentioned in the commit changelog [2], but it looks like it was
> added just to keep data aligned, and AFAICS it's not used anywhere
> 
> how about we add a flag saying this set8 has kfuncs in it

Nice, yeah. This makes sense. We can tag it at the BTF_SET8_START level
to reduce churn. At same time, we can WARN_ON() if the flag is missing
in register_btf_kfunc_id_set().

Thanks,
Daniel
diff mbox series

Patch

diff --git a/btf_encoder.c b/btf_encoder.c
index fd04008..2697214 100644
--- a/btf_encoder.c
+++ b/btf_encoder.c
@@ -34,6 +34,9 @@ 
 #include <pthread.h>
 
 #define BTF_ENCODER_MAX_PROTO	512
+#define BTF_IDS_SECTION		".BTF_ids"
+#define BTF_ID_FUNC_PFX		"__BTF_ID__func__"
+#define BTF_KFUNC_TYPE_TAG	"kfunc"
 
 /* state used to do later encoding of saved functions */
 struct btf_encoder_state {
@@ -1352,6 +1355,200 @@  out:
 	return err;
 }
 
+/*
+ * Parse BTF_ID symbol and return the kfunc name.
+ *
+ * Returns:
+ *	Callee-owned string containing kfunc name if successful.
+ *	NULL if !kfunc or on error.
+ */
+static char *get_kfunc_name(const char *sym)
+{
+	char *kfunc, *end;
+
+	if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1))
+		return NULL;
+
+	/* Strip prefix */
+	kfunc = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1);
+
+	/* Strip suffix */
+	end = strrchr(kfunc, '_');
+	if (!end || *(end - 1) != '_') {
+		free(kfunc);
+		return NULL;
+	}
+	*(end - 1) = '\0';
+
+	return kfunc;
+}
+
+static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, const char *kfunc)
+{
+	int nr_types, type_id, err = -1;
+	struct btf *btf = encoder->btf;
+
+	nr_types = btf__type_cnt(btf);
+	for (type_id = 1; type_id < nr_types; type_id++) {
+		const struct btf_type *type;
+		const char *name;
+
+		type = btf__type_by_id(btf, type_id);
+		if (!type) {
+			fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n",
+				__func__, type_id);
+			goto out;
+		}
+
+		if (!btf_is_func(type))
+			continue;
+
+		name = btf__name_by_offset(btf, type->name_off);
+		if (!name) {
+			fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n",
+				__func__, type_id);
+			goto out;
+		}
+
+		if (strcmp(name, kfunc))
+		    continue;
+
+		err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, type_id, -1);
+		if (err < 0) {
+			fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n",
+				__func__, kfunc, err);
+			goto out;
+		}
+
+		err = 0;
+		break;
+	}
+
+out:
+	return err;
+}
+
+static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
+{
+	const char *filename = encoder->filename;
+	GElf_Shdr shdr_mem, *shdr;
+	int symbols_shndx = -1;
+	int idlist_shndx = -1;
+	Elf_Scn *scn = NULL;
+	Elf_Data *symbols;
+	int fd, err = -1;
+	size_t strtabidx;
+	Elf *elf = NULL;
+	size_t strndx;
+	char *secname;
+	int nr_syms;
+	int i = 0;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		fprintf(stderr, "Cannot open %s\n", filename);
+		goto out;
+	}
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		elf_error("Cannot set libelf version");
+		goto out;
+	}
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	if (elf == NULL) {
+		elf_error("Cannot update ELF file");
+		goto out;
+	}
+
+	/* Location symbol table and .BTF_ids sections */
+	elf_getshdrstrndx(elf, &strndx);
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		Elf_Data *data;
+
+		i++;
+		shdr = gelf_getshdr(scn, &shdr_mem);
+		if (shdr == NULL) {
+			elf_error("Failed to get ELF section(%d) hdr", i);
+			goto out;
+		}
+
+		secname = elf_strptr(elf, strndx, shdr->sh_name);
+		if (!secname) {
+			elf_error("Failed to get ELF section(%d) hdr name", i);
+			goto out;
+		}
+
+		data = elf_getdata(scn, 0);
+		if (!data) {
+			elf_error("Failed to get ELF section(%d) data", i);
+			goto out;
+		}
+
+		if (shdr->sh_type == SHT_SYMTAB) {
+			symbols_shndx = i;
+			symbols = data;
+			strtabidx = shdr->sh_link;
+		} else if (!strcmp(secname, BTF_IDS_SECTION)) {
+			idlist_shndx = i;
+		}
+	}
+
+	/* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */
+	if (symbols_shndx == -1 || idlist_shndx == -1) {
+		err = 0;
+		goto out;
+	}
+
+	/*
+	 * Look for __BTF_ID__func__ symbols in .BTF_ids section and
+	 * inject BTF decl tags for each of them.
+	 */
+	scn = elf_getscn(elf, symbols_shndx);
+	if (!scn) {
+		elf_error("Failed to get ELF symbol table");
+		goto out;
+	}
+
+	shdr = gelf_getshdr(scn, &shdr_mem);
+	if (!shdr) {
+		elf_error("Failed to get ELF symbol table header");
+		goto out;
+	}
+
+	nr_syms = shdr->sh_size / shdr->sh_entsize;
+	for (i = 0; i < nr_syms; i++) {
+		char *kfunc, *name;
+		GElf_Sym sym;
+		int err;
+
+		if (!gelf_getsym(symbols, i, &sym)) {
+			elf_error("Failed to get ELF symbol(%d)", i);
+			goto out;
+		}
+
+		if (sym.st_shndx != idlist_shndx)
+			continue;
+
+		name = elf_strptr(elf, strtabidx, sym.st_name);
+		kfunc = get_kfunc_name(name);
+		if (!kfunc)
+			continue;
+
+		err = btf_encoder__tag_kfunc(encoder, kfunc);
+		if (err) {
+			fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, kfunc);
+			free(kfunc);
+			goto out;
+		}
+		free(kfunc);
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
 int btf_encoder__encode(struct btf_encoder *encoder)
 {
 	int err;
@@ -1366,6 +1563,11 @@  int btf_encoder__encode(struct btf_encoder *encoder)
 	if (btf__type_cnt(encoder->btf) == 1)
 		return 0;
 
+	if (btf_encoder__tag_kfuncs(encoder)) {
+		fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__);
+		return -1;
+	}
+
 	if (btf__dedup(encoder->btf, NULL)) {
 		fprintf(stderr, "%s: btf__dedup failed!\n", __func__);
 		return -1;