diff mbox series

[dwarves,v4,2/2] pahole: Inject kfunc decl tags into BTF

Message ID 28e81ccf28d6dd33f6db50af6526dc1770502b8d.1707071969.git.dxu@dxuuu.xyz (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series pahole: Inject kfunc decl tags into BTF | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-PR fail merge-conflict
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-27 fail Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18

Commit Message

Daniel Xu Feb. 4, 2024, 6:40 p.m. UTC
This commit teaches pahole to parse symbols in .BTF_ids section in
vmlinux and discover exported kfuncs. Pahole then takes the list of
kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.

Example of encoding:

        $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l
        121

        $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337
        [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static
        [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1

This enables downstream users and tools to dynamically discover which
kfuncs are available on a system by parsing vmlinux or module BTF, both
available in /sys/kernel/btf.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
 btf_encoder.c | 359 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 359 insertions(+)

Comments

Eduard Zingerman Feb. 5, 2024, 4:54 p.m. UTC | #1
On Sun, 2024-02-04 at 11:40 -0700, Daniel Xu wrote:
[...]
> +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
> +{
> +	const char *filename = encoder->filename;
> +	struct gobuffer btf_kfunc_ranges = {};
> +	struct gobuffer btf_funcs = {};
> +	Elf_Scn *symscn = NULL;
> +	int symbols_shndx = -1;
> +	int fd = -1, err = -1;
> +	int idlist_shndx = -1;
> +	Elf_Scn *scn = NULL;
> +	size_t idlist_addr;
> +	Elf_Data *symbols;
> +	Elf_Data *idlist;
> +	size_t strtabidx;
> +	Elf *elf = NULL;
> +	GElf_Shdr shdr;
> +	size_t strndx;
> +	char *secname;
> +	int nr_syms;
> +	int i = 0;

Note: when compiled in Release mode (e.g. using buildcmd.sh from the repo)
there is a number of false-positive warnings reported by GCC 13.2.1:

$ ./buildcmd.sh
...
In function ‘is_sym_kfunc_set’,
    inlined from ‘btf_encoder__tag_kfuncs’ at /home/eddy/work/dwarves-fork/btf_encoder.c:1639:8,
    inlined from ‘btf_encoder__encode’ at /home/eddy/work/dwarves-fork/btf_encoder.c:1724:29:
/home/eddy/work/dwarves-fork/btf_encoder.c:1395:29: warning: ‘idlist_addr’ may be used uninitialized [-Wmaybe-uninitialized]
 1395 |         off = sym->st_value - idlist_addr;
      |               ~~~~~~~~~~~~~~^~~~~~~~~~~~~
/home/eddy/work/dwarves-fork/btf_encoder.c: In function ‘btf_encoder__encode’:
/home/eddy/work/dwarves-fork/btf_encoder.c:1538:16: note: ‘idlist_addr’ was declared here
 1538 |         size_t idlist_addr;
      |                ^~~~~~~~~~~
In function ‘btf_encoder__tag_kfuncs’,
    inlined from ‘btf_encoder__encode’ at /home/eddy/work/dwarves-fork/btf_encoder.c:1724:29:

Same thing is reported for:
- btf_encoder.c:1630:22: warning: ‘symbols’ may be used uninitialized
- btf_encoder.c:1385:15: warning: ‘idlist’ may be used uninitialized
- btf_encoder.c:1638:24: warning: ‘strtabidx’ may be used uninitialized

GCC does not figure out that the variables above are guarded by -1 checks below.

[...]
> +	while ((scn = elf_nextscn(elf, scn)) != NULL) {
[...]
> +		if (shdr.sh_type == SHT_SYMTAB) {
> +			symbols_shndx = i;
> +			symscn = scn;
> +			symbols = data;
> +			strtabidx = shdr.sh_link;
> +		} else if (!strcmp(secname, BTF_IDS_SECTION)) {
> +			idlist_shndx = i;
> +			idlist_addr = shdr.sh_addr;
> +			idlist = data;
> +		}
> +	}
> +
> +	/* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */
> +	if (symbols_shndx == -1 || idlist_shndx == -1) {
> +		err = 0;
> +		goto out;
> +	}
[...]
Eduard Zingerman Feb. 5, 2024, 11:31 p.m. UTC | #2
On Sun, 2024-02-04 at 11:40 -0700, Daniel Xu wrote:
> This commit teaches pahole to parse symbols in .BTF_ids section in
> vmlinux and discover exported kfuncs. Pahole then takes the list of
> kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> 
> Example of encoding:
> 
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l
>         121
> 
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337
>         [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static
>         [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1
> 
> This enables downstream users and tools to dynamically discover which
> kfuncs are available on a system by parsing vmlinux or module BTF, both
> available in /sys/kernel/btf.
> 
> Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> ---

I've tested this patch-set using kernel built both with clang and gcc,
on current bpf-next master (2d9a925d0fbf), both times get 124 kfunc definitions.

Tested-by: Eduard Zingerman <eddyz87@gmail.com>

Two nitpicks below.

[...]

> +static char *get_func_name(const char *sym)
> +{
> +	char *func, *end;
> +
> +	if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1))
> +		return NULL;
> +
> +	/* Strip prefix */
> +	func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1);
> +
> +	/* Strip suffix */
> +	end = strrchr(func, '_');
> +	if (!end || *(end - 1) != '_') {

Nit: this would do out of bounds access on malformed input
     "__BTF_ID__func___"

> +		free(func);
> +		return NULL;
> +	}
> +	*(end - 1) = '\0';
> +
> +	return func;
> +}

[...]

> +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
> +{

[...]

> +	elf = elf_begin(fd, ELF_C_READ, NULL);
> +	if (elf == NULL) {
> +		elf_error("Cannot update ELF file");
> +		goto out;
> +	}
> +
> +	/* Location symbol table and .BTF_ids sections */
> +	elf_getshdrstrndx(elf, &strndx);

Nit: in theory elf_getshdrstrndx() could fail and strndx would remain
     uninitialized.

[...]
Alan Maguire Feb. 8, 2024, 10 a.m. UTC | #3
On 04/02/2024 18:40, Daniel Xu wrote:
> This commit teaches pahole to parse symbols in .BTF_ids section in
> vmlinux and discover exported kfuncs. Pahole then takes the list of
> kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> 
> Example of encoding:
> 
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l
>         121
> 
>         $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337
>         [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static
>         [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1
> 
> This enables downstream users and tools to dynamically discover which
> kfuncs are available on a system by parsing vmlinux or module BTF, both
> available in /sys/kernel/btf.
> 
> Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>

Looks good! A few suggestions below..

> ---
>  btf_encoder.c | 359 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 359 insertions(+)
> 
> diff --git a/btf_encoder.c b/btf_encoder.c
> index e325f66..d6a561c 100644
> --- a/btf_encoder.c
> +++ b/btf_encoder.c
> @@ -34,6 +34,21 @@
>  #include <pthread.h>
>  
>  #define BTF_ENCODER_MAX_PROTO	512
> +#define BTF_IDS_SECTION		".BTF_ids"
> +#define BTF_ID_FUNC_PFX		"__BTF_ID__func__"
> +#define BTF_ID_SET8_PFX		"__BTF_ID__set8__"
> +#define BTF_SET8_KFUNCS		(1 << 0)
> +#define BTF_KFUNC_TYPE_TAG	"bpf_kfunc"
> +
> +/* Adapted from include/linux/btf_ids.h */
> +struct btf_id_set8 {
> +        uint32_t cnt;
> +        uint32_t flags;
> +        struct {
> +                uint32_t id;
> +                uint32_t flags;
> +        } pairs[];
> +};
>  
>  /* state used to do later encoding of saved functions */
>  struct btf_encoder_state {
> @@ -95,6 +110,17 @@ struct btf_encoder {
>  	} functions;
>  };
>  
> +struct btf_func {
> +	const char *name;
> +	int	    type_id;
> +};
> +
> +/* Half open interval representing range of addresses containing kfuncs */
> +struct btf_kfunc_set_range {
> +	size_t start;
> +	size_t end;
> +};
> +
>  static LIST_HEAD(encoders);
>  static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER;
>  
> @@ -1353,6 +1379,331 @@ out:
>  	return err;
>  }
>  
> +/* Returns if `sym` points to a kfunc set */
> +static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr)
> +{
> +	void *ptr = idlist->d_buf;
> +	struct btf_id_set8 *set;
> +	bool is_set8;
> +	int off;
> +
> +	/* kfuncs are only found in BTF_SET8's */
> +	is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1);
> +	if (!is_set8)
> +		return false;
> +
> +	off = sym->st_value - idlist_addr;
> +	if (off >= idlist->d_size) {
> +		fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name);
> +		return false;
> +	}
> +
> +	/* Check the set8 flags to see if it was marked as kfunc */
> +	set = ptr + off;
> +	return set->flags & BTF_SET8_KFUNCS;
> +}
> +
> +/*
> + * Parse BTF_ID symbol and return the func name.
> + *
> + * Returns:
> + *	Caller-owned string containing func name if successful.
> + *	NULL if !func or on error.
> + */
> +static char *get_func_name(const char *sym)
> +{
> +	char *func, *end;
> +
> +	if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1))
> +		return NULL;
> +
> +	/* Strip prefix */
> +	func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1);
> +
> +	/* Strip suffix */
> +	end = strrchr(func, '_');
> +	if (!end || *(end - 1) != '_') {
> +		free(func);
> +		return NULL;
> +	}
> +	*(end - 1) = '\0';
> +
> +	return func;
> +}
> +
> +static int btf_func_cmp(const void *_a, const void *_b)
> +{
> +	const struct btf_func *a = _a;
> +	const struct btf_func *b = _b;
> +
> +	return strcmp(a->name, b->name);
> +}
> +
> +/*
> + * Collects all functions described in BTF.
> + * Returns non-zero on error.
> + */
> +static int btf_encoder__collect_btf_funcs(struct btf_encoder *encoder, struct gobuffer *funcs)
> +{
> +	struct btf *btf = encoder->btf;
> +	int nr_types, type_id;
> +	int err = -1;
> +
> +	/* First collect all the func entries into an array */
> +	nr_types = btf__type_cnt(btf);
> +	for (type_id = 1; type_id < nr_types; type_id++) {
> +		const struct btf_type *type;
> +		struct btf_func func = {};
> +		const char *name;
> +
> +		type = btf__type_by_id(btf, type_id);
> +		if (!type) {
> +			fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n",
> +				__func__, type_id);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +
> +		if (!btf_is_func(type))
> +			continue;
> +
> +		name = btf__name_by_offset(btf, type->name_off);
> +		if (!name) {
> +			fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n",
> +				__func__, type_id);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +
> +		func.name = name;
> +		func.type_id = type_id;
> +		err = gobuffer__add(funcs, &func, sizeof(func));
> +		if (err < 0)
> +			goto out;
> +	}
> +
> +	/* Now that we've collected funcs, sort them by name */
> +	qsort((void *)gobuffer__entries(funcs), gobuffer__nr_entries(funcs),
> +	      sizeof(struct btf_func), btf_func_cmp);
> +
> +	err = 0;
> +out:
> +	return err;
> +}
> +
> +static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const char *kfunc)
> +{
> +	struct btf_func key = { .name = kfunc };
> +	struct btf *btf = encoder->btf;
> +	struct btf_func *target;
> +	const void *base;
> +	unsigned int cnt;
> +	int err = -1;
> +
> +	base = gobuffer__entries(funcs);
> +	cnt = gobuffer__nr_entries(funcs);
> +	target = bsearch(&key, base, cnt, sizeof(key), btf_func_cmp);
> +	if (!target) {
> +		fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc);
> +		goto out;
> +	}
> +
> +	/* Note we are unconditionally adding the btf_decl_tag even
> +	 * though vmlinux may already contain btf_decl_tags for kfuncs.
> +	 * We are ok to do this b/c we will later btf__dedup() to remove
> +	 * any duplicates.
> +	 */
> +	err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, -1);
> +	if (err < 0) {
> +		fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n",
> +			__func__, kfunc, err);
> +		goto out;
> +	}
> +
> +	err = 0;
> +out:
> +	return err;
> +}
> +
> +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
> +{
> +	const char *filename = encoder->filename;
> +	struct gobuffer btf_kfunc_ranges = {};
> +	struct gobuffer btf_funcs = {};
> +	Elf_Scn *symscn = NULL;
> +	int symbols_shndx = -1;
> +	int fd = -1, err = -1;
> +	int idlist_shndx = -1;
> +	Elf_Scn *scn = NULL;
> +	size_t idlist_addr;
> +	Elf_Data *symbols;
> +	Elf_Data *idlist;
> +	size_t strtabidx;
> +	Elf *elf = NULL;
> +	GElf_Shdr shdr;
> +	size_t strndx;
> +	char *secname;
> +	int nr_syms;
> +	int i = 0;
> +
> +	fd = open(filename, O_RDONLY);
> +	if (fd < 0) {
> +		fprintf(stderr, "Cannot open %s\n", filename);
> +		goto out;
> +	}
> +
> +	if (elf_version(EV_CURRENT) == EV_NONE) {
> +		elf_error("Cannot set libelf version");
> +		goto out;
> +	}
> +
> +	elf = elf_begin(fd, ELF_C_READ, NULL);
> +	if (elf == NULL) {
> +		elf_error("Cannot update ELF file");
> +		goto out;
> +	}
> +
> +	/* Location symbol table and .BTF_ids sections */
> +	elf_getshdrstrndx(elf, &strndx);
> +	while ((scn = elf_nextscn(elf, scn)) != NULL) {
> +		Elf_Data *data;
> +
> +		i++;
> +		if (!gelf_getshdr(scn, &shdr)) {
> +			elf_error("Failed to get ELF section(%d) hdr", i);
> +			goto out;
> +		}
> +
> +		secname = elf_strptr(elf, strndx, shdr.sh_name);
> +		if (!secname) {
> +			elf_error("Failed to get ELF section(%d) hdr name", i);
> +			goto out;
> +		}
> +
> +		data = elf_getdata(scn, 0);
> +		if (!data) {
> +			elf_error("Failed to get ELF section(%d) data", i);
> +			goto out;
> +		}
> +
> +		if (shdr.sh_type == SHT_SYMTAB) {
> +			symbols_shndx = i;
> +			symscn = scn;
> +			symbols = data;
> +			strtabidx = shdr.sh_link;
> +		} else if (!strcmp(secname, BTF_IDS_SECTION)) {
> +			idlist_shndx = i;
> +			idlist_addr = shdr.sh_addr;
> +			idlist = data;
> +		}
> +	}
> +

Can we use the existing list of ELF functions collected via
btf_encoder__collect_symbols() for the above? We merge info across CUs
about ELF functions. It _seems_ like there might be a way to re-use this
info but I might be missing something; more below..


> +	/* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */
> +	if (symbols_shndx == -1 || idlist_shndx == -1) {
> +		err = 0;
> +		goto out;
> +	}
> +
> +	if (!gelf_getshdr(symscn, &shdr)) {
> +		elf_error("Failed to get ELF symbol table header");
> +		goto out;
> +	}
> +	nr_syms = shdr.sh_size / shdr.sh_entsize;
> +
> +	err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs);
> +	if (err) {
> +		fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__);
> +		goto out;
> +	}
> +
> +	/* First collect all kfunc set ranges.
> +	 *
> +	 * Note we choose not to sort these ranges and accept a linear
> +	 * search when doing lookups. Reasoning is that the number of
> +	 * sets is ~O(100) and not worth the additional code to optimize.
> +	 */
> +	for (i = 0; i < nr_syms; i++) {
> +		struct btf_kfunc_set_range range = {};
> +		const char *name;
> +		GElf_Sym sym;
> +
> +		if (!gelf_getsym(symbols, i, &sym)) {
> +			elf_error("Failed to get ELF symbol(%d)", i);
> +			goto out;
> +		}
> +
> +		if (sym.st_shndx != idlist_shndx)
> +			continue;
> +
> +		name = elf_strptr(elf, strtabidx, sym.st_name);
> +		if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr))
> +			continue;
> +
> +		range.start = sym.st_value;
> +		range.end = sym.st_value + sym.st_size;

we could potentially record this info when we collect symbols in
btf_encoder__collect_function(). The reason I suggest this is that it is
likely that to fully clarify which symbol a name refers to we will end
up needing the address.  So struct elf_function could record start and
size, and that could be used by you later without having to parse ELF
for symbols (you'd still need to for the BTF ids section).

Then all you'd need to do is iterate over BTF functions, using
btf_encoder__find_function() to get a function and associated ELF info
by name.


> +		gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range));
> +	}
> +
> +	/* Now inject BTF with kfunc decl tag for detected kfuncs */
> +	for (i = 0; i < nr_syms; i++) {
> +		const struct btf_kfunc_set_range *ranges;
> +		unsigned int ranges_cnt;
> +		char *func, *name;
> +		GElf_Sym sym;
> +		bool found;
> +		int err;
> +		int j;
> +
> +		if (!gelf_getsym(symbols, i, &sym)) {
> +			elf_error("Failed to get ELF symbol(%d)", i);
> +			goto out;
> +		}
> +
> +		if (sym.st_shndx != idlist_shndx)
> +			continue;
> +
> +		name = elf_strptr(elf, strtabidx, sym.st_name);
> +		func = get_func_name(name);
> +		if (!func)
> +			continue;
> +
> +		/* Check if function belongs to a kfunc set */
> +		ranges = gobuffer__entries(&btf_kfunc_ranges);
> +		ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges);
> +		found = false;
> +		for (j = 0; j < ranges_cnt; j++) {
> +			size_t addr = sym.st_value;
> +
> +			if (ranges[j].start <= addr && addr < ranges[j].end) {
> +				found = true;
> +				break;
> +			}
> +		}
> +		if (!found) {
> +			free(func);
> +			continue;
> +		}
> +
> +		err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func);
> +		if (err) {
> +			fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func);
> +			free(func);
> +			goto out;
> +		}
> +		free(func);
> +	}
> +
> +	err = 0;
> +out:
> +	__gobuffer__delete(&btf_funcs);
> +	__gobuffer__delete(&btf_kfunc_ranges);
> +	if (elf)
> +		elf_end(elf);
> +	if (fd != -1)
> +		close(fd);
> +	return err;
> +}
> +
>  int btf_encoder__encode(struct btf_encoder *encoder)
>  {
>  	int err;
> @@ -1367,6 +1718,14 @@ int btf_encoder__encode(struct btf_encoder *encoder)
>  	if (btf__type_cnt(encoder->btf) == 1)
>  		return 0;
>  
> +	/* Note vmlinux may already contain btf_decl_tag's for kfuncs. So
> +	 * take care to call this before btf_dedup().
> +	 */

sorry another thing I should have thought of here; if the user has asked
to skip encoding declaration tags, we should respect that for this case
also. So you'll probably need to set

	encoder->skip_encoding_btf_decl_tag =
conf_load->skip_encoding_btf_decl_tag;

..earlier on, and bail here if encoder->skip_encoding_btf_decl_tag is
true. We'd need to be consistent in that if the user asks not to encode
declaration tags we don't do it for this case either.

> +	if (encoder->tag_kfuncs && btf_encoder__tag_kfuncs(encoder)) {
> +		fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__);
> +		return -1;
> +	}
> +
>  	if (btf__dedup(encoder->btf, NULL)) {
>  		fprintf(stderr, "%s: btf__dedup failed!\n", __func__);
>  		return -1;
Daniel Xu Feb. 28, 2024, 4:07 p.m. UTC | #4
Hi Eduard,

Apologies for long delay - life has been busy.

On Tue, Feb 06, 2024 at 01:31:20AM +0200, Eduard Zingerman wrote:
> On Sun, 2024-02-04 at 11:40 -0700, Daniel Xu wrote:
> > This commit teaches pahole to parse symbols in .BTF_ids section in
> > vmlinux and discover exported kfuncs. Pahole then takes the list of
> > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc.
> > 
> > Example of encoding:
> > 
> >         $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l
> >         121
> > 
> >         $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337
> >         [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static
> >         [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1
> > 
> > This enables downstream users and tools to dynamically discover which
> > kfuncs are available on a system by parsing vmlinux or module BTF, both
> > available in /sys/kernel/btf.
> > 
> > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> > ---
> 
> I've tested this patch-set using kernel built both with clang and gcc,
> on current bpf-next master (2d9a925d0fbf), both times get 124 kfunc definitions.
> 
> Tested-by: Eduard Zingerman <eddyz87@gmail.com>
> 
> Two nitpicks below.
> 
> [...]
> 
> > +static char *get_func_name(const char *sym)
> > +{
> > +	char *func, *end;
> > +
> > +	if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1))
> > +		return NULL;
> > +
> > +	/* Strip prefix */
> > +	func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1);
> > +
> > +	/* Strip suffix */
> > +	end = strrchr(func, '_');
> > +	if (!end || *(end - 1) != '_') {
> 
> Nit: this would do out of bounds access on malformed input
>      "__BTF_ID__func___"

I think this is actually ok. Reason is we have the strncmp() above
so we know the prefix is there. Then the strdup() in the malformed cased
returns empty string. And strrchr() will then return NULL, so we don't
enter the branch.

I tested it with: https://pastes.dxuuu.xyz/c3j4kk

        $ gcc test.c
        dxu@kashmir~/scratch $ ./a.out
        name=(null)

> 
> > +		free(func);
> > +		return NULL;
> > +	}
> > +	*(end - 1) = '\0';
> > +
> > +	return func;
> > +}
> 
> [...]
> 
> > +static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
> > +{
> 
> [...]
> 
> > +	elf = elf_begin(fd, ELF_C_READ, NULL);
> > +	if (elf == NULL) {
> > +		elf_error("Cannot update ELF file");
> > +		goto out;
> > +	}
> > +
> > +	/* Location symbol table and .BTF_ids sections */
> > +	elf_getshdrstrndx(elf, &strndx);
> 
> Nit: in theory elf_getshdrstrndx() could fail and strndx would remain
>      uninitialized.

Sure, will fix. Also fixing typo (Location -> Locate).

Thanks,
Daniel
Eduard Zingerman Feb. 28, 2024, 9:33 p.m. UTC | #5
On Wed, 2024-02-28 at 09:07 -0700, Daniel Xu wrote:
> Hi Eduard,
>
> Apologies for long delay - life has been busy.

Hi Daniel,

No problem, thank you for reaching back.

[...]

> > > +static char *get_func_name(const char *sym)
> > > +{
> > > +	char *func, *end;
> > > +
> > > +	if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1))
> > > +		return NULL;
> > > +
> > > +	/* Strip prefix */
> > > +	func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1);
> > > +
> > > +	/* Strip suffix */
> > > +	end = strrchr(func, '_');
> > > +	if (!end || *(end - 1) != '_') {
> > 
> > Nit: this would do out of bounds access on malformed input
> >      "__BTF_ID__func___"
> 
> I think this is actually ok. Reason is we have the strncmp() above
> so we know the prefix is there. Then the strdup() in the malformed cased
> returns empty string. And strrchr() will then return NULL, so we don't
> enter the branch.
> 
> I tested it with: https://pastes.dxuuu.xyz/c3j4kk
> 
>         $ gcc test.c
>         dxu@kashmir~/scratch $ ./a.out
>         name=(null)
> 

The test is for "__BTF_ID__func__", but nitpick is for "__BTF_ID__func___"
(three underscores in the end).

E.g. here is a repro:

--- 8< ---------------------------------------------------------------

$ cat test.c

#include <stdlib.h>
#include <string.h>
#include <stdio.h>

#define BTF_ID_FUNC_PFX         "__BTF_ID__func__"

static char *get_func_name(const char *sym)
{
        char *func, *end;

        if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1))
                return NULL;

        /* Strip prefix */
        func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1);

        /* Strip suffix */
        end = strrchr(func, '_');
        if (!end || *(end - 1) != '_') {
                free(func);
                return NULL;
        }
        *(end - 1) = '\0';

        return func;
}

int main(int argc, char *argv[]) {
	if (argc < 2)
		return -1;

	printf("name='%s;\n", get_func_name(argv[1]));
	return 0;
}

$ gcc -g test.c

$ valgrind ./a.out __BTF_ID__func___

==16856== Memcheck, a memory error detector
==16856== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==16856== Using Valgrind-3.22.0 and LibVEX; rerun with -h for copyright info
==16856== Command: ./a.out __BTF_ID__func___
==16856== 
==16856== Invalid read of size 1
==16856==    at 0x4011EB: get_func_name (test.c:19)
==16856==    by 0x401244: main (test.c:32)
==16856==  Address 0x4a7e03f is 1 bytes before a block of size 2 alloc'd
==16856==    at 0x4845784: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==16856==    by 0x492176D: strdup (in /usr/lib64/libc.so.6)
==16856==    by 0x4011C2: get_func_name (test.c:15)
==16856==    by 0x401244: main (test.c:32)
==16856== 
name='(null);
==16856== 
==16856== HEAP SUMMARY:
==16856==     in use at exit: 0 bytes in 0 blocks
==16856==   total heap usage: 2 allocs, 2 frees, 1,026 bytes allocated
==16856== 
==16856== All heap blocks were freed -- no leaks are possible
==16856== 
==16856== For lists of detected and suppressed errors, rerun with: -s
==16856== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)

--------------------------------------------------------------- >8 ---

Thanks,
Eduard
Daniel Xu Feb. 29, 2024, 12:57 a.m. UTC | #6
Hi Alan,

On Thu, Feb 08, 2024 at 10:00:15AM +0000, Alan Maguire wrote:
> On 04/02/2024 18:40, Daniel Xu wrote:

[...]

> > +
> > +		if (shdr.sh_type == SHT_SYMTAB) {
> > +			symbols_shndx = i;
> > +			symscn = scn;
> > +			symbols = data;
> > +			strtabidx = shdr.sh_link;
> > +		} else if (!strcmp(secname, BTF_IDS_SECTION)) {
> > +			idlist_shndx = i;
> > +			idlist_addr = shdr.sh_addr;
> > +			idlist = data;
> > +		}
> > +	}
> > +
> 
> Can we use the existing list of ELF functions collected via
> btf_encoder__collect_symbols() for the above? We merge info across CUs
> about ELF functions. It _seems_ like there might be a way to re-use this
> info but I might be missing something; more below..

So the above two sections are only used to acquire information on the
set8's. For collecting functions, this patch uses BTF (see
btf_encoder__collect_btf_funcs()).

> 
> 
> > +	/* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */
> > +	if (symbols_shndx == -1 || idlist_shndx == -1) {
> > +		err = 0;
> > +		goto out;
> > +	}
> > +
> > +	if (!gelf_getshdr(symscn, &shdr)) {
> > +		elf_error("Failed to get ELF symbol table header");
> > +		goto out;
> > +	}
> > +	nr_syms = shdr.sh_size / shdr.sh_entsize;
> > +
> > +	err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs);
> > +	if (err) {
> > +		fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__);
> > +		goto out;
> > +	}
> > +
> > +	/* First collect all kfunc set ranges.
> > +	 *
> > +	 * Note we choose not to sort these ranges and accept a linear
> > +	 * search when doing lookups. Reasoning is that the number of
> > +	 * sets is ~O(100) and not worth the additional code to optimize.
> > +	 */
> > +	for (i = 0; i < nr_syms; i++) {
> > +		struct btf_kfunc_set_range range = {};
> > +		const char *name;
> > +		GElf_Sym sym;
> > +
> > +		if (!gelf_getsym(symbols, i, &sym)) {
> > +			elf_error("Failed to get ELF symbol(%d)", i);
> > +			goto out;
> > +		}
> > +
> > +		if (sym.st_shndx != idlist_shndx)
> > +			continue;
> > +
> > +		name = elf_strptr(elf, strtabidx, sym.st_name);
> > +		if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr))
> > +			continue;
> > +
> > +		range.start = sym.st_value;
> > +		range.end = sym.st_value + sym.st_size;
> 
> we could potentially record this info when we collect symbols in
> btf_encoder__collect_function(). The reason I suggest this is that it is
> likely that to fully clarify which symbol a name refers to we will end
> up needing the address.  So struct elf_function could record start and
> size, and that could be used by you later without having to parse ELF
> for symbols (you'd still need to for the BTF ids section).

This start/end pair is just for the set8's in .BTF_ids section. Which
btf_encoder__collect_function() rightfully does not look at. So I think
new code needs to be added for set8 collection anyways. I don't have any
strong feelings about whether we should hook into
btf_encoder__collect_symbols() or not -- IMHO it's a bit cleaner to have
all the set8 stuff on its own codepath.

What cases do you see needing the location + size for? Since kfuncs are
exported, I would think it's not possible for a single object file to
have multiple copies of the same kfunc. Nor that the compiler inline the
kfunc.

> 
> Then all you'd need to do is iterate over BTF functions, using
> btf_encoder__find_function() to get a function and associated ELF info
> by name.

Didn't know about this, thanks. I'll take a look at if the patch can use
the existing function metadata. That should get rid of
btf_encoder__collect_btf_funcs() if it works.

> 
> 
> > +		gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range));
> > +	}
> > +
> > +	/* Now inject BTF with kfunc decl tag for detected kfuncs */
> > +	for (i = 0; i < nr_syms; i++) {
> > +		const struct btf_kfunc_set_range *ranges;
> > +		unsigned int ranges_cnt;
> > +		char *func, *name;
> > +		GElf_Sym sym;
> > +		bool found;
> > +		int err;
> > +		int j;
> > +
> > +		if (!gelf_getsym(symbols, i, &sym)) {
> > +			elf_error("Failed to get ELF symbol(%d)", i);
> > +			goto out;
> > +		}
> > +
> > +		if (sym.st_shndx != idlist_shndx)
> > +			continue;
> > +
> > +		name = elf_strptr(elf, strtabidx, sym.st_name);
> > +		func = get_func_name(name);
> > +		if (!func)
> > +			continue;
> > +
> > +		/* Check if function belongs to a kfunc set */
> > +		ranges = gobuffer__entries(&btf_kfunc_ranges);
> > +		ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges);
> > +		found = false;
> > +		for (j = 0; j < ranges_cnt; j++) {
> > +			size_t addr = sym.st_value;
> > +
> > +			if (ranges[j].start <= addr && addr < ranges[j].end) {
> > +				found = true;
> > +				break;
> > +			}
> > +		}
> > +		if (!found) {
> > +			free(func);
> > +			continue;
> > +		}
> > +
> > +		err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func);
> > +		if (err) {
> > +			fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func);
> > +			free(func);
> > +			goto out;
> > +		}
> > +		free(func);
> > +	}
> > +
> > +	err = 0;
> > +out:
> > +	__gobuffer__delete(&btf_funcs);
> > +	__gobuffer__delete(&btf_kfunc_ranges);
> > +	if (elf)
> > +		elf_end(elf);
> > +	if (fd != -1)
> > +		close(fd);
> > +	return err;
> > +}
> > +
> >  int btf_encoder__encode(struct btf_encoder *encoder)
> >  {
> >  	int err;
> > @@ -1367,6 +1718,14 @@ int btf_encoder__encode(struct btf_encoder *encoder)
> >  	if (btf__type_cnt(encoder->btf) == 1)
> >  		return 0;
> >  
> > +	/* Note vmlinux may already contain btf_decl_tag's for kfuncs. So
> > +	 * take care to call this before btf_dedup().
> > +	 */
> 
> sorry another thing I should have thought of here; if the user has asked
> to skip encoding declaration tags, we should respect that for this case
> also. So you'll probably need to set
> 
> 	encoder->skip_encoding_btf_decl_tag =
> conf_load->skip_encoding_btf_decl_tag;
> 
> ..earlier on, and bail here if encoder->skip_encoding_btf_decl_tag is
> true. We'd need to be consistent in that if the user asks not to encode
> declaration tags we don't do it for this case either.

Yeah, good catch. Will do.

[...]

Thanks,
Daniel
Daniel Xu March 13, 2024, 3:17 a.m. UTC | #7
Hi Alan,

On Wed, Feb 28, 2024 at 05:57:01PM -0700, Daniel Xu wrote:
> Hi Alan,
> 
> On Thu, Feb 08, 2024 at 10:00:15AM +0000, Alan Maguire wrote:
> > On 04/02/2024 18:40, Daniel Xu wrote:

[...]

> > 
> > Then all you'd need to do is iterate over BTF functions, using
> > btf_encoder__find_function() to get a function and associated ELF info
> > by name.
> 
> Didn't know about this, thanks. I'll take a look at if the patch can use
> the existing function metadata. That should get rid of
> btf_encoder__collect_btf_funcs() if it works.

I experimented with this a bit and I'm not sure if it's a good approach.

Here are the two commits:

        https://pastes.dxuuu.xyz/xo9jwk
        https://pastes.dxuuu.xyz/xmzew5

Basically my approach was to fan-in all the function info collected by
the different threads. I don't think it'll work cuz some of the data
(the name in particular) in struct elf_function belongs to the thread's
struct btf_encoder instance. Which is all freed by btf_encoder__delete()
before btf_encoder__encode().

It could probably be fixed, but doesn't seem very clean to me. So I
think it'll be better to keep the code as-is. Unless you were thinking
something different.

Thanks,
Daniel
Alan Maguire March 13, 2024, 9:36 a.m. UTC | #8
On 13/03/2024 03:17, Daniel Xu wrote:
> Hi Alan,
> 
> On Wed, Feb 28, 2024 at 05:57:01PM -0700, Daniel Xu wrote:
>> Hi Alan,
>>
>> On Thu, Feb 08, 2024 at 10:00:15AM +0000, Alan Maguire wrote:
>>> On 04/02/2024 18:40, Daniel Xu wrote:
> 
> [...]
> 
>>>
>>> Then all you'd need to do is iterate over BTF functions, using
>>> btf_encoder__find_function() to get a function and associated ELF info
>>> by name.
>>
>> Didn't know about this, thanks. I'll take a look at if the patch can use
>> the existing function metadata. That should get rid of
>> btf_encoder__collect_btf_funcs() if it works.
> 
> I experimented with this a bit and I'm not sure if it's a good approach.
> 
> Here are the two commits:
> 
>         https://pastes.dxuuu.xyz/xo9jwk
>         https://pastes.dxuuu.xyz/xmzew5
> 
> Basically my approach was to fan-in all the function info collected by
> the different threads. I don't think it'll work cuz some of the data
> (the name in particular) in struct elf_function belongs to the thread's
> struct btf_encoder instance. Which is all freed by btf_encoder__delete()
> before btf_encoder__encode().
> 
> It could probably be fixed, but doesn't seem very clean to me. So I
> think it'll be better to keep the code as-is. Unless you were thinking
> something different.
> 

Thanks for trying! We may end up revisiting the freeing of ELF
function/variable info in the future if we add address information from
symbols into BTF, but since that's not there yet, it makes sense to do
your collection separately.

Alan
Daniel Xu March 15, 2024, 7:43 p.m. UTC | #9
Hi Eduard,

On Wed, Feb 28, 2024 at 11:33:28PM +0200, Eduard Zingerman wrote:
> On Wed, 2024-02-28 at 09:07 -0700, Daniel Xu wrote:
> > Hi Eduard,
> >
> > Apologies for long delay - life has been busy.
> 
> Hi Daniel,
> 
> No problem, thank you for reaching back.
> 
> [...]
> 
> > > > +static char *get_func_name(const char *sym)
> > > > +{
> > > > +	char *func, *end;
> > > > +
> > > > +	if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1))
> > > > +		return NULL;
> > > > +
> > > > +	/* Strip prefix */
> > > > +	func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1);
> > > > +
> > > > +	/* Strip suffix */
> > > > +	end = strrchr(func, '_');
> > > > +	if (!end || *(end - 1) != '_') {
> > > 
> > > Nit: this would do out of bounds access on malformed input
> > >      "__BTF_ID__func___"
> > 
> > I think this is actually ok. Reason is we have the strncmp() above
> > so we know the prefix is there. Then the strdup() in the malformed cased
> > returns empty string. And strrchr() will then return NULL, so we don't
> > enter the branch.
> > 
> > I tested it with: https://pastes.dxuuu.xyz/c3j4kk
> > 
> >         $ gcc test.c
> >         dxu@kashmir~/scratch $ ./a.out
> >         name=(null)
> > 
> 
> The test is for "__BTF_ID__func__", but nitpick is for "__BTF_ID__func___"
> (three underscores in the end).
> 

Ha, got it. Didn't see the 3rd one. Fixed in v5.

[...]

Thanks,
Daniel
diff mbox series

Patch

diff --git a/btf_encoder.c b/btf_encoder.c
index e325f66..d6a561c 100644
--- a/btf_encoder.c
+++ b/btf_encoder.c
@@ -34,6 +34,21 @@ 
 #include <pthread.h>
 
 #define BTF_ENCODER_MAX_PROTO	512
+#define BTF_IDS_SECTION		".BTF_ids"
+#define BTF_ID_FUNC_PFX		"__BTF_ID__func__"
+#define BTF_ID_SET8_PFX		"__BTF_ID__set8__"
+#define BTF_SET8_KFUNCS		(1 << 0)
+#define BTF_KFUNC_TYPE_TAG	"bpf_kfunc"
+
+/* Adapted from include/linux/btf_ids.h */
+struct btf_id_set8 {
+        uint32_t cnt;
+        uint32_t flags;
+        struct {
+                uint32_t id;
+                uint32_t flags;
+        } pairs[];
+};
 
 /* state used to do later encoding of saved functions */
 struct btf_encoder_state {
@@ -95,6 +110,17 @@  struct btf_encoder {
 	} functions;
 };
 
+struct btf_func {
+	const char *name;
+	int	    type_id;
+};
+
+/* Half open interval representing range of addresses containing kfuncs */
+struct btf_kfunc_set_range {
+	size_t start;
+	size_t end;
+};
+
 static LIST_HEAD(encoders);
 static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER;
 
@@ -1353,6 +1379,331 @@  out:
 	return err;
 }
 
+/* Returns if `sym` points to a kfunc set */
+static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr)
+{
+	void *ptr = idlist->d_buf;
+	struct btf_id_set8 *set;
+	bool is_set8;
+	int off;
+
+	/* kfuncs are only found in BTF_SET8's */
+	is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1);
+	if (!is_set8)
+		return false;
+
+	off = sym->st_value - idlist_addr;
+	if (off >= idlist->d_size) {
+		fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name);
+		return false;
+	}
+
+	/* Check the set8 flags to see if it was marked as kfunc */
+	set = ptr + off;
+	return set->flags & BTF_SET8_KFUNCS;
+}
+
+/*
+ * Parse BTF_ID symbol and return the func name.
+ *
+ * Returns:
+ *	Caller-owned string containing func name if successful.
+ *	NULL if !func or on error.
+ */
+static char *get_func_name(const char *sym)
+{
+	char *func, *end;
+
+	if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1))
+		return NULL;
+
+	/* Strip prefix */
+	func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1);
+
+	/* Strip suffix */
+	end = strrchr(func, '_');
+	if (!end || *(end - 1) != '_') {
+		free(func);
+		return NULL;
+	}
+	*(end - 1) = '\0';
+
+	return func;
+}
+
+static int btf_func_cmp(const void *_a, const void *_b)
+{
+	const struct btf_func *a = _a;
+	const struct btf_func *b = _b;
+
+	return strcmp(a->name, b->name);
+}
+
+/*
+ * Collects all functions described in BTF.
+ * Returns non-zero on error.
+ */
+static int btf_encoder__collect_btf_funcs(struct btf_encoder *encoder, struct gobuffer *funcs)
+{
+	struct btf *btf = encoder->btf;
+	int nr_types, type_id;
+	int err = -1;
+
+	/* First collect all the func entries into an array */
+	nr_types = btf__type_cnt(btf);
+	for (type_id = 1; type_id < nr_types; type_id++) {
+		const struct btf_type *type;
+		struct btf_func func = {};
+		const char *name;
+
+		type = btf__type_by_id(btf, type_id);
+		if (!type) {
+			fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n",
+				__func__, type_id);
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (!btf_is_func(type))
+			continue;
+
+		name = btf__name_by_offset(btf, type->name_off);
+		if (!name) {
+			fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n",
+				__func__, type_id);
+			err = -EINVAL;
+			goto out;
+		}
+
+		func.name = name;
+		func.type_id = type_id;
+		err = gobuffer__add(funcs, &func, sizeof(func));
+		if (err < 0)
+			goto out;
+	}
+
+	/* Now that we've collected funcs, sort them by name */
+	qsort((void *)gobuffer__entries(funcs), gobuffer__nr_entries(funcs),
+	      sizeof(struct btf_func), btf_func_cmp);
+
+	err = 0;
+out:
+	return err;
+}
+
+static int btf_encoder__tag_kfunc(struct btf_encoder *encoder, struct gobuffer *funcs, const char *kfunc)
+{
+	struct btf_func key = { .name = kfunc };
+	struct btf *btf = encoder->btf;
+	struct btf_func *target;
+	const void *base;
+	unsigned int cnt;
+	int err = -1;
+
+	base = gobuffer__entries(funcs);
+	cnt = gobuffer__nr_entries(funcs);
+	target = bsearch(&key, base, cnt, sizeof(key), btf_func_cmp);
+	if (!target) {
+		fprintf(stderr, "%s: failed to find kfunc '%s' in BTF\n", __func__, kfunc);
+		goto out;
+	}
+
+	/* Note we are unconditionally adding the btf_decl_tag even
+	 * though vmlinux may already contain btf_decl_tags for kfuncs.
+	 * We are ok to do this b/c we will later btf__dedup() to remove
+	 * any duplicates.
+	 */
+	err = btf__add_decl_tag(btf, BTF_KFUNC_TYPE_TAG, target->type_id, -1);
+	if (err < 0) {
+		fprintf(stderr, "%s: failed to insert kfunc decl tag for '%s': %d\n",
+			__func__, kfunc, err);
+		goto out;
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+static int btf_encoder__tag_kfuncs(struct btf_encoder *encoder)
+{
+	const char *filename = encoder->filename;
+	struct gobuffer btf_kfunc_ranges = {};
+	struct gobuffer btf_funcs = {};
+	Elf_Scn *symscn = NULL;
+	int symbols_shndx = -1;
+	int fd = -1, err = -1;
+	int idlist_shndx = -1;
+	Elf_Scn *scn = NULL;
+	size_t idlist_addr;
+	Elf_Data *symbols;
+	Elf_Data *idlist;
+	size_t strtabidx;
+	Elf *elf = NULL;
+	GElf_Shdr shdr;
+	size_t strndx;
+	char *secname;
+	int nr_syms;
+	int i = 0;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		fprintf(stderr, "Cannot open %s\n", filename);
+		goto out;
+	}
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		elf_error("Cannot set libelf version");
+		goto out;
+	}
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	if (elf == NULL) {
+		elf_error("Cannot update ELF file");
+		goto out;
+	}
+
+	/* Location symbol table and .BTF_ids sections */
+	elf_getshdrstrndx(elf, &strndx);
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		Elf_Data *data;
+
+		i++;
+		if (!gelf_getshdr(scn, &shdr)) {
+			elf_error("Failed to get ELF section(%d) hdr", i);
+			goto out;
+		}
+
+		secname = elf_strptr(elf, strndx, shdr.sh_name);
+		if (!secname) {
+			elf_error("Failed to get ELF section(%d) hdr name", i);
+			goto out;
+		}
+
+		data = elf_getdata(scn, 0);
+		if (!data) {
+			elf_error("Failed to get ELF section(%d) data", i);
+			goto out;
+		}
+
+		if (shdr.sh_type == SHT_SYMTAB) {
+			symbols_shndx = i;
+			symscn = scn;
+			symbols = data;
+			strtabidx = shdr.sh_link;
+		} else if (!strcmp(secname, BTF_IDS_SECTION)) {
+			idlist_shndx = i;
+			idlist_addr = shdr.sh_addr;
+			idlist = data;
+		}
+	}
+
+	/* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */
+	if (symbols_shndx == -1 || idlist_shndx == -1) {
+		err = 0;
+		goto out;
+	}
+
+	if (!gelf_getshdr(symscn, &shdr)) {
+		elf_error("Failed to get ELF symbol table header");
+		goto out;
+	}
+	nr_syms = shdr.sh_size / shdr.sh_entsize;
+
+	err = btf_encoder__collect_btf_funcs(encoder, &btf_funcs);
+	if (err) {
+		fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__);
+		goto out;
+	}
+
+	/* First collect all kfunc set ranges.
+	 *
+	 * Note we choose not to sort these ranges and accept a linear
+	 * search when doing lookups. Reasoning is that the number of
+	 * sets is ~O(100) and not worth the additional code to optimize.
+	 */
+	for (i = 0; i < nr_syms; i++) {
+		struct btf_kfunc_set_range range = {};
+		const char *name;
+		GElf_Sym sym;
+
+		if (!gelf_getsym(symbols, i, &sym)) {
+			elf_error("Failed to get ELF symbol(%d)", i);
+			goto out;
+		}
+
+		if (sym.st_shndx != idlist_shndx)
+			continue;
+
+		name = elf_strptr(elf, strtabidx, sym.st_name);
+		if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr))
+			continue;
+
+		range.start = sym.st_value;
+		range.end = sym.st_value + sym.st_size;
+		gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range));
+	}
+
+	/* Now inject BTF with kfunc decl tag for detected kfuncs */
+	for (i = 0; i < nr_syms; i++) {
+		const struct btf_kfunc_set_range *ranges;
+		unsigned int ranges_cnt;
+		char *func, *name;
+		GElf_Sym sym;
+		bool found;
+		int err;
+		int j;
+
+		if (!gelf_getsym(symbols, i, &sym)) {
+			elf_error("Failed to get ELF symbol(%d)", i);
+			goto out;
+		}
+
+		if (sym.st_shndx != idlist_shndx)
+			continue;
+
+		name = elf_strptr(elf, strtabidx, sym.st_name);
+		func = get_func_name(name);
+		if (!func)
+			continue;
+
+		/* Check if function belongs to a kfunc set */
+		ranges = gobuffer__entries(&btf_kfunc_ranges);
+		ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges);
+		found = false;
+		for (j = 0; j < ranges_cnt; j++) {
+			size_t addr = sym.st_value;
+
+			if (ranges[j].start <= addr && addr < ranges[j].end) {
+				found = true;
+				break;
+			}
+		}
+		if (!found) {
+			free(func);
+			continue;
+		}
+
+		err = btf_encoder__tag_kfunc(encoder, &btf_funcs, func);
+		if (err) {
+			fprintf(stderr, "%s: failed to tag kfunc '%s'\n", __func__, func);
+			free(func);
+			goto out;
+		}
+		free(func);
+	}
+
+	err = 0;
+out:
+	__gobuffer__delete(&btf_funcs);
+	__gobuffer__delete(&btf_kfunc_ranges);
+	if (elf)
+		elf_end(elf);
+	if (fd != -1)
+		close(fd);
+	return err;
+}
+
 int btf_encoder__encode(struct btf_encoder *encoder)
 {
 	int err;
@@ -1367,6 +1718,14 @@  int btf_encoder__encode(struct btf_encoder *encoder)
 	if (btf__type_cnt(encoder->btf) == 1)
 		return 0;
 
+	/* Note vmlinux may already contain btf_decl_tag's for kfuncs. So
+	 * take care to call this before btf_dedup().
+	 */
+	if (encoder->tag_kfuncs && btf_encoder__tag_kfuncs(encoder)) {
+		fprintf(stderr, "%s: failed to tag kfuncs!\n", __func__);
+		return -1;
+	}
+
 	if (btf__dedup(encoder->btf, NULL)) {
 		fprintf(stderr, "%s: btf__dedup failed!\n", __func__);
 		return -1;