diff mbox series

[v4,bpf-next,4/5] bpf: load and verify kernel module BTFs

Message ID 20201110011932.3201430-5-andrii@kernel.org (mailing list archive)
State Accepted
Delegated to: BPF
Headers show
Series Integrate kernel module BTF support | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 28482 this patch: 28483
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch fail Link
netdev/build_allmodconfig_warn fail Errors and warnings before: 26530 this patch: 26531
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Andrii Nakryiko Nov. 10, 2020, 1:19 a.m. UTC
Add kernel module listener that will load/validate and unload module BTF.
Module BTFs gets ID generated for them, which makes it possible to iterate
them with existing BTF iteration API. They are given their respective module's
names, which will get reported through GET_OBJ_INFO API. They are also marked
as in-kernel BTFs for tooling to distinguish them from user-provided BTFs.

Also, similarly to vmlinux BTF, kernel module BTFs are exposed through
sysfs as /sys/kernel/btf/<module-name>. This is convenient for user-space
tools to inspect module BTF contents and dump their types with existing tools:

[vmuser@archvm bpf]$ ls -la /sys/kernel/btf
total 0
drwxr-xr-x  2 root root       0 Nov  4 19:46 .
drwxr-xr-x 13 root root       0 Nov  4 19:46 ..

...

-r--r--r--  1 root root     888 Nov  4 19:46 irqbypass
-r--r--r--  1 root root  100225 Nov  4 19:46 kvm
-r--r--r--  1 root root   35401 Nov  4 19:46 kvm_intel
-r--r--r--  1 root root     120 Nov  4 19:46 pcspkr
-r--r--r--  1 root root     399 Nov  4 19:46 serio_raw
-r--r--r--  1 root root 4094095 Nov  4 19:46 vmlinux

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 Documentation/ABI/testing/sysfs-kernel-btf |   8 +
 include/linux/bpf.h                        |   2 +
 include/linux/module.h                     |   4 +
 kernel/bpf/btf.c                           | 194 +++++++++++++++++++++
 kernel/bpf/sysfs_btf.c                     |   2 +-
 kernel/module.c                            |  32 ++++
 6 files changed, 241 insertions(+), 1 deletion(-)

Comments

kernel test robot Nov. 11, 2020, 7:11 a.m. UTC | #1
Hi Andrii,

I love your patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Andrii-Nakryiko/Integrate-kernel-module-BTF-support/20201110-095309
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: mips-randconfig-r002-20201110 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 4d81c8adb6ed9840257f6cb6b93f60856d422a15)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install mips cross compiling tool for clang build
        # apt-get install binutils-mips-linux-gnu
        # https://github.com/0day-ci/linux/commit/dcd763b7808fdc01ebf70bbe07ba92388df4d20d
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Andrii-Nakryiko/Integrate-kernel-module-BTF-support/20201110-095309
        git checkout dcd763b7808fdc01ebf70bbe07ba92388df4d20d
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> kernel/bpf/btf.c:4481:20: warning: unused function 'btf_parse_module'
   static struct btf char const void unsigned int data_size)
   ^
   fatal error: error in backend: Nested variants found in inline asm string: ' .set push
   .set mips64r2
   .if ( 0x00 ) != -1)) 0x00 ) != -1)) : ($( static struct ftrace_branch_data __attribute__((__aligned__(4))) __attribute__((__section__("_ftrace_branch"))) __if_trace = $( .func = __func__, .file = "arch/mips/include/asm/atomic.h", .line = 154, $); 0x00 ) != -1)) : $))) ) && ( 0 ); .set push; .set mips64r2; .rept 1; sync 0x00; .endr; .set pop; .else; ; .endif
   1: ll $1, $2 # atomic_fetch_sub
   subu $0, $1, $3
   sc $0, $2
   beqz $0, 1b
   .set pop
   move $0, $1
   '
   clang-12: error: clang frontend command failed with exit code 70 (use -v to see invocation)
   clang version 12.0.0 (git://gitmirror/llvm_project 874b0a0b9db93f5d3350ffe6b5efda2d908415d0)
   Target: mipsel-unknown-linux-gnu
   Thread model: posix
   InstalledDir: /opt/cross/clang-874b0a0b9d/bin
   clang-12: note: diagnostic msg:
   Makefile arch drivers include kernel mm scripts source usr

vim +/btf_parse_module +4481 kernel/bpf/btf.c

  4480	
> 4481	static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size)
  4482	{
  4483		struct btf_verifier_env *env = NULL;
  4484		struct bpf_verifier_log *log;
  4485		struct btf *btf = NULL, *base_btf;
  4486		int err;
  4487	
  4488		base_btf = bpf_get_btf_vmlinux();
  4489		if (IS_ERR(base_btf))
  4490			return base_btf;
  4491		if (!base_btf)
  4492			return ERR_PTR(-EINVAL);
  4493	
  4494		env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
  4495		if (!env)
  4496			return ERR_PTR(-ENOMEM);
  4497	
  4498		log = &env->log;
  4499		log->level = BPF_LOG_KERNEL;
  4500	
  4501		btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
  4502		if (!btf) {
  4503			err = -ENOMEM;
  4504			goto errout;
  4505		}
  4506		env->btf = btf;
  4507	
  4508		btf->base_btf = base_btf;
  4509		btf->start_id = base_btf->nr_types;
  4510		btf->start_str_off = base_btf->hdr.str_len;
  4511		btf->kernel_btf = true;
  4512		snprintf(btf->name, sizeof(btf->name), "%s", module_name);
  4513	
  4514		btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN);
  4515		if (!btf->data) {
  4516			err = -ENOMEM;
  4517			goto errout;
  4518		}
  4519		memcpy(btf->data, data, data_size);
  4520		btf->data_size = data_size;
  4521	
  4522		err = btf_parse_hdr(env);
  4523		if (err)
  4524			goto errout;
  4525	
  4526		btf->nohdr_data = btf->data + btf->hdr.hdr_len;
  4527	
  4528		err = btf_parse_str_sec(env);
  4529		if (err)
  4530			goto errout;
  4531	
  4532		err = btf_check_all_metas(env);
  4533		if (err)
  4534			goto errout;
  4535	
  4536		btf_verifier_env_free(env);
  4537		refcount_set(&btf->refcnt, 1);
  4538		return btf;
  4539	
  4540	errout:
  4541		btf_verifier_env_free(env);
  4542		if (btf) {
  4543			kvfree(btf->data);
  4544			kvfree(btf->types);
  4545			kfree(btf);
  4546		}
  4547		return ERR_PTR(err);
  4548	}
  4549	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Jessica Yu Nov. 11, 2020, 10:13 a.m. UTC | #2
+++ Andrii Nakryiko [09/11/20 17:19 -0800]:
[snipped]
>diff --git a/kernel/module.c b/kernel/module.c
>index a4fa44a652a7..f2996b02ab2e 100644
>--- a/kernel/module.c
>+++ b/kernel/module.c
>@@ -380,6 +380,35 @@ static void *section_objs(const struct load_info *info,
> 	return (void *)info->sechdrs[sec].sh_addr;
> }
>
>+/* Find a module section: 0 means not found. Ignores SHF_ALLOC flag. */
>+static unsigned int find_any_sec(const struct load_info *info, const char *name)
>+{
>+	unsigned int i;
>+
>+	for (i = 1; i < info->hdr->e_shnum; i++) {
>+		Elf_Shdr *shdr = &info->sechdrs[i];
>+		if (strcmp(info->secstrings + shdr->sh_name, name) == 0)
>+			return i;
>+	}
>+	return 0;
>+}
>+
>+/*
>+ * Find a module section, or NULL. Fill in number of "objects" in section.
>+ * Ignores SHF_ALLOC flag.
>+ */
>+static __maybe_unused void *any_section_objs(const struct load_info *info,
>+					     const char *name,
>+					     size_t object_size,
>+					     unsigned int *num)
>+{
>+	unsigned int sec = find_any_sec(info, name);
>+
>+	/* Section 0 has sh_addr 0 and sh_size 0. */
>+	*num = info->sechdrs[sec].sh_size / object_size;
>+	return (void *)info->sechdrs[sec].sh_addr;
>+}
>+

Hm, I see this patchset has already been applied to bpf-next, but I
guess that doesn't preclude any follow-up patches :-)

I am not a huge fan of the code duplication here, and also the fact
that they're only called in one place. any_section_objs() and
find_any_sec() are pretty much identical to section_objs() and
find_sec(), other than the fact the former drops the SHF_ALLOC check.

Moreover, since it appears that the ".BTF" section is not marked
SHF_ALLOC, I think this will leave mod->btf_data as a dangling pointer
after the module is done loading and the module's load_info has been
deallocated, since SHF_ALLOC sections are not allocated nor copied to
the module's final location in memory.

Why not simply mark the ".BTF" section in the module SHF_ALLOC? We
already do some sh_flags rewriting in rewrite_section_headers(). Then
the module loader knows to keep the section in memory and you can use
section_objs(). And since the .BTF section stays in module memory,
that might save you the memcpy() to btf->data in btf_parse_module()
(unless that is still needed for some reason).

Thanks,

Jessica

> /* Provided by the linker */
> extern const struct kernel_symbol __start___ksymtab[];
> extern const struct kernel_symbol __stop___ksymtab[];
>@@ -3250,6 +3279,9 @@ static int find_module_sections(struct module *mod, struct load_info *info)
> 					   sizeof(*mod->bpf_raw_events),
> 					   &mod->num_bpf_raw_events);
> #endif
>+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
>+	mod->btf_data = any_section_objs(info, ".BTF", 1, &mod->btf_data_size);
>+#endif
> #ifdef CONFIG_JUMP_LABEL
> 	mod->jump_entries = section_objs(info, "__jump_table",
> 					sizeof(*mod->jump_entries),
>-- 
>2.24.1
>
Andrii Nakryiko Nov. 11, 2020, 8:11 p.m. UTC | #3
On Wed, Nov 11, 2020 at 2:13 AM Jessica Yu <jeyu@kernel.org> wrote:
>
> +++ Andrii Nakryiko [09/11/20 17:19 -0800]:
> [snipped]
> >diff --git a/kernel/module.c b/kernel/module.c
> >index a4fa44a652a7..f2996b02ab2e 100644
> >--- a/kernel/module.c
> >+++ b/kernel/module.c
> >@@ -380,6 +380,35 @@ static void *section_objs(const struct load_info *info,
> >       return (void *)info->sechdrs[sec].sh_addr;
> > }
> >
> >+/* Find a module section: 0 means not found. Ignores SHF_ALLOC flag. */
> >+static unsigned int find_any_sec(const struct load_info *info, const char *name)
> >+{
> >+      unsigned int i;
> >+
> >+      for (i = 1; i < info->hdr->e_shnum; i++) {
> >+              Elf_Shdr *shdr = &info->sechdrs[i];
> >+              if (strcmp(info->secstrings + shdr->sh_name, name) == 0)
> >+                      return i;
> >+      }
> >+      return 0;
> >+}
> >+
> >+/*
> >+ * Find a module section, or NULL. Fill in number of "objects" in section.
> >+ * Ignores SHF_ALLOC flag.
> >+ */
> >+static __maybe_unused void *any_section_objs(const struct load_info *info,
> >+                                           const char *name,
> >+                                           size_t object_size,
> >+                                           unsigned int *num)
> >+{
> >+      unsigned int sec = find_any_sec(info, name);
> >+
> >+      /* Section 0 has sh_addr 0 and sh_size 0. */
> >+      *num = info->sechdrs[sec].sh_size / object_size;
> >+      return (void *)info->sechdrs[sec].sh_addr;
> >+}
> >+
>
> Hm, I see this patchset has already been applied to bpf-next, but I
> guess that doesn't preclude any follow-up patches :-)

Of course!

>
> I am not a huge fan of the code duplication here, and also the fact
> that they're only called in one place. any_section_objs() and
> find_any_sec() are pretty much identical to section_objs() and
> find_sec(), other than the fact the former drops the SHF_ALLOC check.

Right, but the alternative was to add a new flag to existing
section_objs() and find_sec() functions, which would cause much more
code churn for no good reason (besides saving some trivial code
duplication). And those true/false flags are harder to read in code
anyways.

>
> Moreover, since it appears that the ".BTF" section is not marked
> SHF_ALLOC, I think this will leave mod->btf_data as a dangling pointer
> after the module is done loading and the module's load_info has been
> deallocated, since SHF_ALLOC sections are not allocated nor copied to
> the module's final location in memory.

I can make sure that we also reset the btf_data pointer back to NULL,
if that's a big concern.

>
> Why not simply mark the ".BTF" section in the module SHF_ALLOC? We
> already do some sh_flags rewriting in rewrite_section_headers(). Then
> the module loader knows to keep the section in memory and you can use
> section_objs(). And since the .BTF section stays in module memory,
> that might save you the memcpy() to btf->data in btf_parse_module()
> (unless that is still needed for some reason).

Wasn't aware about rewrite_section_headers() manipulations. Are you
suggesting to just add SHF_ALLOC there for the .BTF section from the
kernel side? I guess that would work, but won't avoid memory copy (so
actually would waste kernel memory, if I understand correctly). The
reason being that the module's BTF is registered as an independently
ref-counted BTF object, which could be held past the kernel module
being unloaded. So I can't directly reference module's .BTF data
anyways.

Also, marking .BTF with SHF_ALLOC with pahole or objcopy tool actually
might generate warnings because SHF_ALLOC sections need to be
allocated to data segments, which neither of those tools know how to
do, it requires a linker support. We do that for vmlinux with extra
linker script logic, but for kernel modules we don't have and probably
don't want to do that.

So in the end, the cleanest approach still seems like not doing
SHF_ALLOC but allowing "capturing" .BTF data with an extra helper.

>
> Thanks,
>
> Jessica
>
> > /* Provided by the linker */
> > extern const struct kernel_symbol __start___ksymtab[];
> > extern const struct kernel_symbol __stop___ksymtab[];
> >@@ -3250,6 +3279,9 @@ static int find_module_sections(struct module *mod, struct load_info *info)
> >                                          sizeof(*mod->bpf_raw_events),
> >                                          &mod->num_bpf_raw_events);
> > #endif
> >+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
> >+      mod->btf_data = any_section_objs(info, ".BTF", 1, &mod->btf_data_size);
> >+#endif
> > #ifdef CONFIG_JUMP_LABEL
> >       mod->jump_entries = section_objs(info, "__jump_table",
> >                                       sizeof(*mod->jump_entries),
> >--
> >2.24.1
> >
Jessica Yu Nov. 13, 2020, 10:31 a.m. UTC | #4
+++ Andrii Nakryiko [11/11/20 12:11 -0800]:
>On Wed, Nov 11, 2020 at 2:13 AM Jessica Yu <jeyu@kernel.org> wrote:
>>
>> +++ Andrii Nakryiko [09/11/20 17:19 -0800]:
>> [snipped]
>> >diff --git a/kernel/module.c b/kernel/module.c
>> >index a4fa44a652a7..f2996b02ab2e 100644
>> >--- a/kernel/module.c
>> >+++ b/kernel/module.c
>> >@@ -380,6 +380,35 @@ static void *section_objs(const struct load_info *info,
>> >       return (void *)info->sechdrs[sec].sh_addr;
>> > }
>> >
>> >+/* Find a module section: 0 means not found. Ignores SHF_ALLOC flag. */
>> >+static unsigned int find_any_sec(const struct load_info *info, const char *name)
>> >+{
>> >+      unsigned int i;
>> >+
>> >+      for (i = 1; i < info->hdr->e_shnum; i++) {
>> >+              Elf_Shdr *shdr = &info->sechdrs[i];
>> >+              if (strcmp(info->secstrings + shdr->sh_name, name) == 0)
>> >+                      return i;
>> >+      }
>> >+      return 0;
>> >+}
>> >+
>> >+/*
>> >+ * Find a module section, or NULL. Fill in number of "objects" in section.
>> >+ * Ignores SHF_ALLOC flag.
>> >+ */
>> >+static __maybe_unused void *any_section_objs(const struct load_info *info,
>> >+                                           const char *name,
>> >+                                           size_t object_size,
>> >+                                           unsigned int *num)
>> >+{
>> >+      unsigned int sec = find_any_sec(info, name);
>> >+
>> >+      /* Section 0 has sh_addr 0 and sh_size 0. */
>> >+      *num = info->sechdrs[sec].sh_size / object_size;
>> >+      return (void *)info->sechdrs[sec].sh_addr;
>> >+}
>> >+
>>
>> Hm, I see this patchset has already been applied to bpf-next, but I
>> guess that doesn't preclude any follow-up patches :-)
>
>Of course!
>
>>
>> I am not a huge fan of the code duplication here, and also the fact
>> that they're only called in one place. any_section_objs() and
>> find_any_sec() are pretty much identical to section_objs() and
>> find_sec(), other than the fact the former drops the SHF_ALLOC check.
>
>Right, but the alternative was to add a new flag to existing
>section_objs() and find_sec() functions, which would cause much more
>code churn for no good reason (besides saving some trivial code
>duplication). And those true/false flags are harder to read in code
>anyways.

That's true, all fair points. I thought there was the possibility to
avoid the code duplication if .BTF were also set to SHF_ALLOC, but I
see for reasons you explained below it is more trouble than it's worth.

>>
>> Moreover, since it appears that the ".BTF" section is not marked
>> SHF_ALLOC, I think this will leave mod->btf_data as a dangling pointer
>> after the module is done loading and the module's load_info has been
>> deallocated, since SHF_ALLOC sections are not allocated nor copied to
>> the module's final location in memory.
>
>I can make sure that we also reset the btf_data pointer back to NULL,
>if that's a big concern.

It's not a terribly huge concern, since mod->btf_data is only accessed
in the btf coming notifier at the moment, but it's probably best to at
least not advertise it as a valid pointer anymore after the module is
done loading. We do some pointer and section size cleanup at the end
of do_init_module() for sections that are deallocated at the end of
module load (starting where init_layout.base is reset to NULL),
we could just tack on mod->btf_data = NULL there as well.

>>
>> Why not simply mark the ".BTF" section in the module SHF_ALLOC? We
>> already do some sh_flags rewriting in rewrite_section_headers(). Then
>> the module loader knows to keep the section in memory and you can use
>> section_objs(). And since the .BTF section stays in module memory,
>> that might save you the memcpy() to btf->data in btf_parse_module()
>> (unless that is still needed for some reason).
>
>Wasn't aware about rewrite_section_headers() manipulations. Are you
>suggesting to just add SHF_ALLOC there for the .BTF section from the
>kernel side? I guess that would work, but won't avoid memory copy (so
>actually would waste kernel memory, if I understand correctly). The
>reason being that the module's BTF is registered as an independently
>ref-counted BTF object, which could be held past the kernel module
>being unloaded. So I can't directly reference module's .BTF data
>anyways.

Ah OK, I was not aware that the section could be held past the module
being unloaded. Then yeah, it would be a memory waste to keep them in
memory if they are being memcpy'd anyway. Thanks for clarifying!

Jessica
Andrii Nakryiko Nov. 13, 2020, 6:54 p.m. UTC | #5
On Fri, Nov 13, 2020 at 2:32 AM Jessica Yu <jeyu@kernel.org> wrote:
>
> +++ Andrii Nakryiko [11/11/20 12:11 -0800]:
> >On Wed, Nov 11, 2020 at 2:13 AM Jessica Yu <jeyu@kernel.org> wrote:
> >>
> >> +++ Andrii Nakryiko [09/11/20 17:19 -0800]:
> >> [snipped]
> >> >diff --git a/kernel/module.c b/kernel/module.c
> >> >index a4fa44a652a7..f2996b02ab2e 100644
> >> >--- a/kernel/module.c
> >> >+++ b/kernel/module.c
> >> >@@ -380,6 +380,35 @@ static void *section_objs(const struct load_info *info,
> >> >       return (void *)info->sechdrs[sec].sh_addr;
> >> > }
> >> >
> >> >+/* Find a module section: 0 means not found. Ignores SHF_ALLOC flag. */
> >> >+static unsigned int find_any_sec(const struct load_info *info, const char *name)
> >> >+{
> >> >+      unsigned int i;
> >> >+
> >> >+      for (i = 1; i < info->hdr->e_shnum; i++) {
> >> >+              Elf_Shdr *shdr = &info->sechdrs[i];
> >> >+              if (strcmp(info->secstrings + shdr->sh_name, name) == 0)
> >> >+                      return i;
> >> >+      }
> >> >+      return 0;
> >> >+}
> >> >+
> >> >+/*
> >> >+ * Find a module section, or NULL. Fill in number of "objects" in section.
> >> >+ * Ignores SHF_ALLOC flag.
> >> >+ */
> >> >+static __maybe_unused void *any_section_objs(const struct load_info *info,
> >> >+                                           const char *name,
> >> >+                                           size_t object_size,
> >> >+                                           unsigned int *num)
> >> >+{
> >> >+      unsigned int sec = find_any_sec(info, name);
> >> >+
> >> >+      /* Section 0 has sh_addr 0 and sh_size 0. */
> >> >+      *num = info->sechdrs[sec].sh_size / object_size;
> >> >+      return (void *)info->sechdrs[sec].sh_addr;
> >> >+}
> >> >+
> >>
> >> Hm, I see this patchset has already been applied to bpf-next, but I
> >> guess that doesn't preclude any follow-up patches :-)
> >
> >Of course!
> >
> >>
> >> I am not a huge fan of the code duplication here, and also the fact
> >> that they're only called in one place. any_section_objs() and
> >> find_any_sec() are pretty much identical to section_objs() and
> >> find_sec(), other than the fact the former drops the SHF_ALLOC check.
> >
> >Right, but the alternative was to add a new flag to existing
> >section_objs() and find_sec() functions, which would cause much more
> >code churn for no good reason (besides saving some trivial code
> >duplication). And those true/false flags are harder to read in code
> >anyways.
>
> That's true, all fair points. I thought there was the possibility to
> avoid the code duplication if .BTF were also set to SHF_ALLOC, but I
> see for reasons you explained below it is more trouble than it's worth.
>
> >>
> >> Moreover, since it appears that the ".BTF" section is not marked
> >> SHF_ALLOC, I think this will leave mod->btf_data as a dangling pointer
> >> after the module is done loading and the module's load_info has been
> >> deallocated, since SHF_ALLOC sections are not allocated nor copied to
> >> the module's final location in memory.
> >
> >I can make sure that we also reset the btf_data pointer back to NULL,
> >if that's a big concern.
>
> It's not a terribly huge concern, since mod->btf_data is only accessed
> in the btf coming notifier at the moment, but it's probably best to at
> least not advertise it as a valid pointer anymore after the module is
> done loading. We do some pointer and section size cleanup at the end
> of do_init_module() for sections that are deallocated at the end of
> module load (starting where init_layout.base is reset to NULL),
> we could just tack on mod->btf_data = NULL there as well.

Sounds good, I'll send a follow up patch. Thanks!

>
> >>
> >> Why not simply mark the ".BTF" section in the module SHF_ALLOC? We
> >> already do some sh_flags rewriting in rewrite_section_headers(). Then
> >> the module loader knows to keep the section in memory and you can use
> >> section_objs(). And since the .BTF section stays in module memory,
> >> that might save you the memcpy() to btf->data in btf_parse_module()
> >> (unless that is still needed for some reason).
> >
> >Wasn't aware about rewrite_section_headers() manipulations. Are you
> >suggesting to just add SHF_ALLOC there for the .BTF section from the
> >kernel side? I guess that would work, but won't avoid memory copy (so
> >actually would waste kernel memory, if I understand correctly). The
> >reason being that the module's BTF is registered as an independently
> >ref-counted BTF object, which could be held past the kernel module
> >being unloaded. So I can't directly reference module's .BTF data
> >anyways.
>
> Ah OK, I was not aware that the section could be held past the module
> being unloaded. Then yeah, it would be a memory waste to keep them in
> memory if they are being memcpy'd anyway. Thanks for clarifying!
>
> Jessica
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-kernel-btf b/Documentation/ABI/testing/sysfs-kernel-btf
index 2c9744b2cd59..fe96efdc9b6c 100644
--- a/Documentation/ABI/testing/sysfs-kernel-btf
+++ b/Documentation/ABI/testing/sysfs-kernel-btf
@@ -15,3 +15,11 @@  Description:
 		information with description of all internal kernel types. See
 		Documentation/bpf/btf.rst for detailed description of format
 		itself.
+
+What:		/sys/kernel/btf/<module-name>
+Date:		Nov 2020
+KernelVersion:	5.11
+Contact:	bpf@vger.kernel.org
+Description:
+		Read-only binary attribute exposing kernel module's BTF type
+		information as an add-on to the kernel's BTF (/sys/kernel/btf/vmlinux).
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 73d5381a5d5c..581b2a2e78eb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,9 +36,11 @@  struct seq_operations;
 struct bpf_iter_aux_info;
 struct bpf_local_storage;
 struct bpf_local_storage_map;
+struct kobject;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
+extern struct kobject *btf_kobj;
 
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
 					struct bpf_iter_aux_info *aux);
diff --git a/include/linux/module.h b/include/linux/module.h
index a29187f7c360..20fce258ffba 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -475,6 +475,10 @@  struct module {
 	unsigned int num_bpf_raw_events;
 	struct bpf_raw_event_map *bpf_raw_events;
 #endif
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+	unsigned int btf_data_size;
+	void *btf_data;
+#endif
 #ifdef CONFIG_JUMP_LABEL
 	struct jump_entry *jump_entries;
 	unsigned int num_jump_entries;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 856585db7aa7..0f1fd2669d69 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -22,6 +22,8 @@ 
 #include <linux/skmsg.h>
 #include <linux/perf_event.h>
 #include <linux/bsearch.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
 #include <net/sock.h>
 
 /* BTF (BPF Type Format) is the meta data format which describes
@@ -4476,6 +4478,75 @@  struct btf *btf_parse_vmlinux(void)
 	return ERR_PTR(err);
 }
 
+static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size)
+{
+	struct btf_verifier_env *env = NULL;
+	struct bpf_verifier_log *log;
+	struct btf *btf = NULL, *base_btf;
+	int err;
+
+	base_btf = bpf_get_btf_vmlinux();
+	if (IS_ERR(base_btf))
+		return base_btf;
+	if (!base_btf)
+		return ERR_PTR(-EINVAL);
+
+	env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
+	if (!env)
+		return ERR_PTR(-ENOMEM);
+
+	log = &env->log;
+	log->level = BPF_LOG_KERNEL;
+
+	btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
+	if (!btf) {
+		err = -ENOMEM;
+		goto errout;
+	}
+	env->btf = btf;
+
+	btf->base_btf = base_btf;
+	btf->start_id = base_btf->nr_types;
+	btf->start_str_off = base_btf->hdr.str_len;
+	btf->kernel_btf = true;
+	snprintf(btf->name, sizeof(btf->name), "%s", module_name);
+
+	btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN);
+	if (!btf->data) {
+		err = -ENOMEM;
+		goto errout;
+	}
+	memcpy(btf->data, data, data_size);
+	btf->data_size = data_size;
+
+	err = btf_parse_hdr(env);
+	if (err)
+		goto errout;
+
+	btf->nohdr_data = btf->data + btf->hdr.hdr_len;
+
+	err = btf_parse_str_sec(env);
+	if (err)
+		goto errout;
+
+	err = btf_check_all_metas(env);
+	if (err)
+		goto errout;
+
+	btf_verifier_env_free(env);
+	refcount_set(&btf->refcnt, 1);
+	return btf;
+
+errout:
+	btf_verifier_env_free(env);
+	if (btf) {
+		kvfree(btf->data);
+		kvfree(btf->types);
+		kfree(btf);
+	}
+	return ERR_PTR(err);
+}
+
 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
 {
 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
@@ -5651,3 +5722,126 @@  bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
 {
 	return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
 }
+
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+struct btf_module {
+	struct list_head list;
+	struct module *module;
+	struct btf *btf;
+	struct bin_attribute *sysfs_attr;
+};
+
+static LIST_HEAD(btf_modules);
+static DEFINE_MUTEX(btf_module_mutex);
+
+static ssize_t
+btf_module_read(struct file *file, struct kobject *kobj,
+		struct bin_attribute *bin_attr,
+		char *buf, loff_t off, size_t len)
+{
+	const struct btf *btf = bin_attr->private;
+
+	memcpy(buf, btf->data + off, len);
+	return len;
+}
+
+static int btf_module_notify(struct notifier_block *nb, unsigned long op,
+			     void *module)
+{
+	struct btf_module *btf_mod, *tmp;
+	struct module *mod = module;
+	struct btf *btf;
+	int err = 0;
+
+	if (mod->btf_data_size == 0 ||
+	    (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+		goto out;
+
+	switch (op) {
+	case MODULE_STATE_COMING:
+		btf_mod = kzalloc(sizeof(*btf_mod), GFP_KERNEL);
+		if (!btf_mod) {
+			err = -ENOMEM;
+			goto out;
+		}
+		btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size);
+		if (IS_ERR(btf)) {
+			pr_warn("failed to validate module [%s] BTF: %ld\n",
+				mod->name, PTR_ERR(btf));
+			kfree(btf_mod);
+			err = PTR_ERR(btf);
+			goto out;
+		}
+		err = btf_alloc_id(btf);
+		if (err) {
+			btf_free(btf);
+			kfree(btf_mod);
+			goto out;
+		}
+
+		mutex_lock(&btf_module_mutex);
+		btf_mod->module = module;
+		btf_mod->btf = btf;
+		list_add(&btf_mod->list, &btf_modules);
+		mutex_unlock(&btf_module_mutex);
+
+		if (IS_ENABLED(CONFIG_SYSFS)) {
+			struct bin_attribute *attr;
+
+			attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+			if (!attr)
+				goto out;
+
+			sysfs_bin_attr_init(attr);
+			attr->attr.name = btf->name;
+			attr->attr.mode = 0444;
+			attr->size = btf->data_size;
+			attr->private = btf;
+			attr->read = btf_module_read;
+
+			err = sysfs_create_bin_file(btf_kobj, attr);
+			if (err) {
+				pr_warn("failed to register module [%s] BTF in sysfs: %d\n",
+					mod->name, err);
+				kfree(attr);
+				err = 0;
+				goto out;
+			}
+
+			btf_mod->sysfs_attr = attr;
+		}
+
+		break;
+	case MODULE_STATE_GOING:
+		mutex_lock(&btf_module_mutex);
+		list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
+			if (btf_mod->module != module)
+				continue;
+
+			list_del(&btf_mod->list);
+			if (btf_mod->sysfs_attr)
+				sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr);
+			btf_put(btf_mod->btf);
+			kfree(btf_mod->sysfs_attr);
+			kfree(btf_mod);
+			break;
+		}
+		mutex_unlock(&btf_module_mutex);
+		break;
+	}
+out:
+	return notifier_from_errno(err);
+}
+
+static struct notifier_block btf_module_nb = {
+	.notifier_call = btf_module_notify,
+};
+
+static int __init btf_module_init(void)
+{
+	register_module_notifier(&btf_module_nb);
+	return 0;
+}
+
+fs_initcall(btf_module_init);
+#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
index 11b3380887fa..ef6911aee3bb 100644
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -26,7 +26,7 @@  static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
 	.read = btf_vmlinux_read,
 };
 
-static struct kobject *btf_kobj;
+struct kobject *btf_kobj;
 
 static int __init btf_vmlinux_init(void)
 {
diff --git a/kernel/module.c b/kernel/module.c
index a4fa44a652a7..f2996b02ab2e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -380,6 +380,35 @@  static void *section_objs(const struct load_info *info,
 	return (void *)info->sechdrs[sec].sh_addr;
 }
 
+/* Find a module section: 0 means not found. Ignores SHF_ALLOC flag. */
+static unsigned int find_any_sec(const struct load_info *info, const char *name)
+{
+	unsigned int i;
+
+	for (i = 1; i < info->hdr->e_shnum; i++) {
+		Elf_Shdr *shdr = &info->sechdrs[i];
+		if (strcmp(info->secstrings + shdr->sh_name, name) == 0)
+			return i;
+	}
+	return 0;
+}
+
+/*
+ * Find a module section, or NULL. Fill in number of "objects" in section.
+ * Ignores SHF_ALLOC flag.
+ */
+static __maybe_unused void *any_section_objs(const struct load_info *info,
+					     const char *name,
+					     size_t object_size,
+					     unsigned int *num)
+{
+	unsigned int sec = find_any_sec(info, name);
+
+	/* Section 0 has sh_addr 0 and sh_size 0. */
+	*num = info->sechdrs[sec].sh_size / object_size;
+	return (void *)info->sechdrs[sec].sh_addr;
+}
+
 /* Provided by the linker */
 extern const struct kernel_symbol __start___ksymtab[];
 extern const struct kernel_symbol __stop___ksymtab[];
@@ -3250,6 +3279,9 @@  static int find_module_sections(struct module *mod, struct load_info *info)
 					   sizeof(*mod->bpf_raw_events),
 					   &mod->num_bpf_raw_events);
 #endif
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+	mod->btf_data = any_section_objs(info, ".BTF", 1, &mod->btf_data_size);
+#endif
 #ifdef CONFIG_JUMP_LABEL
 	mod->jump_entries = section_objs(info, "__jump_table",
 					sizeof(*mod->jump_entries),