diff mbox series

[bpf-next,v2,4/5] bpftool: add support for subskeletons

Message ID d3ee2b3bb282e8aa0e6ab01ca4be522004a7cba0.1646957399.git.delyank@fb.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Subskeleton support for BPF libraries | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 9 maintainers not CCed: rafaeldtinoco@gmail.com quentin@isovalent.com kpsingh@kernel.org mauricio@kinvolk.io john.fastabend@gmail.com kafai@fb.com songliubraving@fb.com yhs@fb.com netdev@vger.kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning CHECK: Alignment should match open parenthesis WARNING: Avoid line continuations in quoted strings WARNING: Misplaced SPDX-License-Identifier tag - use line 1 instead WARNING: line length of 100 exceeds 80 columns WARNING: line length of 107 exceeds 80 columns WARNING: line length of 111 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: unnecessary whitespace before a quoted newline
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline fail Was 0 now: 4
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next success VM_Test

Commit Message

Delyan Kratunov March 11, 2022, 12:11 a.m. UTC
Subskeletons are headers which require an already loaded program to
operate.

For example, when a BPF library is linked into a larger BPF object file,
the library userspace needs a way to access its own global variables
without requiring knowledge about the larger program at build time.

As a result, subskeletons require a loaded bpf_object to open().
Further, they find their own symbols in the larger program by
walking BTF type data at run time.

At this time, programs, maps, and globals are supported through
non-owning pointers.

Signed-off-by: Delyan Kratunov <delyank@fb.com>
---
 .../bpf/bpftool/Documentation/bpftool-gen.rst |  25 +
 tools/bpf/bpftool/bash-completion/bpftool     |  14 +-
 tools/bpf/bpftool/gen.c                       | 589 ++++++++++++++++--
 3 files changed, 564 insertions(+), 64 deletions(-)

Comments

Andrii Nakryiko March 11, 2022, 11:27 p.m. UTC | #1
On Thu, Mar 10, 2022 at 4:12 PM Delyan Kratunov <delyank@fb.com> wrote:
>
> Subskeletons are headers which require an already loaded program to
> operate.
>
> For example, when a BPF library is linked into a larger BPF object file,
> the library userspace needs a way to access its own global variables
> without requiring knowledge about the larger program at build time.
>
> As a result, subskeletons require a loaded bpf_object to open().
> Further, they find their own symbols in the larger program by
> walking BTF type data at run time.
>
> At this time, programs, maps, and globals are supported through
> non-owning pointers.
>
> Signed-off-by: Delyan Kratunov <delyank@fb.com>
> ---
>  .../bpf/bpftool/Documentation/bpftool-gen.rst |  25 +
>  tools/bpf/bpftool/bash-completion/bpftool     |  14 +-
>  tools/bpf/bpftool/gen.c                       | 589 ++++++++++++++++--
>  3 files changed, 564 insertions(+), 64 deletions(-)
>

Few comments below, but overall looks great!

[...]

> +static bool btf_is_ptr_to_func_proto(const struct btf *btf,
> +                                    const struct btf_type *v)
> +{
> +       return btf_is_ptr(v) && btf_is_func_proto(btf__type_by_id(btf, v->type));
> +}
> +
> +static int codegen_subskel_datasecs(struct bpf_object *obj, const char *obj_name)
> +{
> +       struct btf *btf = bpf_object__btf(obj);
> +       struct btf_dump *d;
> +       struct bpf_map *map;
> +       const struct btf_type *sec, *var;
> +       const struct btf_var_secinfo *sec_var;
> +       int i, err, vlen;
> +       char map_ident[256], var_ident[256], sec_ident[256];
> +       bool strip_mods = false;
> +       const char *sec_name, *var_name;
> +       __u32 var_type_id;
> +
> +       d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL);
> +       err = libbpf_get_error(d);

nit: no need to use libbpf_get_error() in libbpf 1.0 mode

> +       if (err)
> +               return err;
> +
> +       bpf_object__for_each_map(map, obj) {
> +               /* only generate definitions for memory-mapped internal maps */
> +               if (!bpf_map__is_internal(map))
> +                       continue;
> +               if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
> +                       continue;
> +               if (!get_map_ident(map, map_ident, sizeof(map_ident)))
> +                       continue;

extract these three checks into helper and reuse from codegen_asserts
and codegen_datasecs?


> +
> +               sec = find_type_for_map(btf, map_ident);
> +               if (!sec)
> +                       continue;
> +
> +               sec_name = btf__name_by_offset(btf, sec->name_off);
> +               if (!get_datasec_ident(sec_name, sec_ident, sizeof(sec_ident)))
> +                       continue;
> +
> +               if (strcmp(sec_name, ".kconfig") != 0)
> +                       strip_mods = true;
> +
> +               printf("        struct %s__%s {\n", obj_name, sec_ident);
> +
> +               sec_var = btf_var_secinfos(sec);
> +               vlen = btf_vlen(sec);
> +               for (i = 0; i < vlen; i++, sec_var++) {
> +                       DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts,
> +                               .field_name = var_ident,
> +                               .indent_level = 2,
> +                               .strip_mods = strip_mods,
> +                       );
> +
> +                       var = btf__type_by_id(btf, sec_var->type);
> +                       var_name = btf__name_by_offset(btf, var->name_off);
> +                       var_type_id = var->type;
> +
> +                       /* static variables are not exposed through BPF skeleton */
> +                       if (btf_var(var)->linkage == BTF_VAR_STATIC)
> +                               continue;
> +
> +                       /* leave the first character for a * prefix, size - 2
> +                        * as a result as well
> +                        */
> +                       var_ident[0] = '\0';
> +                       var_ident[1] = '\0';
> +                       strncat(var_ident + 1, var_name, sizeof(var_ident) - 2);
> +
> +                       /* sanitize variable name, e.g., for static vars inside
> +                        * a function, it's name is '<function name>.<variable name>',
> +                        * which we'll turn into a '<function name>_<variable name>'.
> +                        */
> +                       sanitize_identifier(var_ident + 1);

btw, I think we don't need sanitization anymore. We needed it for
static variables (they would be of the form <func_name>.<var_name> for
static variables inside the functions), but now it's just unnecessary
complication

> +                       var_ident[0] = ' ';
> +
> +                       /* The datasec member has KIND_VAR but we want the
> +                        * underlying type of the variable (e.g. KIND_INT).
> +                        */
> +                       var = btf__type_by_id(btf, var->type);

you need to use skip_mods_and_typedefs() or equivalent to skip any
const/volatile/restrict modifiers before checking btf_is_array()

> +
> +                       /* Prepend * to the field name to make it a pointer. */
> +                       var_ident[0] = '*';
> +
> +                       printf("\t\t");
> +                       /* Func and array members require special handling.
> +                        * Instead of producing `typename *var`, they produce
> +                        * `typeof(typename) *var`. This allows us to keep a
> +                        * similar syntax where the identifier is just prefixed
> +                        * by *, allowing us to ignore C declaration minutae.
> +                        */
> +                       if (btf_is_array(var) ||
> +                           btf_is_ptr_to_func_proto(btf, var)) {
> +                               printf("typeof(");
> +                               /* print the type inside typeof() without a name */
> +                               opts.field_name = "";
> +                               err = btf_dump__emit_type_decl(d, var_type_id, &opts);
> +                               if (err)
> +                                       goto out;
> +                               printf(") %s", var_ident);
> +                       } else {
> +                               err = btf_dump__emit_type_decl(d, var_type_id, &opts);
> +                               if (err)
> +                                       goto out;
> +                       }
> +                       printf(";\n");

we can simplify this a bit around var_ident and two
btf_dump__emit_type_decl() invocations. We know that we are handling
"non-uniform" C syntax for array and func pointer, so we don't need to
use opts.field_name. Doing this (schematically) should work (taking
into account no need for sanitization as well):

if (btf_is_array() || btf_is_ptr_to_func_proto())
    printf("typeof(");
btf_dump__emit_type_decl(... /* opts.field_name stays NULL */);
printf(" *%s", var_name);

or did I miss some corner case?

> +               }
> +               printf("        } %s;\n", sec_ident);
> +       }
> +
> +out:
> +       btf_dump__free(d);
> +       return err;
> +}
> +
>  static void codegen(const char *template, ...)
>  {
>         const char *src, *end;
> @@ -727,10 +843,93 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
>         return err;
>  }
>
> +static void
> +codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped)
> +{
> +       struct bpf_map *map;
> +       char ident[256];
> +       size_t i;
> +
> +       if (map_cnt) {

invert if, return early, reduce nestedness?

> +               codegen("\
> +                       \n\
> +                                                                           \n\
> +                               /* maps */                                  \n\
> +                               s->map_cnt = %zu;                           \n\
> +                               s->map_skel_sz = sizeof(*s->maps);          \n\
> +                               s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\
> +                               if (!s->maps)                               \n\
> +                                       goto err;                           \n\
> +                       ",
> +                       map_cnt
> +               );
> +               i = 0;
> +               bpf_object__for_each_map(map, obj) {
> +                       if (!get_map_ident(map, ident, sizeof(ident)))
> +                               continue;
> +
> +                       codegen("\
> +                               \n\
> +                                                                           \n\
> +                                       s->maps[%zu].name = \"%s\";         \n\
> +                                       s->maps[%zu].map = &obj->maps.%s;   \n\
> +                               ",
> +                               i, bpf_map__name(map), i, ident);
> +                       /* memory-mapped internal maps */
> +                       if (mmaped && bpf_map__is_internal(map) &&
> +                           (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) {
> +                               printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
> +                                      i, ident);
> +                       }
> +                       i++;
> +               }
> +       }
> +}
> +
> +static void
> +codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_links)
> +{
> +       struct bpf_program *prog;
> +       int i;
> +
> +       if (prog_cnt) {

same as above, reduce nestedness?

> +               codegen("\
> +                       \n\
> +                                                                           \n\
> +                               /* programs */                              \n\
> +                               s->prog_cnt = %zu;                          \n\
> +                               s->prog_skel_sz = sizeof(*s->progs);        \n\
> +                               s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\
> +                               if (!s->progs)                              \n\
> +                                       goto err;                           \n\
> +                       ",
> +                       prog_cnt
> +               );
> +               i = 0;
> +               bpf_object__for_each_program(prog, obj) {
> +                       codegen("\
> +                               \n\
> +                                                                           \n\
> +                                       s->progs[%1$zu].name = \"%2$s\";    \n\
> +                                       s->progs[%1$zu].prog = &obj->progs.%2$s;\n\
> +                               ",
> +                               i, bpf_program__name(prog));
> +
> +                       if (populate_links)

nit: {} ?

> +                               codegen("\
> +                                       \n\
> +                                               s->progs[%1$zu].link = &obj->links.%2$s;\n\
> +                                       ",
> +                                       i, bpf_program__name(prog));
> +                       i++;
> +               }
> +       }
> +}
> +

[...]

> +       if (!REQ_ARGS(1)) {
> +               usage();
> +               return -1;
> +       }
> +       file = GET_ARG();
> +
> +       while (argc) {
> +               if (!REQ_ARGS(2))
> +                       return -1;
> +
> +               if (is_prefix(*argv, "name")) {
> +                       NEXT_ARG();
> +
> +                       if (obj_name[0] != '\0') {
> +                               p_err("object name already specified");
> +                               return -1;
> +                       }
> +
> +                       strncpy(obj_name, *argv, MAX_OBJ_NAME_LEN - 1);
> +                       obj_name[MAX_OBJ_NAME_LEN - 1] = '\0';

we don't know the name of the final object, why would we allow to set
any object name at all?

> +               } else {
> +                       p_err("unknown arg %s", *argv);
> +                       return -1;
> +               }
> +
> +               NEXT_ARG();
> +       }
> +
> +       if (argc) {
> +               p_err("extra unknown arguments");
> +               return -1;
> +       }
> +
> +       if (use_loader) {
> +               p_err("cannot use loader for subskeletons");
> +               return -1;
> +       }
> +
> +       if (stat(file, &st)) {
> +               p_err("failed to stat() %s: %s", file, strerror(errno));
> +               return -1;
> +       }
> +       file_sz = st.st_size;
> +       mmap_sz = roundup(file_sz, sysconf(_SC_PAGE_SIZE));
> +       fd = open(file, O_RDONLY);
> +       if (fd < 0) {
> +               p_err("failed to open() %s: %s", file, strerror(errno));
> +               return -1;
> +       }
> +       obj_data = mmap(NULL, mmap_sz, PROT_READ, MAP_PRIVATE, fd, 0);
> +       if (obj_data == MAP_FAILED) {
> +               obj_data = NULL;
> +               p_err("failed to mmap() %s: %s", file, strerror(errno));
> +               goto out;
> +       }
> +       if (obj_name[0] == '\0')
> +               get_obj_name(obj_name, file);
> +
> +       /* The empty object name allows us to use bpf_map__name and produce
> +        * ELF section names out of it. (".data" instead of "obj.data")
> +        */
> +       opts.object_name = "";

yep, like this. So that "name" argument "support" above is bogus,
let's remove it

> +       obj = bpf_object__open_mem(obj_data, file_sz, &opts);
> +       if (!obj) {
> +               char err_buf[256];
> +
> +               libbpf_strerror(errno, err_buf, sizeof(err_buf));
> +               p_err("failed to open BPF object file: %s", err_buf);
> +               obj = NULL;
> +               goto out;
> +       }
> +

[...]

> +               for (i = 0; i < len; i++, var++) {
> +                       var_type = btf__type_by_id(btf, var->type);
> +                       var_name = btf__name_by_offset(btf, var_type->name_off);
> +
> +                       if (btf_var(var_type)->linkage == BTF_VAR_STATIC)
> +                               continue;
> +
> +                       var_ident[0] = '\0';
> +                       strncat(var_ident, var_name, sizeof(var_ident) - 1);
> +                       sanitize_identifier(var_ident);
> +
> +                       /* Note that we use the dot prefix in .data as the
> +                        * field access operator i.e. maps%s becomes maps.data
> +                        */
> +                       codegen("\
> +                       \n\
> +                               s->vars[%4$d].name = \"%1$s\";              \n\
> +                               s->vars[%4$d].map = &obj->maps%3$s;         \n\
> +                               s->vars[%4$d].addr = (void**) &obj->%2$s.%1$s;\n\
> +                       ", var_ident, ident, bpf_map__name(map), var_idx);

map reference should be using ident, not bpf_map__name(), as it refers
to a field. The way it is now it shouldn't work for custom
.data.my_section case (do you have a test for this?) You shouldn't
need bpf_map__name() here at all.

> +
> +                       var_idx++;
> +               }
> +       }
> +
> +       codegen_maps_skeleton(obj, map_cnt, false /*mmaped*/);
> +       codegen_progs_skeleton(obj, prog_cnt, false /*links*/);
> +
> +       codegen("\
> +               \n\
> +                                                                           \n\
> +                       err = bpf_object__open_subskeleton(s);              \n\
> +                       if (err) {                                          \n\
> +                               errno = err;                                \n\

see below, best setting errno after __destroy() call (and errno has to
be positive), see below

> +                               goto err;                                   \n\
> +                       }                                                   \n\
> +                                                                           \n\
> +                       return obj;                                         \n\
> +               err:                                                        \n\
> +                       %1$s__destroy(obj);                                 \n\

errno = -err here

> +                       return NULL;                                        \n\
> +               }                                                           \n\
> +                                                                           \n\
> +               #ifdef __cplusplus                                          \n\
> +               struct %1$s *%1$s::open(const struct bpf_object *src) { return %1$s__open(src); }\n\
> +               void %1$s::destroy(struct %1$s *skel) { %1$s__destroy(skel); }\n\
> +               #endif /* __cplusplus */                                    \n\
> +                                                                           \n\
> +               #endif /* %2$s */                                           \n\
> +               ",
> +               obj_name, header_guard);
> +       err = 0;
> +out:
> +       bpf_object__close(obj);
> +       if (obj_data)
> +               munmap(obj_data, mmap_sz);
> +       close(fd);
> +       return err;
> +}
> +
>  static int do_object(int argc, char **argv)
>  {
>         struct bpf_linker *linker;
> @@ -1192,6 +1653,7 @@ static int do_help(int argc, char **argv)
>         fprintf(stderr,
>                 "Usage: %1$s %2$s object OUTPUT_FILE INPUT_FILE [INPUT_FILE...]\n"
>                 "       %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
> +               "       %1$s %2$s subskeleton FILE [name OBJECT_NAME]\n"

[name OBJECT_NAME] should be supported

>                 "       %1$s %2$s min_core_btf INPUT OUTPUT OBJECT [OBJECT...]\n"
>                 "       %1$s %2$s help\n"
>                 "\n"
> @@ -1788,6 +2250,7 @@ static int do_min_core_btf(int argc, char **argv)
>  static const struct cmd cmds[] = {
>         { "object",             do_object },
>         { "skeleton",           do_skeleton },
> +       { "subskeleton",        do_subskeleton },
>         { "min_core_btf",       do_min_core_btf},
>         { "help",               do_help },
>         { 0 }
> --
> 2.34.1
Delyan Kratunov March 14, 2022, 11:18 p.m. UTC | #2
Thanks, Andrii! The nits/refactors are straightforward but have some questions
below:

On Fri, 2022-03-11 at 15:27 -0800, Andrii Nakryiko wrote:
> > +
> > +                       /* sanitize variable name, e.g., for static vars inside
> > +                        * a function, it's name is '<function name>.<variable name>',
> > +                        * which we'll turn into a '<function name>_<variable name>'.
> > +                        */
> > +                       sanitize_identifier(var_ident + 1);
> 
> btw, I think we don't need sanitization anymore. We needed it for
> static variables (they would be of the form <func_name>.<var_name> for
> static variables inside the functions), but now it's just unnecessary
> complication

How would we handle static variables inside functions in libraries then?

> 
> > +                       var_ident[0] = ' ';
> > +
> > +                       /* The datasec member has KIND_VAR but we want the
> > +                        * underlying type of the variable (e.g. KIND_INT).
> > +                        */
> > +                       var = btf__type_by_id(btf, var->type);
> 
> you need to use skip_mods_and_typedefs() or equivalent to skip any
> const/volatile/restrict modifiers before checking btf_is_array()

Good catch!

> 
> > +
> > +                       /* Prepend * to the field name to make it a pointer. */
> > +                       var_ident[0] = '*';
> > +
> > +                       printf("\t\t");
> > +                       /* Func and array members require special handling.
> > +                        * Instead of producing `typename *var`, they produce
> > +                        * `typeof(typename) *var`. This allows us to keep a
> > +                        * similar syntax where the identifier is just prefixed
> > +                        * by *, allowing us to ignore C declaration minutae.
> > +                        */
> > +                       if (btf_is_array(var) ||
> > +                           btf_is_ptr_to_func_proto(btf, var)) {
> > +                               printf("typeof(");
> > +                               /* print the type inside typeof() without a name */
> > +                               opts.field_name = "";
> > +                               err = btf_dump__emit_type_decl(d, var_type_id, &opts);
> > +                               if (err)
> > +                                       goto out;
> > +                               printf(") %s", var_ident);
> > +                       } else {
> > +                               err = btf_dump__emit_type_decl(d, var_type_id, &opts);
> > +                               if (err)
> > +                                       goto out;
> > +                       }
> > +                       printf(";\n");
> 
> we can simplify this a bit around var_ident and two
> btf_dump__emit_type_decl() invocations. We know that we are handling
> "non-uniform" C syntax for array and func pointer, so we don't need to
> use opts.field_name. Doing this (schematically) should work (taking
> into account no need for sanitization as well):
> 
> if (btf_is_array() || btf_is_ptr_to_func_proto())
>     printf("typeof(");
> btf_dump__emit_type_decl(... /* opts.field_name stays NULL */);
> printf(" *%s", var_name);
> 
> or did I miss some corner case?

You didn't close the "typeof" :) 

if (btf_is_array() || btf_is_ptr_to_func_proto())
     printf("typeof(");
btf_dump__emit_type_decl(... /* opts.field_name stays NULL */);
if (btf_is_array() || btf_is_ptr_to_func_proto())
     printf(")");
printf(" *%s", var_name);

If you feel that's easier to understand, sure. I don't love it but it's
understandable enough.

[...]


> we don't know the name of the final object, why would we allow to set
> any object name at all?

We don't really care about the final object name but we do need an object name
for the subskeleton. The subskeleton type name, header guard etc all use it.
We can say that it's always taken from the file name, but giving the user the
option to override it feels right, given the parallel with skeletons (and what
would we do if the file name is a pipe from a subshell invocation?).

> > 
> > +
> > +       /* The empty object name allows us to use bpf_map__name and produce
> > +        * ELF section names out of it. (".data" instead of "obj.data")
> > +        */
> > +       opts.object_name = "";
> 
> yep, like this. So that "name" argument "support" above is bogus,
> let's remove it

See above, it changes real things.

> 
> > +       obj = bpf_object__open_mem(obj_data, file_sz, &opts);
> > +       if (!obj) {
> > +               char err_buf[256];
> > +
> > +               libbpf_strerror(errno, err_buf, sizeof(err_buf));
> > +               p_err("failed to open BPF object file: %s", err_buf);
> > +               obj = NULL;
> > +               goto out;
> > +       }
> > +
> 
> [...]
> 
> > +               for (i = 0; i < len; i++, var++) {
> > +                       var_type = btf__type_by_id(btf, var->type);
> > +                       var_name = btf__name_by_offset(btf, var_type->name_off);
> > +
> > +                       if (btf_var(var_type)->linkage == BTF_VAR_STATIC)
> > +                               continue;
> > +
> > +                       var_ident[0] = '\0';
> > +                       strncat(var_ident, var_name, sizeof(var_ident) - 1);
> > +                       sanitize_identifier(var_ident);
> > +
> > +                       /* Note that we use the dot prefix in .data as the
> > +                        * field access operator i.e. maps%s becomes maps.data
> > +                        */
> > +                       codegen("\
> > +                       \n\
> > +                               s->vars[%4$d].name = \"%1$s\";              \n\
> > +                               s->vars[%4$d].map = &obj->maps%3$s;         \n\
> > +                               s->vars[%4$d].addr = (void**) &obj->%2$s.%1$s;\n\
> > +                       ", var_ident, ident, bpf_map__name(map), var_idx);
> 
> map reference should be using ident, not bpf_map__name(), as it refers
> to a field. The way it is now it shouldn't work for custom
> .data.my_section case (do you have a test for this?) You shouldn't
> need bpf_map__name() here at all.

Good catch, I'll add a .data.custom test.

[...]

> 
> > +               "       %1$s %2$s subskeleton FILE [name OBJECT_NAME]\n"
> 
> [name OBJECT_NAME] should be supported

Not sure what you mean by "supported" here.

> 
> >                 "       %1$s %2$s min_core_btf INPUT OUTPUT OBJECT [OBJECT...]\n"
> >                 "       %1$s %2$s help\n"
> >                 "\n"
> > @@ -1788,6 +2250,7 @@ static int do_min_core_btf(int argc, char **argv)
> >  static const struct cmd cmds[] = {
> >         { "object",             do_object },
> >         { "skeleton",           do_skeleton },
> > +       { "subskeleton",        do_subskeleton },
> >         { "min_core_btf",       do_min_core_btf},
> >         { "help",               do_help },
> >         { 0 }
> > --
> > 2.34.1

-- Delyan
Delyan Kratunov March 15, 2022, 5:28 p.m. UTC | #3
On Mon, 2022-03-14 at 16:13 -0700, Delyan Kratunov wrote:
> Thanks, Andrii! The nits/refactors are straightforward but have some questions
> below:
> 
> On Fri, 2022-03-11 at 15:27 -0800, Andrii Nakryiko wrote:
> > > +
> > > +                       /* sanitize variable name, e.g., for static vars inside
> > > +                        * a function, it's name is '<function name>.<variable name>',
> > > +                        * which we'll turn into a '<function name>_<variable name>'.
> > > +                        */
> > > +                       sanitize_identifier(var_ident + 1);
> > 
> > btw, I think we don't need sanitization anymore. We needed it for
> > static variables (they would be of the form <func_name>.<var_name> for
> > static variables inside the functions), but now it's just unnecessary
> > complication
> 
> How would we handle static variables inside functions in libraries then?

Ah, just realized 31332ccb7562 (bpftool: Stop emitting static variables in BPF
skeleton) stopped that. I'll remove the sanitization then.

[...]


> > we don't know the name of the final object, why would we allow to set
> > any object name at all?
> 
> We don't really care about the final object name but we do need an object name
> for the subskeleton. The subskeleton type name, header guard etc all use it.
> We can say that it's always taken from the file name, but giving the user the
> option to override it feels right, given the parallel with skeletons (and what
> would we do if the file name is a pipe from a subshell invocation?).

In particular, with the current test setup, if we don't use the name param, we'd
end up inferring names like test_subskeleton_lib_linked3 from the filename. The
tests case is a bit contrived and we can work around it but there may be other
similar situations out there.


-- Delyan
Andrii Nakryiko March 15, 2022, 6:07 p.m. UTC | #4
On Mon, Mar 14, 2022 at 4:18 PM Delyan Kratunov <delyank@fb.com> wrote:
>
> Thanks, Andrii! The nits/refactors are straightforward but have some questions
> below:
>
> On Fri, 2022-03-11 at 15:27 -0800, Andrii Nakryiko wrote:
> > > +
> > > +                       /* sanitize variable name, e.g., for static vars inside
> > > +                        * a function, it's name is '<function name>.<variable name>',
> > > +                        * which we'll turn into a '<function name>_<variable name>'.
> > > +                        */
> > > +                       sanitize_identifier(var_ident + 1);
> >
> > btw, I think we don't need sanitization anymore. We needed it for
> > static variables (they would be of the form <func_name>.<var_name> for
> > static variables inside the functions), but now it's just unnecessary
> > complication
>
> How would we handle static variables inside functions in libraries then?

That's the thing, we don't expose them (anymore) in skeletons. So we
skip anything static. Anything global should have a unique and valid C
identifier name.

>
> >
> > > +                       var_ident[0] = ' ';
> > > +
> > > +                       /* The datasec member has KIND_VAR but we want the
> > > +                        * underlying type of the variable (e.g. KIND_INT).
> > > +                        */
> > > +                       var = btf__type_by_id(btf, var->type);
> >
> > you need to use skip_mods_and_typedefs() or equivalent to skip any
> > const/volatile/restrict modifiers before checking btf_is_array()
>
> Good catch!
>
> >
> > > +
> > > +                       /* Prepend * to the field name to make it a pointer. */
> > > +                       var_ident[0] = '*';
> > > +
> > > +                       printf("\t\t");
> > > +                       /* Func and array members require special handling.
> > > +                        * Instead of producing `typename *var`, they produce
> > > +                        * `typeof(typename) *var`. This allows us to keep a
> > > +                        * similar syntax where the identifier is just prefixed
> > > +                        * by *, allowing us to ignore C declaration minutae.
> > > +                        */
> > > +                       if (btf_is_array(var) ||
> > > +                           btf_is_ptr_to_func_proto(btf, var)) {
> > > +                               printf("typeof(");
> > > +                               /* print the type inside typeof() without a name */
> > > +                               opts.field_name = "";
> > > +                               err = btf_dump__emit_type_decl(d, var_type_id, &opts);
> > > +                               if (err)
> > > +                                       goto out;
> > > +                               printf(") %s", var_ident);
> > > +                       } else {
> > > +                               err = btf_dump__emit_type_decl(d, var_type_id, &opts);
> > > +                               if (err)
> > > +                                       goto out;
> > > +                       }
> > > +                       printf(";\n");
> >
> > we can simplify this a bit around var_ident and two
> > btf_dump__emit_type_decl() invocations. We know that we are handling
> > "non-uniform" C syntax for array and func pointer, so we don't need to
> > use opts.field_name. Doing this (schematically) should work (taking
> > into account no need for sanitization as well):
> >
> > if (btf_is_array() || btf_is_ptr_to_func_proto())
> >     printf("typeof(");
> > btf_dump__emit_type_decl(... /* opts.field_name stays NULL */);
> > printf(" *%s", var_name);
> >
> > or did I miss some corner case?
>
> You didn't close the "typeof" :)

Eagle eye :)

>
> if (btf_is_array() || btf_is_ptr_to_func_proto())
>      printf("typeof(");
> btf_dump__emit_type_decl(... /* opts.field_name stays NULL */);
> if (btf_is_array() || btf_is_ptr_to_func_proto())
>      printf(")");
> printf(" *%s", var_name);
>
> If you feel that's easier to understand, sure. I don't love it but it's
> understandable enough.

all the string buffer manipulations seem worse (you can also have a
variable to record the decision whether to use typeof or not, so you
don't have to repeat verbose is_array || is_ptr_to_func_proto check)

>
> [...]
>
>
> > we don't know the name of the final object, why would we allow to set
> > any object name at all?
>
> We don't really care about the final object name but we do need an object name
> for the subskeleton. The subskeleton type name, header guard etc all use it.
> We can say that it's always taken from the file name, but giving the user the
> option to override it feels right, given the parallel with skeletons (and what
> would we do if the file name is a pipe from a subshell invocation?).

Ah, I see, it's sort of like "library name" in this case. Yeah, makes
sense, missed that part. I was too much focused on not letting it get
into map names :)

>
> > >
> > > +
> > > +       /* The empty object name allows us to use bpf_map__name and produce
> > > +        * ELF section names out of it. (".data" instead of "obj.data")
> > > +        */
> > > +       opts.object_name = "";
> >
> > yep, like this. So that "name" argument "support" above is bogus,
> > let's remove it
>
> See above, it changes real things.

yep, my bad

>
> >
> > > +       obj = bpf_object__open_mem(obj_data, file_sz, &opts);
> > > +       if (!obj) {
> > > +               char err_buf[256];
> > > +
> > > +               libbpf_strerror(errno, err_buf, sizeof(err_buf));
> > > +               p_err("failed to open BPF object file: %s", err_buf);
> > > +               obj = NULL;
> > > +               goto out;
> > > +       }
> > > +
> >
> > [...]
> >
> > > +               for (i = 0; i < len; i++, var++) {
> > > +                       var_type = btf__type_by_id(btf, var->type);
> > > +                       var_name = btf__name_by_offset(btf, var_type->name_off);
> > > +
> > > +                       if (btf_var(var_type)->linkage == BTF_VAR_STATIC)
> > > +                               continue;
> > > +
> > > +                       var_ident[0] = '\0';
> > > +                       strncat(var_ident, var_name, sizeof(var_ident) - 1);
> > > +                       sanitize_identifier(var_ident);
> > > +
> > > +                       /* Note that we use the dot prefix in .data as the
> > > +                        * field access operator i.e. maps%s becomes maps.data
> > > +                        */
> > > +                       codegen("\
> > > +                       \n\
> > > +                               s->vars[%4$d].name = \"%1$s\";              \n\
> > > +                               s->vars[%4$d].map = &obj->maps%3$s;         \n\
> > > +                               s->vars[%4$d].addr = (void**) &obj->%2$s.%1$s;\n\
> > > +                       ", var_ident, ident, bpf_map__name(map), var_idx);
> >
> > map reference should be using ident, not bpf_map__name(), as it refers
> > to a field. The way it is now it shouldn't work for custom
> > .data.my_section case (do you have a test for this?) You shouldn't
> > need bpf_map__name() here at all.
>
> Good catch, I'll add a .data.custom test.
>
> [...]
>
> >
> > > +               "       %1$s %2$s subskeleton FILE [name OBJECT_NAME]\n"
> >
> > [name OBJECT_NAME] should be supported
>
> Not sure what you mean by "supported" here.

"not" was missing, but we just concluded that it is indeed needed

>
> >
> > >                 "       %1$s %2$s min_core_btf INPUT OUTPUT OBJECT [OBJECT...]\n"
> > >                 "       %1$s %2$s help\n"
> > >                 "\n"
> > > @@ -1788,6 +2250,7 @@ static int do_min_core_btf(int argc, char **argv)
> > >  static const struct cmd cmds[] = {
> > >         { "object",             do_object },
> > >         { "skeleton",           do_skeleton },
> > > +       { "subskeleton",        do_subskeleton },
> > >         { "min_core_btf",       do_min_core_btf},
> > >         { "help",               do_help },
> > >         { 0 }
> > > --
> > > 2.34.1
>
> -- Delyan
>
Andrii Nakryiko March 15, 2022, 6:07 p.m. UTC | #5
On Tue, Mar 15, 2022 at 10:28 AM Delyan Kratunov <delyank@fb.com> wrote:
>
> On Mon, 2022-03-14 at 16:13 -0700, Delyan Kratunov wrote:
> > Thanks, Andrii! The nits/refactors are straightforward but have some questions
> > below:
> >
> > On Fri, 2022-03-11 at 15:27 -0800, Andrii Nakryiko wrote:
> > > > +
> > > > +                       /* sanitize variable name, e.g., for static vars inside
> > > > +                        * a function, it's name is '<function name>.<variable name>',
> > > > +                        * which we'll turn into a '<function name>_<variable name>'.
> > > > +                        */
> > > > +                       sanitize_identifier(var_ident + 1);
> > >
> > > btw, I think we don't need sanitization anymore. We needed it for
> > > static variables (they would be of the form <func_name>.<var_name> for
> > > static variables inside the functions), but now it's just unnecessary
> > > complication
> >
> > How would we handle static variables inside functions in libraries then?
>
> Ah, just realized 31332ccb7562 (bpftool: Stop emitting static variables in BPF
> skeleton) stopped that. I'll remove the sanitization then.
>

yep

> [...]
>
>
> > > we don't know the name of the final object, why would we allow to set
> > > any object name at all?
> >
> > We don't really care about the final object name but we do need an object name
> > for the subskeleton. The subskeleton type name, header guard etc all use it.
> > We can say that it's always taken from the file name, but giving the user the
> > option to override it feels right, given the parallel with skeletons (and what
> > would we do if the file name is a pipe from a subshell invocation?).
>
> In particular, with the current test setup, if we don't use the name param, we'd
> end up inferring names like test_subskeleton_lib_linked3 from the filename. The
> tests case is a bit contrived and we can work around it but there may be other
> similar situations out there.
>

yep


>
> -- Delyan
diff mbox series

Patch

diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 18d646b571ec..68454ef28f58 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -25,6 +25,7 @@  GEN COMMANDS
 
 |	**bpftool** **gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...]
 |	**bpftool** **gen skeleton** *FILE* [**name** *OBJECT_NAME*]
+|	**bpftool** **gen subskeleton** *FILE* [**name** *OBJECT_NAME*]
 |	**bpftool** **gen min_core_btf** *INPUT* *OUTPUT* *OBJECT* [*OBJECT*...]
 |	**bpftool** **gen help**
 
@@ -150,6 +151,30 @@  DESCRIPTION
 		  (non-read-only) data from userspace, with same simplicity
 		  as for BPF side.
 
+	**bpftool gen subskeleton** *FILE*
+		  Generate BPF subskeleton C header file for a given *FILE*.
+
+		  Subskeletons are similar to skeletons, except they do not own
+		  the corresponding maps, programs, or global variables. They
+		  require that the object file used to generate them is already
+		  loaded into a *bpf_object* by some other means.
+
+		  This functionality is useful when a library is included into a
+		  larger BPF program. A subskeleton for the library would have
+		  access to all objects and globals defined in it, without
+		  having to know about the larger program.
+
+		  Consequently, there are only two functions defined
+		  for subskeletons:
+
+		  - **example__open(bpf_object\*)**
+		    Instantiates a subskeleton from an already opened (but not
+		    necessarily loaded) **bpf_object**.
+
+		  - **example__destroy()**
+		    Frees the storage for the subskeleton but *does not* unload
+		    any BPF programs or maps.
+
 	**bpftool** **gen min_core_btf** *INPUT* *OUTPUT* *OBJECT* [*OBJECT*...]
 		  Generate a minimum BTF file as *OUTPUT*, derived from a given
 		  *INPUT* BTF file, containing all needed BTF types so one, or
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 958e1fd71b5c..5df8d72c5179 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -1003,13 +1003,25 @@  _bpftool()
                             ;;
                     esac
                     ;;
+                subskeleton)
+                    case $prev in
+                        $command)
+                            _filedir
+                            return 0
+                            ;;
+                        *)
+                            _bpftool_once_attr 'name'
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 min_core_btf)
                     _filedir
                     return 0
                     ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'object skeleton help min_core_btf' -- "$cur" ) )
+                        COMPREPLY=( $( compgen -W 'object skeleton subskeleton help min_core_btf' -- "$cur" ) )
                     ;;
             esac
             ;;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 145734b4fe41..3f05e3df94cf 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -64,11 +64,11 @@  static void get_obj_name(char *name, const char *file)
 	sanitize_identifier(name);
 }
 
-static void get_header_guard(char *guard, const char *obj_name)
+static void get_header_guard(char *guard, const char *obj_name, const char *suffix)
 {
 	int i;
 
-	sprintf(guard, "__%s_SKEL_H__", obj_name);
+	sprintf(guard, "__%s_%s__", obj_name, suffix);
 	for (i = 0; guard[i]; i++)
 		guard[i] = toupper(guard[i]);
 }
@@ -280,6 +280,122 @@  static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
 	return err;
 }
 
+static bool btf_is_ptr_to_func_proto(const struct btf *btf,
+				     const struct btf_type *v)
+{
+	return btf_is_ptr(v) && btf_is_func_proto(btf__type_by_id(btf, v->type));
+}
+
+static int codegen_subskel_datasecs(struct bpf_object *obj, const char *obj_name)
+{
+	struct btf *btf = bpf_object__btf(obj);
+	struct btf_dump *d;
+	struct bpf_map *map;
+	const struct btf_type *sec, *var;
+	const struct btf_var_secinfo *sec_var;
+	int i, err, vlen;
+	char map_ident[256], var_ident[256], sec_ident[256];
+	bool strip_mods = false;
+	const char *sec_name, *var_name;
+	__u32 var_type_id;
+
+	d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL);
+	err = libbpf_get_error(d);
+	if (err)
+		return err;
+
+	bpf_object__for_each_map(map, obj) {
+		/* only generate definitions for memory-mapped internal maps */
+		if (!bpf_map__is_internal(map))
+			continue;
+		if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
+			continue;
+		if (!get_map_ident(map, map_ident, sizeof(map_ident)))
+			continue;
+
+		sec = find_type_for_map(btf, map_ident);
+		if (!sec)
+			continue;
+
+		sec_name = btf__name_by_offset(btf, sec->name_off);
+		if (!get_datasec_ident(sec_name, sec_ident, sizeof(sec_ident)))
+			continue;
+
+		if (strcmp(sec_name, ".kconfig") != 0)
+			strip_mods = true;
+
+		printf("	struct %s__%s {\n", obj_name, sec_ident);
+
+		sec_var = btf_var_secinfos(sec);
+		vlen = btf_vlen(sec);
+		for (i = 0; i < vlen; i++, sec_var++) {
+			DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts,
+				.field_name = var_ident,
+				.indent_level = 2,
+				.strip_mods = strip_mods,
+			);
+
+			var = btf__type_by_id(btf, sec_var->type);
+			var_name = btf__name_by_offset(btf, var->name_off);
+			var_type_id = var->type;
+
+			/* static variables are not exposed through BPF skeleton */
+			if (btf_var(var)->linkage == BTF_VAR_STATIC)
+				continue;
+
+			/* leave the first character for a * prefix, size - 2
+			 * as a result as well
+			 */
+			var_ident[0] = '\0';
+			var_ident[1] = '\0';
+			strncat(var_ident + 1, var_name, sizeof(var_ident) - 2);
+
+			/* sanitize variable name, e.g., for static vars inside
+			 * a function, it's name is '<function name>.<variable name>',
+			 * which we'll turn into a '<function name>_<variable name>'.
+			 */
+			sanitize_identifier(var_ident + 1);
+			var_ident[0] = ' ';
+
+			/* The datasec member has KIND_VAR but we want the
+			 * underlying type of the variable (e.g. KIND_INT).
+			 */
+			var = btf__type_by_id(btf, var->type);
+
+			/* Prepend * to the field name to make it a pointer. */
+			var_ident[0] = '*';
+
+			printf("\t\t");
+			/* Func and array members require special handling.
+			 * Instead of producing `typename *var`, they produce
+			 * `typeof(typename) *var`. This allows us to keep a
+			 * similar syntax where the identifier is just prefixed
+			 * by *, allowing us to ignore C declaration minutae.
+			 */
+			if (btf_is_array(var) ||
+			    btf_is_ptr_to_func_proto(btf, var)) {
+				printf("typeof(");
+				/* print the type inside typeof() without a name */
+				opts.field_name = "";
+				err = btf_dump__emit_type_decl(d, var_type_id, &opts);
+				if (err)
+					goto out;
+				printf(") %s", var_ident);
+			} else {
+				err = btf_dump__emit_type_decl(d, var_type_id, &opts);
+				if (err)
+					goto out;
+			}
+			printf(";\n");
+		}
+		printf("	} %s;\n", sec_ident);
+	}
+
+out:
+	btf_dump__free(d);
+	return err;
+}
+
 static void codegen(const char *template, ...)
 {
 	const char *src, *end;
@@ -727,10 +843,93 @@  static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
 	return err;
 }
 
+static void
+codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped)
+{
+	struct bpf_map *map;
+	char ident[256];
+	size_t i;
+
+	if (map_cnt) {
+		codegen("\
+			\n\
+									    \n\
+				/* maps */				    \n\
+				s->map_cnt = %zu;			    \n\
+				s->map_skel_sz = sizeof(*s->maps);	    \n\
+				s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\
+				if (!s->maps)				    \n\
+					goto err;			    \n\
+			",
+			map_cnt
+		);
+		i = 0;
+		bpf_object__for_each_map(map, obj) {
+			if (!get_map_ident(map, ident, sizeof(ident)))
+				continue;
+
+			codegen("\
+				\n\
+									    \n\
+					s->maps[%zu].name = \"%s\";	    \n\
+					s->maps[%zu].map = &obj->maps.%s;   \n\
+				",
+				i, bpf_map__name(map), i, ident);
+			/* memory-mapped internal maps */
+			if (mmaped && bpf_map__is_internal(map) &&
+			    (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) {
+				printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
+				       i, ident);
+			}
+			i++;
+		}
+	}
+}
+
+static void
+codegen_progs_skeleton(struct bpf_object *obj, size_t prog_cnt, bool populate_links)
+{
+	struct bpf_program *prog;
+	int i;
+
+	if (prog_cnt) {
+		codegen("\
+			\n\
+									    \n\
+				/* programs */				    \n\
+				s->prog_cnt = %zu;			    \n\
+				s->prog_skel_sz = sizeof(*s->progs);	    \n\
+				s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\
+				if (!s->progs)				    \n\
+					goto err;			    \n\
+			",
+			prog_cnt
+		);
+		i = 0;
+		bpf_object__for_each_program(prog, obj) {
+			codegen("\
+				\n\
+									    \n\
+					s->progs[%1$zu].name = \"%2$s\";    \n\
+					s->progs[%1$zu].prog = &obj->progs.%2$s;\n\
+				",
+				i, bpf_program__name(prog));
+
+			if (populate_links)
+				codegen("\
+					\n\
+						s->progs[%1$zu].link = &obj->links.%2$s;\n\
+					",
+					i, bpf_program__name(prog));
+			i++;
+		}
+	}
+}
+
 static int do_skeleton(int argc, char **argv)
 {
 	char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
-	size_t i, map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz;
+	size_t map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz;
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
 	char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data;
 	struct bpf_object *obj = NULL;
@@ -821,7 +1020,7 @@  static int do_skeleton(int argc, char **argv)
 		prog_cnt++;
 	}
 
-	get_header_guard(header_guard, obj_name);
+	get_header_guard(header_guard, obj_name, "SKEL_H");
 	if (use_loader) {
 		codegen("\
 		\n\
@@ -1024,66 +1223,10 @@  static int do_skeleton(int argc, char **argv)
 		",
 		obj_name
 	);
-	if (map_cnt) {
-		codegen("\
-			\n\
-									    \n\
-				/* maps */				    \n\
-				s->map_cnt = %zu;			    \n\
-				s->map_skel_sz = sizeof(*s->maps);	    \n\
-				s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\
-				if (!s->maps)				    \n\
-					goto err;			    \n\
-			",
-			map_cnt
-		);
-		i = 0;
-		bpf_object__for_each_map(map, obj) {
-			if (!get_map_ident(map, ident, sizeof(ident)))
-				continue;
 
-			codegen("\
-				\n\
-									    \n\
-					s->maps[%zu].name = \"%s\";	    \n\
-					s->maps[%zu].map = &obj->maps.%s;   \n\
-				",
-				i, bpf_map__name(map), i, ident);
-			/* memory-mapped internal maps */
-			if (bpf_map__is_internal(map) &&
-			    (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) {
-				printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
-				       i, ident);
-			}
-			i++;
-		}
-	}
-	if (prog_cnt) {
-		codegen("\
-			\n\
-									    \n\
-				/* programs */				    \n\
-				s->prog_cnt = %zu;			    \n\
-				s->prog_skel_sz = sizeof(*s->progs);	    \n\
-				s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\
-				if (!s->progs)				    \n\
-					goto err;			    \n\
-			",
-			prog_cnt
-		);
-		i = 0;
-		bpf_object__for_each_program(prog, obj) {
-			codegen("\
-				\n\
-									    \n\
-					s->progs[%1$zu].name = \"%2$s\";    \n\
-					s->progs[%1$zu].prog = &obj->progs.%2$s;\n\
-					s->progs[%1$zu].link = &obj->links.%2$s;\n\
-				",
-				i, bpf_program__name(prog));
-			i++;
-		}
-	}
+	codegen_maps_skeleton(obj, map_cnt, true /*mmaped*/);
+	codegen_progs_skeleton(obj, prog_cnt, true /*populate_links*/);
+
 	codegen("\
 		\n\
 									    \n\
@@ -1141,6 +1284,324 @@  static int do_skeleton(int argc, char **argv)
 	return err;
 }
 
+/* Subskeletons are like skeletons, except they don't own the bpf_object,
+ * associated maps, links, etc. Instead, they know about the existence of
+ * variables, maps, programs and are able to find their locations
+ * _at runtime_ from an already loaded bpf_object.
+ *
+ * This allows for library-like BPF objects to have userspace counterparts
+ * with access to their own items without having to know anything about the
+ * final BPF object that the library was linked into.
+ */
+static int do_subskeleton(int argc, char **argv)
+{
+	char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SUBSKEL_H__")];
+	size_t i, len, file_sz, map_cnt = 0, prog_cnt = 0, mmap_sz, var_cnt = 0, var_idx = 0;
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+	char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data;
+	struct bpf_object *obj = NULL;
+	const char *file, *var_name;
+	char ident[256], var_ident[256];
+	int fd, err = -1, map_type_id;
+	const struct bpf_map *map;
+	struct bpf_program *prog;
+	struct btf *btf;
+	const struct btf_type *map_type, *var_type;
+	const struct btf_var_secinfo *var;
+	struct stat st;
+
+	if (!REQ_ARGS(1)) {
+		usage();
+		return -1;
+	}
+	file = GET_ARG();
+
+	while (argc) {
+		if (!REQ_ARGS(2))
+			return -1;
+
+		if (is_prefix(*argv, "name")) {
+			NEXT_ARG();
+
+			if (obj_name[0] != '\0') {
+				p_err("object name already specified");
+				return -1;
+			}
+
+			strncpy(obj_name, *argv, MAX_OBJ_NAME_LEN - 1);
+			obj_name[MAX_OBJ_NAME_LEN - 1] = '\0';
+		} else {
+			p_err("unknown arg %s", *argv);
+			return -1;
+		}
+
+		NEXT_ARG();
+	}
+
+	if (argc) {
+		p_err("extra unknown arguments");
+		return -1;
+	}
+
+	if (use_loader) {
+		p_err("cannot use loader for subskeletons");
+		return -1;
+	}
+
+	if (stat(file, &st)) {
+		p_err("failed to stat() %s: %s", file, strerror(errno));
+		return -1;
+	}
+	file_sz = st.st_size;
+	mmap_sz = roundup(file_sz, sysconf(_SC_PAGE_SIZE));
+	fd = open(file, O_RDONLY);
+	if (fd < 0) {
+		p_err("failed to open() %s: %s", file, strerror(errno));
+		return -1;
+	}
+	obj_data = mmap(NULL, mmap_sz, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (obj_data == MAP_FAILED) {
+		obj_data = NULL;
+		p_err("failed to mmap() %s: %s", file, strerror(errno));
+		goto out;
+	}
+	if (obj_name[0] == '\0')
+		get_obj_name(obj_name, file);
+
+	/* The empty object name allows us to use bpf_map__name and produce
+	 * ELF section names out of it. (".data" instead of "obj.data")
+	 */
+	opts.object_name = "";
+	obj = bpf_object__open_mem(obj_data, file_sz, &opts);
+	if (!obj) {
+		char err_buf[256];
+
+		libbpf_strerror(errno, err_buf, sizeof(err_buf));
+		p_err("failed to open BPF object file: %s", err_buf);
+		obj = NULL;
+		goto out;
+	}
+
+	btf = bpf_object__btf(obj);
+	if (!btf) {
+		err = -1;
+		p_err("need btf type information for %s", obj_name);
+		goto out;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		prog_cnt++;
+	}
+
+	/* First, count how many variables we have to find.
+	 * We need this in advance so the subskel can allocate the right
+	 * amount of storage.
+	 */
+	bpf_object__for_each_map(map, obj) {
+		if (!get_map_ident(map, ident, sizeof(ident)))
+			continue;
+
+		/* Also count all maps that have a name */
+		map_cnt++;
+
+		if (!bpf_map__is_internal(map))
+			continue;
+		if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
+			continue;
+
+		map_type_id = bpf_map__btf_value_type_id(map);
+		if (map_type_id < 0) {
+			err = map_type_id;
+			goto out;
+		}
+		map_type = btf__type_by_id(btf, map_type_id);
+
+		var = btf_var_secinfos(map_type);
+		len = btf_vlen(map_type);
+		for (i = 0; i < len; i++, var++) {
+			var_type = btf__type_by_id(btf, var->type);
+
+			if (btf_var(var_type)->linkage == BTF_VAR_STATIC)
+				continue;
+
+			var_cnt++;
+		}
+	}
+
+	get_header_guard(header_guard, obj_name, "SUBSKEL_H");
+	codegen("\
+	\n\
+	/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */   \n\
+								    \n\
+	/* THIS FILE IS AUTOGENERATED! */			    \n\
+	#ifndef %2$s						    \n\
+	#define %2$s						    \n\
+								    \n\
+	#include <errno.h>					    \n\
+	#include <stdlib.h>					    \n\
+	#include <bpf/libbpf.h>					    \n\
+								    \n\
+	struct %1$s {						    \n\
+		struct bpf_object *obj;				    \n\
+		struct bpf_object_subskeleton *subskel;		    \n\
+	", obj_name, header_guard);
+
+	if (map_cnt) {
+		printf("\tstruct {\n");
+		bpf_object__for_each_map(map, obj) {
+			if (!get_map_ident(map, ident, sizeof(ident)))
+				continue;
+			printf("\t\tstruct bpf_map *%s;\n", ident);
+		}
+		printf("\t} maps;\n");
+	}
+
+	if (prog_cnt) {
+		printf("\tstruct {\n");
+		bpf_object__for_each_program(prog, obj) {
+			printf("\t\tstruct bpf_program *%s;\n",
+				bpf_program__name(prog));
+		}
+		printf("\t} progs;\n");
+	}
+
+	err = codegen_subskel_datasecs(obj, obj_name);
+	if (err)
+		goto out;
+
+	/* emit code that will allocate enough storage for all symbols */
+	codegen("\
+		\n\
+									    \n\
+		#ifdef __cplusplus					    \n\
+			static inline struct %1$s *open(const struct bpf_object *src);\n\
+			static inline void destroy(struct %1$s *skel);\n\
+		#endif /* __cplusplus */				    \n\
+		};							    \n\
+									    \n\
+		static inline void					    \n\
+		%1$s__destroy(struct %1$s *skel)			    \n\
+		{							    \n\
+			if (!skel)					    \n\
+				return;					    \n\
+			if (skel->subskel)				    \n\
+				bpf_object__destroy_subskeleton(skel->subskel);\n\
+			free(skel);					    \n\
+		}							    \n\
+									    \n\
+		static inline struct %1$s *				    \n\
+		%1$s__open(const struct bpf_object *src)		    \n\
+		{							    \n\
+			struct %1$s *obj;				    \n\
+			struct bpf_object_subskeleton *s;		    \n\
+			int err;					    \n\
+									    \n\
+			obj = (struct %1$s *)calloc(1, sizeof(*obj));	    \n\
+			if (!obj) {					    \n\
+				errno = ENOMEM;				    \n\
+				goto err;				    \n\
+			}						    \n\
+			s = (struct bpf_object_subskeleton *)calloc(1, sizeof(*s));\n\
+			if (!s) {					    \n\
+				errno = ENOMEM;				    \n\
+				goto err;				    \n\
+			}						    \n\
+			s->sz = sizeof(*s);				    \n\
+			s->obj = src;					    \n\
+			s->var_skel_sz = sizeof(*s->vars);		    \n\
+			obj->subskel = s;				    \n\
+									    \n\
+			/* vars */					    \n\
+			s->var_cnt = %2$d;				    \n\
+			s->vars = (struct bpf_var_skeleton *)calloc(%2$d, sizeof(*s->vars));\n\
+			if (!s->vars) {					    \n\
+				errno = ENOMEM;				    \n\
+				goto err;				    \n\
+			}						    \n\
+									    \n\
+		",
+		obj_name, var_cnt
+	);
+
+	/* walk through each symbol and emit the runtime representation */
+	bpf_object__for_each_map(map, obj) {
+		if (!bpf_map__is_internal(map))
+			continue;
+
+		if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
+			continue;
+
+		if (!get_map_ident(map, ident, sizeof(ident)))
+			continue;
+
+		map_type_id = bpf_map__btf_value_type_id(map);
+		if (map_type_id < 0)
+			/* skip over internal maps with no type*/
+			continue;
+
+		map_type = btf__type_by_id(btf, map_type_id);
+		var = btf_var_secinfos(map_type);
+		len = btf_vlen(map_type);
+		for (i = 0; i < len; i++, var++) {
+			var_type = btf__type_by_id(btf, var->type);
+			var_name = btf__name_by_offset(btf, var_type->name_off);
+
+			if (btf_var(var_type)->linkage == BTF_VAR_STATIC)
+				continue;
+
+			var_ident[0] = '\0';
+			strncat(var_ident, var_name, sizeof(var_ident) - 1);
+			sanitize_identifier(var_ident);
+
+			/* Note that we use the dot prefix in .data as the
+			 * field access operator i.e. maps%s becomes maps.data
+			 */
+			codegen("\
+			\n\
+				s->vars[%4$d].name = \"%1$s\";		    \n\
+				s->vars[%4$d].map = &obj->maps%3$s;	    \n\
+				s->vars[%4$d].addr = (void**) &obj->%2$s.%1$s;\n\
+			", var_ident, ident, bpf_map__name(map), var_idx);
+
+			var_idx++;
+		}
+	}
+
+	codegen_maps_skeleton(obj, map_cnt, false /*mmaped*/);
+	codegen_progs_skeleton(obj, prog_cnt, false /*links*/);
+
+	codegen("\
+		\n\
+									    \n\
+			err = bpf_object__open_subskeleton(s);		    \n\
+			if (err) {					    \n\
+				errno = err;				    \n\
+				goto err;				    \n\
+			}						    \n\
+									    \n\
+			return obj;					    \n\
+		err:							    \n\
+			%1$s__destroy(obj);				    \n\
+			return NULL;					    \n\
+		}							    \n\
+									    \n\
+		#ifdef __cplusplus					    \n\
+		struct %1$s *%1$s::open(const struct bpf_object *src) { return %1$s__open(src); }\n\
+		void %1$s::destroy(struct %1$s *skel) { %1$s__destroy(skel); }\n\
+		#endif /* __cplusplus */				    \n\
+									    \n\
+		#endif /* %2$s */					    \n\
+		",
+		obj_name, header_guard);
+	err = 0;
+out:
+	bpf_object__close(obj);
+	if (obj_data)
+		munmap(obj_data, mmap_sz);
+	close(fd);
+	return err;
+}
+
 static int do_object(int argc, char **argv)
 {
 	struct bpf_linker *linker;
@@ -1192,6 +1653,7 @@  static int do_help(int argc, char **argv)
 	fprintf(stderr,
 		"Usage: %1$s %2$s object OUTPUT_FILE INPUT_FILE [INPUT_FILE...]\n"
 		"       %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
+		"       %1$s %2$s subskeleton FILE [name OBJECT_NAME]\n"
 		"       %1$s %2$s min_core_btf INPUT OUTPUT OBJECT [OBJECT...]\n"
 		"       %1$s %2$s help\n"
 		"\n"
@@ -1788,6 +2250,7 @@  static int do_min_core_btf(int argc, char **argv)
 static const struct cmd cmds[] = {
 	{ "object",		do_object },
 	{ "skeleton",		do_skeleton },
+	{ "subskeleton",	do_subskeleton },
 	{ "min_core_btf",	do_min_core_btf},
 	{ "help",		do_help },
 	{ 0 }