diff mbox series

[v4,bpf-next,18/22] bpftool: Use syscall/loader program in "prog load" and "gen skeleton" command.

Message ID 20210508034837.64585-19-alexei.starovoitov@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: syscall program, FD array, loader program, light skeleton. | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 15 maintainers not CCed: brouer@redhat.com netdev@vger.kernel.org yhs@fb.com kpsingh@kernel.org irogers@google.com kafai@fb.com jean-philippe@linaro.org iii@linux.ibm.com yuehaibing@huawei.com ast@kernel.org tklauser@distanz.ch cong.wang@bytedance.com songliubraving@fb.com quentin@isovalent.com zhuyifei@google.com
netdev/source_inline fail Was 0 now: 6
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch fail CHECK: No space is necessary after a cast CHECK: Please don't use multiple blank lines ERROR: "foo * bar" should be "foo *bar" ERROR: Avoid using diff content in the commit message - patch(1) might not work WARNING: Avoid line continuations in quoted strings WARNING: Misplaced SPDX-License-Identifier tag - use line 1 instead WARNING: line length of 100 exceeds 80 columns WARNING: line length of 104 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns WARNING: unnecessary whitespace before a quoted newline
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link

Commit Message

Alexei Starovoitov May 8, 2021, 3:48 a.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

Add -L flag to bpftool to use libbpf gen_trace facility and syscall/loader program
for skeleton generation and program loading.

"bpftool gen skeleton -L" command will generate a "light skeleton" or "loader skeleton"
that is similar to existing skeleton, but has one major difference:
$ bpftool gen skeleton lsm.o > lsm.skel.h
$ bpftool gen skeleton -L lsm.o > lsm.lskel.h
$ diff lsm.skel.h lsm.lskel.h
@@ -5,34 +4,34 @@
 #define __LSM_SKEL_H__

 #include <stdlib.h>
-#include <bpf/libbpf.h>
+#include <bpf/bpf.h>

The light skeleton does not use majority of libbpf infrastructure.
It doesn't need libelf. It doesn't parse .o file.
It only needs few sys_bpf wrappers. All of them are in bpf/bpf.h file.
In future libbpf/bpf.c can be inlined into bpf.h, so not even libbpf.a would be
needed to work with light skeleton.

"bpftool prog load -L file.o" command is introduced for debugging of syscall/loader
program generation. Just like the same command without -L it will try to load
the programs from file.o into the kernel. It won't even try to pin them.

"bpftool prog load -L -d file.o" command will provide additional debug messages
on how syscall/loader program was generated.
Also the execution of syscall/loader program will use bpf_trace_printk() for
each step of loading BTF, creating maps, and loading programs.
The user can do "cat /.../trace_pipe" for further debug.

An example of fexit_sleep.lskel.h generated from progs/fexit_sleep.c:
struct fexit_sleep {
	struct bpf_loader_ctx ctx;
	struct {
		struct bpf_map_desc bss;
	} maps;
	struct {
		struct bpf_prog_desc nanosleep_fentry;
		struct bpf_prog_desc nanosleep_fexit;
	} progs;
	struct {
		int nanosleep_fentry_fd;
		int nanosleep_fexit_fd;
	} links;
	struct fexit_sleep__bss {
		int pid;
		int fentry_cnt;
		int fexit_cnt;
	} *bss;
};

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Makefile        |   2 +-
 tools/bpf/bpftool/gen.c           | 362 ++++++++++++++++++++++++++++--
 tools/bpf/bpftool/main.c          |   7 +-
 tools/bpf/bpftool/main.h          |   1 +
 tools/bpf/bpftool/prog.c          | 104 +++++++++
 tools/bpf/bpftool/xlated_dumper.c |   3 +
 6 files changed, 456 insertions(+), 23 deletions(-)

Comments

Andrii Nakryiko May 12, 2021, 4:17 a.m. UTC | #1
On Fri, May 7, 2021 at 8:49 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> Add -L flag to bpftool to use libbpf gen_trace facility and syscall/loader program
> for skeleton generation and program loading.
>
> "bpftool gen skeleton -L" command will generate a "light skeleton" or "loader skeleton"
> that is similar to existing skeleton, but has one major difference:
> $ bpftool gen skeleton lsm.o > lsm.skel.h
> $ bpftool gen skeleton -L lsm.o > lsm.lskel.h
> $ diff lsm.skel.h lsm.lskel.h
> @@ -5,34 +4,34 @@
>  #define __LSM_SKEL_H__
>
>  #include <stdlib.h>
> -#include <bpf/libbpf.h>
> +#include <bpf/bpf.h>
>
> The light skeleton does not use majority of libbpf infrastructure.
> It doesn't need libelf. It doesn't parse .o file.
> It only needs few sys_bpf wrappers. All of them are in bpf/bpf.h file.
> In future libbpf/bpf.c can be inlined into bpf.h, so not even libbpf.a would be
> needed to work with light skeleton.
>
> "bpftool prog load -L file.o" command is introduced for debugging of syscall/loader
> program generation. Just like the same command without -L it will try to load
> the programs from file.o into the kernel. It won't even try to pin them.
>
> "bpftool prog load -L -d file.o" command will provide additional debug messages
> on how syscall/loader program was generated.
> Also the execution of syscall/loader program will use bpf_trace_printk() for
> each step of loading BTF, creating maps, and loading programs.
> The user can do "cat /.../trace_pipe" for further debug.
>
> An example of fexit_sleep.lskel.h generated from progs/fexit_sleep.c:
> struct fexit_sleep {
>         struct bpf_loader_ctx ctx;
>         struct {
>                 struct bpf_map_desc bss;
>         } maps;
>         struct {
>                 struct bpf_prog_desc nanosleep_fentry;
>                 struct bpf_prog_desc nanosleep_fexit;
>         } progs;
>         struct {
>                 int nanosleep_fentry_fd;
>                 int nanosleep_fexit_fd;
>         } links;
>         struct fexit_sleep__bss {
>                 int pid;
>                 int fentry_cnt;
>                 int fexit_cnt;
>         } *bss;
> };
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---

After you applied my patchset removing static variables from BPF
skeleton, trace_printk selftests doesn't compile anymore, you'll need
to move out fmt outside of the function and make it non-static. With
that everything compiles locally.

But CI reports different errors still, not sure what's going on there, see [0].

https://travis-ci.com/github/kernel-patches/bpf/builds/225675119

My main complaint for this patch is that the generated .lskel.h header
file looks quite sloppy and doesn't follow kernel code style. It would
be good to try to clean this up a bit.

E.g., we don't write

        if (skel->maps.ringbuf.map_fd > 0) close(skel->maps.ringbuf.map_fd);

but instead

        if (skel->maps.ringbuf.map_fd > 0)
                close(skel->maps.ringbuf.map_fd);

And instead of

        int ret = 0;
        ret = ret < 0 ? ret : test_ringbuf__test_ringbuf__attach(skel);

we'd have an empty line

        int ret = 0;

        ret = ret < 0 ? ret : test_ringbuf__test_ringbuf__attach(skel);

It's auto-generated code, of course, but people might want/need to
read it, so would be good to have it look clean.

>  tools/bpf/bpftool/Makefile        |   2 +-
>  tools/bpf/bpftool/gen.c           | 362 ++++++++++++++++++++++++++++--
>  tools/bpf/bpftool/main.c          |   7 +-
>  tools/bpf/bpftool/main.h          |   1 +
>  tools/bpf/bpftool/prog.c          | 104 +++++++++
>  tools/bpf/bpftool/xlated_dumper.c |   3 +
>  6 files changed, 456 insertions(+), 23 deletions(-)
>
> diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
> index b3073ae84018..d16d289ade7a 100644
> --- a/tools/bpf/bpftool/Makefile
> +++ b/tools/bpf/bpftool/Makefile
> @@ -136,7 +136,7 @@ endif
>
>  BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
>
> -BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
> +BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o) $(OUTPUT)disasm.o
>  OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
>
>  VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)                           \
> diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
> index 31ade77f5ef8..7a3e343f31db 100644
> --- a/tools/bpf/bpftool/gen.c
> +++ b/tools/bpf/bpftool/gen.c
> @@ -18,6 +18,7 @@
>  #include <sys/stat.h>
>  #include <sys/mman.h>
>  #include <bpf/btf.h>
> +#include <bpf/bpf_gen_internal.h>
>
>  #include "json_writer.h"
>  #include "main.h"
> @@ -268,6 +269,303 @@ static void codegen(const char *template, ...)
>         free(s);
>  }
>
> +static void print_hex(const char *obj_data, int file_sz)

nit: obj_data -> data, file_sz -> data_sz (it's multi-purpose now)

> +{
> +       int i, len;
> +
> +       for (i = 0, len = 0; i < file_sz; i++) {
> +               int w = obj_data[i] ? 4 : 2;
> +
> +               len += w;
> +               if (len > 78) {
> +                       printf("\\\n");
> +                       len = w;
> +               }
> +               if (!obj_data[i])
> +                       printf("\\0");
> +               else
> +                       printf("\\x%02x", (unsigned char)obj_data[i]);
> +       }
> +}
> +
> +static size_t bpf_map_mmap_sz(const struct bpf_map *map)
> +{
> +       long page_sz = sysconf(_SC_PAGE_SIZE);
> +       size_t map_sz;
> +
> +       map_sz = (size_t)roundup(bpf_map__value_size(map), 8) * bpf_map__max_entries(map);
> +       map_sz = roundup(map_sz, page_sz);
> +       return map_sz;
> +}
> +
> +static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name)
> +{
> +       struct bpf_program *prog;
> +
> +       bpf_object__for_each_program(prog, obj) {
> +               codegen("\
> +                       \n\
> +                       \n\
> +                       static inline int                                           \n\
> +                       %1$s__%2$s__attach(struct %1$s *skel)                       \n\
> +                       {                                                           \n\
> +                               int fd = bpf_raw_tracepoint_open(                   \
> +                       ", obj_name, bpf_program__name(prog));
> +
> +               switch (bpf_program__get_type(prog)) {
> +               case BPF_PROG_TYPE_RAW_TRACEPOINT:
> +                       putchar('"');
> +                       fputs(strchr(bpf_program__section_name(prog), '/') + 1, stdout);
> +                       putchar('"');

we use codegen() and printf(), let's not add fputs() to the mix, it's
doesn't add much and in this case, I think printf is even a bit easier
to follow:

tp_name = strchr(bpf_program__section_name(prog), '/') + 1;
printf("\"%s\", tp_name);

But also it seems like this code assumes that every program type can
be attached with bpf_raw_tracepoint_open() which is definitely not the
case for a lot of programs. When in the future you support, say, BPF
iterator, you'll do that with bpf_link_create(), so not sure why you
chose this code pattern instead of something like:

printf("\tint prog_fd = skel->progs.%s.prog_fd;\n", bpf_program__name(prog));

switch (bpf_program__get_type(prog)) {
case BPF_PROG_TYPE_RAW_TRACEPOINT:
    tp_name = ...;
    printf("\tint fd = bpf_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name);
    break;
case BPF_PROG_TYPE_TRACING:
    printf("\tint fd = bpf_raw_tracepoint_open(NULL, prog_fd);\n");
    break;
default:
    printf("int fd = 0; /* auto-attach not supported */\n");
    break;
}

Then you have a common if (fd > 0) /* set fd */; return fd; piece of code.

This is much clearer to follow, it's more easily extensible and it
doesn't pretend that every program is a fentry/fexit or raw_tp and
fails to auto-attach, rather just skipping auto-attaching.

> +                       break;
> +               default:
> +                       fputs("NULL", stdout);
> +                       break;
> +               }
> +               codegen("\
> +                       \n\
> +                       , skel->progs.%1$s.prog_fd);                                \n\
> +                               if (fd > 0) skel->links.%1$s_fd = fd;               \n\
> +                               return fd;                                          \n\
> +                       }                                                           \n\
> +                       ", bpf_program__name(prog));
> +       }
> +
> +       codegen("\
> +               \n\
> +                                                                           \n\
> +               static inline int                                           \n\
> +               %1$s__attach(struct %1$s *skel)                             \n\
> +               {                                                           \n\
> +                       int ret = 0;                                        \n\

codegen empty line here, as one example of what I've talked about above

> +               ", obj_name);
> +
> +       bpf_object__for_each_program(prog, obj) {
> +               codegen("\
> +                       \n\
> +                               ret = ret < 0 ? ret : %1$s__%2$s__attach(skel);   \n\
> +                       ", obj_name, bpf_program__name(prog));
> +       }
> +
> +       codegen("\
> +               \n\
> +                       return ret < 0 ? ret : 0;                           \n\
> +               }                                                           \n\
> +                                                                           \n\
> +               static inline void                                          \n\
> +               %1$s__detach(struct %1$s *skel)                             \n\
> +               {                                                           \n\
> +               ", obj_name);
> +       bpf_object__for_each_program(prog, obj) {
> +               printf("\tif (skel->links.%1$s_fd > 0) close(skel->links.%1$s_fd);\n",
> +                      bpf_program__name(prog));

you use bpf_program__name(prog) in so many place that it will be much
simpler if you have a dedicated variable for it

> +       }
> +       codegen("\
> +               \n\
> +               }                                                           \n\
> +               ");
> +}
> +
> +static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
> +{
> +       struct bpf_program *prog;
> +       struct bpf_map *map;
> +
> +       codegen("\
> +               \n\
> +               static void                                                 \n\
> +               %1$s__destroy(struct %1$s *skel)                            \n\
> +               {                                                           \n\
> +                       if (!skel)                                          \n\
> +                               return;                                     \n\
> +                       %1$s__detach(skel);                                 \n\
> +               ",
> +               obj_name);

please use some separator empty lines between logical blocks/steps
(here and in many other places), it's quite hard to follow these dense
blocks of code

> +       bpf_object__for_each_program(prog, obj) {
> +               printf("\tif (skel->progs.%1$s.prog_fd > 0) close(skel->progs.%1$s.prog_fd);\n",
> +                      bpf_program__name(prog));
> +       }

[...]

> +               if (!bpf_map__is_internal(map) ||
> +                   !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
> +                       continue;
> +
> +               printf("\tskel->%1$s =\n"
> +                      "\t\tmmap(NULL, %2$zd, PROT_READ | PROT_WRITE,\n"
> +                      "\t\t\tMAP_SHARED | MAP_ANONYMOUS, -1, 0);\n"
> +                      "\tmemcpy(skel->%1$s, (void *)\"",

add \\ after (void *)" so that long hex dump starts on a new line?

> +                      ident, bpf_map_mmap_sz(map));

this printf is also very unreadable. If you insist on doing this as
multi-line code, I think it deserves codegen, but I'd probably
generate mmap() invocation on a single line

But also, mmap() can fail, it would be good to handle this instead of
having (void *)-1 happily stored and getting sigsegv on memcpy().

> +               bpf_map__get_initial_value(map, &mmap_data, &mmap_size);
> +               print_hex(mmap_data, mmap_size);
> +               printf("\", %2$zd);\n"
> +                      "\tskel->maps.%1$s.initial_value = (__u64)(long)skel->%1$s;\n",
> +                      ident, mmap_size);
> +       }

[...]

> +
> +static int try_loader(struct gen_loader_opts *gen)
> +{
> +       struct bpf_load_and_run_opts opts = {};
> +       struct bpf_loader_ctx *ctx;
> +       int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc), sizeof(struct bpf_prog_desc));

this is quite a long line...

> +       int log_buf_sz = (1u << 24) - 1;
> +       int err, fds_before, fd_delta;
> +       char *log_buf;
> +

[...]

> +static int do_loader(int argc, char **argv)
> +{
> +       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
> +       DECLARE_LIBBPF_OPTS(gen_loader_opts, gen);
> +       struct bpf_object_load_attr load_attr = {};
> +       struct bpf_object *obj;
> +       const char *file;
> +       int err = 0;
> +
> +       if (!REQ_ARGS(1))
> +               return -1;
> +       file = GET_ARG();
> +
> +       obj = bpf_object__open_file(file, &open_opts);
> +       if (IS_ERR_OR_NULL(obj)) {

please use libbpf_get_error() instead of IS_ERR_OR_NULL()


> +               p_err("failed to open object file");
> +               goto err_close_obj;
> +       }
> +

[...]
Alexei Starovoitov May 12, 2021, 6:43 p.m. UTC | #2
On 5/11/21 9:17 PM, Andrii Nakryiko wrote:
>> +       bpf_object__for_each_program(prog, obj) {
>> +               printf("\tif (skel->links.%1$s_fd > 0) close(skel->links.%1$s_fd);\n",
>> +                      bpf_program__name(prog));
> 
> you use bpf_program__name(prog) in so many place that it will be much
> simpler if you have a dedicated variable for it

Every time it's in the different loop over all progs.

>> +       obj = bpf_object__open_file(file, &open_opts);
>> +       if (IS_ERR_OR_NULL(obj)) {
> 
> please use libbpf_get_error() instead of IS_ERR_OR_NULL()

That was copy-pasted from another place in the same file.
Fixed both and the rest of comments.
Andrii Nakryiko May 12, 2021, 6:55 p.m. UTC | #3
On Wed, May 12, 2021 at 11:44 AM Alexei Starovoitov <ast@fb.com> wrote:
>
> On 5/11/21 9:17 PM, Andrii Nakryiko wrote:
> >> +       bpf_object__for_each_program(prog, obj) {
> >> +               printf("\tif (skel->links.%1$s_fd > 0) close(skel->links.%1$s_fd);\n",
> >> +                      bpf_program__name(prog));
> >
> > you use bpf_program__name(prog) in so many place that it will be much
> > simpler if you have a dedicated variable for it
>
> Every time it's in the different loop over all progs.

ok, it's no big deal, using variable is always an option to shorten
printf if necessary

>
> >> +       obj = bpf_object__open_file(file, &open_opts);
> >> +       if (IS_ERR_OR_NULL(obj)) {
> >
> > please use libbpf_get_error() instead of IS_ERR_OR_NULL()
>
> That was copy-pasted from another place in the same file.
> Fixed both and the rest of comments.

The reason to use libbpf_get_error() is because we'll be changing how
error is reported for APIs like bpf_object__open_file and
libbpf_get_error() will handle that transition automatically, so not
having IS_ERR or IS_ERR_OR_NULL reduces amount of clean up we'll need
to do.
diff mbox series

Patch

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index b3073ae84018..d16d289ade7a 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -136,7 +136,7 @@  endif
 
 BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
 
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o) $(OUTPUT)disasm.o
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 31ade77f5ef8..7a3e343f31db 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -18,6 +18,7 @@ 
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <bpf/btf.h>
+#include <bpf/bpf_gen_internal.h>
 
 #include "json_writer.h"
 #include "main.h"
@@ -268,6 +269,303 @@  static void codegen(const char *template, ...)
 	free(s);
 }
 
+static void print_hex(const char *obj_data, int file_sz)
+{
+	int i, len;
+
+	for (i = 0, len = 0; i < file_sz; i++) {
+		int w = obj_data[i] ? 4 : 2;
+
+		len += w;
+		if (len > 78) {
+			printf("\\\n");
+			len = w;
+		}
+		if (!obj_data[i])
+			printf("\\0");
+		else
+			printf("\\x%02x", (unsigned char)obj_data[i]);
+	}
+}
+
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+{
+	long page_sz = sysconf(_SC_PAGE_SIZE);
+	size_t map_sz;
+
+	map_sz = (size_t)roundup(bpf_map__value_size(map), 8) * bpf_map__max_entries(map);
+	map_sz = roundup(map_sz, page_sz);
+	return map_sz;
+}
+
+static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name)
+{
+	struct bpf_program *prog;
+
+	bpf_object__for_each_program(prog, obj) {
+		codegen("\
+			\n\
+			\n\
+			static inline int					    \n\
+			%1$s__%2$s__attach(struct %1$s *skel)			    \n\
+			{							    \n\
+				int fd = bpf_raw_tracepoint_open(		    \
+			", obj_name, bpf_program__name(prog));
+
+		switch (bpf_program__get_type(prog)) {
+		case BPF_PROG_TYPE_RAW_TRACEPOINT:
+			putchar('"');
+			fputs(strchr(bpf_program__section_name(prog), '/') + 1, stdout);
+			putchar('"');
+			break;
+		default:
+			fputs("NULL", stdout);
+			break;
+		}
+		codegen("\
+			\n\
+			, skel->progs.%1$s.prog_fd);				    \n\
+				if (fd > 0) skel->links.%1$s_fd = fd;		    \n\
+				return fd;					    \n\
+			}							    \n\
+			", bpf_program__name(prog));
+	}
+
+	codegen("\
+		\n\
+									    \n\
+		static inline int					    \n\
+		%1$s__attach(struct %1$s *skel)				    \n\
+		{							    \n\
+			int ret = 0;					    \n\
+		", obj_name);
+
+	bpf_object__for_each_program(prog, obj) {
+		codegen("\
+			\n\
+				ret = ret < 0 ? ret : %1$s__%2$s__attach(skel);   \n\
+			", obj_name, bpf_program__name(prog));
+	}
+
+	codegen("\
+		\n\
+			return ret < 0 ? ret : 0;			    \n\
+		}							    \n\
+									    \n\
+		static inline void					    \n\
+		%1$s__detach(struct %1$s *skel)				    \n\
+		{							    \n\
+		", obj_name);
+	bpf_object__for_each_program(prog, obj) {
+		printf("\tif (skel->links.%1$s_fd > 0) close(skel->links.%1$s_fd);\n",
+		       bpf_program__name(prog));
+	}
+	codegen("\
+		\n\
+		}							    \n\
+		");
+}
+
+static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
+{
+	struct bpf_program *prog;
+	struct bpf_map *map;
+
+	codegen("\
+		\n\
+		static void						    \n\
+		%1$s__destroy(struct %1$s *skel)			    \n\
+		{							    \n\
+			if (!skel)					    \n\
+				return;					    \n\
+			%1$s__detach(skel);				    \n\
+		",
+		obj_name);
+	bpf_object__for_each_program(prog, obj) {
+		printf("\tif (skel->progs.%1$s.prog_fd > 0) close(skel->progs.%1$s.prog_fd);\n",
+		       bpf_program__name(prog));
+	}
+	bpf_object__for_each_map(map, obj) {
+		const char * ident;
+
+		ident = get_map_ident(map);
+		if (!ident)
+			continue;
+		if (bpf_map__is_internal(map) &&
+		    (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+			printf("\tmunmap(skel->%1$s, %2$zd);\n",
+			       ident, bpf_map_mmap_sz(map));
+		printf("\tif (skel->maps.%1$s.map_fd > 0) close(skel->maps.%1$s.map_fd);\n", ident);
+	}
+	codegen("\
+		\n\
+			free(skel);					    \n\
+		}							    \n\
+		",
+		obj_name);
+}
+
+static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard)
+{
+	struct bpf_object_load_attr load_attr = {};
+	DECLARE_LIBBPF_OPTS(gen_loader_opts, opts);
+	struct bpf_map *map;
+	int err = 0;
+
+	err = bpf_object__gen_loader(obj, &opts);
+	if (err)
+		return err;
+
+	load_attr.obj = obj;
+	if (verifier_logs)
+		/* log_level1 + log_level2 + stats, but not stable UAPI */
+		load_attr.log_level = 1 + 2 + 4;
+
+	err = bpf_object__load_xattr(&load_attr);
+	if (err) {
+		p_err("failed to load object file");
+		goto out;
+	}
+	/* If there was no error during load then gen_loader_opts
+	 * are populated with the loader program.
+	 */
+
+	/* finish generating 'struct skel' */
+	codegen("\
+		\n\
+		};							    \n\
+		", obj_name);
+
+
+	codegen_attach_detach(obj, obj_name);
+
+	codegen_destroy(obj, obj_name);
+
+	codegen("\
+		\n\
+		static inline struct %1$s *				    \n\
+		%1$s__open(void)					    \n\
+		{							    \n\
+			struct %1$s *skel;				    \n\
+									    \n\
+			skel = calloc(sizeof(*skel), 1);		    \n\
+			if (!skel)					    \n\
+				return NULL;				    \n\
+			skel->ctx.sz = (void *)&skel->links - (void *)skel; \n\
+		",
+		obj_name, opts.data_sz);
+	bpf_object__for_each_map(map, obj) {
+		const char *ident;
+		const void *mmap_data = NULL;
+		size_t mmap_size = 0;
+
+		ident = get_map_ident(map);
+		if (!ident)
+			continue;
+
+		if (!bpf_map__is_internal(map) ||
+		    !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+			continue;
+
+		printf("\tskel->%1$s =\n"
+		       "\t\tmmap(NULL, %2$zd, PROT_READ | PROT_WRITE,\n"
+		       "\t\t\tMAP_SHARED | MAP_ANONYMOUS, -1, 0);\n"
+		       "\tmemcpy(skel->%1$s, (void *)\"",
+		       ident, bpf_map_mmap_sz(map));
+		bpf_map__get_initial_value(map, &mmap_data, &mmap_size);
+		print_hex(mmap_data, mmap_size);
+		printf("\", %2$zd);\n"
+		       "\tskel->maps.%1$s.initial_value = (__u64)(long)skel->%1$s;\n",
+		       ident, mmap_size);
+	}
+	codegen("\
+		\n\
+			return skel;					    \n\
+		}							    \n\
+									    \n\
+		static inline int					    \n\
+		%1$s__load(struct %1$s *skel)				    \n\
+		{							    \n\
+			struct bpf_load_and_run_opts opts = {};		    \n\
+			int err;					    \n\
+									    \n\
+			opts.ctx = (struct bpf_loader_ctx *)skel;	    \n\
+			opts.data_sz = %2$d;				    \n\
+			opts.data = (void *)\"\\			    \n\
+		",
+		obj_name, opts.data_sz);
+	print_hex(opts.data, opts.data_sz);
+	codegen("\
+		\n\
+		\";							    \n\
+		");
+
+	codegen("\
+		\n\
+			opts.insns_sz = %d;				    \n\
+			opts.insns = (void *)\"\\			    \n\
+		",
+		opts.insns_sz);
+	print_hex(opts.insns, opts.insns_sz);
+	codegen("\
+		\n\
+		\";							    \n\
+			err = bpf_load_and_run(&opts);			    \n\
+			if (err < 0)					    \n\
+				return err;				    \n\
+		", obj_name);
+	bpf_object__for_each_map(map, obj) {
+		const char *ident, *mmap_flags;
+
+		ident = get_map_ident(map);
+		if (!ident)
+			continue;
+
+		if (!bpf_map__is_internal(map) ||
+		    !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE))
+			continue;
+		if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG)
+			mmap_flags = "PROT_READ";
+		else
+			mmap_flags = "PROT_READ | PROT_WRITE";
+
+		printf("\tskel->%1$s =\n"
+		       "\t\tmmap(skel->%1$s, %2$zd, %3$s, MAP_SHARED | MAP_FIXED,\n"
+		       "\t\t\tskel->maps.%1$s.map_fd, 0);\n",
+		       ident, bpf_map_mmap_sz(map), mmap_flags);
+	}
+	codegen("\
+		\n\
+			return 0;					    \n\
+		}							    \n\
+									    \n\
+		static inline struct %1$s *				    \n\
+		%1$s__open_and_load(void)				    \n\
+		{							    \n\
+			struct %1$s *skel;				    \n\
+									    \n\
+			skel = %1$s__open();				    \n\
+			if (!skel)					    \n\
+				return NULL;				    \n\
+			if (%1$s__load(skel)) {				    \n\
+				%1$s__destroy(skel);			    \n\
+				return NULL;				    \n\
+			}						    \n\
+			return skel;					    \n\
+		}							    \n\
+		", obj_name);
+
+	codegen("\
+		\n\
+									    \n\
+		#endif /* %s */						    \n\
+		",
+		header_guard);
+	err = 0;
+out:
+	return err;
+}
+
 static int do_skeleton(int argc, char **argv)
 {
 	char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
@@ -277,7 +575,7 @@  static int do_skeleton(int argc, char **argv)
 	struct bpf_object *obj = NULL;
 	const char *file, *ident;
 	struct bpf_program *prog;
-	int fd, len, err = -1;
+	int fd, err = -1;
 	struct bpf_map *map;
 	struct btf *btf;
 	struct stat st;
@@ -359,7 +657,25 @@  static int do_skeleton(int argc, char **argv)
 	}
 
 	get_header_guard(header_guard, obj_name);
-	codegen("\
+	if (use_loader) {
+		codegen("\
+		\n\
+		/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */   \n\
+		/* THIS FILE IS AUTOGENERATED! */			    \n\
+		#ifndef %2$s						    \n\
+		#define %2$s						    \n\
+									    \n\
+		#include <stdlib.h>					    \n\
+		#include <bpf/bpf.h>					    \n\
+		#include <bpf/skel_internal.h>				    \n\
+									    \n\
+		struct %1$s {						    \n\
+			struct bpf_loader_ctx ctx;			    \n\
+		",
+		obj_name, header_guard
+		);
+	} else {
+		codegen("\
 		\n\
 		/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */   \n\
 									    \n\
@@ -375,7 +691,8 @@  static int do_skeleton(int argc, char **argv)
 			struct bpf_object *obj;				    \n\
 		",
 		obj_name, header_guard
-	);
+		);
+	}
 
 	if (map_cnt) {
 		printf("\tstruct {\n");
@@ -383,7 +700,10 @@  static int do_skeleton(int argc, char **argv)
 			ident = get_map_ident(map);
 			if (!ident)
 				continue;
-			printf("\t\tstruct bpf_map *%s;\n", ident);
+			if (use_loader)
+				printf("\t\tstruct bpf_map_desc %s;\n", ident);
+			else
+				printf("\t\tstruct bpf_map *%s;\n", ident);
 		}
 		printf("\t} maps;\n");
 	}
@@ -391,14 +711,22 @@  static int do_skeleton(int argc, char **argv)
 	if (prog_cnt) {
 		printf("\tstruct {\n");
 		bpf_object__for_each_program(prog, obj) {
-			printf("\t\tstruct bpf_program *%s;\n",
-			       bpf_program__name(prog));
+			if (use_loader)
+				printf("\t\tstruct bpf_prog_desc %s;\n",
+				       bpf_program__name(prog));
+			else
+				printf("\t\tstruct bpf_program *%s;\n",
+				       bpf_program__name(prog));
 		}
 		printf("\t} progs;\n");
 		printf("\tstruct {\n");
 		bpf_object__for_each_program(prog, obj) {
-			printf("\t\tstruct bpf_link *%s;\n",
-			       bpf_program__name(prog));
+			if (use_loader)
+				printf("\t\tint %s_fd;\n",
+				       bpf_program__name(prog));
+			else
+				printf("\t\tstruct bpf_link *%s;\n",
+				       bpf_program__name(prog));
 		}
 		printf("\t} links;\n");
 	}
@@ -409,6 +737,10 @@  static int do_skeleton(int argc, char **argv)
 		if (err)
 			goto out;
 	}
+	if (use_loader) {
+		err = gen_trace(obj, obj_name, header_guard);
+		goto out;
+	}
 
 	codegen("\
 		\n\
@@ -578,19 +910,7 @@  static int do_skeleton(int argc, char **argv)
 		file_sz);
 
 	/* embed contents of BPF object file */
-	for (i = 0, len = 0; i < file_sz; i++) {
-		int w = obj_data[i] ? 4 : 2;
-
-		len += w;
-		if (len > 78) {
-			printf("\\\n");
-			len = w;
-		}
-		if (!obj_data[i])
-			printf("\\0");
-		else
-			printf("\\x%02x", (unsigned char)obj_data[i]);
-	}
+	print_hex(obj_data, file_sz);
 
 	codegen("\
 		\n\
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d9afb730136a..7f2817d97079 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -29,6 +29,7 @@  bool show_pinned;
 bool block_mount;
 bool verifier_logs;
 bool relaxed_maps;
+bool use_loader;
 struct btf *base_btf;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
@@ -392,6 +393,7 @@  int main(int argc, char **argv)
 		{ "mapcompat",	no_argument,	NULL,	'm' },
 		{ "nomount",	no_argument,	NULL,	'n' },
 		{ "debug",	no_argument,	NULL,	'd' },
+		{ "use-loader",	no_argument,	NULL,	'L' },
 		{ "base-btf",	required_argument, NULL, 'B' },
 		{ 0 }
 	};
@@ -409,7 +411,7 @@  int main(int argc, char **argv)
 	hash_init(link_table.table);
 
 	opterr = 0;
-	while ((opt = getopt_long(argc, argv, "VhpjfmndB:",
+	while ((opt = getopt_long(argc, argv, "VhpjfLmndB:",
 				  options, NULL)) >= 0) {
 		switch (opt) {
 		case 'V':
@@ -452,6 +454,9 @@  int main(int argc, char **argv)
 				return -1;
 			}
 			break;
+		case 'L':
+			use_loader = true;
+			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
 			if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 76e91641262b..c1cf29798b99 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -90,6 +90,7 @@  extern bool show_pids;
 extern bool block_mount;
 extern bool verifier_logs;
 extern bool relaxed_maps;
+extern bool use_loader;
 extern struct btf *base_btf;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 3f067d2d7584..55401b65815a 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -16,6 +16,7 @@ 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
+#include <dirent.h>
 
 #include <linux/err.h>
 #include <linux/perf_event.h>
@@ -24,6 +25,8 @@ 
 #include <bpf/bpf.h>
 #include <bpf/btf.h>
 #include <bpf/libbpf.h>
+#include <bpf/bpf_gen_internal.h>
+#include <bpf/skel_internal.h>
 
 #include "cfg.h"
 #include "main.h"
@@ -1645,8 +1648,109 @@  static int load_with_options(int argc, char **argv, bool first_prog_only)
 	return -1;
 }
 
+static int count_open_fds(void)
+{
+	DIR *dp = opendir("/proc/self/fd");
+	struct dirent *de;
+	int cnt = -3;
+
+	if (!dp)
+		return -1;
+
+	while ((de = readdir(dp)))
+		cnt++;
+
+	closedir(dp);
+	return cnt;
+}
+
+static int try_loader(struct gen_loader_opts *gen)
+{
+	struct bpf_load_and_run_opts opts = {};
+	struct bpf_loader_ctx *ctx;
+	int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc), sizeof(struct bpf_prog_desc));
+	int log_buf_sz = (1u << 24) - 1;
+	int err, fds_before, fd_delta;
+	char *log_buf;
+
+	ctx = alloca(ctx_sz);
+	memset(ctx, 0, ctx_sz);
+	ctx->sz = ctx_sz;
+	ctx->log_level = 1;
+	ctx->log_size = log_buf_sz;
+	log_buf = malloc(log_buf_sz);
+	if (!log_buf)
+		return -ENOMEM;
+	ctx->log_buf = (long) log_buf;
+	opts.ctx = ctx;
+	opts.data = gen->data;
+	opts.data_sz = gen->data_sz;
+	opts.insns = gen->insns;
+	opts.insns_sz = gen->insns_sz;
+	fds_before = count_open_fds();
+	err = bpf_load_and_run(&opts);
+	fd_delta = count_open_fds() - fds_before;
+	if (err < 0) {
+		fprintf(stderr, "err %d\n%s\n%s", err, opts.errstr, log_buf);
+		if (fd_delta)
+			fprintf(stderr, "loader prog leaked %d FDs\n",
+				fd_delta);
+	}
+	free(log_buf);
+	return err;
+}
+
+static int do_loader(int argc, char **argv)
+{
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
+	DECLARE_LIBBPF_OPTS(gen_loader_opts, gen);
+	struct bpf_object_load_attr load_attr = {};
+	struct bpf_object *obj;
+	const char *file;
+	int err = 0;
+
+	if (!REQ_ARGS(1))
+		return -1;
+	file = GET_ARG();
+
+	obj = bpf_object__open_file(file, &open_opts);
+	if (IS_ERR_OR_NULL(obj)) {
+		p_err("failed to open object file");
+		goto err_close_obj;
+	}
+
+	err = bpf_object__gen_loader(obj, &gen);
+	if (err)
+		goto err_close_obj;
+
+	load_attr.obj = obj;
+	if (verifier_logs)
+		/* log_level1 + log_level2 + stats, but not stable UAPI */
+		load_attr.log_level = 1 + 2 + 4;
+
+	err = bpf_object__load_xattr(&load_attr);
+	if (err) {
+		p_err("failed to load object file");
+		goto err_close_obj;
+	}
+
+	if (verifier_logs) {
+		struct dump_data dd = {};
+
+		kernel_syms_load(&dd);
+		dump_xlated_plain(&dd, (void *)gen.insns, gen.insns_sz, false, false);
+		kernel_syms_destroy(&dd);
+	}
+	err = try_loader(&gen);
+err_close_obj:
+	bpf_object__close(obj);
+	return err;
+}
+
 static int do_load(int argc, char **argv)
 {
+	if (use_loader)
+		return do_loader(argc, argv);
 	return load_with_options(argc, argv, true);
 }
 
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 6fc3e6f7f40c..f1f32e21d5cd 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -196,6 +196,9 @@  static const char *print_imm(void *private_data,
 	else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
+	else if (insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "map[idx:%u]+%u", insn->imm, (insn + 1)->imm);
 	else if (insn->src_reg == BPF_PSEUDO_FUNC)
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "subprog[%+d]", insn->imm);