diff mbox series

[v1,bpf-next,3/9] bpf: expose how xlated insns map to jitted insns

Message ID 20240202162813.4184616-4-aspsk@isovalent.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series BPF static branches | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 6609 this patch: 6769
netdev/build_tools success Errors and warnings before: 1 this patch: 1
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang fail Errors and warnings before: 1323 this patch: 1323
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 7246 this patch: 7246
netdev/checkpatch warning WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-10 success Logs for s390x-gcc / test
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / test
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-llvm-17 / test
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-21 fail Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-llvm-18 / test
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-18 fail Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization

Commit Message

Anton Protopopov Feb. 2, 2024, 4:28 p.m. UTC
Allow users to get the exact xlated -> jitted instructions mapping.
This is done by using a new field xlated_to_jit in bpf_prog_info
which can return up to prog->len

    struct bpf_xlated_to_jit {
            u32 off;
            u32 len;
    };

elements. The xlated_to_jit[insn_off] contains jitted offset within
a function and the length of the resulting jitted instruction.

Example:

       Original:            Xlated:                              Jitted:

                                                                 0:   nopl    (%rax,%rax)
                                                                 5:   nop
                                                                 7:   pushq   %rbp
                                                                 8:   movq    %rsp, %rbp
       0:  call 0x76        0:  r0 = 0xfffffbeef                 b:   movabsq $-1923847220, %rax
                            2:  r0 = *(u64 *)(r0 +0)            15:   movq    (%rax), %rax
       1:  r1 = 0x9 ll      3:  r1 = map[id:666][0]+9           19:   movabsq $-102223334445559, %rdi
       3:  r2 = 0x6         5:  r2 = 6                          23:   movl    $6, %esi
       4:  r3 = r0          6:  r3 = r0                         28:   movq    %rax, %rdx
       5:  call 0x6         7:  call bpf_trace_printk           2b:   callq   0xffffffffcdead4dc
       6:  call pc+2        8:  call pc+2                       30:   callq   0x7c
       7:  r0 = 0x0         9:  r0 = 0                          35:   xorl    %eax, %eax
       8:  exit            10:  exit                            37:   leave
                                                                38:   jmp     0xffffffffcbeeffbc
       ---                 ---                                  ---
                                                                 0:   nopl    (%rax,%rax)
                                                                 5:   nop
                                                                 7:   pushq   %rbp
                                                                 8:   movq    %rsp, %rbp
       9:  goto +0x1       11:  goto pc+1                        b:   jmp     0xf
      10:  goto +0x1       12:  goto pc+1                        d:   jmp     0x11
      11:  goto -0x2       13:  goto pc-2                        f:   jmp     0xd
      12:  r0 = 0x0        14:  r0 = 0                          11:   xorl    %eax, %eax
      13:  exit            15:  exit                            13:   leave
                                                                14:   jmp     0xffffffffcbffbeef

Here the xlated_to_jit array will be of length 16 (11 + 6) and equal to

     0: (0xb, 10)
     1: (0,0) /* undefined, as the previous instruction is 16 bytes */
     2: (0x15, 4)
     3: (0x19, 10)
     4: (0,0) /* undefined, as the previous instruction is 16 bytes */
     5: (0x23, 5)
     6: (0x28, 3)
     7: (0x2b, 5)
     8: (0x30, 5)
     9: (0x35, 2)
    10: (0x37, 6)
    11: (0xb, 2)
    12: (0xd, 2)
    13: (0xf, 2)
    14: (0x11, 2)
    15: (0x13, 6)

The prologues are "unmapped": no mapping exists for xlated -> [0,b)

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
---
 arch/x86/net/bpf_jit_comp.c    | 14 ++++++++++++++
 include/linux/bpf.h            |  7 +++++++
 include/uapi/linux/bpf.h       |  7 +++++++
 kernel/bpf/core.c              | 24 ++++++++++++++++++++++++
 kernel/bpf/syscall.c           | 25 +++++++++++++++++++++++++
 kernel/bpf/verifier.c          |  9 +++++++++
 tools/include/uapi/linux/bpf.h |  7 +++++++
 7 files changed, 93 insertions(+)

Comments

Alexei Starovoitov Feb. 6, 2024, 1:09 a.m. UTC | #1
On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 4def3dde35f6..bdd6be718e82 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
>         };
>         /* an array of original indexes for all xlated instructions */
>         u32 *orig_idx;
> +       /* for every xlated instruction point to all generated jited
> +        * instructions, if allocated
> +        */
> +       struct {
> +               u32 off;        /* local offset in the jitted code */
> +               u32 len;        /* the total len of generated jit code */
> +       } *xlated_to_jit;

Simply put Nack to this approach.

Patches 2 and 3 add an extreme amount of memory overhead.

As we discussed during office hours we need a "pointer to insn" concept
aka "index on insn".
The verifier would need to track that such things exist and adjust
indices of insns when patching affects those indices.

For every static branch there will be one such "pointer to insn".
Different algorithms can be used to keep them correct.
The simplest 'lets iterate over all such pointers and update them'
during patch_insn() may even be ok to start.

Such "pointer to insn" won't add any memory overhead.
When patch+jit is done all such "pointer to insn" are fixed value.
Anton Protopopov Feb. 6, 2024, 10:02 a.m. UTC | #2
On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 4def3dde35f6..bdd6be718e82 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> >         };
> >         /* an array of original indexes for all xlated instructions */
> >         u32 *orig_idx;
> > +       /* for every xlated instruction point to all generated jited
> > +        * instructions, if allocated
> > +        */
> > +       struct {
> > +               u32 off;        /* local offset in the jitted code */
> > +               u32 len;        /* the total len of generated jit code */
> > +       } *xlated_to_jit;
> 
> Simply put Nack to this approach.
> 
> Patches 2 and 3 add an extreme amount of memory overhead.
> 
> As we discussed during office hours we need a "pointer to insn" concept
> aka "index on insn".
> The verifier would need to track that such things exist and adjust
> indices of insns when patching affects those indices.
> 
> For every static branch there will be one such "pointer to insn".
> Different algorithms can be used to keep them correct.
> The simplest 'lets iterate over all such pointers and update them'
> during patch_insn() may even be ok to start.
>
> Such "pointer to insn" won't add any memory overhead.
> When patch+jit is done all such "pointer to insn" are fixed value.

Ok, thanks for looking, this makes sense.
Alexei Starovoitov Feb. 7, 2024, 2:26 a.m. UTC | #3
On Tue, Feb 6, 2024 at 2:08 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> > On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > >
> > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > index 4def3dde35f6..bdd6be718e82 100644
> > > --- a/include/linux/bpf.h
> > > +++ b/include/linux/bpf.h
> > > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> > >         };
> > >         /* an array of original indexes for all xlated instructions */
> > >         u32 *orig_idx;
> > > +       /* for every xlated instruction point to all generated jited
> > > +        * instructions, if allocated
> > > +        */
> > > +       struct {
> > > +               u32 off;        /* local offset in the jitted code */
> > > +               u32 len;        /* the total len of generated jit code */
> > > +       } *xlated_to_jit;
> >
> > Simply put Nack to this approach.
> >
> > Patches 2 and 3 add an extreme amount of memory overhead.
> >
> > As we discussed during office hours we need a "pointer to insn" concept
> > aka "index on insn".
> > The verifier would need to track that such things exist and adjust
> > indices of insns when patching affects those indices.
> >
> > For every static branch there will be one such "pointer to insn".
> > Different algorithms can be used to keep them correct.
> > The simplest 'lets iterate over all such pointers and update them'
> > during patch_insn() may even be ok to start.
> >
> > Such "pointer to insn" won't add any memory overhead.
> > When patch+jit is done all such "pointer to insn" are fixed value.
>
> Ok, thanks for looking, this makes sense.

Before jumping into coding I think it would be good to discuss
the design first.
I'm thinking such "address of insn" will be similar to
existing "address of subprog",
which is encoded in ld_imm64 as BPF_PSEUDO_FUNC.
"address of insn" would be a bit more involved to track
during JIT and likely trivial during insn patching,
since we're already doing imm adjustment for pseudo_func.
So that part of design is straightforward.
Implementation in the kernel and libbpf can copy paste from pseudo_func too.

The question is whether such "address of insn" should be allowed
in the data section. If so, we need to brainstorm how to
do it cleanly.
We had various hacks for similar things in the past. Like prog_array.
Let's not repeat such mistakes.
Anton Protopopov Feb. 8, 2024, 11:05 a.m. UTC | #4
On Tue, Feb 06, 2024 at 06:26:12PM -0800, Alexei Starovoitov wrote:
> On Tue, Feb 6, 2024 at 2:08 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> > > On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > >
> > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > index 4def3dde35f6..bdd6be718e82 100644
> > > > --- a/include/linux/bpf.h
> > > > +++ b/include/linux/bpf.h
> > > > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> > > >         };
> > > >         /* an array of original indexes for all xlated instructions */
> > > >         u32 *orig_idx;
> > > > +       /* for every xlated instruction point to all generated jited
> > > > +        * instructions, if allocated
> > > > +        */
> > > > +       struct {
> > > > +               u32 off;        /* local offset in the jitted code */
> > > > +               u32 len;        /* the total len of generated jit code */
> > > > +       } *xlated_to_jit;
> > >
> > > Simply put Nack to this approach.
> > >
> > > Patches 2 and 3 add an extreme amount of memory overhead.
> > >
> > > As we discussed during office hours we need a "pointer to insn" concept
> > > aka "index on insn".
> > > The verifier would need to track that such things exist and adjust
> > > indices of insns when patching affects those indices.
> > >
> > > For every static branch there will be one such "pointer to insn".
> > > Different algorithms can be used to keep them correct.
> > > The simplest 'lets iterate over all such pointers and update them'
> > > during patch_insn() may even be ok to start.
> > >
> > > Such "pointer to insn" won't add any memory overhead.
> > > When patch+jit is done all such "pointer to insn" are fixed value.
> >
> > Ok, thanks for looking, this makes sense.
> 
> Before jumping into coding I think it would be good to discuss
> the design first.
> I'm thinking such "address of insn" will be similar to
> existing "address of subprog",
> which is encoded in ld_imm64 as BPF_PSEUDO_FUNC.
> "address of insn" would be a bit more involved to track
> during JIT and likely trivial during insn patching,
> since we're already doing imm adjustment for pseudo_func.
> So that part of design is straightforward.
> Implementation in the kernel and libbpf can copy paste from pseudo_func too.

To implement the "primitive version" of static branches, where the
only API is `static_branch_update(xlated off, on/off)` the only
requirement is to build `xlated -> jitted` mapping (which is done
in JIT, after the verification). This can be done in a simplified
version of this patch, without xlated->orig mapping and with
xlated->jit mapping only done to gotol_or_nop instructions.

The "address of insn" appears when we want to provide a more
higher-level API when some object (in user-space or in kernel) keeps
track of one or more gotol_or_nop instructions so that after the
program load this controlling object has a list of xlated offsets.
But this would be a follow-up to the initial static branches patch.

> The question is whether such "address of insn" should be allowed
> in the data section. If so, we need to brainstorm how to
> do it cleanly.
> We had various hacks for similar things in the past. Like prog_array.
> Let's not repeat such mistakes.

So, data section is required for implementing jump tables? Like,
to add a new PTR_TO_LABEL or PTR_TO_INSN data type, and a
corresponding "ptr to insn" object for every occurence of &&label,
which will be adjusted during verification.
Looks to me like this one doesn't require any more API than specifying
a list of &&label occurencies on program load.

For "static keys" though (a feature on top of this patch series) we
need to have access to the corresponding set of adjusted pointers.

Isn't this enough to add something like an array of

  struct insn_ptr {
      u32 type; /* LABEL, STATIC_BRANCH,... */
      u32 insn_off; /* original offset on load */
      union {
          struct label {...};
          struct st_branch { u32 key_id, ..};
      };
  };

to load attrs and then get the xlated list after the program is
loaded? Then no new maps/APIs are needed and this can be extended.
Alexei Starovoitov Feb. 15, 2024, 6:48 a.m. UTC | #5
On Thu, Feb 8, 2024 at 3:11 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> On Tue, Feb 06, 2024 at 06:26:12PM -0800, Alexei Starovoitov wrote:
> > On Tue, Feb 6, 2024 at 2:08 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > >
> > > On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> > > > On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > >
> > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > index 4def3dde35f6..bdd6be718e82 100644
> > > > > --- a/include/linux/bpf.h
> > > > > +++ b/include/linux/bpf.h
> > > > > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> > > > >         };
> > > > >         /* an array of original indexes for all xlated instructions */
> > > > >         u32 *orig_idx;
> > > > > +       /* for every xlated instruction point to all generated jited
> > > > > +        * instructions, if allocated
> > > > > +        */
> > > > > +       struct {
> > > > > +               u32 off;        /* local offset in the jitted code */
> > > > > +               u32 len;        /* the total len of generated jit code */
> > > > > +       } *xlated_to_jit;
> > > >
> > > > Simply put Nack to this approach.
> > > >
> > > > Patches 2 and 3 add an extreme amount of memory overhead.
> > > >
> > > > As we discussed during office hours we need a "pointer to insn" concept
> > > > aka "index on insn".
> > > > The verifier would need to track that such things exist and adjust
> > > > indices of insns when patching affects those indices.
> > > >
> > > > For every static branch there will be one such "pointer to insn".
> > > > Different algorithms can be used to keep them correct.
> > > > The simplest 'lets iterate over all such pointers and update them'
> > > > during patch_insn() may even be ok to start.
> > > >
> > > > Such "pointer to insn" won't add any memory overhead.
> > > > When patch+jit is done all such "pointer to insn" are fixed value.
> > >
> > > Ok, thanks for looking, this makes sense.
> >
> > Before jumping into coding I think it would be good to discuss
> > the design first.
> > I'm thinking such "address of insn" will be similar to
> > existing "address of subprog",
> > which is encoded in ld_imm64 as BPF_PSEUDO_FUNC.
> > "address of insn" would be a bit more involved to track
> > during JIT and likely trivial during insn patching,
> > since we're already doing imm adjustment for pseudo_func.
> > So that part of design is straightforward.
> > Implementation in the kernel and libbpf can copy paste from pseudo_func too.
>
> To implement the "primitive version" of static branches, where the
> only API is `static_branch_update(xlated off, on/off)` the only
> requirement is to build `xlated -> jitted` mapping (which is done
> in JIT, after the verification). This can be done in a simplified
> version of this patch, without xlated->orig mapping and with
> xlated->jit mapping only done to gotol_or_nop instructions.

yes. The array of insn->jit_addr sized with as many goto_or_nop-s
the prog will work for user space to flip them, but...

> The "address of insn" appears when we want to provide a more
> higher-level API when some object (in user-space or in kernel) keeps
> track of one or more gotol_or_nop instructions so that after the
> program load this controlling object has a list of xlated offsets.
> But this would be a follow-up to the initial static branches patch.

this won't work as a follow up,
since such an array won't work for bpf prog that wants to flip branches.
There is nothing that associates static_branch name/id with
particular goto_or_nop.
There could be a kfunc that bpf prog calls, but it can only
flip all of such insns in the prog.
Unless we start encoding a special id inside goto_or_nop or other hacks.

> > The question is whether such "address of insn" should be allowed
> > in the data section. If so, we need to brainstorm how to
> > do it cleanly.
> > We had various hacks for similar things in the past. Like prog_array.
> > Let's not repeat such mistakes.
>
> So, data section is required for implementing jump tables? Like,
> to add a new PTR_TO_LABEL or PTR_TO_INSN data type, and a
> corresponding "ptr to insn" object for every occurence of &&label,
> which will be adjusted during verification.
> Looks to me like this one doesn't require any more API than specifying
> a list of &&label occurencies on program load.
>
> For "static keys" though (a feature on top of this patch series) we
> need to have access to the corresponding set of adjusted pointers.
>
> Isn't this enough to add something like an array of
>
>   struct insn_ptr {
>       u32 type; /* LABEL, STATIC_BRANCH,... */
>       u32 insn_off; /* original offset on load */
>       union {
>           struct label {...};
>           struct st_branch { u32 key_id, ..};
>       };
>   };

which I don't like because it hard codes static_branch needs into
insn->jit_addr association.
"address of insn" should be an individual building block without
bolted on parts.

A data section with a set of such "address of insn"
can be a description of one static_branch.
There will be different ways to combine such building blocks.
For example:
static_branch(foo) can emit goto_or_nop into bpf code
and add "address of insn" into a section '.insn_addrs.foo".
This section is what libbpf and bpf prog will recognize as a set
of "address of insn" that can be passed into static_branch_update kfunc
or static_branch_update sys_bpf command.
The question is whether we need a new map type (array derivative)
to hold a set of "address of insn" or it can be a part of an existing
global data array.
A new map type is easier to reason about.
Notice how such a new map type is not a map type of static branches.
It's not a map type of goto_or_nop instructions either.

At load time libbpf can populate this array with indices of insns
that the verifier and JIT need to track. Once JITed the array is readonly
for bpf prog and for user space.

With that mechanism compilers can generate a proper switch() jmp table.
llvm work can be a follow up, of course, but the whole design needs
to be thought through to cover all use cases.

To summarize, here's what I'm proposing:
- PTR_TO_INSN verifier regtype that can be passed to static_branch_update kfunc
- new map type (array) that holds objects that are PTR_TO_INSN for the verifier
libbpf populates this array with indices of insn it wants to track.
bpf prog needs to "use" this array, so prog/map association is built.
- verifier/JIT update each PTR_TO_INSN during transformations.
- static_branch(foo) macro emits goto_or_nop insn and adds 8 bytes
into ".insn_addrs.foo" section with an ELF relocation that
libbpf will convert into index.

When compilers implement jmptables for switch(key) they will generate
".insn_addrs.uniq_suffix" sections and emit
rX = ld_imm64 that_section
rX += switch_key
rY = *(u64 *)rX
jmpx rY

The verifier would need to do push_stack() for this indirect jmp insn
as many times as there are elements in ".insn_addrs.uniq_suffix" array.

And similar for indirect calls.
That section becomes an array of pointers to functions.
We can make it more flexible for indirect callx by
storing BTF func proto and allowing global subprogs with same proto
to match as safe call targets.
Anton Protopopov Feb. 16, 2024, 1:57 p.m. UTC | #6
On Wed, Feb 14, 2024 at 10:48:26PM -0800, Alexei Starovoitov wrote:
> On Thu, Feb 8, 2024 at 3:11 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > On Tue, Feb 06, 2024 at 06:26:12PM -0800, Alexei Starovoitov wrote:
> > > On Tue, Feb 6, 2024 at 2:08 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > >
> > > > On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> > > > > On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > >
> > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > > index 4def3dde35f6..bdd6be718e82 100644
> > > > > > --- a/include/linux/bpf.h
> > > > > > +++ b/include/linux/bpf.h
> > > > > > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> > > > > >         };
> > > > > >         /* an array of original indexes for all xlated instructions */
> > > > > >         u32 *orig_idx;
> > > > > > +       /* for every xlated instruction point to all generated jited
> > > > > > +        * instructions, if allocated
> > > > > > +        */
> > > > > > +       struct {
> > > > > > +               u32 off;        /* local offset in the jitted code */
> > > > > > +               u32 len;        /* the total len of generated jit code */
> > > > > > +       } *xlated_to_jit;
> > > > >
> > > > > Simply put Nack to this approach.
> > > > >
> > > > > Patches 2 and 3 add an extreme amount of memory overhead.
> > > > >
> > > > > As we discussed during office hours we need a "pointer to insn" concept
> > > > > aka "index on insn".
> > > > > The verifier would need to track that such things exist and adjust
> > > > > indices of insns when patching affects those indices.
> > > > >
> > > > > For every static branch there will be one such "pointer to insn".
> > > > > Different algorithms can be used to keep them correct.
> > > > > The simplest 'lets iterate over all such pointers and update them'
> > > > > during patch_insn() may even be ok to start.
> > > > >
> > > > > Such "pointer to insn" won't add any memory overhead.
> > > > > When patch+jit is done all such "pointer to insn" are fixed value.
> > > >
> > > > Ok, thanks for looking, this makes sense.
> > >
> > > Before jumping into coding I think it would be good to discuss
> > > the design first.
> > > I'm thinking such "address of insn" will be similar to
> > > existing "address of subprog",
> > > which is encoded in ld_imm64 as BPF_PSEUDO_FUNC.
> > > "address of insn" would be a bit more involved to track
> > > during JIT and likely trivial during insn patching,
> > > since we're already doing imm adjustment for pseudo_func.
> > > So that part of design is straightforward.
> > > Implementation in the kernel and libbpf can copy paste from pseudo_func too.
> >
> > To implement the "primitive version" of static branches, where the
> > only API is `static_branch_update(xlated off, on/off)` the only
> > requirement is to build `xlated -> jitted` mapping (which is done
> > in JIT, after the verification). This can be done in a simplified
> > version of this patch, without xlated->orig mapping and with
> > xlated->jit mapping only done to gotol_or_nop instructions.
> 
> yes. The array of insn->jit_addr sized with as many goto_or_nop-s
> the prog will work for user space to flip them, but...
> 
> > The "address of insn" appears when we want to provide a more
> > higher-level API when some object (in user-space or in kernel) keeps
> > track of one or more gotol_or_nop instructions so that after the
> > program load this controlling object has a list of xlated offsets.
> > But this would be a follow-up to the initial static branches patch.
> 
> this won't work as a follow up,
> since such an array won't work for bpf prog that wants to flip branches.
> There is nothing that associates static_branch name/id with
> particular goto_or_nop.
> There could be a kfunc that bpf prog calls, but it can only
> flip all of such insns in the prog.
> Unless we start encoding a special id inside goto_or_nop or other hacks.
> 
> > > The question is whether such "address of insn" should be allowed
> > > in the data section. If so, we need to brainstorm how to
> > > do it cleanly.
> > > We had various hacks for similar things in the past. Like prog_array.
> > > Let's not repeat such mistakes.
> >
> > So, data section is required for implementing jump tables? Like,
> > to add a new PTR_TO_LABEL or PTR_TO_INSN data type, and a
> > corresponding "ptr to insn" object for every occurence of &&label,
> > which will be adjusted during verification.
> > Looks to me like this one doesn't require any more API than specifying
> > a list of &&label occurencies on program load.
> >
> > For "static keys" though (a feature on top of this patch series) we
> > need to have access to the corresponding set of adjusted pointers.
> >
> > Isn't this enough to add something like an array of
> >
> >   struct insn_ptr {
> >       u32 type; /* LABEL, STATIC_BRANCH,... */
> >       u32 insn_off; /* original offset on load */
> >       union {
> >           struct label {...};
> >           struct st_branch { u32 key_id, ..};
> >       };
> >   };
> 
> which I don't like because it hard codes static_branch needs into
> insn->jit_addr association.
> "address of insn" should be an individual building block without
> bolted on parts.
> 
> A data section with a set of such "address of insn"
> can be a description of one static_branch.
> There will be different ways to combine such building blocks.
> For example:
> static_branch(foo) can emit goto_or_nop into bpf code
> and add "address of insn" into a section '.insn_addrs.foo".
> This section is what libbpf and bpf prog will recognize as a set
> of "address of insn" that can be passed into static_branch_update kfunc
> or static_branch_update sys_bpf command.
> The question is whether we need a new map type (array derivative)
> to hold a set of "address of insn" or it can be a part of an existing
> global data array.
> A new map type is easier to reason about.
> Notice how such a new map type is not a map type of static branches.
> It's not a map type of goto_or_nop instructions either.
> 
> At load time libbpf can populate this array with indices of insns
> that the verifier and JIT need to track. Once JITed the array is readonly
> for bpf prog and for user space.

So this will be a map per .insn_addrs.X section (where X is key or
a pre-defined suffix for jump tables or indirect calls). And to tell
the verifier about these maps we will need to pass an array of

    struct {
            u32 map_fd;
            u32 type; /* static key, jump table, etc. */
    }

on program load. Is this correct?

> With that mechanism compilers can generate a proper switch() jmp table.
> llvm work can be a follow up, of course, but the whole design needs
> to be thought through to cover all use cases.
> 
> To summarize, here's what I'm proposing:
> - PTR_TO_INSN verifier regtype that can be passed to static_branch_update kfunc

If we have a set of pointers to jump instructions, generated from
static_branch(foo) for same foo, then this makes more sense to
provide a

    static_branch_update(foo)

(where foo is substituted by libbpf with a map fd of .insn_addrs.foo
on load). The same for userspace:

    bpf(STATIC_BRANCH_UPDATE, .attrs={.map_fd=foo})

> - new map type (array) that holds objects that are PTR_TO_INSN for the verifier
> libbpf populates this array with indices of insn it wants to track.
> bpf prog needs to "use" this array, so prog/map association is built.
> - verifier/JIT update each PTR_TO_INSN during transformations.
> - static_branch(foo) macro emits goto_or_nop insn and adds 8 bytes
> into ".insn_addrs.foo" section with an ELF relocation that
> libbpf will convert into index.
> 
> When compilers implement jmptables for switch(key) they will generate
> ".insn_addrs.uniq_suffix" sections and emit
> rX = ld_imm64 that_section
> rX += switch_key
> rY = *(u64 *)rX
> jmpx rY

What are the types for rX and rY? I thought that we will need to do
smth like

  rX = .insn_addrs.uniq_suffix[switch_key] /* rX has type PTR_TO_INSN */
  ...
  jmpx rX

this can be done if for switch cases (or any other goto *label alike) we generate

  rX = map_lookup_elem(.insn_addrs.uniq_suffix, index)
  jmpx rX

> The verifier would need to do push_stack() for this indirect jmp insn
> as many times as there are elements in ".insn_addrs.uniq_suffix" array.
> 
> And similar for indirect calls.
> That section becomes an array of pointers to functions.
> We can make it more flexible for indirect callx by
> storing BTF func proto and allowing global subprogs with same proto
> to match as safe call targets.
Alexei Starovoitov Feb. 21, 2024, 1:09 a.m. UTC | #7
On Fri, Feb 16, 2024 at 6:04 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> On Wed, Feb 14, 2024 at 10:48:26PM -0800, Alexei Starovoitov wrote:
> > On Thu, Feb 8, 2024 at 3:11 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > >
> > > On Tue, Feb 06, 2024 at 06:26:12PM -0800, Alexei Starovoitov wrote:
> > > > On Tue, Feb 6, 2024 at 2:08 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > >
> > > > > On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> > > > > > On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > >
> > > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > > > index 4def3dde35f6..bdd6be718e82 100644
> > > > > > > --- a/include/linux/bpf.h
> > > > > > > +++ b/include/linux/bpf.h
> > > > > > > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> > > > > > >         };
> > > > > > >         /* an array of original indexes for all xlated instructions */
> > > > > > >         u32 *orig_idx;
> > > > > > > +       /* for every xlated instruction point to all generated jited
> > > > > > > +        * instructions, if allocated
> > > > > > > +        */
> > > > > > > +       struct {
> > > > > > > +               u32 off;        /* local offset in the jitted code */
> > > > > > > +               u32 len;        /* the total len of generated jit code */
> > > > > > > +       } *xlated_to_jit;
> > > > > >
> > > > > > Simply put Nack to this approach.
> > > > > >
> > > > > > Patches 2 and 3 add an extreme amount of memory overhead.
> > > > > >
> > > > > > As we discussed during office hours we need a "pointer to insn" concept
> > > > > > aka "index on insn".
> > > > > > The verifier would need to track that such things exist and adjust
> > > > > > indices of insns when patching affects those indices.
> > > > > >
> > > > > > For every static branch there will be one such "pointer to insn".
> > > > > > Different algorithms can be used to keep them correct.
> > > > > > The simplest 'lets iterate over all such pointers and update them'
> > > > > > during patch_insn() may even be ok to start.
> > > > > >
> > > > > > Such "pointer to insn" won't add any memory overhead.
> > > > > > When patch+jit is done all such "pointer to insn" are fixed value.
> > > > >
> > > > > Ok, thanks for looking, this makes sense.
> > > >
> > > > Before jumping into coding I think it would be good to discuss
> > > > the design first.
> > > > I'm thinking such "address of insn" will be similar to
> > > > existing "address of subprog",
> > > > which is encoded in ld_imm64 as BPF_PSEUDO_FUNC.
> > > > "address of insn" would be a bit more involved to track
> > > > during JIT and likely trivial during insn patching,
> > > > since we're already doing imm adjustment for pseudo_func.
> > > > So that part of design is straightforward.
> > > > Implementation in the kernel and libbpf can copy paste from pseudo_func too.
> > >
> > > To implement the "primitive version" of static branches, where the
> > > only API is `static_branch_update(xlated off, on/off)` the only
> > > requirement is to build `xlated -> jitted` mapping (which is done
> > > in JIT, after the verification). This can be done in a simplified
> > > version of this patch, without xlated->orig mapping and with
> > > xlated->jit mapping only done to gotol_or_nop instructions.
> >
> > yes. The array of insn->jit_addr sized with as many goto_or_nop-s
> > the prog will work for user space to flip them, but...
> >
> > > The "address of insn" appears when we want to provide a more
> > > higher-level API when some object (in user-space or in kernel) keeps
> > > track of one or more gotol_or_nop instructions so that after the
> > > program load this controlling object has a list of xlated offsets.
> > > But this would be a follow-up to the initial static branches patch.
> >
> > this won't work as a follow up,
> > since such an array won't work for bpf prog that wants to flip branches.
> > There is nothing that associates static_branch name/id with
> > particular goto_or_nop.
> > There could be a kfunc that bpf prog calls, but it can only
> > flip all of such insns in the prog.
> > Unless we start encoding a special id inside goto_or_nop or other hacks.
> >
> > > > The question is whether such "address of insn" should be allowed
> > > > in the data section. If so, we need to brainstorm how to
> > > > do it cleanly.
> > > > We had various hacks for similar things in the past. Like prog_array.
> > > > Let's not repeat such mistakes.
> > >
> > > So, data section is required for implementing jump tables? Like,
> > > to add a new PTR_TO_LABEL or PTR_TO_INSN data type, and a
> > > corresponding "ptr to insn" object for every occurence of &&label,
> > > which will be adjusted during verification.
> > > Looks to me like this one doesn't require any more API than specifying
> > > a list of &&label occurencies on program load.
> > >
> > > For "static keys" though (a feature on top of this patch series) we
> > > need to have access to the corresponding set of adjusted pointers.
> > >
> > > Isn't this enough to add something like an array of
> > >
> > >   struct insn_ptr {
> > >       u32 type; /* LABEL, STATIC_BRANCH,... */
> > >       u32 insn_off; /* original offset on load */
> > >       union {
> > >           struct label {...};
> > >           struct st_branch { u32 key_id, ..};
> > >       };
> > >   };
> >
> > which I don't like because it hard codes static_branch needs into
> > insn->jit_addr association.
> > "address of insn" should be an individual building block without
> > bolted on parts.
> >
> > A data section with a set of such "address of insn"
> > can be a description of one static_branch.
> > There will be different ways to combine such building blocks.
> > For example:
> > static_branch(foo) can emit goto_or_nop into bpf code
> > and add "address of insn" into a section '.insn_addrs.foo".
> > This section is what libbpf and bpf prog will recognize as a set
> > of "address of insn" that can be passed into static_branch_update kfunc
> > or static_branch_update sys_bpf command.
> > The question is whether we need a new map type (array derivative)
> > to hold a set of "address of insn" or it can be a part of an existing
> > global data array.
> > A new map type is easier to reason about.
> > Notice how such a new map type is not a map type of static branches.
> > It's not a map type of goto_or_nop instructions either.
> >
> > At load time libbpf can populate this array with indices of insns
> > that the verifier and JIT need to track. Once JITed the array is readonly
> > for bpf prog and for user space.
>
> So this will be a map per .insn_addrs.X section (where X is key or
> a pre-defined suffix for jump tables or indirect calls). And to tell
> the verifier about these maps we will need to pass an array of
>
>     struct {
>             u32 map_fd;
>             u32 type; /* static key, jump table, etc. */
>     }
>
> on program load. Is this correct?

Probably not.
Since we're going with a new map type (at least for the sake of this
discussion) it shouldn't need a new way to tell the verifier about it.
If .insn_addrs.jmp_table_A was a section generated for switch() statement
by llvm it will be created as a map by libbpf,
and there will be an ld_imm64 insn generated by llvm that points
to that map.
libbpf will populate ld_imm64 insn with map_fd, just like it does
for global data.

> > With that mechanism compilers can generate a proper switch() jmp table.
> > llvm work can be a follow up, of course, but the whole design needs
> > to be thought through to cover all use cases.
> >
> > To summarize, here's what I'm proposing:
> > - PTR_TO_INSN verifier regtype that can be passed to static_branch_update kfunc
>
> If we have a set of pointers to jump instructions, generated from
> static_branch(foo) for same foo, then this makes more sense to
> provide a
>
>     static_branch_update(foo)

For bpf_static_branch_update(&foo) kfunc there will be another
ld_imm64 insn that points to that map.
No need for new interface here either.

> (where foo is substituted by libbpf with a map fd of .insn_addrs.foo
> on load). The same for userspace:
>
>     bpf(STATIC_BRANCH_UPDATE, .attrs={.map_fd=foo})

but for libbpf it would be nice to have a helper that knows
this .insn_addrs section details.

> > - new map type (array) that holds objects that are PTR_TO_INSN for the verifier
> > libbpf populates this array with indices of insn it wants to track.
> > bpf prog needs to "use" this array, so prog/map association is built.
> > - verifier/JIT update each PTR_TO_INSN during transformations.
> > - static_branch(foo) macro emits goto_or_nop insn and adds 8 bytes
> > into ".insn_addrs.foo" section with an ELF relocation that
> > libbpf will convert into index.
> >
> > When compilers implement jmptables for switch(key) they will generate
> > ".insn_addrs.uniq_suffix" sections and emit
> > rX = ld_imm64 that_section
> > rX += switch_key
> > rY = *(u64 *)rX
> > jmpx rY
>
> What are the types for rX and rY? I thought that we will need to do
> smth like
>
>   rX = .insn_addrs.uniq_suffix[switch_key] /* rX has type PTR_TO_INSN */
>   ...
>   jmpx rX

right. That ".insn_addrs.uniq_suffix[switch_key]" C syntax is exactly:
  rX = ld_imm64 that_section
  rX += switch_key
in assembly.

>
> this can be done if for switch cases (or any other goto *label alike) we generate
>
>   rX = map_lookup_elem(.insn_addrs.uniq_suffix, index)
>   jmpx rX

No need for function calls.
  rX = ld_imm64 that_section
  rX += switch_key

should work.

It works for global variables already, like:
  rX = ld_imm64 global_data_array_map
  rX += 8 // address of 2nd u64 in global data
Anton Protopopov March 6, 2024, 10:44 a.m. UTC | #8
On Tue, Feb 20, 2024 at 05:09:36PM -0800, Alexei Starovoitov wrote:
> On Fri, Feb 16, 2024 at 6:04 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > On Wed, Feb 14, 2024 at 10:48:26PM -0800, Alexei Starovoitov wrote:
> > > On Thu, Feb 8, 2024 at 3:11 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > >
> > > > On Tue, Feb 06, 2024 at 06:26:12PM -0800, Alexei Starovoitov wrote:
> > > > > On Tue, Feb 6, 2024 at 2:08 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > >
> > > > > > On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> > > > > > > On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > > >
> > > > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > > > > index 4def3dde35f6..bdd6be718e82 100644
> > > > > > > > --- a/include/linux/bpf.h
> > > > > > > > +++ b/include/linux/bpf.h
> > > > > > > > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> > > > > > > >         };
> > > > > > > >         /* an array of original indexes for all xlated instructions */
> > > > > > > >         u32 *orig_idx;
> > > > > > > > +       /* for every xlated instruction point to all generated jited
> > > > > > > > +        * instructions, if allocated
> > > > > > > > +        */
> > > > > > > > +       struct {
> > > > > > > > +               u32 off;        /* local offset in the jitted code */
> > > > > > > > +               u32 len;        /* the total len of generated jit code */
> > > > > > > > +       } *xlated_to_jit;
> > > > > > >
> > > > > > > Simply put Nack to this approach.
> > > > > > >
> > > > > > > Patches 2 and 3 add an extreme amount of memory overhead.
> > > > > > >
> > > > > > > As we discussed during office hours we need a "pointer to insn" concept
> > > > > > > aka "index on insn".
> > > > > > > The verifier would need to track that such things exist and adjust
> > > > > > > indices of insns when patching affects those indices.
> > > > > > >
> > > > > > > For every static branch there will be one such "pointer to insn".
> > > > > > > Different algorithms can be used to keep them correct.
> > > > > > > The simplest 'lets iterate over all such pointers and update them'
> > > > > > > during patch_insn() may even be ok to start.
> > > > > > >
> > > > > > > Such "pointer to insn" won't add any memory overhead.
> > > > > > > When patch+jit is done all such "pointer to insn" are fixed value.
> > > > > >
> > > > > > Ok, thanks for looking, this makes sense.
> > > > >
> > > > > Before jumping into coding I think it would be good to discuss
> > > > > the design first.
> > > > > I'm thinking such "address of insn" will be similar to
> > > > > existing "address of subprog",
> > > > > which is encoded in ld_imm64 as BPF_PSEUDO_FUNC.
> > > > > "address of insn" would be a bit more involved to track
> > > > > during JIT and likely trivial during insn patching,
> > > > > since we're already doing imm adjustment for pseudo_func.
> > > > > So that part of design is straightforward.
> > > > > Implementation in the kernel and libbpf can copy paste from pseudo_func too.
> > > >
> > > > To implement the "primitive version" of static branches, where the
> > > > only API is `static_branch_update(xlated off, on/off)` the only
> > > > requirement is to build `xlated -> jitted` mapping (which is done
> > > > in JIT, after the verification). This can be done in a simplified
> > > > version of this patch, without xlated->orig mapping and with
> > > > xlated->jit mapping only done to gotol_or_nop instructions.
> > >
> > > yes. The array of insn->jit_addr sized with as many goto_or_nop-s
> > > the prog will work for user space to flip them, but...
> > >
> > > > The "address of insn" appears when we want to provide a more
> > > > higher-level API when some object (in user-space or in kernel) keeps
> > > > track of one or more gotol_or_nop instructions so that after the
> > > > program load this controlling object has a list of xlated offsets.
> > > > But this would be a follow-up to the initial static branches patch.
> > >
> > > this won't work as a follow up,
> > > since such an array won't work for bpf prog that wants to flip branches.
> > > There is nothing that associates static_branch name/id with
> > > particular goto_or_nop.
> > > There could be a kfunc that bpf prog calls, but it can only
> > > flip all of such insns in the prog.
> > > Unless we start encoding a special id inside goto_or_nop or other hacks.
> > >
> > > > > The question is whether such "address of insn" should be allowed
> > > > > in the data section. If so, we need to brainstorm how to
> > > > > do it cleanly.
> > > > > We had various hacks for similar things in the past. Like prog_array.
> > > > > Let's not repeat such mistakes.
> > > >
> > > > So, data section is required for implementing jump tables? Like,
> > > > to add a new PTR_TO_LABEL or PTR_TO_INSN data type, and a
> > > > corresponding "ptr to insn" object for every occurence of &&label,
> > > > which will be adjusted during verification.
> > > > Looks to me like this one doesn't require any more API than specifying
> > > > a list of &&label occurencies on program load.
> > > >
> > > > For "static keys" though (a feature on top of this patch series) we
> > > > need to have access to the corresponding set of adjusted pointers.
> > > >
> > > > Isn't this enough to add something like an array of
> > > >
> > > >   struct insn_ptr {
> > > >       u32 type; /* LABEL, STATIC_BRANCH,... */
> > > >       u32 insn_off; /* original offset on load */
> > > >       union {
> > > >           struct label {...};
> > > >           struct st_branch { u32 key_id, ..};
> > > >       };
> > > >   };
> > >
> > > which I don't like because it hard codes static_branch needs into
> > > insn->jit_addr association.
> > > "address of insn" should be an individual building block without
> > > bolted on parts.
> > >
> > > A data section with a set of such "address of insn"
> > > can be a description of one static_branch.
> > > There will be different ways to combine such building blocks.
> > > For example:
> > > static_branch(foo) can emit goto_or_nop into bpf code
> > > and add "address of insn" into a section '.insn_addrs.foo".
> > > This section is what libbpf and bpf prog will recognize as a set
> > > of "address of insn" that can be passed into static_branch_update kfunc
> > > or static_branch_update sys_bpf command.
> > > The question is whether we need a new map type (array derivative)
> > > to hold a set of "address of insn" or it can be a part of an existing
> > > global data array.
> > > A new map type is easier to reason about.
> > > Notice how such a new map type is not a map type of static branches.
> > > It's not a map type of goto_or_nop instructions either.
> > >
> > > At load time libbpf can populate this array with indices of insns
> > > that the verifier and JIT need to track. Once JITed the array is readonly
> > > for bpf prog and for user space.
> >
> > So this will be a map per .insn_addrs.X section (where X is key or
> > a pre-defined suffix for jump tables or indirect calls). And to tell
> > the verifier about these maps we will need to pass an array of
> >
> >     struct {
> >             u32 map_fd;
> >             u32 type; /* static key, jump table, etc. */
> >     }
> >
> > on program load. Is this correct?
>
>
> Probably not.
> Since we're going with a new map type (at least for the sake of this
> discussion) it shouldn't need a new way to tell the verifier about it.
> If .insn_addrs.jmp_table_A was a section generated for switch() statement
> by llvm it will be created as a map by libbpf,
> and there will be an ld_imm64 insn generated by llvm that points
> to that map.
> libbpf will populate ld_imm64 insn with map_fd, just like it does
> for global data.

I understand how this works for indirect jumps (and for the
bpf_static_branch_update(&foo) kfunc) where we have a ld_imm64 with a
map, however, I am still not sure how this will work for static
branches where we just have a 8 byte JA insn + an index in the
corresponding ".insn_addrs.foo" section. How kernel will know that the
program is using a corresponding map which we create from
".insn_addrs.foo" without specifying this on program load?

(Sorry for replying late, catching up after [simultaneous] pto &
covid.)

> > > With that mechanism compilers can generate a proper switch() jmp table.
> > > llvm work can be a follow up, of course, but the whole design needs
> > > to be thought through to cover all use cases.
> > >
> > > To summarize, here's what I'm proposing:
> > > - PTR_TO_INSN verifier regtype that can be passed to static_branch_update kfunc
> >
> > If we have a set of pointers to jump instructions, generated from
> > static_branch(foo) for same foo, then this makes more sense to
> > provide a
> >
> >     static_branch_update(foo)
> 
> For bpf_static_branch_update(&foo) kfunc there will be another
> ld_imm64 insn that points to that map.
> No need for new interface here either.
> 
> > (where foo is substituted by libbpf with a map fd of .insn_addrs.foo
> > on load). The same for userspace:
> >
> >     bpf(STATIC_BRANCH_UPDATE, .attrs={.map_fd=foo})
> 
> but for libbpf it would be nice to have a helper that knows
> this .insn_addrs section details.
> 
> > > - new map type (array) that holds objects that are PTR_TO_INSN for the verifier
> > > libbpf populates this array with indices of insn it wants to track.
> > > bpf prog needs to "use" this array, so prog/map association is built.
> > > - verifier/JIT update each PTR_TO_INSN during transformations.
> > > - static_branch(foo) macro emits goto_or_nop insn and adds 8 bytes
> > > into ".insn_addrs.foo" section with an ELF relocation that
> > > libbpf will convert into index.
> > >
> > > When compilers implement jmptables for switch(key) they will generate
> > > ".insn_addrs.uniq_suffix" sections and emit
> > > rX = ld_imm64 that_section
> > > rX += switch_key
> > > rY = *(u64 *)rX
> > > jmpx rY
> >
> > What are the types for rX and rY? I thought that we will need to do
> > smth like
> >
> >   rX = .insn_addrs.uniq_suffix[switch_key] /* rX has type PTR_TO_INSN */
> >   ...
> >   jmpx rX
> 
> right. That ".insn_addrs.uniq_suffix[switch_key]" C syntax is exactly:
>   rX = ld_imm64 that_section
>   rX += switch_key
> in assembly.
> 
> >
> > this can be done if for switch cases (or any other goto *label alike) we generate
> >
> >   rX = map_lookup_elem(.insn_addrs.uniq_suffix, index)
> >   jmpx rX
> 
> No need for function calls.
>   rX = ld_imm64 that_section
>   rX += switch_key
> 
> should work.
> 
> It works for global variables already, like:
>   rX = ld_imm64 global_data_array_map
>   rX += 8 // address of 2nd u64 in global data
Alexei Starovoitov March 14, 2024, 1:56 a.m. UTC | #9
On Wed, Mar 6, 2024 at 2:51 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> On Tue, Feb 20, 2024 at 05:09:36PM -0800, Alexei Starovoitov wrote:
> > On Fri, Feb 16, 2024 at 6:04 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > >
> > > On Wed, Feb 14, 2024 at 10:48:26PM -0800, Alexei Starovoitov wrote:
> > > > On Thu, Feb 8, 2024 at 3:11 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > >
> > > > > On Tue, Feb 06, 2024 at 06:26:12PM -0800, Alexei Starovoitov wrote:
> > > > > > On Tue, Feb 6, 2024 at 2:08 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > >
> > > > > > > On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> > > > > > > > On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > > > >
> > > > > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > > > > > index 4def3dde35f6..bdd6be718e82 100644
> > > > > > > > > --- a/include/linux/bpf.h
> > > > > > > > > +++ b/include/linux/bpf.h
> > > > > > > > > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> > > > > > > > >         };
> > > > > > > > >         /* an array of original indexes for all xlated instructions */
> > > > > > > > >         u32 *orig_idx;
> > > > > > > > > +       /* for every xlated instruction point to all generated jited
> > > > > > > > > +        * instructions, if allocated
> > > > > > > > > +        */
> > > > > > > > > +       struct {
> > > > > > > > > +               u32 off;        /* local offset in the jitted code */
> > > > > > > > > +               u32 len;        /* the total len of generated jit code */
> > > > > > > > > +       } *xlated_to_jit;
> > > > > > > >
> > > > > > > > Simply put Nack to this approach.
> > > > > > > >
> > > > > > > > Patches 2 and 3 add an extreme amount of memory overhead.
> > > > > > > >
> > > > > > > > As we discussed during office hours we need a "pointer to insn" concept
> > > > > > > > aka "index on insn".
> > > > > > > > The verifier would need to track that such things exist and adjust
> > > > > > > > indices of insns when patching affects those indices.
> > > > > > > >
> > > > > > > > For every static branch there will be one such "pointer to insn".
> > > > > > > > Different algorithms can be used to keep them correct.
> > > > > > > > The simplest 'lets iterate over all such pointers and update them'
> > > > > > > > during patch_insn() may even be ok to start.
> > > > > > > >
> > > > > > > > Such "pointer to insn" won't add any memory overhead.
> > > > > > > > When patch+jit is done all such "pointer to insn" are fixed value.
> > > > > > >
> > > > > > > Ok, thanks for looking, this makes sense.
> > > > > >
> > > > > > Before jumping into coding I think it would be good to discuss
> > > > > > the design first.
> > > > > > I'm thinking such "address of insn" will be similar to
> > > > > > existing "address of subprog",
> > > > > > which is encoded in ld_imm64 as BPF_PSEUDO_FUNC.
> > > > > > "address of insn" would be a bit more involved to track
> > > > > > during JIT and likely trivial during insn patching,
> > > > > > since we're already doing imm adjustment for pseudo_func.
> > > > > > So that part of design is straightforward.
> > > > > > Implementation in the kernel and libbpf can copy paste from pseudo_func too.
> > > > >
> > > > > To implement the "primitive version" of static branches, where the
> > > > > only API is `static_branch_update(xlated off, on/off)` the only
> > > > > requirement is to build `xlated -> jitted` mapping (which is done
> > > > > in JIT, after the verification). This can be done in a simplified
> > > > > version of this patch, without xlated->orig mapping and with
> > > > > xlated->jit mapping only done to gotol_or_nop instructions.
> > > >
> > > > yes. The array of insn->jit_addr sized with as many goto_or_nop-s
> > > > the prog will work for user space to flip them, but...
> > > >
> > > > > The "address of insn" appears when we want to provide a more
> > > > > higher-level API when some object (in user-space or in kernel) keeps
> > > > > track of one or more gotol_or_nop instructions so that after the
> > > > > program load this controlling object has a list of xlated offsets.
> > > > > But this would be a follow-up to the initial static branches patch.
> > > >
> > > > this won't work as a follow up,
> > > > since such an array won't work for bpf prog that wants to flip branches.
> > > > There is nothing that associates static_branch name/id with
> > > > particular goto_or_nop.
> > > > There could be a kfunc that bpf prog calls, but it can only
> > > > flip all of such insns in the prog.
> > > > Unless we start encoding a special id inside goto_or_nop or other hacks.
> > > >
> > > > > > The question is whether such "address of insn" should be allowed
> > > > > > in the data section. If so, we need to brainstorm how to
> > > > > > do it cleanly.
> > > > > > We had various hacks for similar things in the past. Like prog_array.
> > > > > > Let's not repeat such mistakes.
> > > > >
> > > > > So, data section is required for implementing jump tables? Like,
> > > > > to add a new PTR_TO_LABEL or PTR_TO_INSN data type, and a
> > > > > corresponding "ptr to insn" object for every occurence of &&label,
> > > > > which will be adjusted during verification.
> > > > > Looks to me like this one doesn't require any more API than specifying
> > > > > a list of &&label occurencies on program load.
> > > > >
> > > > > For "static keys" though (a feature on top of this patch series) we
> > > > > need to have access to the corresponding set of adjusted pointers.
> > > > >
> > > > > Isn't this enough to add something like an array of
> > > > >
> > > > >   struct insn_ptr {
> > > > >       u32 type; /* LABEL, STATIC_BRANCH,... */
> > > > >       u32 insn_off; /* original offset on load */
> > > > >       union {
> > > > >           struct label {...};
> > > > >           struct st_branch { u32 key_id, ..};
> > > > >       };
> > > > >   };
> > > >
> > > > which I don't like because it hard codes static_branch needs into
> > > > insn->jit_addr association.
> > > > "address of insn" should be an individual building block without
> > > > bolted on parts.
> > > >
> > > > A data section with a set of such "address of insn"
> > > > can be a description of one static_branch.
> > > > There will be different ways to combine such building blocks.
> > > > For example:
> > > > static_branch(foo) can emit goto_or_nop into bpf code
> > > > and add "address of insn" into a section '.insn_addrs.foo".
> > > > This section is what libbpf and bpf prog will recognize as a set
> > > > of "address of insn" that can be passed into static_branch_update kfunc
> > > > or static_branch_update sys_bpf command.
> > > > The question is whether we need a new map type (array derivative)
> > > > to hold a set of "address of insn" or it can be a part of an existing
> > > > global data array.
> > > > A new map type is easier to reason about.
> > > > Notice how such a new map type is not a map type of static branches.
> > > > It's not a map type of goto_or_nop instructions either.
> > > >
> > > > At load time libbpf can populate this array with indices of insns
> > > > that the verifier and JIT need to track. Once JITed the array is readonly
> > > > for bpf prog and for user space.
> > >
> > > So this will be a map per .insn_addrs.X section (where X is key or
> > > a pre-defined suffix for jump tables or indirect calls). And to tell
> > > the verifier about these maps we will need to pass an array of
> > >
> > >     struct {
> > >             u32 map_fd;
> > >             u32 type; /* static key, jump table, etc. */
> > >     }
> > >
> > > on program load. Is this correct?
> >
> >
> > Probably not.
> > Since we're going with a new map type (at least for the sake of this
> > discussion) it shouldn't need a new way to tell the verifier about it.
> > If .insn_addrs.jmp_table_A was a section generated for switch() statement
> > by llvm it will be created as a map by libbpf,
> > and there will be an ld_imm64 insn generated by llvm that points
> > to that map.
> > libbpf will populate ld_imm64 insn with map_fd, just like it does
> > for global data.
>
> I understand how this works for indirect jumps (and for the
> bpf_static_branch_update(&foo) kfunc) where we have a ld_imm64 with a
> map, however, I am still not sure how this will work for static
> branches where we just have a 8 byte JA insn + an index in the
> corresponding ".insn_addrs.foo" section. How kernel will know that the
> program is using a corresponding map which we create from
> ".insn_addrs.foo" without specifying this on program load?
>
> (Sorry for replying late, catching up after [simultaneous] pto &
> covid.)

sorry. ctx switch takes time.
libbpf can just bpf_prog_bind_map to associate this new map type
of ptr_to_insn with a program.
Or I misunderstood the question?
Anton Protopopov March 14, 2024, 9:03 a.m. UTC | #10
On Wed, Mar 13, 2024 at 06:56:34PM -0700, Alexei Starovoitov wrote:
> On Wed, Mar 6, 2024 at 2:51 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > On Tue, Feb 20, 2024 at 05:09:36PM -0800, Alexei Starovoitov wrote:
> > > On Fri, Feb 16, 2024 at 6:04 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > >
> > > > On Wed, Feb 14, 2024 at 10:48:26PM -0800, Alexei Starovoitov wrote:
> > > > > On Thu, Feb 8, 2024 at 3:11 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > >
> > > > > > On Tue, Feb 06, 2024 at 06:26:12PM -0800, Alexei Starovoitov wrote:
> > > > > > > On Tue, Feb 6, 2024 at 2:08 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > > >
> > > > > > > > On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> > > > > > > > > On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > > > > >
> > > > > > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > > > > > > index 4def3dde35f6..bdd6be718e82 100644
> > > > > > > > > > --- a/include/linux/bpf.h
> > > > > > > > > > +++ b/include/linux/bpf.h
> > > > > > > > > > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> > > > > > > > > >         };
> > > > > > > > > >         /* an array of original indexes for all xlated instructions */
> > > > > > > > > >         u32 *orig_idx;
> > > > > > > > > > +       /* for every xlated instruction point to all generated jited
> > > > > > > > > > +        * instructions, if allocated
> > > > > > > > > > +        */
> > > > > > > > > > +       struct {
> > > > > > > > > > +               u32 off;        /* local offset in the jitted code */
> > > > > > > > > > +               u32 len;        /* the total len of generated jit code */
> > > > > > > > > > +       } *xlated_to_jit;
> > > > > > > > >
> > > > > > > > > Simply put Nack to this approach.
> > > > > > > > >
> > > > > > > > > Patches 2 and 3 add an extreme amount of memory overhead.
> > > > > > > > >
> > > > > > > > > As we discussed during office hours we need a "pointer to insn" concept
> > > > > > > > > aka "index on insn".
> > > > > > > > > The verifier would need to track that such things exist and adjust
> > > > > > > > > indices of insns when patching affects those indices.
> > > > > > > > >
> > > > > > > > > For every static branch there will be one such "pointer to insn".
> > > > > > > > > Different algorithms can be used to keep them correct.
> > > > > > > > > The simplest 'lets iterate over all such pointers and update them'
> > > > > > > > > during patch_insn() may even be ok to start.
> > > > > > > > >
> > > > > > > > > Such "pointer to insn" won't add any memory overhead.
> > > > > > > > > When patch+jit is done all such "pointer to insn" are fixed value.
> > > > > > > >
> > > > > > > > Ok, thanks for looking, this makes sense.
> > > > > > >
> > > > > > > Before jumping into coding I think it would be good to discuss
> > > > > > > the design first.
> > > > > > > I'm thinking such "address of insn" will be similar to
> > > > > > > existing "address of subprog",
> > > > > > > which is encoded in ld_imm64 as BPF_PSEUDO_FUNC.
> > > > > > > "address of insn" would be a bit more involved to track
> > > > > > > during JIT and likely trivial during insn patching,
> > > > > > > since we're already doing imm adjustment for pseudo_func.
> > > > > > > So that part of design is straightforward.
> > > > > > > Implementation in the kernel and libbpf can copy paste from pseudo_func too.
> > > > > >
> > > > > > To implement the "primitive version" of static branches, where the
> > > > > > only API is `static_branch_update(xlated off, on/off)` the only
> > > > > > requirement is to build `xlated -> jitted` mapping (which is done
> > > > > > in JIT, after the verification). This can be done in a simplified
> > > > > > version of this patch, without xlated->orig mapping and with
> > > > > > xlated->jit mapping only done to gotol_or_nop instructions.
> > > > >
> > > > > yes. The array of insn->jit_addr sized with as many goto_or_nop-s
> > > > > the prog will work for user space to flip them, but...
> > > > >
> > > > > > The "address of insn" appears when we want to provide a more
> > > > > > higher-level API when some object (in user-space or in kernel) keeps
> > > > > > track of one or more gotol_or_nop instructions so that after the
> > > > > > program load this controlling object has a list of xlated offsets.
> > > > > > But this would be a follow-up to the initial static branches patch.
> > > > >
> > > > > this won't work as a follow up,
> > > > > since such an array won't work for bpf prog that wants to flip branches.
> > > > > There is nothing that associates static_branch name/id with
> > > > > particular goto_or_nop.
> > > > > There could be a kfunc that bpf prog calls, but it can only
> > > > > flip all of such insns in the prog.
> > > > > Unless we start encoding a special id inside goto_or_nop or other hacks.
> > > > >
> > > > > > > The question is whether such "address of insn" should be allowed
> > > > > > > in the data section. If so, we need to brainstorm how to
> > > > > > > do it cleanly.
> > > > > > > We had various hacks for similar things in the past. Like prog_array.
> > > > > > > Let's not repeat such mistakes.
> > > > > >
> > > > > > So, data section is required for implementing jump tables? Like,
> > > > > > to add a new PTR_TO_LABEL or PTR_TO_INSN data type, and a
> > > > > > corresponding "ptr to insn" object for every occurence of &&label,
> > > > > > which will be adjusted during verification.
> > > > > > Looks to me like this one doesn't require any more API than specifying
> > > > > > a list of &&label occurencies on program load.
> > > > > >
> > > > > > For "static keys" though (a feature on top of this patch series) we
> > > > > > need to have access to the corresponding set of adjusted pointers.
> > > > > >
> > > > > > Isn't this enough to add something like an array of
> > > > > >
> > > > > >   struct insn_ptr {
> > > > > >       u32 type; /* LABEL, STATIC_BRANCH,... */
> > > > > >       u32 insn_off; /* original offset on load */
> > > > > >       union {
> > > > > >           struct label {...};
> > > > > >           struct st_branch { u32 key_id, ..};
> > > > > >       };
> > > > > >   };
> > > > >
> > > > > which I don't like because it hard codes static_branch needs into
> > > > > insn->jit_addr association.
> > > > > "address of insn" should be an individual building block without
> > > > > bolted on parts.
> > > > >
> > > > > A data section with a set of such "address of insn"
> > > > > can be a description of one static_branch.
> > > > > There will be different ways to combine such building blocks.
> > > > > For example:
> > > > > static_branch(foo) can emit goto_or_nop into bpf code
> > > > > and add "address of insn" into a section '.insn_addrs.foo".
> > > > > This section is what libbpf and bpf prog will recognize as a set
> > > > > of "address of insn" that can be passed into static_branch_update kfunc
> > > > > or static_branch_update sys_bpf command.
> > > > > The question is whether we need a new map type (array derivative)
> > > > > to hold a set of "address of insn" or it can be a part of an existing
> > > > > global data array.
> > > > > A new map type is easier to reason about.
> > > > > Notice how such a new map type is not a map type of static branches.
> > > > > It's not a map type of goto_or_nop instructions either.
> > > > >
> > > > > At load time libbpf can populate this array with indices of insns
> > > > > that the verifier and JIT need to track. Once JITed the array is readonly
> > > > > for bpf prog and for user space.
> > > >
> > > > So this will be a map per .insn_addrs.X section (where X is key or
> > > > a pre-defined suffix for jump tables or indirect calls). And to tell
> > > > the verifier about these maps we will need to pass an array of
> > > >
> > > >     struct {
> > > >             u32 map_fd;
> > > >             u32 type; /* static key, jump table, etc. */
> > > >     }
> > > >
> > > > on program load. Is this correct?
> > >
> > >
> > > Probably not.
> > > Since we're going with a new map type (at least for the sake of this
> > > discussion) it shouldn't need a new way to tell the verifier about it.
> > > If .insn_addrs.jmp_table_A was a section generated for switch() statement
> > > by llvm it will be created as a map by libbpf,
> > > and there will be an ld_imm64 insn generated by llvm that points
> > > to that map.
> > > libbpf will populate ld_imm64 insn with map_fd, just like it does
> > > for global data.
> >
> > I understand how this works for indirect jumps (and for the
> > bpf_static_branch_update(&foo) kfunc) where we have a ld_imm64 with a
> > map, however, I am still not sure how this will work for static
> > branches where we just have a 8 byte JA insn + an index in the
> > corresponding ".insn_addrs.foo" section. How kernel will know that the
> > program is using a corresponding map which we create from
> > ".insn_addrs.foo" without specifying this on program load?
> >
> > (Sorry for replying late, catching up after [simultaneous] pto &
> > covid.)
> 
> sorry. ctx switch takes time.
> libbpf can just bpf_prog_bind_map to associate this new map type
> of ptr_to_insn with a program.
> Or I misunderstood the question?

All ptr_to_insn maps are required during the verification. So
bpf_prog_bind_map can't be used as it requires an existing program.

What could work and what I am proposing is to pass a list of bound
maps in prog_load attributes. Then such maps can be used during the
verification. For normal maps

  prog = prog_load(attr={.bound_maps=maps})

will be semantically the same as

  prog = prog_load()
  bpf_prog_bind_map(prog, maps)
Alexei Starovoitov March 14, 2024, 5:07 p.m. UTC | #11
On Thu, Mar 14, 2024 at 2:11 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> On Wed, Mar 13, 2024 at 06:56:34PM -0700, Alexei Starovoitov wrote:
> > On Wed, Mar 6, 2024 at 2:51 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > >
> > > On Tue, Feb 20, 2024 at 05:09:36PM -0800, Alexei Starovoitov wrote:
> > > > On Fri, Feb 16, 2024 at 6:04 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > >
> > > > > On Wed, Feb 14, 2024 at 10:48:26PM -0800, Alexei Starovoitov wrote:
> > > > > > On Thu, Feb 8, 2024 at 3:11 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > >
> > > > > > > On Tue, Feb 06, 2024 at 06:26:12PM -0800, Alexei Starovoitov wrote:
> > > > > > > > On Tue, Feb 6, 2024 at 2:08 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > > > >
> > > > > > > > > On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> > > > > > > > > > On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > > > > > > > index 4def3dde35f6..bdd6be718e82 100644
> > > > > > > > > > > --- a/include/linux/bpf.h
> > > > > > > > > > > +++ b/include/linux/bpf.h
> > > > > > > > > > > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> > > > > > > > > > >         };
> > > > > > > > > > >         /* an array of original indexes for all xlated instructions */
> > > > > > > > > > >         u32 *orig_idx;
> > > > > > > > > > > +       /* for every xlated instruction point to all generated jited
> > > > > > > > > > > +        * instructions, if allocated
> > > > > > > > > > > +        */
> > > > > > > > > > > +       struct {
> > > > > > > > > > > +               u32 off;        /* local offset in the jitted code */
> > > > > > > > > > > +               u32 len;        /* the total len of generated jit code */
> > > > > > > > > > > +       } *xlated_to_jit;
> > > > > > > > > >
> > > > > > > > > > Simply put Nack to this approach.
> > > > > > > > > >
> > > > > > > > > > Patches 2 and 3 add an extreme amount of memory overhead.
> > > > > > > > > >
> > > > > > > > > > As we discussed during office hours we need a "pointer to insn" concept
> > > > > > > > > > aka "index on insn".
> > > > > > > > > > The verifier would need to track that such things exist and adjust
> > > > > > > > > > indices of insns when patching affects those indices.
> > > > > > > > > >
> > > > > > > > > > For every static branch there will be one such "pointer to insn".
> > > > > > > > > > Different algorithms can be used to keep them correct.
> > > > > > > > > > The simplest 'lets iterate over all such pointers and update them'
> > > > > > > > > > during patch_insn() may even be ok to start.
> > > > > > > > > >
> > > > > > > > > > Such "pointer to insn" won't add any memory overhead.
> > > > > > > > > > When patch+jit is done all such "pointer to insn" are fixed value.
> > > > > > > > >
> > > > > > > > > Ok, thanks for looking, this makes sense.
> > > > > > > >
> > > > > > > > Before jumping into coding I think it would be good to discuss
> > > > > > > > the design first.
> > > > > > > > I'm thinking such "address of insn" will be similar to
> > > > > > > > existing "address of subprog",
> > > > > > > > which is encoded in ld_imm64 as BPF_PSEUDO_FUNC.
> > > > > > > > "address of insn" would be a bit more involved to track
> > > > > > > > during JIT and likely trivial during insn patching,
> > > > > > > > since we're already doing imm adjustment for pseudo_func.
> > > > > > > > So that part of design is straightforward.
> > > > > > > > Implementation in the kernel and libbpf can copy paste from pseudo_func too.
> > > > > > >
> > > > > > > To implement the "primitive version" of static branches, where the
> > > > > > > only API is `static_branch_update(xlated off, on/off)` the only
> > > > > > > requirement is to build `xlated -> jitted` mapping (which is done
> > > > > > > in JIT, after the verification). This can be done in a simplified
> > > > > > > version of this patch, without xlated->orig mapping and with
> > > > > > > xlated->jit mapping only done to gotol_or_nop instructions.
> > > > > >
> > > > > > yes. The array of insn->jit_addr sized with as many goto_or_nop-s
> > > > > > the prog will work for user space to flip them, but...
> > > > > >
> > > > > > > The "address of insn" appears when we want to provide a more
> > > > > > > higher-level API when some object (in user-space or in kernel) keeps
> > > > > > > track of one or more gotol_or_nop instructions so that after the
> > > > > > > program load this controlling object has a list of xlated offsets.
> > > > > > > But this would be a follow-up to the initial static branches patch.
> > > > > >
> > > > > > this won't work as a follow up,
> > > > > > since such an array won't work for bpf prog that wants to flip branches.
> > > > > > There is nothing that associates static_branch name/id with
> > > > > > particular goto_or_nop.
> > > > > > There could be a kfunc that bpf prog calls, but it can only
> > > > > > flip all of such insns in the prog.
> > > > > > Unless we start encoding a special id inside goto_or_nop or other hacks.
> > > > > >
> > > > > > > > The question is whether such "address of insn" should be allowed
> > > > > > > > in the data section. If so, we need to brainstorm how to
> > > > > > > > do it cleanly.
> > > > > > > > We had various hacks for similar things in the past. Like prog_array.
> > > > > > > > Let's not repeat such mistakes.
> > > > > > >
> > > > > > > So, data section is required for implementing jump tables? Like,
> > > > > > > to add a new PTR_TO_LABEL or PTR_TO_INSN data type, and a
> > > > > > > corresponding "ptr to insn" object for every occurence of &&label,
> > > > > > > which will be adjusted during verification.
> > > > > > > Looks to me like this one doesn't require any more API than specifying
> > > > > > > a list of &&label occurencies on program load.
> > > > > > >
> > > > > > > For "static keys" though (a feature on top of this patch series) we
> > > > > > > need to have access to the corresponding set of adjusted pointers.
> > > > > > >
> > > > > > > Isn't this enough to add something like an array of
> > > > > > >
> > > > > > >   struct insn_ptr {
> > > > > > >       u32 type; /* LABEL, STATIC_BRANCH,... */
> > > > > > >       u32 insn_off; /* original offset on load */
> > > > > > >       union {
> > > > > > >           struct label {...};
> > > > > > >           struct st_branch { u32 key_id, ..};
> > > > > > >       };
> > > > > > >   };
> > > > > >
> > > > > > which I don't like because it hard codes static_branch needs into
> > > > > > insn->jit_addr association.
> > > > > > "address of insn" should be an individual building block without
> > > > > > bolted on parts.
> > > > > >
> > > > > > A data section with a set of such "address of insn"
> > > > > > can be a description of one static_branch.
> > > > > > There will be different ways to combine such building blocks.
> > > > > > For example:
> > > > > > static_branch(foo) can emit goto_or_nop into bpf code
> > > > > > and add "address of insn" into a section '.insn_addrs.foo".
> > > > > > This section is what libbpf and bpf prog will recognize as a set
> > > > > > of "address of insn" that can be passed into static_branch_update kfunc
> > > > > > or static_branch_update sys_bpf command.
> > > > > > The question is whether we need a new map type (array derivative)
> > > > > > to hold a set of "address of insn" or it can be a part of an existing
> > > > > > global data array.
> > > > > > A new map type is easier to reason about.
> > > > > > Notice how such a new map type is not a map type of static branches.
> > > > > > It's not a map type of goto_or_nop instructions either.
> > > > > >
> > > > > > At load time libbpf can populate this array with indices of insns
> > > > > > that the verifier and JIT need to track. Once JITed the array is readonly
> > > > > > for bpf prog and for user space.
> > > > >
> > > > > So this will be a map per .insn_addrs.X section (where X is key or
> > > > > a pre-defined suffix for jump tables or indirect calls). And to tell
> > > > > the verifier about these maps we will need to pass an array of
> > > > >
> > > > >     struct {
> > > > >             u32 map_fd;
> > > > >             u32 type; /* static key, jump table, etc. */
> > > > >     }
> > > > >
> > > > > on program load. Is this correct?
> > > >
> > > >
> > > > Probably not.
> > > > Since we're going with a new map type (at least for the sake of this
> > > > discussion) it shouldn't need a new way to tell the verifier about it.
> > > > If .insn_addrs.jmp_table_A was a section generated for switch() statement
> > > > by llvm it will be created as a map by libbpf,
> > > > and there will be an ld_imm64 insn generated by llvm that points
> > > > to that map.
> > > > libbpf will populate ld_imm64 insn with map_fd, just like it does
> > > > for global data.
> > >
> > > I understand how this works for indirect jumps (and for the
> > > bpf_static_branch_update(&foo) kfunc) where we have a ld_imm64 with a
> > > map, however, I am still not sure how this will work for static
> > > branches where we just have a 8 byte JA insn + an index in the
> > > corresponding ".insn_addrs.foo" section. How kernel will know that the
> > > program is using a corresponding map which we create from
> > > ".insn_addrs.foo" without specifying this on program load?
> > >
> > > (Sorry for replying late, catching up after [simultaneous] pto &
> > > covid.)
> >
> > sorry. ctx switch takes time.
> > libbpf can just bpf_prog_bind_map to associate this new map type
> > of ptr_to_insn with a program.
> > Or I misunderstood the question?
>
> All ptr_to_insn maps are required during the verification. So
> bpf_prog_bind_map can't be used as it requires an existing program.

I see.

> What could work and what I am proposing is to pass a list of bound
> maps in prog_load attributes. Then such maps can be used during the
> verification. For normal maps
>
>   prog = prog_load(attr={.bound_maps=maps})
>
> will be semantically the same as
>
>   prog = prog_load()
>   bpf_prog_bind_map(prog, maps)

Instead of a whole new api, let's make libbpf insert
ld_imm64 r0, map
as the first insn for this case for now.
Andrii Nakryiko March 14, 2024, 8:06 p.m. UTC | #12
On Thu, Mar 14, 2024 at 10:07 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, Mar 14, 2024 at 2:11 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > On Wed, Mar 13, 2024 at 06:56:34PM -0700, Alexei Starovoitov wrote:
> > > On Wed, Mar 6, 2024 at 2:51 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > >
> > > > On Tue, Feb 20, 2024 at 05:09:36PM -0800, Alexei Starovoitov wrote:
> > > > > On Fri, Feb 16, 2024 at 6:04 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > >
> > > > > > On Wed, Feb 14, 2024 at 10:48:26PM -0800, Alexei Starovoitov wrote:
> > > > > > > On Thu, Feb 8, 2024 at 3:11 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > > >
> > > > > > > > On Tue, Feb 06, 2024 at 06:26:12PM -0800, Alexei Starovoitov wrote:
> > > > > > > > > On Tue, Feb 6, 2024 at 2:08 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Mon, Feb 05, 2024 at 05:09:51PM -0800, Alexei Starovoitov wrote:
> > > > > > > > > > > On Fri, Feb 2, 2024 at 8:34 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > > > > > > > > index 4def3dde35f6..bdd6be718e82 100644
> > > > > > > > > > > > --- a/include/linux/bpf.h
> > > > > > > > > > > > +++ b/include/linux/bpf.h
> > > > > > > > > > > > @@ -1524,6 +1524,13 @@ struct bpf_prog_aux {
> > > > > > > > > > > >         };
> > > > > > > > > > > >         /* an array of original indexes for all xlated instructions */
> > > > > > > > > > > >         u32 *orig_idx;
> > > > > > > > > > > > +       /* for every xlated instruction point to all generated jited
> > > > > > > > > > > > +        * instructions, if allocated
> > > > > > > > > > > > +        */
> > > > > > > > > > > > +       struct {
> > > > > > > > > > > > +               u32 off;        /* local offset in the jitted code */
> > > > > > > > > > > > +               u32 len;        /* the total len of generated jit code */
> > > > > > > > > > > > +       } *xlated_to_jit;
> > > > > > > > > > >
> > > > > > > > > > > Simply put Nack to this approach.
> > > > > > > > > > >
> > > > > > > > > > > Patches 2 and 3 add an extreme amount of memory overhead.
> > > > > > > > > > >
> > > > > > > > > > > As we discussed during office hours we need a "pointer to insn" concept
> > > > > > > > > > > aka "index on insn".
> > > > > > > > > > > The verifier would need to track that such things exist and adjust
> > > > > > > > > > > indices of insns when patching affects those indices.
> > > > > > > > > > >
> > > > > > > > > > > For every static branch there will be one such "pointer to insn".
> > > > > > > > > > > Different algorithms can be used to keep them correct.
> > > > > > > > > > > The simplest 'lets iterate over all such pointers and update them'
> > > > > > > > > > > during patch_insn() may even be ok to start.
> > > > > > > > > > >
> > > > > > > > > > > Such "pointer to insn" won't add any memory overhead.
> > > > > > > > > > > When patch+jit is done all such "pointer to insn" are fixed value.
> > > > > > > > > >
> > > > > > > > > > Ok, thanks for looking, this makes sense.
> > > > > > > > >
> > > > > > > > > Before jumping into coding I think it would be good to discuss
> > > > > > > > > the design first.
> > > > > > > > > I'm thinking such "address of insn" will be similar to
> > > > > > > > > existing "address of subprog",
> > > > > > > > > which is encoded in ld_imm64 as BPF_PSEUDO_FUNC.
> > > > > > > > > "address of insn" would be a bit more involved to track
> > > > > > > > > during JIT and likely trivial during insn patching,
> > > > > > > > > since we're already doing imm adjustment for pseudo_func.
> > > > > > > > > So that part of design is straightforward.
> > > > > > > > > Implementation in the kernel and libbpf can copy paste from pseudo_func too.
> > > > > > > >
> > > > > > > > To implement the "primitive version" of static branches, where the
> > > > > > > > only API is `static_branch_update(xlated off, on/off)` the only
> > > > > > > > requirement is to build `xlated -> jitted` mapping (which is done
> > > > > > > > in JIT, after the verification). This can be done in a simplified
> > > > > > > > version of this patch, without xlated->orig mapping and with
> > > > > > > > xlated->jit mapping only done to gotol_or_nop instructions.
> > > > > > >
> > > > > > > yes. The array of insn->jit_addr sized with as many goto_or_nop-s
> > > > > > > the prog will work for user space to flip them, but...
> > > > > > >
> > > > > > > > The "address of insn" appears when we want to provide a more
> > > > > > > > higher-level API when some object (in user-space or in kernel) keeps
> > > > > > > > track of one or more gotol_or_nop instructions so that after the
> > > > > > > > program load this controlling object has a list of xlated offsets.
> > > > > > > > But this would be a follow-up to the initial static branches patch.
> > > > > > >
> > > > > > > this won't work as a follow up,
> > > > > > > since such an array won't work for bpf prog that wants to flip branches.
> > > > > > > There is nothing that associates static_branch name/id with
> > > > > > > particular goto_or_nop.
> > > > > > > There could be a kfunc that bpf prog calls, but it can only
> > > > > > > flip all of such insns in the prog.
> > > > > > > Unless we start encoding a special id inside goto_or_nop or other hacks.
> > > > > > >
> > > > > > > > > The question is whether such "address of insn" should be allowed
> > > > > > > > > in the data section. If so, we need to brainstorm how to
> > > > > > > > > do it cleanly.
> > > > > > > > > We had various hacks for similar things in the past. Like prog_array.
> > > > > > > > > Let's not repeat such mistakes.
> > > > > > > >
> > > > > > > > So, data section is required for implementing jump tables? Like,
> > > > > > > > to add a new PTR_TO_LABEL or PTR_TO_INSN data type, and a
> > > > > > > > corresponding "ptr to insn" object for every occurence of &&label,
> > > > > > > > which will be adjusted during verification.
> > > > > > > > Looks to me like this one doesn't require any more API than specifying
> > > > > > > > a list of &&label occurencies on program load.
> > > > > > > >
> > > > > > > > For "static keys" though (a feature on top of this patch series) we
> > > > > > > > need to have access to the corresponding set of adjusted pointers.
> > > > > > > >
> > > > > > > > Isn't this enough to add something like an array of
> > > > > > > >
> > > > > > > >   struct insn_ptr {
> > > > > > > >       u32 type; /* LABEL, STATIC_BRANCH,... */
> > > > > > > >       u32 insn_off; /* original offset on load */
> > > > > > > >       union {
> > > > > > > >           struct label {...};
> > > > > > > >           struct st_branch { u32 key_id, ..};
> > > > > > > >       };
> > > > > > > >   };
> > > > > > >
> > > > > > > which I don't like because it hard codes static_branch needs into
> > > > > > > insn->jit_addr association.
> > > > > > > "address of insn" should be an individual building block without
> > > > > > > bolted on parts.
> > > > > > >
> > > > > > > A data section with a set of such "address of insn"
> > > > > > > can be a description of one static_branch.
> > > > > > > There will be different ways to combine such building blocks.
> > > > > > > For example:
> > > > > > > static_branch(foo) can emit goto_or_nop into bpf code
> > > > > > > and add "address of insn" into a section '.insn_addrs.foo".
> > > > > > > This section is what libbpf and bpf prog will recognize as a set
> > > > > > > of "address of insn" that can be passed into static_branch_update kfunc
> > > > > > > or static_branch_update sys_bpf command.
> > > > > > > The question is whether we need a new map type (array derivative)
> > > > > > > to hold a set of "address of insn" or it can be a part of an existing
> > > > > > > global data array.
> > > > > > > A new map type is easier to reason about.
> > > > > > > Notice how such a new map type is not a map type of static branches.
> > > > > > > It's not a map type of goto_or_nop instructions either.
> > > > > > >
> > > > > > > At load time libbpf can populate this array with indices of insns
> > > > > > > that the verifier and JIT need to track. Once JITed the array is readonly
> > > > > > > for bpf prog and for user space.
> > > > > >
> > > > > > So this will be a map per .insn_addrs.X section (where X is key or
> > > > > > a pre-defined suffix for jump tables or indirect calls). And to tell
> > > > > > the verifier about these maps we will need to pass an array of
> > > > > >
> > > > > >     struct {
> > > > > >             u32 map_fd;
> > > > > >             u32 type; /* static key, jump table, etc. */
> > > > > >     }
> > > > > >
> > > > > > on program load. Is this correct?
> > > > >
> > > > >
> > > > > Probably not.
> > > > > Since we're going with a new map type (at least for the sake of this
> > > > > discussion) it shouldn't need a new way to tell the verifier about it.
> > > > > If .insn_addrs.jmp_table_A was a section generated for switch() statement
> > > > > by llvm it will be created as a map by libbpf,
> > > > > and there will be an ld_imm64 insn generated by llvm that points
> > > > > to that map.
> > > > > libbpf will populate ld_imm64 insn with map_fd, just like it does
> > > > > for global data.
> > > >
> > > > I understand how this works for indirect jumps (and for the
> > > > bpf_static_branch_update(&foo) kfunc) where we have a ld_imm64 with a
> > > > map, however, I am still not sure how this will work for static
> > > > branches where we just have a 8 byte JA insn + an index in the
> > > > corresponding ".insn_addrs.foo" section. How kernel will know that the
> > > > program is using a corresponding map which we create from
> > > > ".insn_addrs.foo" without specifying this on program load?
> > > >
> > > > (Sorry for replying late, catching up after [simultaneous] pto &
> > > > covid.)
> > >
> > > sorry. ctx switch takes time.
> > > libbpf can just bpf_prog_bind_map to associate this new map type
> > > of ptr_to_insn with a program.
> > > Or I misunderstood the question?
> >
> > All ptr_to_insn maps are required during the verification. So
> > bpf_prog_bind_map can't be used as it requires an existing program.
>
> I see.
>
> > What could work and what I am proposing is to pass a list of bound
> > maps in prog_load attributes. Then such maps can be used during the
> > verification. For normal maps
> >
> >   prog = prog_load(attr={.bound_maps=maps})
> >
> > will be semantically the same as
> >
> >   prog = prog_load()
> >   bpf_prog_bind_map(prog, maps)
>
> Instead of a whole new api, let's make libbpf insert
> ld_imm64 r0, map
> as the first insn for this case for now.

This sounds like a big hack and unnecessary complication, tbh. I'd
like to avoid having to do this in libbpf.

But I think we almost have this already supported. In BPF_PROG_LOAD
UAPI we have fd_array property, right? I think right now we lazily
refcnt referenced maps. But I think it makes total sense to just
define that bpf_prog will keep references to all BPF objects passed in
through fd_array, WDYT? Verifier will just iterate all provided FDs,
determine kind of BPF object it is (and reject unknown ones), and then
just take refcnts on each of them once. On prog free we'll just do the
same in reverse and be done with it.

It also can be used as a batch and single-step (in the sense it will
be done as part of program load instead of a separate command)
alternative for bpf_prog_bind_map(), I suppose.

Thoughts?
Alexei Starovoitov March 14, 2024, 9:41 p.m. UTC | #13
On Thu, Mar 14, 2024 at 1:06 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
> >
> > > What could work and what I am proposing is to pass a list of bound
> > > maps in prog_load attributes. Then such maps can be used during the
> > > verification. For normal maps
> > >
> > >   prog = prog_load(attr={.bound_maps=maps})
> > >
> > > will be semantically the same as
> > >
> > >   prog = prog_load()
> > >   bpf_prog_bind_map(prog, maps)
> >
> > Instead of a whole new api, let's make libbpf insert
> > ld_imm64 r0, map
> > as the first insn for this case for now.
>
> This sounds like a big hack and unnecessary complication, tbh. I'd
> like to avoid having to do this in libbpf.
>
> But I think we almost have this already supported. In BPF_PROG_LOAD
> UAPI we have fd_array property, right? I think right now we lazily
> refcnt referenced maps. But I think it makes total sense to just
> define that bpf_prog will keep references to all BPF objects passed in
> through fd_array, WDYT? Verifier will just iterate all provided FDs,
> determine kind of BPF object it is (and reject unknown ones), and then
> just take refcnts on each of them once. On prog free we'll just do the
> same in reverse and be done with it.
>
> It also can be used as a batch and single-step (in the sense it will
> be done as part of program load instead of a separate command)
> alternative for bpf_prog_bind_map(), I suppose.

fd_array approach also works. There can be map and btf fds in there.
I would only bind maps this way.
Anton Protopopov March 15, 2024, 1:11 p.m. UTC | #14
On Thu, Mar 14, 2024 at 02:41:36PM -0700, Alexei Starovoitov wrote:
> On Thu, Mar 14, 2024 at 1:06 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> > >
> > > > What could work and what I am proposing is to pass a list of bound
> > > > maps in prog_load attributes. Then such maps can be used during the
> > > > verification. For normal maps
> > > >
> > > >   prog = prog_load(attr={.bound_maps=maps})
> > > >
> > > > will be semantically the same as
> > > >
> > > >   prog = prog_load()
> > > >   bpf_prog_bind_map(prog, maps)
> > >
> > > Instead of a whole new api, let's make libbpf insert
> > > ld_imm64 r0, map
> > > as the first insn for this case for now.
> >
> > This sounds like a big hack and unnecessary complication, tbh. I'd
> > like to avoid having to do this in libbpf.
> >
> > But I think we almost have this already supported. In BPF_PROG_LOAD
> > UAPI we have fd_array property, right? I think right now we lazily
> > refcnt referenced maps. But I think it makes total sense to just
> > define that bpf_prog will keep references to all BPF objects passed in
> > through fd_array, WDYT? Verifier will just iterate all provided FDs,
> > determine kind of BPF object it is (and reject unknown ones), and then
> > just take refcnts on each of them once. On prog free we'll just do the
> > same in reverse and be done with it.
> >
> > It also can be used as a batch and single-step (in the sense it will
> > be done as part of program load instead of a separate command)
> > alternative for bpf_prog_bind_map(), I suppose.
> 
> fd_array approach also works. There can be map and btf fds in there.
> I would only bind maps this way.

Ok, thanks, this approach looks good
Andrii Nakryiko March 15, 2024, 4:32 p.m. UTC | #15
On Thu, Mar 14, 2024 at 2:41 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, Mar 14, 2024 at 1:06 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> > >
> > > > What could work and what I am proposing is to pass a list of bound
> > > > maps in prog_load attributes. Then such maps can be used during the
> > > > verification. For normal maps
> > > >
> > > >   prog = prog_load(attr={.bound_maps=maps})
> > > >
> > > > will be semantically the same as
> > > >
> > > >   prog = prog_load()
> > > >   bpf_prog_bind_map(prog, maps)
> > >
> > > Instead of a whole new api, let's make libbpf insert
> > > ld_imm64 r0, map
> > > as the first insn for this case for now.
> >
> > This sounds like a big hack and unnecessary complication, tbh. I'd
> > like to avoid having to do this in libbpf.
> >
> > But I think we almost have this already supported. In BPF_PROG_LOAD
> > UAPI we have fd_array property, right? I think right now we lazily
> > refcnt referenced maps. But I think it makes total sense to just
> > define that bpf_prog will keep references to all BPF objects passed in
> > through fd_array, WDYT? Verifier will just iterate all provided FDs,
> > determine kind of BPF object it is (and reject unknown ones), and then
> > just take refcnts on each of them once. On prog free we'll just do the
> > same in reverse and be done with it.
> >
> > It also can be used as a batch and single-step (in the sense it will
> > be done as part of program load instead of a separate command)
> > alternative for bpf_prog_bind_map(), I suppose.
>
> fd_array approach also works. There can be map and btf fds in there.
> I would only bind maps this way.

Any reason why we should have non-uniform behavior between maps and
BTFs? Seems more error-prone to have a difference here, tbh.
Alexei Starovoitov March 15, 2024, 5:22 p.m. UTC | #16
On Fri, Mar 15, 2024 at 9:33 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Mar 14, 2024 at 2:41 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Thu, Mar 14, 2024 at 1:06 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > > What could work and what I am proposing is to pass a list of bound
> > > > > maps in prog_load attributes. Then such maps can be used during the
> > > > > verification. For normal maps
> > > > >
> > > > >   prog = prog_load(attr={.bound_maps=maps})
> > > > >
> > > > > will be semantically the same as
> > > > >
> > > > >   prog = prog_load()
> > > > >   bpf_prog_bind_map(prog, maps)
> > > >
> > > > Instead of a whole new api, let's make libbpf insert
> > > > ld_imm64 r0, map
> > > > as the first insn for this case for now.
> > >
> > > This sounds like a big hack and unnecessary complication, tbh. I'd
> > > like to avoid having to do this in libbpf.
> > >
> > > But I think we almost have this already supported. In BPF_PROG_LOAD
> > > UAPI we have fd_array property, right? I think right now we lazily
> > > refcnt referenced maps. But I think it makes total sense to just
> > > define that bpf_prog will keep references to all BPF objects passed in
> > > through fd_array, WDYT? Verifier will just iterate all provided FDs,
> > > determine kind of BPF object it is (and reject unknown ones), and then
> > > just take refcnts on each of them once. On prog free we'll just do the
> > > same in reverse and be done with it.
> > >
> > > It also can be used as a batch and single-step (in the sense it will
> > > be done as part of program load instead of a separate command)
> > > alternative for bpf_prog_bind_map(), I suppose.
> >
> > fd_array approach also works. There can be map and btf fds in there.
> > I would only bind maps this way.
>
> Any reason why we should have non-uniform behavior between maps and
> BTFs? Seems more error-prone to have a difference here, tbh.

because maps are only held in used_maps while btfs are held
in used_btfs and in kfunc_btf_tab.
And looking at btf_fd it's not clear whether it will be in ld_imm64
and hence used_btf or it's kfunc and will be in kfunc_btf_tab.
All btfs can be stored unconditionally in used_btf,
but that's unnecessary refcnt inc and module_get too.
Doesn't hurt, but makes it harder to reason about everything.
At least to me.
I guess if the whole refcnt of maps and btfs is factored out
and cleaned up into uniform used_maps/used_btf then it's ok,
but fd_array is optional. So it feels messy.
Andrii Nakryiko March 15, 2024, 5:29 p.m. UTC | #17
On Fri, Mar 15, 2024 at 10:23 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Mar 15, 2024 at 9:33 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Thu, Mar 14, 2024 at 2:41 PM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Thu, Mar 14, 2024 at 1:06 PM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > > What could work and what I am proposing is to pass a list of bound
> > > > > > maps in prog_load attributes. Then such maps can be used during the
> > > > > > verification. For normal maps
> > > > > >
> > > > > >   prog = prog_load(attr={.bound_maps=maps})
> > > > > >
> > > > > > will be semantically the same as
> > > > > >
> > > > > >   prog = prog_load()
> > > > > >   bpf_prog_bind_map(prog, maps)
> > > > >
> > > > > Instead of a whole new api, let's make libbpf insert
> > > > > ld_imm64 r0, map
> > > > > as the first insn for this case for now.
> > > >
> > > > This sounds like a big hack and unnecessary complication, tbh. I'd
> > > > like to avoid having to do this in libbpf.
> > > >
> > > > But I think we almost have this already supported. In BPF_PROG_LOAD
> > > > UAPI we have fd_array property, right? I think right now we lazily
> > > > refcnt referenced maps. But I think it makes total sense to just
> > > > define that bpf_prog will keep references to all BPF objects passed in
> > > > through fd_array, WDYT? Verifier will just iterate all provided FDs,
> > > > determine kind of BPF object it is (and reject unknown ones), and then
> > > > just take refcnts on each of them once. On prog free we'll just do the
> > > > same in reverse and be done with it.
> > > >
> > > > It also can be used as a batch and single-step (in the sense it will
> > > > be done as part of program load instead of a separate command)
> > > > alternative for bpf_prog_bind_map(), I suppose.
> > >
> > > fd_array approach also works. There can be map and btf fds in there.
> > > I would only bind maps this way.
> >
> > Any reason why we should have non-uniform behavior between maps and
> > BTFs? Seems more error-prone to have a difference here, tbh.
>
> because maps are only held in used_maps while btfs are held
> in used_btfs and in kfunc_btf_tab.
> And looking at btf_fd it's not clear whether it will be in ld_imm64
> and hence used_btf or it's kfunc and will be in kfunc_btf_tab.
> All btfs can be stored unconditionally in used_btf,
> but that's unnecessary refcnt inc and module_get too.
> Doesn't hurt, but makes it harder to reason about everything.
> At least to me.
> I guess if the whole refcnt of maps and btfs is factored out
> and cleaned up into uniform used_maps/used_btf then it's ok,
> but fd_array is optional. So it feels messy.

Yeah, I was imagining that we'd iterate fd_array (if it's provided)
and add any map/btf into used_{map,btf}, refcnt. Then during
verification we'll just know that any referenced map or btf from
fd_array is already refcounted, so we wouldn't do it there. But I
haven't looked at kfunc_btf_tab, if that's causing some troubles with
this approach, then it's fine by me.

The assumption was that a uniform approach will be less messy and
simplify code and reasoning about the behavior, not the other way. If
that's not the case we can do it just for maps for now.
Anton Protopopov March 28, 2024, 4:37 p.m. UTC | #18
On 24/03/15 10:29, Andrii Nakryiko wrote:
> On Fri, Mar 15, 2024 at 10:23 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Fri, Mar 15, 2024 at 9:33 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Thu, Mar 14, 2024 at 2:41 PM Alexei Starovoitov
> > > <alexei.starovoitov@gmail.com> wrote:
> > > >
> > > > On Thu, Mar 14, 2024 at 1:06 PM Andrii Nakryiko
> > > > <andrii.nakryiko@gmail.com> wrote:
> > > > > >
> > > > > > > What could work and what I am proposing is to pass a list of bound
> > > > > > > maps in prog_load attributes. Then such maps can be used during the
> > > > > > > verification. For normal maps
> > > > > > >
> > > > > > >   prog = prog_load(attr={.bound_maps=maps})
> > > > > > >
> > > > > > > will be semantically the same as
> > > > > > >
> > > > > > >   prog = prog_load()
> > > > > > >   bpf_prog_bind_map(prog, maps)
> > > > > >
> > > > > > Instead of a whole new api, let's make libbpf insert
> > > > > > ld_imm64 r0, map
> > > > > > as the first insn for this case for now.
> > > > >
> > > > > This sounds like a big hack and unnecessary complication, tbh. I'd
> > > > > like to avoid having to do this in libbpf.
> > > > >
> > > > > But I think we almost have this already supported. In BPF_PROG_LOAD
> > > > > UAPI we have fd_array property, right? I think right now we lazily
> > > > > refcnt referenced maps. But I think it makes total sense to just
> > > > > define that bpf_prog will keep references to all BPF objects passed in
> > > > > through fd_array, WDYT? Verifier will just iterate all provided FDs,
> > > > > determine kind of BPF object it is (and reject unknown ones), and then
> > > > > just take refcnts on each of them once. On prog free we'll just do the
> > > > > same in reverse and be done with it.
> > > > >
> > > > > It also can be used as a batch and single-step (in the sense it will
> > > > > be done as part of program load instead of a separate command)
> > > > > alternative for bpf_prog_bind_map(), I suppose.
> > > >
> > > > fd_array approach also works. There can be map and btf fds in there.
> > > > I would only bind maps this way.
> > >
> > > Any reason why we should have non-uniform behavior between maps and
> > > BTFs? Seems more error-prone to have a difference here, tbh.
> >
> > because maps are only held in used_maps while btfs are held
> > in used_btfs and in kfunc_btf_tab.
> > And looking at btf_fd it's not clear whether it will be in ld_imm64
> > and hence used_btf or it's kfunc and will be in kfunc_btf_tab.
> > All btfs can be stored unconditionally in used_btf,
> > but that's unnecessary refcnt inc and module_get too.
> > Doesn't hurt, but makes it harder to reason about everything.
> > At least to me.
> > I guess if the whole refcnt of maps and btfs is factored out
> > and cleaned up into uniform used_maps/used_btf then it's ok,
> > but fd_array is optional. So it feels messy.
> 
> Yeah, I was imagining that we'd iterate fd_array (if it's provided)
> and add any map/btf into used_{map,btf}, refcnt. Then during
> verification we'll just know that any referenced map or btf from
> fd_array is already refcounted, so we wouldn't do it there. But I
> haven't looked at kfunc_btf_tab, if that's causing some troubles with
> this approach, then it's fine by me.
> 
> The assumption was that a uniform approach will be less messy and
> simplify code and reasoning about the behavior, not the other way. If
> that's not the case we can do it just for maps for now.

fd_array is sent in attrs without specifying its size, so individual
fds are copied to kernel as needed. Therefore, this is not possible
to pass extra fds (not used directly by the program) without changing
the API. So either a pair of new fields, say, (fd_extra,fd_extra_len),
or just another field fd_array_len should be added. What sounds better?
Andrii Nakryiko March 29, 2024, 10:44 p.m. UTC | #19
On Thu, Mar 28, 2024 at 9:37 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> On 24/03/15 10:29, Andrii Nakryiko wrote:
> > On Fri, Mar 15, 2024 at 10:23 AM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Fri, Mar 15, 2024 at 9:33 AM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Thu, Mar 14, 2024 at 2:41 PM Alexei Starovoitov
> > > > <alexei.starovoitov@gmail.com> wrote:
> > > > >
> > > > > On Thu, Mar 14, 2024 at 1:06 PM Andrii Nakryiko
> > > > > <andrii.nakryiko@gmail.com> wrote:
> > > > > > >
> > > > > > > > What could work and what I am proposing is to pass a list of bound
> > > > > > > > maps in prog_load attributes. Then such maps can be used during the
> > > > > > > > verification. For normal maps
> > > > > > > >
> > > > > > > >   prog = prog_load(attr={.bound_maps=maps})
> > > > > > > >
> > > > > > > > will be semantically the same as
> > > > > > > >
> > > > > > > >   prog = prog_load()
> > > > > > > >   bpf_prog_bind_map(prog, maps)
> > > > > > >
> > > > > > > Instead of a whole new api, let's make libbpf insert
> > > > > > > ld_imm64 r0, map
> > > > > > > as the first insn for this case for now.
> > > > > >
> > > > > > This sounds like a big hack and unnecessary complication, tbh. I'd
> > > > > > like to avoid having to do this in libbpf.
> > > > > >
> > > > > > But I think we almost have this already supported. In BPF_PROG_LOAD
> > > > > > UAPI we have fd_array property, right? I think right now we lazily
> > > > > > refcnt referenced maps. But I think it makes total sense to just
> > > > > > define that bpf_prog will keep references to all BPF objects passed in
> > > > > > through fd_array, WDYT? Verifier will just iterate all provided FDs,
> > > > > > determine kind of BPF object it is (and reject unknown ones), and then
> > > > > > just take refcnts on each of them once. On prog free we'll just do the
> > > > > > same in reverse and be done with it.
> > > > > >
> > > > > > It also can be used as a batch and single-step (in the sense it will
> > > > > > be done as part of program load instead of a separate command)
> > > > > > alternative for bpf_prog_bind_map(), I suppose.
> > > > >
> > > > > fd_array approach also works. There can be map and btf fds in there.
> > > > > I would only bind maps this way.
> > > >
> > > > Any reason why we should have non-uniform behavior between maps and
> > > > BTFs? Seems more error-prone to have a difference here, tbh.
> > >
> > > because maps are only held in used_maps while btfs are held
> > > in used_btfs and in kfunc_btf_tab.
> > > And looking at btf_fd it's not clear whether it will be in ld_imm64
> > > and hence used_btf or it's kfunc and will be in kfunc_btf_tab.
> > > All btfs can be stored unconditionally in used_btf,
> > > but that's unnecessary refcnt inc and module_get too.
> > > Doesn't hurt, but makes it harder to reason about everything.
> > > At least to me.
> > > I guess if the whole refcnt of maps and btfs is factored out
> > > and cleaned up into uniform used_maps/used_btf then it's ok,
> > > but fd_array is optional. So it feels messy.
> >
> > Yeah, I was imagining that we'd iterate fd_array (if it's provided)
> > and add any map/btf into used_{map,btf}, refcnt. Then during
> > verification we'll just know that any referenced map or btf from
> > fd_array is already refcounted, so we wouldn't do it there. But I
> > haven't looked at kfunc_btf_tab, if that's causing some troubles with
> > this approach, then it's fine by me.
> >
> > The assumption was that a uniform approach will be less messy and
> > simplify code and reasoning about the behavior, not the other way. If
> > that's not the case we can do it just for maps for now.
>
> fd_array is sent in attrs without specifying its size, so individual
> fds are copied to kernel as needed. Therefore, this is not possible
> to pass extra fds (not used directly by the program) without changing
> the API. So either a pair of new fields, say, (fd_extra,fd_extra_len),
> or just another field fd_array_len should be added. What sounds better?

I'd say we should extend fd_array with (optional) fd_array_cnt and
have the following logic:

  - if fd_array != NULL and fd_array_cnt == 0, then only maps/btfs
referenced from BPF program instructions should be refcnt/fetched;
  - if fd_array != NULL and fd_array_cnt > 0, we can eagerly fetch all
FDs and refcnt, as discussed. If any instruction uses the fd index
which is >= fd_array_cnt, that's an error (the user didn't provide a
big enough FD array and we can now detect this).

WDYT?
Anton Protopopov April 1, 2024, 9:47 a.m. UTC | #20
On Fri, Mar 29, 2024 at 03:44:04PM -0700, Andrii Nakryiko wrote:
> On Thu, Mar 28, 2024 at 9:37 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > On 24/03/15 10:29, Andrii Nakryiko wrote:
> > > On Fri, Mar 15, 2024 at 10:23 AM Alexei Starovoitov
> > > <alexei.starovoitov@gmail.com> wrote:
> > > >
> > > > On Fri, Mar 15, 2024 at 9:33 AM Andrii Nakryiko
> > > > <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > On Thu, Mar 14, 2024 at 2:41 PM Alexei Starovoitov
> > > > > <alexei.starovoitov@gmail.com> wrote:
> > > > > >
> > > > > > On Thu, Mar 14, 2024 at 1:06 PM Andrii Nakryiko
> > > > > > <andrii.nakryiko@gmail.com> wrote:
> > > > > > > >
> > > > > > > > > What could work and what I am proposing is to pass a list of bound
> > > > > > > > > maps in prog_load attributes. Then such maps can be used during the
> > > > > > > > > verification. For normal maps
> > > > > > > > >
> > > > > > > > >   prog = prog_load(attr={.bound_maps=maps})
> > > > > > > > >
> > > > > > > > > will be semantically the same as
> > > > > > > > >
> > > > > > > > >   prog = prog_load()
> > > > > > > > >   bpf_prog_bind_map(prog, maps)
> > > > > > > >
> > > > > > > > Instead of a whole new api, let's make libbpf insert
> > > > > > > > ld_imm64 r0, map
> > > > > > > > as the first insn for this case for now.
> > > > > > >
> > > > > > > This sounds like a big hack and unnecessary complication, tbh. I'd
> > > > > > > like to avoid having to do this in libbpf.
> > > > > > >
> > > > > > > But I think we almost have this already supported. In BPF_PROG_LOAD
> > > > > > > UAPI we have fd_array property, right? I think right now we lazily
> > > > > > > refcnt referenced maps. But I think it makes total sense to just
> > > > > > > define that bpf_prog will keep references to all BPF objects passed in
> > > > > > > through fd_array, WDYT? Verifier will just iterate all provided FDs,
> > > > > > > determine kind of BPF object it is (and reject unknown ones), and then
> > > > > > > just take refcnts on each of them once. On prog free we'll just do the
> > > > > > > same in reverse and be done with it.
> > > > > > >
> > > > > > > It also can be used as a batch and single-step (in the sense it will
> > > > > > > be done as part of program load instead of a separate command)
> > > > > > > alternative for bpf_prog_bind_map(), I suppose.
> > > > > >
> > > > > > fd_array approach also works. There can be map and btf fds in there.
> > > > > > I would only bind maps this way.
> > > > >
> > > > > Any reason why we should have non-uniform behavior between maps and
> > > > > BTFs? Seems more error-prone to have a difference here, tbh.
> > > >
> > > > because maps are only held in used_maps while btfs are held
> > > > in used_btfs and in kfunc_btf_tab.
> > > > And looking at btf_fd it's not clear whether it will be in ld_imm64
> > > > and hence used_btf or it's kfunc and will be in kfunc_btf_tab.
> > > > All btfs can be stored unconditionally in used_btf,
> > > > but that's unnecessary refcnt inc and module_get too.
> > > > Doesn't hurt, but makes it harder to reason about everything.
> > > > At least to me.
> > > > I guess if the whole refcnt of maps and btfs is factored out
> > > > and cleaned up into uniform used_maps/used_btf then it's ok,
> > > > but fd_array is optional. So it feels messy.
> > >
> > > Yeah, I was imagining that we'd iterate fd_array (if it's provided)
> > > and add any map/btf into used_{map,btf}, refcnt. Then during
> > > verification we'll just know that any referenced map or btf from
> > > fd_array is already refcounted, so we wouldn't do it there. But I
> > > haven't looked at kfunc_btf_tab, if that's causing some troubles with
> > > this approach, then it's fine by me.
> > >
> > > The assumption was that a uniform approach will be less messy and
> > > simplify code and reasoning about the behavior, not the other way. If
> > > that's not the case we can do it just for maps for now.
> >
> > fd_array is sent in attrs without specifying its size, so individual
> > fds are copied to kernel as needed. Therefore, this is not possible
> > to pass extra fds (not used directly by the program) without changing
> > the API. So either a pair of new fields, say, (fd_extra,fd_extra_len),
> > or just another field fd_array_len should be added. What sounds better?
> 
> I'd say we should extend fd_array with (optional) fd_array_cnt and
> have the following logic:
> 
>   - if fd_array != NULL and fd_array_cnt == 0, then only maps/btfs
> referenced from BPF program instructions should be refcnt/fetched;
>   - if fd_array != NULL and fd_array_cnt > 0, we can eagerly fetch all
> FDs and refcnt, as discussed. If any instruction uses the fd index
> which is >= fd_array_cnt, that's an error (the user didn't provide a
> big enough FD array and we can now detect this).
> 
> WDYT?

Yes, thanks, I've thought the same way. I will use this API
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e1390d1e331b..a80b8c1e7afe 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1186,6 +1186,7 @@  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 		const s32 imm32 = insn->imm;
 		u32 dst_reg = insn->dst_reg;
 		u32 src_reg = insn->src_reg;
+		int adjust_off = 0;
 		u8 b2 = 0, b3 = 0;
 		u8 *start_of_ldx;
 		s64 jmp_offset;
@@ -1290,6 +1291,7 @@  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 			emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
 			insn++;
 			i++;
+			adjust_off = 1;
 			break;
 
 			/* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
@@ -2073,6 +2075,18 @@  st:			if (is_imm8(insn->off))
 				return -EFAULT;
 			}
 			memcpy(rw_image + proglen, temp, ilen);
+
+			if (bpf_prog->aux->xlated_to_jit) {
+				u32 func_idx = bpf_prog->aux->func_idx;
+				int off;
+
+				off = i - 1 - adjust_off;
+				if (func_idx)
+					off += bpf_prog->aux->func_info[func_idx].insn_off;
+
+				bpf_prog->aux->xlated_to_jit[off].off = proglen;
+				bpf_prog->aux->xlated_to_jit[off].len = ilen;
+			}
 		}
 		proglen += ilen;
 		addrs[i] = proglen;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4def3dde35f6..bdd6be718e82 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1524,6 +1524,13 @@  struct bpf_prog_aux {
 	};
 	/* an array of original indexes for all xlated instructions */
 	u32 *orig_idx;
+	/* for every xlated instruction point to all generated jited
+	 * instructions, if allocated
+	 */
+	struct {
+		u32 off;	/* local offset in the jitted code */
+		u32 len;	/* the total len of generated jit code */
+	} *xlated_to_jit;
 };
 
 struct bpf_prog {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b929523444b0..c874f354c290 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6493,6 +6493,11 @@  struct sk_reuseport_md {
 
 #define BPF_TAG_SIZE	8
 
+struct bpf_xlated_to_jit {
+	__u32 off;
+	__u32 len;
+};
+
 struct bpf_prog_info {
 	__u32 type;
 	__u32 id;
@@ -6535,6 +6540,8 @@  struct bpf_prog_info {
 	__u32 attach_btf_id;
 	__u32 orig_idx_len;
 	__aligned_u64 orig_idx;
+	__u32 xlated_to_jit_len;
+	__aligned_u64 xlated_to_jit;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f0086925b810..8e99c1563a7f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -493,6 +493,26 @@  static int bpf_prog_realloc_orig_idx(struct bpf_prog *prog, u32 off, u32 patch_l
 	return 0;
 }
 
+static void adjust_func_info(struct bpf_prog *prog, u32 off, u32 insn_delta)
+{
+	int i;
+
+	if (insn_delta == 0)
+		return;
+
+	for (i = 0; i < prog->aux->func_info_cnt; i++) {
+		if (prog->aux->func_info[i].insn_off <= off)
+			continue;
+		prog->aux->func_info[i].insn_off += insn_delta;
+	}
+}
+
+static void bpf_prog_adj_orig_idx_after_remove(struct bpf_prog *prog, u32 off, u32 len)
+{
+	memmove(prog->aux->orig_idx + off, prog->aux->orig_idx + off + len,
+		sizeof(*prog->aux->orig_idx) * (prog->len - off));
+}
+
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len)
 {
@@ -554,6 +574,7 @@  struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 	BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false));
 
 	bpf_adj_linfo(prog_adj, off, insn_delta);
+	adjust_func_info(prog_adj, off, insn_delta);
 
 	return prog_adj;
 }
@@ -574,6 +595,8 @@  int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
 	if (err)
 		return err;
 
+	bpf_prog_adj_orig_idx_after_remove(prog, off, cnt);
+
 	return 0;
 }
 
@@ -2807,6 +2830,7 @@  static void bpf_prog_free_deferred(struct work_struct *work)
 		bpf_jit_free(aux->prog);
 	}
 	kfree(aux->orig_idx);
+	kfree(aux->xlated_to_jit);
 }
 
 void bpf_prog_free(struct bpf_prog *fp)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 172bf8d3aef2..36b8fdcfba75 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4571,6 +4571,31 @@  static int bpf_prog_get_info_by_fd(struct file *file,
 			return -EFAULT;
 	}
 
+	ulen = info.xlated_to_jit_len;
+	if (prog->aux->xlated_to_jit)
+		info.xlated_to_jit_len = prog->len * sizeof(struct bpf_xlated_to_jit);
+	else
+		info.xlated_to_jit_len = 0;
+	if (info.xlated_to_jit_len && ulen) {
+		struct bpf_xlated_to_jit *xlated_to_jit;
+		int i;
+
+		xlated_to_jit = kzalloc(info.xlated_to_jit_len, GFP_KERNEL);
+		if (!xlated_to_jit)
+			return -ENOMEM;
+		for (i = 0; i < prog->len; i++) {
+			xlated_to_jit[i].off = prog->aux->xlated_to_jit[i].off;
+			xlated_to_jit[i].len = prog->aux->xlated_to_jit[i].len;
+		}
+		if (copy_to_user(u64_to_user_ptr(info.xlated_to_jit),
+				 xlated_to_jit,
+				 min_t(u32, info.xlated_to_jit_len, ulen))) {
+			kfree(xlated_to_jit);
+			return -EFAULT;
+		}
+		kfree(xlated_to_jit);
+	}
+
 	if (bpf_prog_is_offloaded(prog->aux)) {
 		err = bpf_prog_offload_info_fill(&info, prog);
 		if (err)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2dc48f88f43c..270dc0a26d03 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -18974,6 +18974,7 @@  static int jit_subprogs(struct bpf_verifier_env *env)
 		func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
 		if (!i)
 			func[i]->aux->exception_boundary = env->seen_exception;
+		func[i]->aux->xlated_to_jit = prog->aux->xlated_to_jit;
 		func[i] = bpf_int_jit_compile(func[i]);
 		if (!func[i]->jited) {
 			err = -ENOTSUPP;
@@ -20832,6 +20833,7 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	int len, ret = -EINVAL, err;
 	u32 log_true_size;
 	bool is_priv;
+	u32 size;
 
 	/* no program is valid */
 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
@@ -20981,6 +20983,13 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 								     : false;
 	}
 
+	if (ret == 0) {
+		size = array_size(sizeof(*env->prog->aux->xlated_to_jit), env->prog->len);
+		env->prog->aux->xlated_to_jit = kzalloc(size, GFP_KERNEL);
+		if (!env->prog->aux->xlated_to_jit)
+			ret = -ENOMEM;
+	}
+
 	if (ret == 0)
 		ret = fixup_call_args(env);
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b929523444b0..c874f354c290 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6493,6 +6493,11 @@  struct sk_reuseport_md {
 
 #define BPF_TAG_SIZE	8
 
+struct bpf_xlated_to_jit {
+	__u32 off;
+	__u32 len;
+};
+
 struct bpf_prog_info {
 	__u32 type;
 	__u32 id;
@@ -6535,6 +6540,8 @@  struct bpf_prog_info {
 	__u32 attach_btf_id;
 	__u32 orig_idx_len;
 	__aligned_u64 orig_idx;
+	__u32 xlated_to_jit_len;
+	__aligned_u64 xlated_to_jit;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {