diff mbox series

[RESEND,bpf,2/2] x86/bpf: Fix IP for relocating call depth accounting

Message ID 20240329094906.18147-3-ubizjak@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series x86/bpf: Fixes for the BPF JIT with retbleed=stuff | expand

Checks

Context Check Description
bpf/vmtest-bpf-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 15338 this patch: 15338
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 19 maintainers not CCed: john.fastabend@gmail.com andrii@kernel.org kpsingh@kernel.org mingo@redhat.com jgross@suse.com martin.lau@linux.dev tglx@linutronix.de dsahern@kernel.org eddyz87@gmail.com jpoimboe@kernel.org sdf@google.com bp@alien8.de yonghong.song@linux.dev dave.hansen@linux.intel.com peterz@infradead.org hpa@zytor.com haoluo@google.com jolsa@kernel.org song@kernel.org
netdev/build_clang success Errors and warnings before: 2129 this patch: 2129
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 16502 this patch: 16502
netdev/checkpatch warning CHECK: extern prototypes should be avoided in .h files
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization

Commit Message

Uros Bizjak March 29, 2024, 9:46 a.m. UTC
From: Joan Bruguera Micó <joanbrugueram@gmail.com>

The recently introduced support for %rip-relative relocations in the
call thunk template assumes that the code is being patched in-place,
so the destination of the relocation matches the address of the code.
This is not true for the call depth accounting emitted by the BPF JIT,
so the calculated address is wrong and usually causes a page fault.

Pass the destination IP when the BPF JIT emits call depth accounting.

Fixes: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template")
Signed-off-by: Joan Bruguera Micó <joanbrugueram@gmail.com>
Reviewed-by: Uros Bizjak <ubizjak@gmail.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
---
 arch/x86/include/asm/alternative.h |  4 ++--
 arch/x86/kernel/callthunks.c       |  4 ++--
 arch/x86/net/bpf_jit_comp.c        | 19 ++++++++-----------
 3 files changed, 12 insertions(+), 15 deletions(-)

Comments

Alexei Starovoitov March 29, 2024, 9:53 p.m. UTC | #1
On Fri, Mar 29, 2024 at 2:49 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> From: Joan Bruguera Micó <joanbrugueram@gmail.com>
>
> The recently introduced support for %rip-relative relocations in the
> call thunk template assumes that the code is being patched in-place,
> so the destination of the relocation matches the address of the code.
> This is not true for the call depth accounting emitted by the BPF JIT,
> so the calculated address is wrong and usually causes a page fault.

Could you share the link to what this 'rip-relative' relocation is ?

> Pass the destination IP when the BPF JIT emits call depth accounting.
>
> Fixes: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template")

Ohh. It's buried inside that patch.
Pls make commit log a bit more clear that that commit 17bce3b2ae2d
broke x86_call_depth_emit_accounting logic.

> Signed-off-by: Joan Bruguera Micó <joanbrugueram@gmail.com>
> Reviewed-by: Uros Bizjak <ubizjak@gmail.com>
> Acked-by: Ingo Molnar <mingo@kernel.org>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> ---
>  arch/x86/include/asm/alternative.h |  4 ++--
>  arch/x86/kernel/callthunks.c       |  4 ++--
>  arch/x86/net/bpf_jit_comp.c        | 19 ++++++++-----------
>  3 files changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
> index fcd20c6dc7f9..67b68d0d17d1 100644
> --- a/arch/x86/include/asm/alternative.h
> +++ b/arch/x86/include/asm/alternative.h
> @@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
>  extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
>                                           struct module *mod);
>  extern void *callthunks_translate_call_dest(void *dest);
> -extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
> +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
>  #else
>  static __always_inline void callthunks_patch_builtin_calls(void) {}
>  static __always_inline void
> @@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
>         return dest;
>  }
>  static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
> -                                                         void *func)
> +                                                         void *func, void *ip)
>  {
>         return 0;
>  }
> diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
> index 30335182b6b0..e92ff0c11db8 100644
> --- a/arch/x86/kernel/callthunks.c
> +++ b/arch/x86/kernel/callthunks.c
> @@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
>         return !bcmp(pad, insn_buff, tmpl_size);
>  }
>
> -int x86_call_depth_emit_accounting(u8 **pprog, void *func)
> +int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
>  {
>         unsigned int tmpl_size = SKL_TMPL_SIZE;
>         u8 insn_buff[MAX_PATCH_LEN];
> @@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
>                 return 0;
>
>         memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
> -       apply_relocation(insn_buff, tmpl_size, *pprog,
> +       apply_relocation(insn_buff, tmpl_size, ip,

Did the logic inside apply_relocation() change to have
a new meaning for 'dest' and 'src'?
Answering to myself... yes. in that commit.
Better commit log would have made the code review so much easier.

>                          skl_call_thunk_template, tmpl_size);
>
>         memcpy(*pprog, insn_buff, tmpl_size);
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 09f7dc9d4d65..f2e8769f5eee 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -481,7 +481,7 @@ static int emit_rsb_call(u8 **pprog, void *func, void *ip)
>  {
>         void *adjusted_ip;
>         OPTIMIZER_HIDE_VAR(func);
> -       adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func);
> +       adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func, ip);

Now I see why you added extra var in the previous patch.
Should have mentioned it in the commit log.

>         return emit_patch(pprog, func, adjusted_ip, 0xE8);
>  }
>
> @@ -1973,20 +1973,17 @@ st:                     if (is_imm8(insn->off))
>
>                         /* call */
>                 case BPF_JMP | BPF_CALL: {
> -                       int offs;
> +                       u8 *ip = image + addrs[i - 1];
>
>                         func = (u8 *) __bpf_call_base + imm32;
>                         if (tail_call_reachable) {
>                                 RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
> -                               if (!imm32)
> -                                       return -EINVAL;
> -                               offs = 7 + x86_call_depth_emit_accounting(&prog, func);
> -                       } else {
> -                               if (!imm32)
> -                                       return -EINVAL;
> -                               offs = x86_call_depth_emit_accounting(&prog, func);
> +                               ip += 7;
>                         }
> -                       if (emit_call(&prog, func, image + addrs[i - 1] + offs))
> +                       if (!imm32)
> +                               return -EINVAL;
> +                       ip += x86_call_depth_emit_accounting(&prog, func, ip);
> +                       if (emit_call(&prog, func, ip))
>                                 return -EINVAL;
>                         break;
>                 }
> @@ -2836,7 +2833,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>                  * Direct-call fentry stub, as such it needs accounting for the
>                  * __fentry__ call.
>                  */
> -               x86_call_depth_emit_accounting(&prog, NULL);
> +               x86_call_depth_emit_accounting(&prog, NULL, image);

Overall it all makes sense.
Pls respin with more precise commit logs.
Uros Bizjak March 30, 2024, 9:01 a.m. UTC | #2
On Fri, Mar 29, 2024 at 10:53 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Mar 29, 2024 at 2:49 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > From: Joan Bruguera Micó <joanbrugueram@gmail.com>
> >
> > The recently introduced support for %rip-relative relocations in the
> > call thunk template assumes that the code is being patched in-place,
> > so the destination of the relocation matches the address of the code.
> > This is not true for the call depth accounting emitted by the BPF JIT,
> > so the calculated address is wrong and usually causes a page fault.
>
> Could you share the link to what this 'rip-relative' relocation is ?

Please see the "RIP relative addressing" section in [1].

[1] https://compas.cs.stonybrook.edu/~nhonarmand/courses/sp17/cse506/ref/assembly.html

In our case:

The callthunks patching creates a call thunk template in the .rodata
section (please see arch/x86/kernel/callthunks.c)  that is later
copied to the .text section at the correct place. The template uses
X86_call_depth in the pcpu_hot structure. Previously, the template
used absolute location for X86_call_depth and the linker resolved the
address in the template to this absolute location. There is no issue
when this template is copied to the various places in the .text
section.

When we want to use PC relative relocations (to reduce the code size),
then the linker calculates the address of the variable in the template
according to the PC in the .rodata section. If we want to copy the
template to its final location, then the address of X86_call_depth,
relative to the PC, has to be adjusted, as explained in
arch/x86/kernel/alternative.c, in the comment above apply_reloc_n
macro.

Uros.

> > Pass the destination IP when the BPF JIT emits call depth accounting.
> >
> > Fixes: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template")
>
> Ohh. It's buried inside that patch.
> Pls make commit log a bit more clear that that commit 17bce3b2ae2d
> broke x86_call_depth_emit_accounting logic.
>
> > Signed-off-by: Joan Bruguera Micó <joanbrugueram@gmail.com>
> > Reviewed-by: Uros Bizjak <ubizjak@gmail.com>
> > Acked-by: Ingo Molnar <mingo@kernel.org>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > ---
> >  arch/x86/include/asm/alternative.h |  4 ++--
> >  arch/x86/kernel/callthunks.c       |  4 ++--
> >  arch/x86/net/bpf_jit_comp.c        | 19 ++++++++-----------
> >  3 files changed, 12 insertions(+), 15 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
> > index fcd20c6dc7f9..67b68d0d17d1 100644
> > --- a/arch/x86/include/asm/alternative.h
> > +++ b/arch/x86/include/asm/alternative.h
> > @@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
> >  extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
> >                                           struct module *mod);
> >  extern void *callthunks_translate_call_dest(void *dest);
> > -extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
> > +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
> >  #else
> >  static __always_inline void callthunks_patch_builtin_calls(void) {}
> >  static __always_inline void
> > @@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
> >         return dest;
> >  }
> >  static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
> > -                                                         void *func)
> > +                                                         void *func, void *ip)
> >  {
> >         return 0;
> >  }
> > diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
> > index 30335182b6b0..e92ff0c11db8 100644
> > --- a/arch/x86/kernel/callthunks.c
> > +++ b/arch/x86/kernel/callthunks.c
> > @@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
> >         return !bcmp(pad, insn_buff, tmpl_size);
> >  }
> >
> > -int x86_call_depth_emit_accounting(u8 **pprog, void *func)
> > +int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
> >  {
> >         unsigned int tmpl_size = SKL_TMPL_SIZE;
> >         u8 insn_buff[MAX_PATCH_LEN];
> > @@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
> >                 return 0;
> >
> >         memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
> > -       apply_relocation(insn_buff, tmpl_size, *pprog,
> > +       apply_relocation(insn_buff, tmpl_size, ip,
>
> Did the logic inside apply_relocation() change to have
> a new meaning for 'dest' and 'src'?
> Answering to myself... yes. in that commit.
> Better commit log would have made the code review so much easier.
>
> >                          skl_call_thunk_template, tmpl_size);
> >
> >         memcpy(*pprog, insn_buff, tmpl_size);
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > index 09f7dc9d4d65..f2e8769f5eee 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -481,7 +481,7 @@ static int emit_rsb_call(u8 **pprog, void *func, void *ip)
> >  {
> >         void *adjusted_ip;
> >         OPTIMIZER_HIDE_VAR(func);
> > -       adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func);
> > +       adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func, ip);
>
> Now I see why you added extra var in the previous patch.
> Should have mentioned it in the commit log.
>
> >         return emit_patch(pprog, func, adjusted_ip, 0xE8);
> >  }
> >
> > @@ -1973,20 +1973,17 @@ st:                     if (is_imm8(insn->off))
> >
> >                         /* call */
> >                 case BPF_JMP | BPF_CALL: {
> > -                       int offs;
> > +                       u8 *ip = image + addrs[i - 1];
> >
> >                         func = (u8 *) __bpf_call_base + imm32;
> >                         if (tail_call_reachable) {
> >                                 RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
> > -                               if (!imm32)
> > -                                       return -EINVAL;
> > -                               offs = 7 + x86_call_depth_emit_accounting(&prog, func);
> > -                       } else {
> > -                               if (!imm32)
> > -                                       return -EINVAL;
> > -                               offs = x86_call_depth_emit_accounting(&prog, func);
> > +                               ip += 7;
> >                         }
> > -                       if (emit_call(&prog, func, image + addrs[i - 1] + offs))
> > +                       if (!imm32)
> > +                               return -EINVAL;
> > +                       ip += x86_call_depth_emit_accounting(&prog, func, ip);
> > +                       if (emit_call(&prog, func, ip))
> >                                 return -EINVAL;
> >                         break;
> >                 }
> > @@ -2836,7 +2833,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> >                  * Direct-call fentry stub, as such it needs accounting for the
> >                  * __fentry__ call.
> >                  */
> > -               x86_call_depth_emit_accounting(&prog, NULL);
> > +               x86_call_depth_emit_accounting(&prog, NULL, image);
>
> Overall it all makes sense.
> Pls respin with more precise commit logs.
Alexei Starovoitov April 1, 2024, 6:02 p.m. UTC | #3
On Sat, Mar 30, 2024 at 2:01 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Fri, Mar 29, 2024 at 10:53 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Fri, Mar 29, 2024 at 2:49 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > From: Joan Bruguera Micó <joanbrugueram@gmail.com>
> > >
> > > The recently introduced support for %rip-relative relocations in the
> > > call thunk template assumes that the code is being patched in-place,
> > > so the destination of the relocation matches the address of the code.
> > > This is not true for the call depth accounting emitted by the BPF JIT,
> > > so the calculated address is wrong and usually causes a page fault.
> >
> > Could you share the link to what this 'rip-relative' relocation is ?
>
> Please see the "RIP relative addressing" section in [1].
>
> [1] https://compas.cs.stonybrook.edu/~nhonarmand/courses/sp17/cse506/ref/assembly.html
>
> In our case:
>
> The callthunks patching creates a call thunk template in the .rodata
> section (please see arch/x86/kernel/callthunks.c)  that is later
> copied to the .text section at the correct place. The template uses
> X86_call_depth in the pcpu_hot structure. Previously, the template
> used absolute location for X86_call_depth and the linker resolved the
> address in the template to this absolute location. There is no issue
> when this template is copied to the various places in the .text
> section.
>
> When we want to use PC relative relocations (to reduce the code size),
> then the linker calculates the address of the variable in the template
> according to the PC in the .rodata section. If we want to copy the
> template to its final location, then the address of X86_call_depth,
> relative to the PC, has to be adjusted, as explained in
> arch/x86/kernel/alternative.c, in the comment above apply_reloc_n
> macro.

I didn't mean to ask for info about the definition of rip-relative,
but how it's used in this case and what you've been trying
to achieve with commit 17bce3b2ae2d that broke call depth accounting.
And the whole sequence of events that caused this breakage.
Something like:
commit 59bec00ace28 ("x86/percpu: Introduce %rip-relative addressing
to PER_CPU_VAR()")
made PER_CPU_VAR() to use rip-relative addressing,
hence INCREMENT_CALL_DEPTH macro and skl_call_thunk_template
got rip-relative asm code inside of it.
Hence x86_call_depth_emit_accounting() was changed
in commit 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative
relocations in call thunk template") to use apply_relocation(),
but it was mistakenly made to use *pprog as dest ip,
so jit-ed bpf progs on kernels with call depth tracking got broken.
Such details should be in the commit log.
Uros Bizjak April 1, 2024, 6:38 p.m. UTC | #4
On Mon, Apr 1, 2024 at 8:03 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Sat, Mar 30, 2024 at 2:01 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Fri, Mar 29, 2024 at 10:53 PM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Fri, Mar 29, 2024 at 2:49 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > >
> > > > From: Joan Bruguera Micó <joanbrugueram@gmail.com>
> > > >
> > > > The recently introduced support for %rip-relative relocations in the
> > > > call thunk template assumes that the code is being patched in-place,
> > > > so the destination of the relocation matches the address of the code.
> > > > This is not true for the call depth accounting emitted by the BPF JIT,
> > > > so the calculated address is wrong and usually causes a page fault.
> > >
> > > Could you share the link to what this 'rip-relative' relocation is ?
> >
> > Please see the "RIP relative addressing" section in [1].
> >
> > [1] https://compas.cs.stonybrook.edu/~nhonarmand/courses/sp17/cse506/ref/assembly.html
> >
> > In our case:
> >
> > The callthunks patching creates a call thunk template in the .rodata
> > section (please see arch/x86/kernel/callthunks.c)  that is later
> > copied to the .text section at the correct place. The template uses
> > X86_call_depth in the pcpu_hot structure. Previously, the template
> > used absolute location for X86_call_depth and the linker resolved the
> > address in the template to this absolute location. There is no issue
> > when this template is copied to the various places in the .text
> > section.
> >
> > When we want to use PC relative relocations (to reduce the code size),
> > then the linker calculates the address of the variable in the template
> > according to the PC in the .rodata section. If we want to copy the
> > template to its final location, then the address of X86_call_depth,
> > relative to the PC, has to be adjusted, as explained in
> > arch/x86/kernel/alternative.c, in the comment above apply_reloc_n
> > macro.
>
> I didn't mean to ask for info about the definition of rip-relative,
> but how it's used in this case and what you've been trying
> to achieve with commit 17bce3b2ae2d that broke call depth accounting.
> And the whole sequence of events that caused this breakage.
> Something like:
> commit 59bec00ace28 ("x86/percpu: Introduce %rip-relative addressing
> to PER_CPU_VAR()")
> made PER_CPU_VAR() to use rip-relative addressing,
> hence INCREMENT_CALL_DEPTH macro and skl_call_thunk_template
> got rip-relative asm code inside of it.
> Hence x86_call_depth_emit_accounting() was changed
> in commit 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative
> relocations in call thunk template") to use apply_relocation(),
> but it was mistakenly made to use *pprog as dest ip,
> so jit-ed bpf progs on kernels with call depth tracking got broken.
> Such details should be in the commit log.

Oh, I was not sure that all those x86 specific details should be in
the commit log, since x86 maintainers already acked the patch. Sure,
I'll add your description of the fix to the patch commit message, it
really describes the problem in a way, understandable also to non-x86
people.

Thanks,
Uros.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index fcd20c6dc7f9..67b68d0d17d1 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -117,7 +117,7 @@  extern void callthunks_patch_builtin_calls(void);
 extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
 					  struct module *mod);
 extern void *callthunks_translate_call_dest(void *dest);
-extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
 #else
 static __always_inline void callthunks_patch_builtin_calls(void) {}
 static __always_inline void
@@ -128,7 +128,7 @@  static __always_inline void *callthunks_translate_call_dest(void *dest)
 	return dest;
 }
 static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
-							  void *func)
+							  void *func, void *ip)
 {
 	return 0;
 }
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 30335182b6b0..e92ff0c11db8 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -314,7 +314,7 @@  static bool is_callthunk(void *addr)
 	return !bcmp(pad, insn_buff, tmpl_size);
 }
 
-int x86_call_depth_emit_accounting(u8 **pprog, void *func)
+int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
 {
 	unsigned int tmpl_size = SKL_TMPL_SIZE;
 	u8 insn_buff[MAX_PATCH_LEN];
@@ -327,7 +327,7 @@  int x86_call_depth_emit_accounting(u8 **pprog, void *func)
 		return 0;
 
 	memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
-	apply_relocation(insn_buff, tmpl_size, *pprog,
+	apply_relocation(insn_buff, tmpl_size, ip,
 			 skl_call_thunk_template, tmpl_size);
 
 	memcpy(*pprog, insn_buff, tmpl_size);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 09f7dc9d4d65..f2e8769f5eee 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -481,7 +481,7 @@  static int emit_rsb_call(u8 **pprog, void *func, void *ip)
 {
 	void *adjusted_ip;
 	OPTIMIZER_HIDE_VAR(func);
-	adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func);
+	adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func, ip);
 	return emit_patch(pprog, func, adjusted_ip, 0xE8);
 }
 
@@ -1973,20 +1973,17 @@  st:			if (is_imm8(insn->off))
 
 			/* call */
 		case BPF_JMP | BPF_CALL: {
-			int offs;
+			u8 *ip = image + addrs[i - 1];
 
 			func = (u8 *) __bpf_call_base + imm32;
 			if (tail_call_reachable) {
 				RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
-				if (!imm32)
-					return -EINVAL;
-				offs = 7 + x86_call_depth_emit_accounting(&prog, func);
-			} else {
-				if (!imm32)
-					return -EINVAL;
-				offs = x86_call_depth_emit_accounting(&prog, func);
+				ip += 7;
 			}
-			if (emit_call(&prog, func, image + addrs[i - 1] + offs))
+			if (!imm32)
+				return -EINVAL;
+			ip += x86_call_depth_emit_accounting(&prog, func, ip);
+			if (emit_call(&prog, func, ip))
 				return -EINVAL;
 			break;
 		}
@@ -2836,7 +2833,7 @@  static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 		 * Direct-call fentry stub, as such it needs accounting for the
 		 * __fentry__ call.
 		 */
-		x86_call_depth_emit_accounting(&prog, NULL);
+		x86_call_depth_emit_accounting(&prog, NULL, image);
 	}
 	EMIT1(0x55);		 /* push rbp */
 	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */