diff mbox series

[v2,4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free]

Message ID 20230309180213.180263-5-hbathini@linux.ibm.com (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series enable bpf_prog_pack allocator for powerpc | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-17
netdev/tree_selection success Not a local patch, async
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix

Commit Message

Hari Bathini March 9, 2023, 6:02 p.m. UTC
Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
writes the program to the rw buffer. When the jit is done, the program
is copied to the final location with bpf_jit_binary_pack_finalize.
With multiple jit_subprogs, bpf_jit_free is called on some subprograms
that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
if necessary. While here, correct the misnomer powerpc64_jit_data to
powerpc_jit_data as it is meant for both ppc32 and ppc64.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/net/bpf_jit.h        |   7 +-
 arch/powerpc/net/bpf_jit_comp.c   | 104 +++++++++++++++++++++---------
 arch/powerpc/net/bpf_jit_comp32.c |   4 +-
 arch/powerpc/net/bpf_jit_comp64.c |   6 +-
 4 files changed, 83 insertions(+), 38 deletions(-)

Comments

Song Liu March 10, 2023, 10:35 p.m. UTC | #1
On Thu, Mar 9, 2023 at 10:03 AM Hari Bathini <hbathini@linux.ibm.com> wrote:
>
> Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
> writes the program to the rw buffer. When the jit is done, the program
> is copied to the final location with bpf_jit_binary_pack_finalize.
> With multiple jit_subprogs, bpf_jit_free is called on some subprograms
> that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
> bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
> not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
> if necessary. While here, correct the misnomer powerpc64_jit_data to
> powerpc_jit_data as it is meant for both ppc32 and ppc64.
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/net/bpf_jit.h        |   7 +-
>  arch/powerpc/net/bpf_jit_comp.c   | 104 +++++++++++++++++++++---------
>  arch/powerpc/net/bpf_jit_comp32.c |   4 +-
>  arch/powerpc/net/bpf_jit_comp64.c |   6 +-
>  4 files changed, 83 insertions(+), 38 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index d767e39d5645..a8b7480c4d43 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -168,15 +168,16 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
>
>  void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
>  int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>                        u32 *addrs, int pass, bool extra_pass);
>  void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_realloc_regs(struct codegen_context *ctx);
>  int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
>
> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
> -                         int insn_idx, int jmp_off, int dst_reg);
> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
> +                         struct codegen_context *ctx, int insn_idx,
> +                         int jmp_off, int dst_reg);
>
>  #endif
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index d1794d9f0154..ece75c829499 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -42,10 +42,11 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
>         return 0;
>  }
>
> -struct powerpc64_jit_data {
> -       struct bpf_binary_header *header;
> +struct powerpc_jit_data {
> +       struct bpf_binary_header *hdr;
> +       struct bpf_binary_header *fhdr;
>         u32 *addrs;
> -       u8 *image;
> +       u8 *fimage;
>         u32 proglen;
>         struct codegen_context ctx;
>  };

Some comments about the f- prefix will be helpful. (Yes, I should have done
better job adding comments for the x86 counterpart..)

> @@ -62,15 +63,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>         u8 *image = NULL;
>         u32 *code_base;
>         u32 *addrs;
> -       struct powerpc64_jit_data *jit_data;
> +       struct powerpc_jit_data *jit_data;
>         struct codegen_context cgctx;
>         int pass;
>         int flen;
> -       struct bpf_binary_header *bpf_hdr;
> +       struct bpf_binary_header *fhdr = NULL;
> +       struct bpf_binary_header *hdr = NULL;
>         struct bpf_prog *org_fp = fp;
>         struct bpf_prog *tmp_fp;
>         bool bpf_blinded = false;
>         bool extra_pass = false;
> +       u8 *fimage = NULL;
> +       u32 *fcode_base;
>         u32 extable_len;
>         u32 fixup_len;
>
> @@ -100,9 +104,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>         addrs = jit_data->addrs;
>         if (addrs) {
>                 cgctx = jit_data->ctx;
> -               image = jit_data->image;
> -               bpf_hdr = jit_data->header;
> +               fimage = jit_data->fimage;
> +               fhdr = jit_data->fhdr;
>                 proglen = jit_data->proglen;
> +               hdr = jit_data->hdr;
> +               image = (void *)hdr + ((void *)fimage - (void *)fhdr);
>                 extra_pass = true;
>                 goto skip_init_ctx;
>         }
> @@ -120,7 +126,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>         cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>
>         /* Scouting faux-generate pass 0 */
> -       if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
> +       if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>                 /* We hit something illegal or unsupported. */
>                 fp = org_fp;
>                 goto out_addrs;
> @@ -135,7 +141,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>          */
>         if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
>                 cgctx.idx = 0;
> -               if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
> +               if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>                         fp = org_fp;
>                         goto out_addrs;
>                 }
> @@ -157,17 +163,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>         proglen = cgctx.idx * 4;
>         alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
>
> -       bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
> -       if (!bpf_hdr) {
> +       fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
> +                                             bpf_jit_fill_ill_insns);
> +       if (!fhdr) {
>                 fp = org_fp;
>                 goto out_addrs;
>         }
>
>         if (extable_len)
> -               fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
> +               fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
>
>  skip_init_ctx:
>         code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
> +       fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
>
>         /* Code generation passes 1-2 */
>         for (pass = 1; pass < 3; pass++) {
> @@ -175,8 +183,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>                 cgctx.idx = 0;
>                 cgctx.alt_exit_addr = 0;
>                 bpf_jit_build_prologue(code_base, &cgctx);
> -               if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
> -                       bpf_jit_binary_free(bpf_hdr);
> +               if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, extra_pass)) {
> +                       bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
> +                       bpf_jit_binary_pack_free(fhdr, hdr);
>                         fp = org_fp;
>                         goto out_addrs;
>                 }
> @@ -192,21 +201,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>                  * Note that we output the base address of the code_base
>                  * rather than image, since opcodes are in code_base.
>                  */
Maybe update the comment above with fcode_base to avoid
confusion.

> -               bpf_jit_dump(flen, proglen, pass, code_base);
> +               bpf_jit_dump(flen, proglen, pass, fcode_base);
>
>  #ifdef CONFIG_PPC64_ELF_ABI_V1
>         /* Function descriptor nastiness: Address + TOC */
> -       ((u64 *)image)[0] = (u64)code_base;
> +       ((u64 *)image)[0] = (u64)fcode_base;
>         ((u64 *)image)[1] = local_paca->kernel_toc;
>  #endif
>
> -       fp->bpf_func = (void *)image;
> +       fp->bpf_func = (void *)fimage;
>         fp->jited = 1;
>         fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
>
> -       bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
>         if (!fp->is_func || extra_pass) {
> -               bpf_jit_binary_lock_ro(bpf_hdr);
> +               if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
> +                       fp = org_fp;
> +                       goto out_addrs;
> +               }
>                 bpf_prog_fill_jited_linfo(fp, addrs);
>  out_addrs:
>                 kfree(addrs);
> @@ -216,8 +227,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>                 jit_data->addrs = addrs;
>                 jit_data->ctx = cgctx;
>                 jit_data->proglen = proglen;
> -               jit_data->image = image;
> -               jit_data->header = bpf_hdr;
> +               jit_data->fimage = fimage;
> +               jit_data->fhdr = fhdr;
> +               jit_data->hdr = hdr;
>         }
>
>  out:
> @@ -231,12 +243,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
>   * this function, as this only applies to BPF_PROBE_MEM, for now.
>   */
> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
> -                         int insn_idx, int jmp_off, int dst_reg)
> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
> +                         struct codegen_context *ctx, int insn_idx, int jmp_off,
> +                         int dst_reg)
>  {
>         off_t offset;
>         unsigned long pc;
> -       struct exception_table_entry *ex;
> +       struct exception_table_entry *ex, *ex_entry;
>         u32 *fixup;
>
>         /* Populate extable entries only in the last pass */
> @@ -247,9 +260,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>             WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
>                 return -EINVAL;
>
> +       /*
> +        * Program is firt written to image before copying to the
s/firt/first/

> +        * final location (fimage). Accordingly, update in the image first.
> +        * As all offsets used are relative, copying as is to the
> +        * final location should be alright.
> +        */
>         pc = (unsigned long)&image[insn_idx];
> +       ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
>
> -       fixup = (void *)fp->aux->extable -
> +       fixup = (void *)ex -
>                 (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
>                 (ctx->exentry_idx * BPF_FIXUP_LEN * 4);
>
> @@ -260,17 +280,17 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>         fixup[BPF_FIXUP_LEN - 1] =
>                 PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
>
> -       ex = &fp->aux->extable[ctx->exentry_idx];
> +       ex_entry = &ex[ctx->exentry_idx];
>
> -       offset = pc - (long)&ex->insn;
> +       offset = pc - (long)&ex_entry->insn;
>         if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>                 return -ERANGE;
> -       ex->insn = offset;
> +       ex_entry->insn = offset;
>
> -       offset = (long)fixup - (long)&ex->fixup;
> +       offset = (long)fixup - (long)&ex_entry->fixup;
>         if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>                 return -ERANGE;
> -       ex->fixup = offset;
> +       ex_entry->fixup = offset;
>
>         ctx->exentry_idx++;
>         return 0;
> @@ -308,3 +328,27 @@ int bpf_arch_text_invalidate(void *dst, size_t len)
>
>         return ret;
>  }
> +
> +void bpf_jit_free(struct bpf_prog *fp)
> +{
> +       if (fp->jited) {
> +               struct powerpc_jit_data *jit_data = fp->aux->jit_data;
> +               struct bpf_binary_header *hdr;
> +
> +               /*
> +                * If we fail the final pass of JIT (from jit_subprogs),
> +                * the program may not be finalized yet. Call finalize here
> +                * before freeing it.
> +                */
> +               if (jit_data) {
> +                       bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);

I just realized x86 is the same. But I think we only need the following
here?

bpf_arch_text_copy(&jit_data->fhdr->size, &jit_data->hdr->size,
sizeof(jit_data->hdr->size));

Right?

Thanks,
Song
Christophe Leroy March 11, 2023, 10:16 a.m. UTC | #2
Le 09/03/2023 à 19:02, Hari Bathini a écrit :
> Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
> writes the program to the rw buffer. When the jit is done, the program
> is copied to the final location with bpf_jit_binary_pack_finalize.
> With multiple jit_subprogs, bpf_jit_free is called on some subprograms
> that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
> bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
> not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
> if necessary. While here, correct the misnomer powerpc64_jit_data to
> powerpc_jit_data as it is meant for both ppc32 and ppc64.

root@vgoip:~# echo 1 > /proc/sys/net/core/bpf_jit_enable
root@vgoip:~# insmod test_bpf.ko
[  570.270983] kernel tried to execute exec-protected page (bd42c198) - 
exploit attempt? (uid: 0)
[  570.279414] BUG: Unable to handle kernel instruction fetch
[  570.284822] Faulting instruction address: 0xbd42c198
[  570.289734] Oops: Kernel access of bad area, sig: 11 [#1]
[  570.295062] BE PAGE_SIZE=16K PREEMPT CMPC885
[  570.302811] Modules linked in: test_bpf(+) test_module
[  570.307891] CPU: 0 PID: 559 Comm: insmod Not tainted 
6.3.0-rc1-s3k-dev-g4ae0418b3500 #258
[  570.315975] Hardware name: MIAE 8xx 0x500000 CMPC885
[  570.320882] NIP:  bd42c198 LR: be8180ec CTR: be818010
[  570.325873] REGS: cae2bc40 TRAP: 0400   Not tainted 
(6.3.0-rc1-s3k-dev-g4ae0418b3500)
[  570.333704] MSR:  40009032 <EE,ME,IR,DR,RI>  CR: 88008222  XER: 00000000
[  570.340503]
[  570.340503] GPR00: be806eac cae2bd00 c2977340 00000000 c2c40900 
00000000 c1a18a80 00000000
[  570.340503] GPR08: 00000002 00000001 00000000 00000000 ffffffff 
100d815e ca6a0000 00000001
[  570.340503] GPR16: 1234aaaa ca242250 c1180000 00000001 1234aaab 
c9050030 00000000 00000000
[  570.340503] GPR24: c2c40900 00000000 ffffffff 00000000 c1a18a80 
00000000 00000002 ca24225c
[  570.376819] NIP [bd42c198] 0xbd42c198
[  570.380436] LR [be8180ec] 0xbe8180ec
[  570.383965] Call Trace:
[  570.386373] [cae2bd00] [0000000b] 0xb (unreliable)
[  570.391107] [cae2bd50] [be806eac] __run_one+0x58/0x224 [test_bpf]
[  570.397390] [cae2bd90] [be80ca94] test_bpf_init+0x8d8/0x1010 [test_bpf]
[  570.404189] [cae2be20] [c00049f0] do_one_initcall+0x38/0x1e4
[  570.409782] [cae2be80] [c0090aa8] do_init_module+0x50/0x234
[  570.415291] [cae2bea0] [c0092e08] sys_finit_module+0xb4/0xf8
[  570.420884] [cae2bf20] [c000e344] system_call_exception+0x94/0x150
[  570.426995] [cae2bf30] [c00120a8] ret_from_syscall+0x0/0x28
[  570.432502] --- interrupt: c00 at 0xfd5fca0
[  570.436632] NIP:  0fd5fca0 LR: 10014568 CTR: 10013294
[  570.441625] REGS: cae2bf40 TRAP: 0c00   Not tainted 
(6.3.0-rc1-s3k-dev-g4ae0418b3500)
[  570.449455] MSR:  0000d032 <EE,PR,ME,IR,DR,RI>  CR: 44002224  XER: 
00000000
[  570.456513]
[  570.456513] GPR00: 00000161 7f868d30 77ed34d0 00000003 100bc4ef 
00000000 0fd51868 0000d032
[  570.456513] GPR08: 000007b1 10013294 00000000 00000002 52454753 
100d815e 100a44b8 00000000
[  570.456513] GPR16: 100d167c 100b0000 1198426c 119854cd 100d0000 
100d0000 00000000 100a4498
[  570.456513] GPR24: ffffffa2 ffffffff 11984244 00000003 1198426c 
100bc4ef 11984288 1198426c
[  570.492828] NIP [0fd5fca0] 0xfd5fca0
[  570.496358] LR [10014568] 0x10014568
[  570.499887] --- interrupt: c00
[  570.502902] Code: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX 
XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX 
XXXXXXXX XXXXXXXX XXXXXXXX
[  570.517973] ---[ end trace 0000000000000000 ]---
[  570.522523]
[  570.523986] note: insmod[559] exited with irqs disabled
Segmentation fault

Christophe

> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>   arch/powerpc/net/bpf_jit.h        |   7 +-
>   arch/powerpc/net/bpf_jit_comp.c   | 104 +++++++++++++++++++++---------
>   arch/powerpc/net/bpf_jit_comp32.c |   4 +-
>   arch/powerpc/net/bpf_jit_comp64.c |   6 +-
>   4 files changed, 83 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index d767e39d5645..a8b7480c4d43 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -168,15 +168,16 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
>   
>   void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
>   int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>   		       u32 *addrs, int pass, bool extra_pass);
>   void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>   void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>   void bpf_jit_realloc_regs(struct codegen_context *ctx);
>   int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
>   
> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
> -			  int insn_idx, int jmp_off, int dst_reg);
> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
> +			  struct codegen_context *ctx, int insn_idx,
> +			  int jmp_off, int dst_reg);
>   
>   #endif
>   
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index d1794d9f0154..ece75c829499 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -42,10 +42,11 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
>   	return 0;
>   }
>   
> -struct powerpc64_jit_data {
> -	struct bpf_binary_header *header;
> +struct powerpc_jit_data {
> +	struct bpf_binary_header *hdr;
> +	struct bpf_binary_header *fhdr;
>   	u32 *addrs;
> -	u8 *image;
> +	u8 *fimage;
>   	u32 proglen;
>   	struct codegen_context ctx;
>   };
> @@ -62,15 +63,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	u8 *image = NULL;
>   	u32 *code_base;
>   	u32 *addrs;
> -	struct powerpc64_jit_data *jit_data;
> +	struct powerpc_jit_data *jit_data;
>   	struct codegen_context cgctx;
>   	int pass;
>   	int flen;
> -	struct bpf_binary_header *bpf_hdr;
> +	struct bpf_binary_header *fhdr = NULL;
> +	struct bpf_binary_header *hdr = NULL;
>   	struct bpf_prog *org_fp = fp;
>   	struct bpf_prog *tmp_fp;
>   	bool bpf_blinded = false;
>   	bool extra_pass = false;
> +	u8 *fimage = NULL;
> +	u32 *fcode_base;
>   	u32 extable_len;
>   	u32 fixup_len;
>   
> @@ -100,9 +104,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	addrs = jit_data->addrs;
>   	if (addrs) {
>   		cgctx = jit_data->ctx;
> -		image = jit_data->image;
> -		bpf_hdr = jit_data->header;
> +		fimage = jit_data->fimage;
> +		fhdr = jit_data->fhdr;
>   		proglen = jit_data->proglen;
> +		hdr = jit_data->hdr;
> +		image = (void *)hdr + ((void *)fimage - (void *)fhdr);
>   		extra_pass = true;
>   		goto skip_init_ctx;
>   	}
> @@ -120,7 +126,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>   
>   	/* Scouting faux-generate pass 0 */
> -	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
> +	if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>   		/* We hit something illegal or unsupported. */
>   		fp = org_fp;
>   		goto out_addrs;
> @@ -135,7 +141,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	 */
>   	if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
>   		cgctx.idx = 0;
> -		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
> +		if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>   			fp = org_fp;
>   			goto out_addrs;
>   		}
> @@ -157,17 +163,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	proglen = cgctx.idx * 4;
>   	alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
>   
> -	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
> -	if (!bpf_hdr) {
> +	fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
> +					      bpf_jit_fill_ill_insns);
> +	if (!fhdr) {
>   		fp = org_fp;
>   		goto out_addrs;
>   	}
>   
>   	if (extable_len)
> -		fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
> +		fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
>   
>   skip_init_ctx:
>   	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
> +	fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
>   
>   	/* Code generation passes 1-2 */
>   	for (pass = 1; pass < 3; pass++) {
> @@ -175,8 +183,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   		cgctx.idx = 0;
>   		cgctx.alt_exit_addr = 0;
>   		bpf_jit_build_prologue(code_base, &cgctx);
> -		if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
> -			bpf_jit_binary_free(bpf_hdr);
> +		if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, extra_pass)) {
> +			bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
> +			bpf_jit_binary_pack_free(fhdr, hdr);
>   			fp = org_fp;
>   			goto out_addrs;
>   		}
> @@ -192,21 +201,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   		 * Note that we output the base address of the code_base
>   		 * rather than image, since opcodes are in code_base.
>   		 */
> -		bpf_jit_dump(flen, proglen, pass, code_base);
> +		bpf_jit_dump(flen, proglen, pass, fcode_base);
>   
>   #ifdef CONFIG_PPC64_ELF_ABI_V1
>   	/* Function descriptor nastiness: Address + TOC */
> -	((u64 *)image)[0] = (u64)code_base;
> +	((u64 *)image)[0] = (u64)fcode_base;
>   	((u64 *)image)[1] = local_paca->kernel_toc;
>   #endif
>   
> -	fp->bpf_func = (void *)image;
> +	fp->bpf_func = (void *)fimage;
>   	fp->jited = 1;
>   	fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
>   
> -	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
>   	if (!fp->is_func || extra_pass) {
> -		bpf_jit_binary_lock_ro(bpf_hdr);
> +		if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
> +			fp = org_fp;
> +			goto out_addrs;
> +		}
>   		bpf_prog_fill_jited_linfo(fp, addrs);
>   out_addrs:
>   		kfree(addrs);
> @@ -216,8 +227,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   		jit_data->addrs = addrs;
>   		jit_data->ctx = cgctx;
>   		jit_data->proglen = proglen;
> -		jit_data->image = image;
> -		jit_data->header = bpf_hdr;
> +		jit_data->fimage = fimage;
> +		jit_data->fhdr = fhdr;
> +		jit_data->hdr = hdr;
>   	}
>   
>   out:
> @@ -231,12 +243,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>    * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
>    * this function, as this only applies to BPF_PROBE_MEM, for now.
>    */
> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
> -			  int insn_idx, int jmp_off, int dst_reg)
> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
> +			  struct codegen_context *ctx, int insn_idx, int jmp_off,
> +			  int dst_reg)
>   {
>   	off_t offset;
>   	unsigned long pc;
> -	struct exception_table_entry *ex;
> +	struct exception_table_entry *ex, *ex_entry;
>   	u32 *fixup;
>   
>   	/* Populate extable entries only in the last pass */
> @@ -247,9 +260,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>   	    WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
>   		return -EINVAL;
>   
> +	/*
> +	 * Program is firt written to image before copying to the
> +	 * final location (fimage). Accordingly, update in the image first.
> +	 * As all offsets used are relative, copying as is to the
> +	 * final location should be alright.
> +	 */
>   	pc = (unsigned long)&image[insn_idx];
> +	ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
>   
> -	fixup = (void *)fp->aux->extable -
> +	fixup = (void *)ex -
>   		(fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
>   		(ctx->exentry_idx * BPF_FIXUP_LEN * 4);
>   
> @@ -260,17 +280,17 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>   	fixup[BPF_FIXUP_LEN - 1] =
>   		PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
>   
> -	ex = &fp->aux->extable[ctx->exentry_idx];
> +	ex_entry = &ex[ctx->exentry_idx];
>   
> -	offset = pc - (long)&ex->insn;
> +	offset = pc - (long)&ex_entry->insn;
>   	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>   		return -ERANGE;
> -	ex->insn = offset;
> +	ex_entry->insn = offset;
>   
> -	offset = (long)fixup - (long)&ex->fixup;
> +	offset = (long)fixup - (long)&ex_entry->fixup;
>   	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>   		return -ERANGE;
> -	ex->fixup = offset;
> +	ex_entry->fixup = offset;
>   
>   	ctx->exentry_idx++;
>   	return 0;
> @@ -308,3 +328,27 @@ int bpf_arch_text_invalidate(void *dst, size_t len)
>   
>   	return ret;
>   }
> +
> +void bpf_jit_free(struct bpf_prog *fp)
> +{
> +	if (fp->jited) {
> +		struct powerpc_jit_data *jit_data = fp->aux->jit_data;
> +		struct bpf_binary_header *hdr;
> +
> +		/*
> +		 * If we fail the final pass of JIT (from jit_subprogs),
> +		 * the program may not be finalized yet. Call finalize here
> +		 * before freeing it.
> +		 */
> +		if (jit_data) {
> +			bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);
> +			kvfree(jit_data->addrs);
> +			kfree(jit_data);
> +		}
> +		hdr = bpf_jit_binary_pack_hdr(fp);
> +		bpf_jit_binary_pack_free(hdr, NULL);
> +		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
> +	}
> +
> +	bpf_prog_unlock_free(fp);
> +}
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 7f91ea064c08..fb2761b54d64 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -278,7 +278,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
>   }
>   
>   /* Assemble the body code between the prologue & epilogue */
> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>   		       u32 *addrs, int pass, bool extra_pass)
>   {
>   	const struct bpf_insn *insn = fp->insnsi;
> @@ -997,7 +997,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   					jmp_off += 4;
>   				}
>   
> -				ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx,
> +				ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx,
>   							    jmp_off, dst_reg);
>   				if (ret)
>   					return ret;
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 8dd3cabaa83a..37a8970a7065 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -343,7 +343,7 @@ asm (
>   );
>   
>   /* Assemble the body code between the prologue & epilogue */
> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>   		       u32 *addrs, int pass, bool extra_pass)
>   {
>   	enum stf_barrier_type stf_barrier = stf_barrier_type_get();
> @@ -922,8 +922,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   				addrs[++i] = ctx->idx * 4;
>   
>   			if (BPF_MODE(code) == BPF_PROBE_MEM) {
> -				ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1,
> -							    4, dst_reg);
> +				ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
> +							    ctx->idx - 1, 4, dst_reg);
>   				if (ret)
>   					return ret;
>   			}
Hari Bathini Aug. 25, 2023, 3:29 p.m. UTC | #3
On 11/03/23 3:46 pm, Christophe Leroy wrote:
> 
> 
> Le 09/03/2023 à 19:02, Hari Bathini a écrit :
>> Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
>> writes the program to the rw buffer. When the jit is done, the program
>> is copied to the final location with bpf_jit_binary_pack_finalize.
>> With multiple jit_subprogs, bpf_jit_free is called on some subprograms
>> that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
>> bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
>> not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
>> if necessary. While here, correct the misnomer powerpc64_jit_data to
>> powerpc_jit_data as it is meant for both ppc32 and ppc64.
> 
> root@vgoip:~# echo 1 > /proc/sys/net/core/bpf_jit_enable
> root@vgoip:~# insmod test_bpf.ko
> [  570.270983] kernel tried to execute exec-protected page (bd42c198) -
> exploit attempt? (uid: 0)
> [  570.279414] BUG: Unable to handle kernel instruction fetch
> [  570.284822] Faulting instruction address: 0xbd42c198
> [  570.289734] Oops: Kernel access of bad area, sig: 11 [#1]
> [  570.295062] BE PAGE_SIZE=16K PREEMPT CMPC885
> [  570.302811] Modules linked in: test_bpf(+) test_module
> [  570.307891] CPU: 0 PID: 559 Comm: insmod Not tainted
> 6.3.0-rc1-s3k-dev-g4ae0418b3500 #258
> [  570.315975] Hardware name: MIAE 8xx 0x500000 CMPC885
> [  570.320882] NIP:  bd42c198 LR: be8180ec CTR: be818010
> [  570.325873] REGS: cae2bc40 TRAP: 0400   Not tainted
> (6.3.0-rc1-s3k-dev-g4ae0418b3500)
> [  570.333704] MSR:  40009032 <EE,ME,IR,DR,RI>  CR: 88008222  XER: 00000000
> [  570.340503]
> [  570.340503] GPR00: be806eac cae2bd00 c2977340 00000000 c2c40900
> 00000000 c1a18a80 00000000
> [  570.340503] GPR08: 00000002 00000001 00000000 00000000 ffffffff
> 100d815e ca6a0000 00000001
> [  570.340503] GPR16: 1234aaaa ca242250 c1180000 00000001 1234aaab
> c9050030 00000000 00000000
> [  570.340503] GPR24: c2c40900 00000000 ffffffff 00000000 c1a18a80
> 00000000 00000002 ca24225c
> [  570.376819] NIP [bd42c198] 0xbd42c198
> [  570.380436] LR [be8180ec] 0xbe8180ec
> [  570.383965] Call Trace:
> [  570.386373] [cae2bd00] [0000000b] 0xb (unreliable)
> [  570.391107] [cae2bd50] [be806eac] __run_one+0x58/0x224 [test_bpf]
> [  570.397390] [cae2bd90] [be80ca94] test_bpf_init+0x8d8/0x1010 [test_bpf]
> [  570.404189] [cae2be20] [c00049f0] do_one_initcall+0x38/0x1e4
> [  570.409782] [cae2be80] [c0090aa8] do_init_module+0x50/0x234
> [  570.415291] [cae2bea0] [c0092e08] sys_finit_module+0xb4/0xf8
> [  570.420884] [cae2bf20] [c000e344] system_call_exception+0x94/0x150
> [  570.426995] [cae2bf30] [c00120a8] ret_from_syscall+0x0/0x28
> [  570.432502] --- interrupt: c00 at 0xfd5fca0
> [  570.436632] NIP:  0fd5fca0 LR: 10014568 CTR: 10013294
> [  570.441625] REGS: cae2bf40 TRAP: 0c00   Not tainted
> (6.3.0-rc1-s3k-dev-g4ae0418b3500)
> [  570.449455] MSR:  0000d032 <EE,PR,ME,IR,DR,RI>  CR: 44002224  XER:
> 00000000
> [  570.456513]
> [  570.456513] GPR00: 00000161 7f868d30 77ed34d0 00000003 100bc4ef
> 00000000 0fd51868 0000d032
> [  570.456513] GPR08: 000007b1 10013294 00000000 00000002 52454753
> 100d815e 100a44b8 00000000
> [  570.456513] GPR16: 100d167c 100b0000 1198426c 119854cd 100d0000
> 100d0000 00000000 100a4498
> [  570.456513] GPR24: ffffffa2 ffffffff 11984244 00000003 1198426c
> 100bc4ef 11984288 1198426c
> [  570.492828] NIP [0fd5fca0] 0xfd5fca0
> [  570.496358] LR [10014568] 0x10014568
> [  570.499887] --- interrupt: c00
> [  570.502902] Code: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
> XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
> XXXXXXXX XXXXXXXX XXXXXXXX
> [  570.517973] ---[ end trace 0000000000000000 ]---
> [  570.522523]
> [  570.523986] note: insmod[559] exited with irqs disabled
> Segmentation fault

Thanks a lot for reviewing v2, Christophe.
Posted v3 for reviewing..

Thanks
Hari
Hari Bathini Aug. 25, 2023, 3:40 p.m. UTC | #4
On 11/03/23 4:05 am, Song Liu wrote:
> On Thu, Mar 9, 2023 at 10:03 AM Hari Bathini <hbathini@linux.ibm.com> wrote:
>>
>> Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
>> writes the program to the rw buffer. When the jit is done, the program
>> is copied to the final location with bpf_jit_binary_pack_finalize.
>> With multiple jit_subprogs, bpf_jit_free is called on some subprograms
>> that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
>> bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
>> not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
>> if necessary. While here, correct the misnomer powerpc64_jit_data to
>> powerpc_jit_data as it is meant for both ppc32 and ppc64.
>>
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> ---
>>   arch/powerpc/net/bpf_jit.h        |   7 +-
>>   arch/powerpc/net/bpf_jit_comp.c   | 104 +++++++++++++++++++++---------
>>   arch/powerpc/net/bpf_jit_comp32.c |   4 +-
>>   arch/powerpc/net/bpf_jit_comp64.c |   6 +-
>>   4 files changed, 83 insertions(+), 38 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
>> index d767e39d5645..a8b7480c4d43 100644
>> --- a/arch/powerpc/net/bpf_jit.h
>> +++ b/arch/powerpc/net/bpf_jit.h
>> @@ -168,15 +168,16 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
>>
>>   void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
>>   int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
>> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
>> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>>                         u32 *addrs, int pass, bool extra_pass);
>>   void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>>   void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>>   void bpf_jit_realloc_regs(struct codegen_context *ctx);
>>   int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
>>
>> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
>> -                         int insn_idx, int jmp_off, int dst_reg);
>> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
>> +                         struct codegen_context *ctx, int insn_idx,
>> +                         int jmp_off, int dst_reg);
>>
>>   #endif
>>
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
>> index d1794d9f0154..ece75c829499 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -42,10 +42,11 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
>>          return 0;
>>   }
>>
>> -struct powerpc64_jit_data {
>> -       struct bpf_binary_header *header;
>> +struct powerpc_jit_data {
>> +       struct bpf_binary_header *hdr;
>> +       struct bpf_binary_header *fhdr;
>>          u32 *addrs;
>> -       u8 *image;
>> +       u8 *fimage;
>>          u32 proglen;
>>          struct codegen_context ctx;
>>   };
> 
> Some comments about the f- prefix will be helpful. (Yes, I should have done
> better job adding comments for the x86 counterpart..)
> 
>> @@ -62,15 +63,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          u8 *image = NULL;
>>          u32 *code_base;
>>          u32 *addrs;
>> -       struct powerpc64_jit_data *jit_data;
>> +       struct powerpc_jit_data *jit_data;
>>          struct codegen_context cgctx;
>>          int pass;
>>          int flen;
>> -       struct bpf_binary_header *bpf_hdr;
>> +       struct bpf_binary_header *fhdr = NULL;
>> +       struct bpf_binary_header *hdr = NULL;
>>          struct bpf_prog *org_fp = fp;
>>          struct bpf_prog *tmp_fp;
>>          bool bpf_blinded = false;
>>          bool extra_pass = false;
>> +       u8 *fimage = NULL;
>> +       u32 *fcode_base;
>>          u32 extable_len;
>>          u32 fixup_len;
>>
>> @@ -100,9 +104,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          addrs = jit_data->addrs;
>>          if (addrs) {
>>                  cgctx = jit_data->ctx;
>> -               image = jit_data->image;
>> -               bpf_hdr = jit_data->header;
>> +               fimage = jit_data->fimage;
>> +               fhdr = jit_data->fhdr;
>>                  proglen = jit_data->proglen;
>> +               hdr = jit_data->hdr;
>> +               image = (void *)hdr + ((void *)fimage - (void *)fhdr);
>>                  extra_pass = true;
>>                  goto skip_init_ctx;
>>          }
>> @@ -120,7 +126,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>>
>>          /* Scouting faux-generate pass 0 */
>> -       if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
>> +       if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>>                  /* We hit something illegal or unsupported. */
>>                  fp = org_fp;
>>                  goto out_addrs;
>> @@ -135,7 +141,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>           */
>>          if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
>>                  cgctx.idx = 0;
>> -               if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
>> +               if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>>                          fp = org_fp;
>>                          goto out_addrs;
>>                  }
>> @@ -157,17 +163,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          proglen = cgctx.idx * 4;
>>          alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
>>
>> -       bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
>> -       if (!bpf_hdr) {
>> +       fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
>> +                                             bpf_jit_fill_ill_insns);
>> +       if (!fhdr) {
>>                  fp = org_fp;
>>                  goto out_addrs;
>>          }
>>
>>          if (extable_len)
>> -               fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
>> +               fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
>>
>>   skip_init_ctx:
>>          code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
>> +       fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
>>
>>          /* Code generation passes 1-2 */
>>          for (pass = 1; pass < 3; pass++) {
>> @@ -175,8 +183,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>                  cgctx.idx = 0;
>>                  cgctx.alt_exit_addr = 0;
>>                  bpf_jit_build_prologue(code_base, &cgctx);
>> -               if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
>> -                       bpf_jit_binary_free(bpf_hdr);
>> +               if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, extra_pass)) {
>> +                       bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
>> +                       bpf_jit_binary_pack_free(fhdr, hdr);
>>                          fp = org_fp;
>>                          goto out_addrs;
>>                  }
>> @@ -192,21 +201,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>                   * Note that we output the base address of the code_base
>>                   * rather than image, since opcodes are in code_base.
>>                   */
> Maybe update the comment above with fcode_base to avoid
> confusion.
> 
>> -               bpf_jit_dump(flen, proglen, pass, code_base);
>> +               bpf_jit_dump(flen, proglen, pass, fcode_base);
>>
>>   #ifdef CONFIG_PPC64_ELF_ABI_V1
>>          /* Function descriptor nastiness: Address + TOC */
>> -       ((u64 *)image)[0] = (u64)code_base;
>> +       ((u64 *)image)[0] = (u64)fcode_base;
>>          ((u64 *)image)[1] = local_paca->kernel_toc;
>>   #endif
>>
>> -       fp->bpf_func = (void *)image;
>> +       fp->bpf_func = (void *)fimage;
>>          fp->jited = 1;
>>          fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
>>
>> -       bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
>>          if (!fp->is_func || extra_pass) {
>> -               bpf_jit_binary_lock_ro(bpf_hdr);
>> +               if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
>> +                       fp = org_fp;
>> +                       goto out_addrs;
>> +               }
>>                  bpf_prog_fill_jited_linfo(fp, addrs);
>>   out_addrs:
>>                  kfree(addrs);
>> @@ -216,8 +227,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>                  jit_data->addrs = addrs;
>>                  jit_data->ctx = cgctx;
>>                  jit_data->proglen = proglen;
>> -               jit_data->image = image;
>> -               jit_data->header = bpf_hdr;
>> +               jit_data->fimage = fimage;
>> +               jit_data->fhdr = fhdr;
>> +               jit_data->hdr = hdr;
>>          }
>>
>>   out:
>> @@ -231,12 +243,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>    * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
>>    * this function, as this only applies to BPF_PROBE_MEM, for now.
>>    */
>> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
>> -                         int insn_idx, int jmp_off, int dst_reg)
>> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
>> +                         struct codegen_context *ctx, int insn_idx, int jmp_off,
>> +                         int dst_reg)
>>   {
>>          off_t offset;
>>          unsigned long pc;
>> -       struct exception_table_entry *ex;
>> +       struct exception_table_entry *ex, *ex_entry;
>>          u32 *fixup;
>>
>>          /* Populate extable entries only in the last pass */
>> @@ -247,9 +260,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>>              WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
>>                  return -EINVAL;
>>
>> +       /*
>> +        * Program is firt written to image before copying to the
> s/firt/first/
> 
>> +        * final location (fimage). Accordingly, update in the image first.
>> +        * As all offsets used are relative, copying as is to the
>> +        * final location should be alright.
>> +        */
>>          pc = (unsigned long)&image[insn_idx];
>> +       ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
>>
>> -       fixup = (void *)fp->aux->extable -
>> +       fixup = (void *)ex -
>>                  (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
>>                  (ctx->exentry_idx * BPF_FIXUP_LEN * 4);
>>
>> @@ -260,17 +280,17 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>>          fixup[BPF_FIXUP_LEN - 1] =
>>                  PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
>>
>> -       ex = &fp->aux->extable[ctx->exentry_idx];
>> +       ex_entry = &ex[ctx->exentry_idx];
>>
>> -       offset = pc - (long)&ex->insn;
>> +       offset = pc - (long)&ex_entry->insn;
>>          if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>>                  return -ERANGE;
>> -       ex->insn = offset;
>> +       ex_entry->insn = offset;
>>
>> -       offset = (long)fixup - (long)&ex->fixup;
>> +       offset = (long)fixup - (long)&ex_entry->fixup;
>>          if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>>                  return -ERANGE;
>> -       ex->fixup = offset;
>> +       ex_entry->fixup = offset;
>>
>>          ctx->exentry_idx++;
>>          return 0;
>> @@ -308,3 +328,27 @@ int bpf_arch_text_invalidate(void *dst, size_t len)
>>
>>          return ret;
>>   }
>> +
>> +void bpf_jit_free(struct bpf_prog *fp)
>> +{
>> +       if (fp->jited) {
>> +               struct powerpc_jit_data *jit_data = fp->aux->jit_data;
>> +               struct bpf_binary_header *hdr;
>> +
>> +               /*
>> +                * If we fail the final pass of JIT (from jit_subprogs),
>> +                * the program may not be finalized yet. Call finalize here
>> +                * before freeing it.
>> +                */
>> +               if (jit_data) {
>> +                       bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);
> 
> I just realized x86 is the same. But I think we only need the following
> here?
> 
> bpf_arch_text_copy(&jit_data->fhdr->size, &jit_data->hdr->size,
> sizeof(jit_data->hdr->size));
> 
> Right?

Thanks for reviewing.
Better off with bpf_jit_binary_pack_finalize, probably?
Kept it that way for v3. Posted v3. Please review.

Thanks
Hari
diff mbox series

Patch

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index d767e39d5645..a8b7480c4d43 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -168,15 +168,16 @@  static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
 
 void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
 int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
-int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
 		       u32 *addrs, int pass, bool extra_pass);
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_realloc_regs(struct codegen_context *ctx);
 int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
 
-int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
-			  int insn_idx, int jmp_off, int dst_reg);
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
+			  struct codegen_context *ctx, int insn_idx,
+			  int jmp_off, int dst_reg);
 
 #endif
 
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d1794d9f0154..ece75c829499 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -42,10 +42,11 @@  int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
 	return 0;
 }
 
-struct powerpc64_jit_data {
-	struct bpf_binary_header *header;
+struct powerpc_jit_data {
+	struct bpf_binary_header *hdr;
+	struct bpf_binary_header *fhdr;
 	u32 *addrs;
-	u8 *image;
+	u8 *fimage;
 	u32 proglen;
 	struct codegen_context ctx;
 };
@@ -62,15 +63,18 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	u8 *image = NULL;
 	u32 *code_base;
 	u32 *addrs;
-	struct powerpc64_jit_data *jit_data;
+	struct powerpc_jit_data *jit_data;
 	struct codegen_context cgctx;
 	int pass;
 	int flen;
-	struct bpf_binary_header *bpf_hdr;
+	struct bpf_binary_header *fhdr = NULL;
+	struct bpf_binary_header *hdr = NULL;
 	struct bpf_prog *org_fp = fp;
 	struct bpf_prog *tmp_fp;
 	bool bpf_blinded = false;
 	bool extra_pass = false;
+	u8 *fimage = NULL;
+	u32 *fcode_base;
 	u32 extable_len;
 	u32 fixup_len;
 
@@ -100,9 +104,11 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	addrs = jit_data->addrs;
 	if (addrs) {
 		cgctx = jit_data->ctx;
-		image = jit_data->image;
-		bpf_hdr = jit_data->header;
+		fimage = jit_data->fimage;
+		fhdr = jit_data->fhdr;
 		proglen = jit_data->proglen;
+		hdr = jit_data->hdr;
+		image = (void *)hdr + ((void *)fimage - (void *)fhdr);
 		extra_pass = true;
 		goto skip_init_ctx;
 	}
@@ -120,7 +126,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 
 	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
+	if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
 		/* We hit something illegal or unsupported. */
 		fp = org_fp;
 		goto out_addrs;
@@ -135,7 +141,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	 */
 	if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
 		cgctx.idx = 0;
-		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
+		if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
 			fp = org_fp;
 			goto out_addrs;
 		}
@@ -157,17 +163,19 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	proglen = cgctx.idx * 4;
 	alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
 
-	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
-	if (!bpf_hdr) {
+	fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
+					      bpf_jit_fill_ill_insns);
+	if (!fhdr) {
 		fp = org_fp;
 		goto out_addrs;
 	}
 
 	if (extable_len)
-		fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
+		fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
 
 skip_init_ctx:
 	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
+	fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
 
 	/* Code generation passes 1-2 */
 	for (pass = 1; pass < 3; pass++) {
@@ -175,8 +183,9 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		cgctx.idx = 0;
 		cgctx.alt_exit_addr = 0;
 		bpf_jit_build_prologue(code_base, &cgctx);
-		if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
-			bpf_jit_binary_free(bpf_hdr);
+		if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, extra_pass)) {
+			bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
+			bpf_jit_binary_pack_free(fhdr, hdr);
 			fp = org_fp;
 			goto out_addrs;
 		}
@@ -192,21 +201,23 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		 * Note that we output the base address of the code_base
 		 * rather than image, since opcodes are in code_base.
 		 */
-		bpf_jit_dump(flen, proglen, pass, code_base);
+		bpf_jit_dump(flen, proglen, pass, fcode_base);
 
 #ifdef CONFIG_PPC64_ELF_ABI_V1
 	/* Function descriptor nastiness: Address + TOC */
-	((u64 *)image)[0] = (u64)code_base;
+	((u64 *)image)[0] = (u64)fcode_base;
 	((u64 *)image)[1] = local_paca->kernel_toc;
 #endif
 
-	fp->bpf_func = (void *)image;
+	fp->bpf_func = (void *)fimage;
 	fp->jited = 1;
 	fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
 
-	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
 	if (!fp->is_func || extra_pass) {
-		bpf_jit_binary_lock_ro(bpf_hdr);
+		if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
+			fp = org_fp;
+			goto out_addrs;
+		}
 		bpf_prog_fill_jited_linfo(fp, addrs);
 out_addrs:
 		kfree(addrs);
@@ -216,8 +227,9 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		jit_data->addrs = addrs;
 		jit_data->ctx = cgctx;
 		jit_data->proglen = proglen;
-		jit_data->image = image;
-		jit_data->header = bpf_hdr;
+		jit_data->fimage = fimage;
+		jit_data->fhdr = fhdr;
+		jit_data->hdr = hdr;
 	}
 
 out:
@@ -231,12 +243,13 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
  * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
  * this function, as this only applies to BPF_PROBE_MEM, for now.
  */
-int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
-			  int insn_idx, int jmp_off, int dst_reg)
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
+			  struct codegen_context *ctx, int insn_idx, int jmp_off,
+			  int dst_reg)
 {
 	off_t offset;
 	unsigned long pc;
-	struct exception_table_entry *ex;
+	struct exception_table_entry *ex, *ex_entry;
 	u32 *fixup;
 
 	/* Populate extable entries only in the last pass */
@@ -247,9 +260,16 @@  int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
 	    WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
 		return -EINVAL;
 
+	/*
+	 * Program is firt written to image before copying to the
+	 * final location (fimage). Accordingly, update in the image first.
+	 * As all offsets used are relative, copying as is to the
+	 * final location should be alright.
+	 */
 	pc = (unsigned long)&image[insn_idx];
+	ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
 
-	fixup = (void *)fp->aux->extable -
+	fixup = (void *)ex -
 		(fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
 		(ctx->exentry_idx * BPF_FIXUP_LEN * 4);
 
@@ -260,17 +280,17 @@  int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
 	fixup[BPF_FIXUP_LEN - 1] =
 		PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
 
-	ex = &fp->aux->extable[ctx->exentry_idx];
+	ex_entry = &ex[ctx->exentry_idx];
 
-	offset = pc - (long)&ex->insn;
+	offset = pc - (long)&ex_entry->insn;
 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
 		return -ERANGE;
-	ex->insn = offset;
+	ex_entry->insn = offset;
 
-	offset = (long)fixup - (long)&ex->fixup;
+	offset = (long)fixup - (long)&ex_entry->fixup;
 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
 		return -ERANGE;
-	ex->fixup = offset;
+	ex_entry->fixup = offset;
 
 	ctx->exentry_idx++;
 	return 0;
@@ -308,3 +328,27 @@  int bpf_arch_text_invalidate(void *dst, size_t len)
 
 	return ret;
 }
+
+void bpf_jit_free(struct bpf_prog *fp)
+{
+	if (fp->jited) {
+		struct powerpc_jit_data *jit_data = fp->aux->jit_data;
+		struct bpf_binary_header *hdr;
+
+		/*
+		 * If we fail the final pass of JIT (from jit_subprogs),
+		 * the program may not be finalized yet. Call finalize here
+		 * before freeing it.
+		 */
+		if (jit_data) {
+			bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);
+			kvfree(jit_data->addrs);
+			kfree(jit_data);
+		}
+		hdr = bpf_jit_binary_pack_hdr(fp);
+		bpf_jit_binary_pack_free(hdr, NULL);
+		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
+	}
+
+	bpf_prog_unlock_free(fp);
+}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 7f91ea064c08..fb2761b54d64 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -278,7 +278,7 @@  static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
 }
 
 /* Assemble the body code between the prologue & epilogue */
-int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
 		       u32 *addrs, int pass, bool extra_pass)
 {
 	const struct bpf_insn *insn = fp->insnsi;
@@ -997,7 +997,7 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 					jmp_off += 4;
 				}
 
-				ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx,
+				ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx,
 							    jmp_off, dst_reg);
 				if (ret)
 					return ret;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 8dd3cabaa83a..37a8970a7065 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -343,7 +343,7 @@  asm (
 );
 
 /* Assemble the body code between the prologue & epilogue */
-int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
 		       u32 *addrs, int pass, bool extra_pass)
 {
 	enum stf_barrier_type stf_barrier = stf_barrier_type_get();
@@ -922,8 +922,8 @@  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				addrs[++i] = ctx->idx * 4;
 
 			if (BPF_MODE(code) == BPF_PROBE_MEM) {
-				ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1,
-							    4, dst_reg);
+				ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
+							    ctx->idx - 1, 4, dst_reg);
 				if (ret)
 					return ret;
 			}