diff mbox series

[bpf-next,2/3] bpf: Keep im address consistent between dry run and real patching

Message ID 20240123103241.2282122-3-pulehui@huaweicloud.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Use bpf_prog_pack for RV64 bpf trampoline | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2639 this patch: 2639
netdev/build_tools success Errors and warnings before: 1 this patch: 0
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 1241 this patch: 1241
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2704 this patch: 2704
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat

Commit Message

Pu Lehui Jan. 23, 2024, 10:32 a.m. UTC
From: Pu Lehui <pulehui@huawei.com>

In __arch_prepare_bpf_trampoline, we emit instructions to store the
address of im to register and then pass it to __bpf_tramp_enter and
__bpf_tramp_exit functions. Currently we use fake im in
arch_bpf_trampoline_size for the dry run, and then allocate new im for
the real patching. This is fine for architectures that use fixed
instructions to generate addresses. However, for architectures that use
dynamic instructions to generate addresses, this may make the front and
rear images inconsistent, leading to patching overflow. We can extract
the im allocation ahead of the dry run and pass the allocated im to
arch_bpf_trampoline_size, so that we can ensure that im is consistent in
dry run and real patching.

Signed-off-by: Pu Lehui <pulehui@huawei.com>
---
 arch/arm64/net/bpf_jit_comp.c   |  7 +++---
 arch/riscv/net/bpf_jit_comp64.c |  7 +++---
 arch/s390/net/bpf_jit_comp.c    |  7 +++---
 arch/x86/net/bpf_jit_comp.c     |  7 +++---
 include/linux/bpf.h             |  4 +--
 kernel/bpf/bpf_struct_ops.c     |  2 +-
 kernel/bpf/trampoline.c         | 43 ++++++++++++++++-----------------
 7 files changed, 36 insertions(+), 41 deletions(-)

Comments

Song Liu Jan. 29, 2024, 5:58 p.m. UTC | #1
On Tue, Jan 23, 2024 at 2:32 AM Pu Lehui <pulehui@huaweicloud.com> wrote:
>
> From: Pu Lehui <pulehui@huawei.com>
>
> In __arch_prepare_bpf_trampoline, we emit instructions to store the
> address of im to register and then pass it to __bpf_tramp_enter and
> __bpf_tramp_exit functions. Currently we use fake im in
> arch_bpf_trampoline_size for the dry run, and then allocate new im for
> the real patching. This is fine for architectures that use fixed
> instructions to generate addresses. However, for architectures that use
> dynamic instructions to generate addresses, this may make the front and
> rear images inconsistent, leading to patching overflow. We can extract
> the im allocation ahead of the dry run and pass the allocated im to
> arch_bpf_trampoline_size, so that we can ensure that im is consistent in
> dry run and real patching.

IIUC, this is required because emit_imm() for riscv may generate variable
size instructions (depends on the value of im). I wonder we can fix this by
simply set a special value for fake im in arch_bpf_trampoline_size() to
so that emit_imm() always gives biggest value for the fake im.

>
> Signed-off-by: Pu Lehui <pulehui@huawei.com>
> ---
[...]
>
>  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
> @@ -432,23 +425,27 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
>                 tr->flags |= BPF_TRAMP_F_ORIG_STACK;
>  #endif
>
> -       size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
> +       im = kzalloc(sizeof(*im), GFP_KERNEL);
> +       if (!im) {
> +               err = -ENOMEM;
> +               goto out;
> +       }
> +
> +       size = arch_bpf_trampoline_size(im, &tr->func.model, tr->flags,
>                                         tlinks, tr->func.addr);
>         if (size < 0) {
>                 err = size;
> -               goto out;
> +               goto out_free_im;
>         }
>
>         if (size > PAGE_SIZE) {
>                 err = -E2BIG;
> -               goto out;
> +               goto out_free_im;
>         }
>
> -       im = bpf_tramp_image_alloc(tr->key, size);
> -       if (IS_ERR(im)) {
> -               err = PTR_ERR(im);
> -               goto out;
> -       }
> +       err = bpf_tramp_image_alloc(im, tr->key, size);
> +       if (err < 0)
> +               goto out_free_im;

I feel this change just makes bpf_trampoline_update() even
more confusing.

>
>         err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
>                                           &tr->func.model, tr->flags, tlinks,
> @@ -496,6 +493,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
>
>  out_free:
>         bpf_tramp_image_free(im);
> +out_free_im:
> +       kfree_rcu(im, rcu);

If we goto out_free above, we will call kfree_rcu(im, rcu)
twice, right? Once in bpf_tramp_image_free(), and again
here.

Thanks,
Song

[...]
Pu Lehui Jan. 30, 2024, 3:19 a.m. UTC | #2
On 2024/1/30 1:58, Song Liu wrote:
> On Tue, Jan 23, 2024 at 2:32 AM Pu Lehui <pulehui@huaweicloud.com> wrote:
>>
>> From: Pu Lehui <pulehui@huawei.com>
>>
>> In __arch_prepare_bpf_trampoline, we emit instructions to store the
>> address of im to register and then pass it to __bpf_tramp_enter and
>> __bpf_tramp_exit functions. Currently we use fake im in
>> arch_bpf_trampoline_size for the dry run, and then allocate new im for
>> the real patching. This is fine for architectures that use fixed
>> instructions to generate addresses. However, for architectures that use
>> dynamic instructions to generate addresses, this may make the front and
>> rear images inconsistent, leading to patching overflow. We can extract
>> the im allocation ahead of the dry run and pass the allocated im to
>> arch_bpf_trampoline_size, so that we can ensure that im is consistent in
>> dry run and real patching.
> 
> IIUC, this is required because emit_imm() for riscv may generate variable
> size instructions (depends on the value of im). I wonder we can fix this by
> simply set a special value for fake im in arch_bpf_trampoline_size() to
> so that emit_imm() always gives biggest value for the fake im.
> 

Hi Song,

Thanks for your review. Yes, I had the same idea as you at first, emit 
biggist count instructions when ctx->insns is NULL, but this may lead to 
memory waste. So try moving out of IM to get a fixed IM address, maybe 
other architectures require it too. If you feel it is inappropriate, I 
will withdraw it.

>>
>> Signed-off-by: Pu Lehui <pulehui@huawei.com>
>> ---
> [...]
>>
>>   static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
>> @@ -432,23 +425,27 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
>>                  tr->flags |= BPF_TRAMP_F_ORIG_STACK;
>>   #endif
>>
>> -       size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
>> +       im = kzalloc(sizeof(*im), GFP_KERNEL);
>> +       if (!im) {
>> +               err = -ENOMEM;
>> +               goto out;
>> +       }
>> +
>> +       size = arch_bpf_trampoline_size(im, &tr->func.model, tr->flags,
>>                                          tlinks, tr->func.addr);
>>          if (size < 0) {
>>                  err = size;
>> -               goto out;
>> +               goto out_free_im;
>>          }
>>
>>          if (size > PAGE_SIZE) {
>>                  err = -E2BIG;
>> -               goto out;
>> +               goto out_free_im;
>>          }
>>
>> -       im = bpf_tramp_image_alloc(tr->key, size);
>> -       if (IS_ERR(im)) {
>> -               err = PTR_ERR(im);
>> -               goto out;
>> -       }
>> +       err = bpf_tramp_image_alloc(im, tr->key, size);
>> +       if (err < 0)
>> +               goto out_free_im;
> 
> I feel this change just makes bpf_trampoline_update() even
> more confusing.
> 
>>
>>          err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
>>                                            &tr->func.model, tr->flags, tlinks,
>> @@ -496,6 +493,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
>>
>>   out_free:
>>          bpf_tramp_image_free(im);
>> +out_free_im:
>> +       kfree_rcu(im, rcu);
> 
> If we goto out_free above, we will call kfree_rcu(im, rcu)
> twice, right? Once in bpf_tramp_image_free(), and again
> here.
> 

Oops, sorry, forgot to remove kfree_rcu in bpf_tramp_image_free in this 
version.

> Thanks,
> Song
> 
> [...]
>
diff mbox series

Patch

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 8955da5c47cf..fad760f14a96 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2041,14 +2041,13 @@  static int btf_func_model_nregs(const struct btf_func_model *m)
 	return nregs;
 }
 
-int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-			     struct bpf_tramp_links *tlinks, void *func_addr)
+int arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+			     u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
 {
 	struct jit_ctx ctx = {
 		.image = NULL,
 		.idx = 0,
 	};
-	struct bpf_tramp_image im;
 	int nregs, ret;
 
 	nregs = btf_func_model_nregs(m);
@@ -2056,7 +2055,7 @@  int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
 	if (nregs > 8)
 		return -ENOTSUPP;
 
-	ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
+	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
 	if (ret < 0)
 		return ret;
 
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 719a97e7edb2..5c4e0ac389d0 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1030,17 +1030,16 @@  static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	return ret;
 }
 
-int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-			     struct bpf_tramp_links *tlinks, void *func_addr)
+int arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+			     u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
 {
-	struct bpf_tramp_image im;
 	struct rv_jit_context ctx;
 	int ret;
 
 	ctx.ninsns = 0;
 	ctx.insns = NULL;
 	ctx.ro_insns = NULL;
-	ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx);
+	ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
 
 	return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns);
 }
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index b418333bb086..adf289eee6cd 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2638,16 +2638,15 @@  static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	return 0;
 }
 
-int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-			     struct bpf_tramp_links *tlinks, void *orig_call)
+int arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+			     u32 flags, struct bpf_tramp_links *tlinks, void *orig_call)
 {
-	struct bpf_tramp_image im;
 	struct bpf_tramp_jit tjit;
 	int ret;
 
 	memset(&tjit, 0, sizeof(tjit));
 
-	ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
+	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
 					    tlinks, orig_call);
 
 	return ret < 0 ? ret : tjit.common.prg;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e1390d1e331b..fdef44913643 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2817,10 +2817,9 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	return ret;
 }
 
-int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-			     struct bpf_tramp_links *tlinks, void *func_addr)
+int arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+			     u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
 {
-	struct bpf_tramp_image im;
 	void *image;
 	int ret;
 
@@ -2835,7 +2834,7 @@  int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
 	if (!image)
 		return -ENOMEM;
 
-	ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
+	ret = __arch_prepare_bpf_trampoline(im, image, image + PAGE_SIZE, image,
 					    m, flags, tlinks, func_addr);
 	bpf_jit_free_exec(image);
 	return ret;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 377857b232c6..d3a486e12b17 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1114,8 +1114,8 @@  void *arch_alloc_bpf_trampoline(unsigned int size);
 void arch_free_bpf_trampoline(void *image, unsigned int size);
 void arch_protect_bpf_trampoline(void *image, unsigned int size);
 void arch_unprotect_bpf_trampoline(void *image, unsigned int size);
-int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-			     struct bpf_tramp_links *tlinks, void *func_addr);
+int arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+			     u32 flags, struct bpf_tramp_links *tlinks, void *func_addr);
 
 u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
 					     struct bpf_tramp_run_ctx *run_ctx);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index e2e1bf3c69a3..8b3c6cc7ea94 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -363,7 +363,7 @@  int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
 	if (model->ret_size > 0)
 		flags |= BPF_TRAMP_F_RET_FENTRY_RET;
 
-	size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
+	size = arch_bpf_trampoline_size(NULL, model, flags, tlinks, NULL);
 	if (size < 0)
 		return size;
 	if (size > (unsigned long)image_end - (unsigned long)image)
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index d382f5ebe06c..25621d97f3ca 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -349,20 +349,15 @@  static void bpf_tramp_image_put(struct bpf_tramp_image *im)
 	call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
 }
 
-static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
+static int bpf_tramp_image_alloc(struct bpf_tramp_image *im, u64 key, int size)
 {
-	struct bpf_tramp_image *im;
 	struct bpf_ksym *ksym;
 	void *image;
-	int err = -ENOMEM;
-
-	im = kzalloc(sizeof(*im), GFP_KERNEL);
-	if (!im)
-		goto out;
+	int err;
 
 	err = bpf_jit_charge_modmem(size);
 	if (err)
-		goto out_free_im;
+		goto out;
 	im->size = size;
 
 	err = -ENOMEM;
@@ -378,16 +373,14 @@  static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
 	INIT_LIST_HEAD_RCU(&ksym->lnode);
 	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
 	bpf_image_ksym_add(image, size, ksym);
-	return im;
+	return 0;
 
 out_free_image:
 	arch_free_bpf_trampoline(im->image, im->size);
 out_uncharge:
 	bpf_jit_uncharge_modmem(size);
-out_free_im:
-	kfree(im);
 out:
-	return ERR_PTR(err);
+	return err;
 }
 
 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
@@ -432,23 +425,27 @@  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
 		tr->flags |= BPF_TRAMP_F_ORIG_STACK;
 #endif
 
-	size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
+	im = kzalloc(sizeof(*im), GFP_KERNEL);
+	if (!im) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	size = arch_bpf_trampoline_size(im, &tr->func.model, tr->flags,
 					tlinks, tr->func.addr);
 	if (size < 0) {
 		err = size;
-		goto out;
+		goto out_free_im;
 	}
 
 	if (size > PAGE_SIZE) {
 		err = -E2BIG;
-		goto out;
+		goto out_free_im;
 	}
 
-	im = bpf_tramp_image_alloc(tr->key, size);
-	if (IS_ERR(im)) {
-		err = PTR_ERR(im);
-		goto out;
-	}
+	err = bpf_tramp_image_alloc(im, tr->key, size);
+	if (err < 0)
+		goto out_free_im;
 
 	err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
 					  &tr->func.model, tr->flags, tlinks,
@@ -496,6 +493,8 @@  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
 
 out_free:
 	bpf_tramp_image_free(im);
+out_free_im:
+	kfree_rcu(im, rcu);
 	goto out;
 }
 
@@ -1085,8 +1084,8 @@  void __weak arch_unprotect_bpf_trampoline(void *image, unsigned int size)
 	set_memory_rw((long)image, 1);
 }
 
-int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-				    struct bpf_tramp_links *tlinks, void *func_addr)
+int __weak arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+				    u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
 {
 	return -ENOTSUPP;
 }