diff mbox series

[bpf-next,2/3] bpf: Keep im address consistent between dry run and real patching

Message ID 20240123103241.2282122-3-pulehui@huaweicloud.com (mailing list archive)
State Handled Elsewhere
Headers show
Series Use bpf_prog_pack for RV64 bpf trampoline | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR success PR summary
conchuod/patch-2-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-2-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-2-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-2-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-2-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-2-test-6 success .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-2-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-2-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-2-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-2-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-2-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-2-test-12 success .github/scripts/patches/tests/verify_signedoff.sh

Commit Message

Pu Lehui Jan. 23, 2024, 10:32 a.m. UTC
From: Pu Lehui <pulehui@huawei.com>

In __arch_prepare_bpf_trampoline, we emit instructions to store the
address of im to register and then pass it to __bpf_tramp_enter and
__bpf_tramp_exit functions. Currently we use fake im in
arch_bpf_trampoline_size for the dry run, and then allocate new im for
the real patching. This is fine for architectures that use fixed
instructions to generate addresses. However, for architectures that use
dynamic instructions to generate addresses, this may make the front and
rear images inconsistent, leading to patching overflow. We can extract
the im allocation ahead of the dry run and pass the allocated im to
arch_bpf_trampoline_size, so that we can ensure that im is consistent in
dry run and real patching.

Signed-off-by: Pu Lehui <pulehui@huawei.com>
---
 arch/arm64/net/bpf_jit_comp.c   |  7 +++---
 arch/riscv/net/bpf_jit_comp64.c |  7 +++---
 arch/s390/net/bpf_jit_comp.c    |  7 +++---
 arch/x86/net/bpf_jit_comp.c     |  7 +++---
 include/linux/bpf.h             |  4 +--
 kernel/bpf/bpf_struct_ops.c     |  2 +-
 kernel/bpf/trampoline.c         | 43 ++++++++++++++++-----------------
 7 files changed, 36 insertions(+), 41 deletions(-)

Comments

Song Liu Jan. 29, 2024, 5:58 p.m. UTC | #1
On Tue, Jan 23, 2024 at 2:32 AM Pu Lehui <pulehui@huaweicloud.com> wrote:
>
> From: Pu Lehui <pulehui@huawei.com>
>
> In __arch_prepare_bpf_trampoline, we emit instructions to store the
> address of im to register and then pass it to __bpf_tramp_enter and
> __bpf_tramp_exit functions. Currently we use fake im in
> arch_bpf_trampoline_size for the dry run, and then allocate new im for
> the real patching. This is fine for architectures that use fixed
> instructions to generate addresses. However, for architectures that use
> dynamic instructions to generate addresses, this may make the front and
> rear images inconsistent, leading to patching overflow. We can extract
> the im allocation ahead of the dry run and pass the allocated im to
> arch_bpf_trampoline_size, so that we can ensure that im is consistent in
> dry run and real patching.

IIUC, this is required because emit_imm() for riscv may generate variable
size instructions (depends on the value of im). I wonder we can fix this by
simply set a special value for fake im in arch_bpf_trampoline_size() to
so that emit_imm() always gives biggest value for the fake im.

>
> Signed-off-by: Pu Lehui <pulehui@huawei.com>
> ---
[...]
>
>  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
> @@ -432,23 +425,27 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
>                 tr->flags |= BPF_TRAMP_F_ORIG_STACK;
>  #endif
>
> -       size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
> +       im = kzalloc(sizeof(*im), GFP_KERNEL);
> +       if (!im) {
> +               err = -ENOMEM;
> +               goto out;
> +       }
> +
> +       size = arch_bpf_trampoline_size(im, &tr->func.model, tr->flags,
>                                         tlinks, tr->func.addr);
>         if (size < 0) {
>                 err = size;
> -               goto out;
> +               goto out_free_im;
>         }
>
>         if (size > PAGE_SIZE) {
>                 err = -E2BIG;
> -               goto out;
> +               goto out_free_im;
>         }
>
> -       im = bpf_tramp_image_alloc(tr->key, size);
> -       if (IS_ERR(im)) {
> -               err = PTR_ERR(im);
> -               goto out;
> -       }
> +       err = bpf_tramp_image_alloc(im, tr->key, size);
> +       if (err < 0)
> +               goto out_free_im;

I feel this change just makes bpf_trampoline_update() even
more confusing.

>
>         err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
>                                           &tr->func.model, tr->flags, tlinks,
> @@ -496,6 +493,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
>
>  out_free:
>         bpf_tramp_image_free(im);
> +out_free_im:
> +       kfree_rcu(im, rcu);

If we goto out_free above, we will call kfree_rcu(im, rcu)
twice, right? Once in bpf_tramp_image_free(), and again
here.

Thanks,
Song

[...]
Pu Lehui Jan. 30, 2024, 3:19 a.m. UTC | #2
On 2024/1/30 1:58, Song Liu wrote:
> On Tue, Jan 23, 2024 at 2:32 AM Pu Lehui <pulehui@huaweicloud.com> wrote:
>>
>> From: Pu Lehui <pulehui@huawei.com>
>>
>> In __arch_prepare_bpf_trampoline, we emit instructions to store the
>> address of im to register and then pass it to __bpf_tramp_enter and
>> __bpf_tramp_exit functions. Currently we use fake im in
>> arch_bpf_trampoline_size for the dry run, and then allocate new im for
>> the real patching. This is fine for architectures that use fixed
>> instructions to generate addresses. However, for architectures that use
>> dynamic instructions to generate addresses, this may make the front and
>> rear images inconsistent, leading to patching overflow. We can extract
>> the im allocation ahead of the dry run and pass the allocated im to
>> arch_bpf_trampoline_size, so that we can ensure that im is consistent in
>> dry run and real patching.
> 
> IIUC, this is required because emit_imm() for riscv may generate variable
> size instructions (depends on the value of im). I wonder we can fix this by
> simply set a special value for fake im in arch_bpf_trampoline_size() to
> so that emit_imm() always gives biggest value for the fake im.
> 

Hi Song,

Thanks for your review. Yes, I had the same idea as you at first, emit 
biggist count instructions when ctx->insns is NULL, but this may lead to 
memory waste. So try moving out of IM to get a fixed IM address, maybe 
other architectures require it too. If you feel it is inappropriate, I 
will withdraw it.

>>
>> Signed-off-by: Pu Lehui <pulehui@huawei.com>
>> ---
> [...]
>>
>>   static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
>> @@ -432,23 +425,27 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
>>                  tr->flags |= BPF_TRAMP_F_ORIG_STACK;
>>   #endif
>>
>> -       size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
>> +       im = kzalloc(sizeof(*im), GFP_KERNEL);
>> +       if (!im) {
>> +               err = -ENOMEM;
>> +               goto out;
>> +       }
>> +
>> +       size = arch_bpf_trampoline_size(im, &tr->func.model, tr->flags,
>>                                          tlinks, tr->func.addr);
>>          if (size < 0) {
>>                  err = size;
>> -               goto out;
>> +               goto out_free_im;
>>          }
>>
>>          if (size > PAGE_SIZE) {
>>                  err = -E2BIG;
>> -               goto out;
>> +               goto out_free_im;
>>          }
>>
>> -       im = bpf_tramp_image_alloc(tr->key, size);
>> -       if (IS_ERR(im)) {
>> -               err = PTR_ERR(im);
>> -               goto out;
>> -       }
>> +       err = bpf_tramp_image_alloc(im, tr->key, size);
>> +       if (err < 0)
>> +               goto out_free_im;
> 
> I feel this change just makes bpf_trampoline_update() even
> more confusing.
> 
>>
>>          err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
>>                                            &tr->func.model, tr->flags, tlinks,
>> @@ -496,6 +493,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
>>
>>   out_free:
>>          bpf_tramp_image_free(im);
>> +out_free_im:
>> +       kfree_rcu(im, rcu);
> 
> If we goto out_free above, we will call kfree_rcu(im, rcu)
> twice, right? Once in bpf_tramp_image_free(), and again
> here.
> 

Oops, sorry, forgot to remove kfree_rcu in bpf_tramp_image_free in this 
version.

> Thanks,
> Song
> 
> [...]
>
diff mbox series

Patch

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 8955da5c47cf..fad760f14a96 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2041,14 +2041,13 @@  static int btf_func_model_nregs(const struct btf_func_model *m)
 	return nregs;
 }
 
-int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-			     struct bpf_tramp_links *tlinks, void *func_addr)
+int arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+			     u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
 {
 	struct jit_ctx ctx = {
 		.image = NULL,
 		.idx = 0,
 	};
-	struct bpf_tramp_image im;
 	int nregs, ret;
 
 	nregs = btf_func_model_nregs(m);
@@ -2056,7 +2055,7 @@  int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
 	if (nregs > 8)
 		return -ENOTSUPP;
 
-	ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
+	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
 	if (ret < 0)
 		return ret;
 
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 719a97e7edb2..5c4e0ac389d0 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1030,17 +1030,16 @@  static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	return ret;
 }
 
-int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-			     struct bpf_tramp_links *tlinks, void *func_addr)
+int arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+			     u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
 {
-	struct bpf_tramp_image im;
 	struct rv_jit_context ctx;
 	int ret;
 
 	ctx.ninsns = 0;
 	ctx.insns = NULL;
 	ctx.ro_insns = NULL;
-	ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx);
+	ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
 
 	return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns);
 }
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index b418333bb086..adf289eee6cd 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2638,16 +2638,15 @@  static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	return 0;
 }
 
-int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-			     struct bpf_tramp_links *tlinks, void *orig_call)
+int arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+			     u32 flags, struct bpf_tramp_links *tlinks, void *orig_call)
 {
-	struct bpf_tramp_image im;
 	struct bpf_tramp_jit tjit;
 	int ret;
 
 	memset(&tjit, 0, sizeof(tjit));
 
-	ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
+	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
 					    tlinks, orig_call);
 
 	return ret < 0 ? ret : tjit.common.prg;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e1390d1e331b..fdef44913643 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2817,10 +2817,9 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	return ret;
 }
 
-int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-			     struct bpf_tramp_links *tlinks, void *func_addr)
+int arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+			     u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
 {
-	struct bpf_tramp_image im;
 	void *image;
 	int ret;
 
@@ -2835,7 +2834,7 @@  int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
 	if (!image)
 		return -ENOMEM;
 
-	ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
+	ret = __arch_prepare_bpf_trampoline(im, image, image + PAGE_SIZE, image,
 					    m, flags, tlinks, func_addr);
 	bpf_jit_free_exec(image);
 	return ret;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 377857b232c6..d3a486e12b17 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1114,8 +1114,8 @@  void *arch_alloc_bpf_trampoline(unsigned int size);
 void arch_free_bpf_trampoline(void *image, unsigned int size);
 void arch_protect_bpf_trampoline(void *image, unsigned int size);
 void arch_unprotect_bpf_trampoline(void *image, unsigned int size);
-int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-			     struct bpf_tramp_links *tlinks, void *func_addr);
+int arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+			     u32 flags, struct bpf_tramp_links *tlinks, void *func_addr);
 
 u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
 					     struct bpf_tramp_run_ctx *run_ctx);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index e2e1bf3c69a3..8b3c6cc7ea94 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -363,7 +363,7 @@  int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
 	if (model->ret_size > 0)
 		flags |= BPF_TRAMP_F_RET_FENTRY_RET;
 
-	size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
+	size = arch_bpf_trampoline_size(NULL, model, flags, tlinks, NULL);
 	if (size < 0)
 		return size;
 	if (size > (unsigned long)image_end - (unsigned long)image)
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index d382f5ebe06c..25621d97f3ca 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -349,20 +349,15 @@  static void bpf_tramp_image_put(struct bpf_tramp_image *im)
 	call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
 }
 
-static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
+static int bpf_tramp_image_alloc(struct bpf_tramp_image *im, u64 key, int size)
 {
-	struct bpf_tramp_image *im;
 	struct bpf_ksym *ksym;
 	void *image;
-	int err = -ENOMEM;
-
-	im = kzalloc(sizeof(*im), GFP_KERNEL);
-	if (!im)
-		goto out;
+	int err;
 
 	err = bpf_jit_charge_modmem(size);
 	if (err)
-		goto out_free_im;
+		goto out;
 	im->size = size;
 
 	err = -ENOMEM;
@@ -378,16 +373,14 @@  static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
 	INIT_LIST_HEAD_RCU(&ksym->lnode);
 	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
 	bpf_image_ksym_add(image, size, ksym);
-	return im;
+	return 0;
 
 out_free_image:
 	arch_free_bpf_trampoline(im->image, im->size);
 out_uncharge:
 	bpf_jit_uncharge_modmem(size);
-out_free_im:
-	kfree(im);
 out:
-	return ERR_PTR(err);
+	return err;
 }
 
 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
@@ -432,23 +425,27 @@  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
 		tr->flags |= BPF_TRAMP_F_ORIG_STACK;
 #endif
 
-	size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
+	im = kzalloc(sizeof(*im), GFP_KERNEL);
+	if (!im) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	size = arch_bpf_trampoline_size(im, &tr->func.model, tr->flags,
 					tlinks, tr->func.addr);
 	if (size < 0) {
 		err = size;
-		goto out;
+		goto out_free_im;
 	}
 
 	if (size > PAGE_SIZE) {
 		err = -E2BIG;
-		goto out;
+		goto out_free_im;
 	}
 
-	im = bpf_tramp_image_alloc(tr->key, size);
-	if (IS_ERR(im)) {
-		err = PTR_ERR(im);
-		goto out;
-	}
+	err = bpf_tramp_image_alloc(im, tr->key, size);
+	if (err < 0)
+		goto out_free_im;
 
 	err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
 					  &tr->func.model, tr->flags, tlinks,
@@ -496,6 +493,8 @@  static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
 
 out_free:
 	bpf_tramp_image_free(im);
+out_free_im:
+	kfree_rcu(im, rcu);
 	goto out;
 }
 
@@ -1085,8 +1084,8 @@  void __weak arch_unprotect_bpf_trampoline(void *image, unsigned int size)
 	set_memory_rw((long)image, 1);
 }
 
-int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
-				    struct bpf_tramp_links *tlinks, void *func_addr)
+int __weak arch_bpf_trampoline_size(struct bpf_tramp_image *im, const struct btf_func_model *m,
+				    u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
 {
 	return -ENOTSUPP;
 }