diff mbox series

[bpf-next] arm64, bpf: Use bpf_prog_pack for arm64 bpf trampoline

Message ID 20240304202803.31400-1-puranjay12@gmail.com (mailing list archive)
State Accepted
Commit d6f98243392f0f4b40b1903736d6458234dedf3b
Delegated to: BPF
Headers show
Series [bpf-next] arm64, bpf: Use bpf_prog_pack for arm64 bpf trampoline | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 8 maintainers not CCed: jolsa@kernel.org eddyz87@gmail.com yonghong.song@linux.dev john.fastabend@gmail.com will@kernel.org haoluo@google.com zlim.lnx@gmail.com sdf@google.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-7 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-10 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py

Commit Message

Puranjay Mohan March 4, 2024, 8:28 p.m. UTC
We used bpf_prog_pack to aggregate bpf programs into huge page to
relieve the iTLB pressure on the system. This was merged for ARM64[1]
We can apply it to bpf trampoline as well. This would increase the
preformance of fentry and struct_ops programs.

[1] https://lore.kernel.org/bpf/20240228141824.119877-1-puranjay12@gmail.com/

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
---
 arch/arm64/net/bpf_jit_comp.c | 55 +++++++++++++++++++++++++++++------
 1 file changed, 46 insertions(+), 9 deletions(-)

Comments

Pu Lehui March 7, 2024, 1:52 p.m. UTC | #1
On 2024/3/5 4:28, Puranjay Mohan wrote:
> We used bpf_prog_pack to aggregate bpf programs into huge page to
> relieve the iTLB pressure on the system. This was merged for ARM64[1]
> We can apply it to bpf trampoline as well. This would increase the
> preformance of fentry and struct_ops programs.
> 
> [1] https://lore.kernel.org/bpf/20240228141824.119877-1-puranjay12@gmail.com/
> 
> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> ---
>   arch/arm64/net/bpf_jit_comp.c | 55 +++++++++++++++++++++++++++++------
>   1 file changed, 46 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
> index 5afc7a525eca..c5b461dda438 100644
> --- a/arch/arm64/net/bpf_jit_comp.c
> +++ b/arch/arm64/net/bpf_jit_comp.c
> @@ -2076,7 +2076,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
>   		/* store return value */
>   		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
>   		/* reserve a nop for bpf_tramp_image_put */
> -		im->ip_after_call = ctx->image + ctx->idx;
> +		im->ip_after_call = ctx->ro_image + ctx->idx;
>   		emit(A64_NOP, ctx);
>   	}
>   
> @@ -2091,7 +2091,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
>   				run_ctx_off, false);
>   
>   	if (flags & BPF_TRAMP_F_CALL_ORIG) {
> -		im->ip_epilogue = ctx->image + ctx->idx;
> +		im->ip_epilogue = ctx->ro_image + ctx->idx;
>   		emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
>   		emit_call((const u64)__bpf_tramp_exit, ctx);
>   	}
> @@ -2124,9 +2124,6 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
>   		emit(A64_RET(A64_R(10)), ctx);
>   	}
>   
> -	if (ctx->image)
> -		bpf_flush_icache(ctx->image, ctx->image + ctx->idx);
> -
>   	kfree(branches);
>   
>   	return ctx->idx;
> @@ -2169,14 +2166,43 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
>   	return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
>   }
>   
> -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
> -				void *image_end, const struct btf_func_model *m,
> +void *arch_alloc_bpf_trampoline(unsigned int size)
> +{
> +	return bpf_prog_pack_alloc(size, jit_fill_hole);
> +}
> +
> +void arch_free_bpf_trampoline(void *image, unsigned int size)
> +{
> +	bpf_prog_pack_free(image, size);
> +}
> +
> +void arch_protect_bpf_trampoline(void *image, unsigned int size)
> +{
> +}
> +
> +void arch_unprotect_bpf_trampoline(void *image, unsigned int size)
> +{
> +}
> +
> +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
> +				void *ro_image_end, const struct btf_func_model *m,
>   				u32 flags, struct bpf_tramp_links *tlinks,
>   				void *func_addr)
>   {
>   	int ret, nregs;
> +	void *image, *tmp;
> +	u32 size = ro_image_end - ro_image;
> +
> +	/* image doesn't need to be in module memory range, so we can
> +	 * use kvmalloc.
> +	 */
> +	image = kvmalloc(size, GFP_KERNEL);
> +	if (!image)
> +		return -ENOMEM;
> +
>   	struct jit_ctx ctx = {
>   		.image = image,
> +		.ro_image = ro_image,
>   		.idx = 0,
>   	};
>   
> @@ -2185,15 +2211,26 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
>   	if (nregs > 8)
>   		return -ENOTSUPP;
>   
> -	jit_fill_hole(image, (unsigned int)(image_end - image));
> +	jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
>   	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
>   
> -	if (ret > 0 && validate_code(&ctx) < 0)
> +	if (ret > 0 && validate_code(&ctx) < 0) {
>   		ret = -EINVAL;
> +		goto out;
> +	}
>   
>   	if (ret > 0)
>   		ret *= AARCH64_INSN_SIZE;
>   
> +	tmp = bpf_arch_text_copy(ro_image, image, size);
> +	if (IS_ERR(tmp)) {
> +		ret = PTR_ERR(tmp);
> +		goto out;
> +	}
> +
> +	bpf_flush_icache(ro_image, ro_image + size);
> +out:
> +	kvfree(image);
>   	return ret;
>   }
>   

Reviewed-by: Pu Lehui <pulehui@huawei.com>
patchwork-bot+netdevbpf@kernel.org March 8, 2024, 11 p.m. UTC | #2
Hello:

This patch was applied to bpf/bpf-next.git (master)
by Alexei Starovoitov <ast@kernel.org>:

On Mon,  4 Mar 2024 20:28:03 +0000 you wrote:
> We used bpf_prog_pack to aggregate bpf programs into huge page to
> relieve the iTLB pressure on the system. This was merged for ARM64[1]
> We can apply it to bpf trampoline as well. This would increase the
> preformance of fentry and struct_ops programs.
> 
> [1] https://lore.kernel.org/bpf/20240228141824.119877-1-puranjay12@gmail.com/
> 
> [...]

Here is the summary with links:
  - [bpf-next] arm64, bpf: Use bpf_prog_pack for arm64 bpf trampoline
    https://git.kernel.org/bpf/bpf-next/c/d6f98243392f

You are awesome, thank you!
diff mbox series

Patch

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 5afc7a525eca..c5b461dda438 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2076,7 +2076,7 @@  static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
 		/* store return value */
 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
 		/* reserve a nop for bpf_tramp_image_put */
-		im->ip_after_call = ctx->image + ctx->idx;
+		im->ip_after_call = ctx->ro_image + ctx->idx;
 		emit(A64_NOP, ctx);
 	}
 
@@ -2091,7 +2091,7 @@  static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
 				run_ctx_off, false);
 
 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
-		im->ip_epilogue = ctx->image + ctx->idx;
+		im->ip_epilogue = ctx->ro_image + ctx->idx;
 		emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
 		emit_call((const u64)__bpf_tramp_exit, ctx);
 	}
@@ -2124,9 +2124,6 @@  static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
 		emit(A64_RET(A64_R(10)), ctx);
 	}
 
-	if (ctx->image)
-		bpf_flush_icache(ctx->image, ctx->image + ctx->idx);
-
 	kfree(branches);
 
 	return ctx->idx;
@@ -2169,14 +2166,43 @@  int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
 	return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
 }
 
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
-				void *image_end, const struct btf_func_model *m,
+void *arch_alloc_bpf_trampoline(unsigned int size)
+{
+	return bpf_prog_pack_alloc(size, jit_fill_hole);
+}
+
+void arch_free_bpf_trampoline(void *image, unsigned int size)
+{
+	bpf_prog_pack_free(image, size);
+}
+
+void arch_protect_bpf_trampoline(void *image, unsigned int size)
+{
+}
+
+void arch_unprotect_bpf_trampoline(void *image, unsigned int size)
+{
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
+				void *ro_image_end, const struct btf_func_model *m,
 				u32 flags, struct bpf_tramp_links *tlinks,
 				void *func_addr)
 {
 	int ret, nregs;
+	void *image, *tmp;
+	u32 size = ro_image_end - ro_image;
+
+	/* image doesn't need to be in module memory range, so we can
+	 * use kvmalloc.
+	 */
+	image = kvmalloc(size, GFP_KERNEL);
+	if (!image)
+		return -ENOMEM;
+
 	struct jit_ctx ctx = {
 		.image = image,
+		.ro_image = ro_image,
 		.idx = 0,
 	};
 
@@ -2185,15 +2211,26 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
 	if (nregs > 8)
 		return -ENOTSUPP;
 
-	jit_fill_hole(image, (unsigned int)(image_end - image));
+	jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
 	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
 
-	if (ret > 0 && validate_code(&ctx) < 0)
+	if (ret > 0 && validate_code(&ctx) < 0) {
 		ret = -EINVAL;
+		goto out;
+	}
 
 	if (ret > 0)
 		ret *= AARCH64_INSN_SIZE;
 
+	tmp = bpf_arch_text_copy(ro_image, image, size);
+	if (IS_ERR(tmp)) {
+		ret = PTR_ERR(tmp);
+		goto out;
+	}
+
+	bpf_flush_icache(ro_image, ro_image + size);
+out:
+	kvfree(image);
 	return ret;
 }