diff mbox series

[bpf-next,v1,4/4] riscv, bpf: Add bpf trampoline support for RV64

Message ID 20230215135205.1411105-5-pulehui@huaweicloud.com (mailing list archive)
State Accepted
Delegated to: BPF
Headers show
Series Support bpf trampoline for RV64 | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 2 maintainers not CCed: aou@eecs.berkeley.edu paul.walmsley@sifive.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP WARNING: kfree(NULL) is safe and this check is probably not required WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-PR fail merge-conflict
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16

Commit Message

Pu Lehui Feb. 15, 2023, 1:52 p.m. UTC
From: Pu Lehui <pulehui@huawei.com>

BPF trampoline is the critical infrastructure of the bpf
subsystem, acting as a mediator between kernel functions
and BPF programs. Numerous important features, such as
using ebpf program for zero overhead kernel introspection,
rely on this key component. We can't wait to support bpf
trampoline on RV64. The related tests have passed, as well
as the test_verifier with no new failure ceses.

Signed-off-by: Pu Lehui <pulehui@huawei.com>
---
 arch/riscv/net/bpf_jit_comp64.c | 319 ++++++++++++++++++++++++++++++++
 1 file changed, 319 insertions(+)
diff mbox series

Patch

diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index b6b9bbcc977a..ef79c54a8eb2 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -695,6 +695,325 @@  int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
 	return ret;
 }
 
+static void store_args(int nregs, int args_off, struct rv_jit_context *ctx)
+{
+	int i;
+
+	for (i = 0; i < nregs; i++) {
+		emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
+		args_off -= 8;
+	}
+}
+
+static void restore_args(int nregs, int args_off, struct rv_jit_context *ctx)
+{
+	int i;
+
+	for (i = 0; i < nregs; i++) {
+		emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx);
+		args_off -= 8;
+	}
+}
+
+static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off,
+			   int run_ctx_off, bool save_ret, struct rv_jit_context *ctx)
+{
+	int ret, branch_off;
+	struct bpf_prog *p = l->link.prog;
+	int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+
+	if (l->cookie) {
+		emit_imm(RV_REG_T1, l->cookie, ctx);
+		emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_T1, ctx);
+	} else {
+		emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_ZERO, ctx);
+	}
+
+	/* arg1: prog */
+	emit_imm(RV_REG_A0, (const s64)p, ctx);
+	/* arg2: &run_ctx */
+	emit_addi(RV_REG_A1, RV_REG_FP, -run_ctx_off, ctx);
+	ret = emit_call((const u64)bpf_trampoline_enter(p), true, ctx);
+	if (ret)
+		return ret;
+
+	/* if (__bpf_prog_enter(prog) == 0)
+	 *	goto skip_exec_of_prog;
+	 */
+	branch_off = ctx->ninsns;
+	/* nop reserved for conditional jump */
+	emit(rv_nop(), ctx);
+
+	/* store prog start time */
+	emit_mv(RV_REG_S1, RV_REG_A0, ctx);
+
+	/* arg1: &args_off */
+	emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx);
+	if (!p->jited)
+		/* arg2: progs[i]->insnsi for interpreter */
+		emit_imm(RV_REG_A1, (const s64)p->insnsi, ctx);
+	ret = emit_call((const u64)p->bpf_func, true, ctx);
+	if (ret)
+		return ret;
+
+	if (save_ret)
+		emit_sd(RV_REG_FP, -retval_off, regmap[BPF_REG_0], ctx);
+
+	/* update branch with beqz */
+	if (ctx->insns) {
+		int offset = ninsns_rvoff(ctx->ninsns - branch_off);
+		u32 insn = rv_beq(RV_REG_A0, RV_REG_ZERO, offset >> 1);
+		*(u32 *)(ctx->insns + branch_off) = insn;
+	}
+
+	/* arg1: prog */
+	emit_imm(RV_REG_A0, (const s64)p, ctx);
+	/* arg2: prog start time */
+	emit_mv(RV_REG_A1, RV_REG_S1, ctx);
+	/* arg3: &run_ctx */
+	emit_addi(RV_REG_A2, RV_REG_FP, -run_ctx_off, ctx);
+	ret = emit_call((const u64)bpf_trampoline_exit(p), true, ctx);
+
+	return ret;
+}
+
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+					 const struct btf_func_model *m,
+					 struct bpf_tramp_links *tlinks,
+					 void *func_addr, u32 flags,
+					 struct rv_jit_context *ctx)
+{
+	int i, ret, offset;
+	int *branches_off = NULL;
+	int stack_size = 0, nregs = m->nr_args;
+	int retaddr_off, fp_off, retval_off, args_off;
+	int nregs_off, ip_off, run_ctx_off, sreg_off;
+	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+	void *orig_call = func_addr;
+	bool save_ret;
+	u32 insn;
+
+	/* Generated trampoline stack layout:
+	 *
+	 * FP - 8	    [ RA of parent func	] return address of parent
+	 *					  function
+	 * FP - retaddr_off [ RA of traced func	] return address of traced
+	 *					  function
+	 * FP - fp_off	    [ FP of parent func ]
+	 *
+	 * FP - retval_off  [ return value      ] BPF_TRAMP_F_CALL_ORIG or
+	 *					  BPF_TRAMP_F_RET_FENTRY_RET
+	 *                  [ argN              ]
+	 *                  [ ...               ]
+	 * FP - args_off    [ arg1              ]
+	 *
+	 * FP - nregs_off   [ regs count        ]
+	 *
+	 * FP - ip_off      [ traced func	] BPF_TRAMP_F_IP_ARG
+	 *
+	 * FP - run_ctx_off [ bpf_tramp_run_ctx ]
+	 *
+	 * FP - sreg_off    [ callee saved reg	]
+	 *
+	 *		    [ pads              ] pads for 16 bytes alignment
+	 */
+
+	if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
+		return -ENOTSUPP;
+
+	/* extra regiters for struct arguments */
+	for (i = 0; i < m->nr_args; i++)
+		if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
+			nregs += round_up(m->arg_size[i], 8) / 8 - 1;
+
+	/* 8 arguments passed by registers */
+	if (nregs > 8)
+		return -ENOTSUPP;
+
+	/* room for parent function return address */
+	stack_size += 8;
+
+	stack_size += 8;
+	retaddr_off = stack_size;
+
+	stack_size += 8;
+	fp_off = stack_size;
+
+	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
+	if (save_ret) {
+		stack_size += 8;
+		retval_off = stack_size;
+	}
+
+	stack_size += nregs * 8;
+	args_off = stack_size;
+
+	stack_size += 8;
+	nregs_off = stack_size;
+
+	if (flags & BPF_TRAMP_F_IP_ARG) {
+		stack_size += 8;
+		ip_off = stack_size;
+	}
+
+	stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
+	run_ctx_off = stack_size;
+
+	stack_size += 8;
+	sreg_off = stack_size;
+
+	stack_size = round_up(stack_size, 16);
+
+	emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx);
+
+	emit_sd(RV_REG_SP, stack_size - retaddr_off, RV_REG_RA, ctx);
+	emit_sd(RV_REG_SP, stack_size - fp_off, RV_REG_FP, ctx);
+
+	emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
+
+	/* callee saved register S1 to pass start time */
+	emit_sd(RV_REG_FP, -sreg_off, RV_REG_S1, ctx);
+
+	/* store ip address of the traced function */
+	if (flags & BPF_TRAMP_F_IP_ARG) {
+		emit_imm(RV_REG_T1, (const s64)func_addr, ctx);
+		emit_sd(RV_REG_FP, -ip_off, RV_REG_T1, ctx);
+	}
+
+	emit_li(RV_REG_T1, nregs, ctx);
+	emit_sd(RV_REG_FP, -nregs_off, RV_REG_T1, ctx);
+
+	store_args(nregs, args_off, ctx);
+
+	/* skip to actual body of traced function */
+	if (flags & BPF_TRAMP_F_SKIP_FRAME)
+		orig_call += 16;
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		emit_imm(RV_REG_A0, (const s64)im, ctx);
+		ret = emit_call((const u64)__bpf_tramp_enter, true, ctx);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < fentry->nr_links; i++) {
+		ret = invoke_bpf_prog(fentry->links[i], args_off, retval_off, run_ctx_off,
+				      flags & BPF_TRAMP_F_RET_FENTRY_RET, ctx);
+		if (ret)
+			return ret;
+	}
+
+	if (fmod_ret->nr_links) {
+		branches_off = kcalloc(fmod_ret->nr_links, sizeof(int), GFP_KERNEL);
+		if (!branches_off)
+			return -ENOMEM;
+
+		/* cleanup to avoid garbage return value confusion */
+		emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx);
+		for (i = 0; i < fmod_ret->nr_links; i++) {
+			ret = invoke_bpf_prog(fmod_ret->links[i], args_off, retval_off,
+					      run_ctx_off, true, ctx);
+			if (ret)
+				goto out;
+			emit_ld(RV_REG_T1, -retval_off, RV_REG_FP, ctx);
+			branches_off[i] = ctx->ninsns;
+			/* nop reserved for conditional jump */
+			emit(rv_nop(), ctx);
+		}
+	}
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		restore_args(nregs, args_off, ctx);
+		ret = emit_call((const u64)orig_call, true, ctx);
+		if (ret)
+			goto out;
+		emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx);
+		im->ip_after_call = ctx->insns + ctx->ninsns;
+		/* 2 nops reserved for auipc+jalr pair */
+		emit(rv_nop(), ctx);
+		emit(rv_nop(), ctx);
+	}
+
+	/* update branches saved in invoke_bpf_mod_ret with bnez */
+	for (i = 0; ctx->insns && i < fmod_ret->nr_links; i++) {
+		offset = ninsns_rvoff(ctx->ninsns - branches_off[i]);
+		insn = rv_bne(RV_REG_T1, RV_REG_ZERO, offset >> 1);
+		*(u32 *)(ctx->insns + branches_off[i]) = insn;
+	}
+
+	for (i = 0; i < fexit->nr_links; i++) {
+		ret = invoke_bpf_prog(fexit->links[i], args_off, retval_off,
+				      run_ctx_off, false, ctx);
+		if (ret)
+			goto out;
+	}
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		im->ip_epilogue = ctx->insns + ctx->ninsns;
+		emit_imm(RV_REG_A0, (const s64)im, ctx);
+		ret = emit_call((const u64)__bpf_tramp_exit, true, ctx);
+		if (ret)
+			goto out;
+	}
+
+	if (flags & BPF_TRAMP_F_RESTORE_REGS)
+		restore_args(nregs, args_off, ctx);
+
+	if (save_ret)
+		emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
+
+	emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
+
+	if (flags & BPF_TRAMP_F_SKIP_FRAME)
+		/* return address of parent function */
+		emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx);
+	else
+		/* return address of traced function */
+		emit_ld(RV_REG_RA, stack_size - retaddr_off, RV_REG_SP, ctx);
+
+	emit_ld(RV_REG_FP, stack_size - fp_off, RV_REG_SP, ctx);
+	emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx);
+
+	emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx);
+
+	ret = ctx->ninsns;
+out:
+	if (branches_off)
+		kfree(branches_off);
+
+	return ret;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+				void *image_end, const struct btf_func_model *m,
+				u32 flags, struct bpf_tramp_links *tlinks,
+				void *func_addr)
+{
+	int ret;
+	struct rv_jit_context ctx;
+
+	ctx.ninsns = 0;
+	ctx.insns = NULL;
+	ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
+	if (ret < 0)
+		return ret;
+
+	if (ninsns_rvoff(ret) > (long)image_end - (long)image)
+		return -EFBIG;
+
+	ctx.ninsns = 0;
+	ctx.insns = image;
+	ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
+	if (ret < 0)
+		return ret;
+
+	bpf_flush_icache(ctx.insns, ctx.insns + ctx.ninsns);
+
+	return ninsns_rvoff(ret);
+}
+
 int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 		      bool extra_pass)
 {