diff mbox series

[bpf-next,v2,1/2] bpf,riscv: Implement PROBE_MEM32 pseudo instructions

Message ID 20240325155434.65589-2-puranjay12@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf,riscv: Add support for BPF Arena | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 20 of 20 maintainers
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 96 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-13 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18

Commit Message

Puranjay Mohan March 25, 2024, 3:54 p.m. UTC
Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
instructions.  They are similar to PROBE_MEM instructions with the
following differences:
- PROBE_MEM32 supports store.
- PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
  src/dst register
- PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in S7
  in the prologue). Due to bpf_arena constructions such S7 + reg +
  off16 access is guaranteed to be within arena virtual range, so no
  address check at run-time.
- S7 is a free callee-saved register, so it is used to store kern_vm_start
- PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
  LDX faults the destination register is zeroed.

To support these on riscv, we do tmp = S7 + src/dst reg and then use
tmp2 as the new src/dst register. This allows us to reuse most of the
code for normal [LDX | STX | ST].

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
---
 arch/riscv/net/bpf_jit.h        |   1 +
 arch/riscv/net/bpf_jit_comp64.c | 193 +++++++++++++++++++++++++++++++-
 arch/riscv/net/bpf_jit_core.c   |   1 +
 3 files changed, 192 insertions(+), 3 deletions(-)

Comments

Björn Töpel March 25, 2024, 4:53 p.m. UTC | #1
Puranjay Mohan <puranjay12@gmail.com> writes:

> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
> instructions.  They are similar to PROBE_MEM instructions with the
> following differences:
> - PROBE_MEM32 supports store.
> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
>   src/dst register
> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in S7
>   in the prologue). Due to bpf_arena constructions such S7 + reg +
>   off16 access is guaranteed to be within arena virtual range, so no
>   address check at run-time.
> - S7 is a free callee-saved register, so it is used to store kern_vm_start
> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
>   LDX faults the destination register is zeroed.
>
> To support these on riscv, we do tmp = S7 + src/dst reg and then use
> tmp2 as the new src/dst register. This allows us to reuse most of the
> code for normal [LDX | STX | ST].

Cool to see the RV BPF JIT keeping up with x86 features! ;-) Nice work!

A couple of minor comments below.

> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> ---
>  arch/riscv/net/bpf_jit.h        |   1 +
>  arch/riscv/net/bpf_jit_comp64.c | 193 +++++++++++++++++++++++++++++++-
>  arch/riscv/net/bpf_jit_core.c   |   1 +
>  3 files changed, 192 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
> index f4b6b3b9edda..8a47da08dd9c 100644
> --- a/arch/riscv/net/bpf_jit.h
> +++ b/arch/riscv/net/bpf_jit.h
> @@ -81,6 +81,7 @@ struct rv_jit_context {
>  	int nexentries;
>  	unsigned long flags;
>  	int stack_size;
> +	u64 arena_vm_start;
>  };
>  
>  /* Convert from ninsns to bytes. */
> diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
> index 1adf2f39ce59..0c0588e327af 100644
> --- a/arch/riscv/net/bpf_jit_comp64.c
> +++ b/arch/riscv/net/bpf_jit_comp64.c
> @@ -255,6 +255,10 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
>  		emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
>  		store_offset -= 8;
>  	}
> +	if (ctx->arena_vm_start) {
> +		emit_ld(RV_REG_S7, store_offset, RV_REG_SP, ctx);
> +		store_offset -= 8;
> +	}
>  
>  	emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
>  	/* Set return value. */
> @@ -548,6 +552,7 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
>  
>  #define BPF_FIXUP_OFFSET_MASK   GENMASK(26, 0)
>  #define BPF_FIXUP_REG_MASK      GENMASK(31, 27)
> +#define DONT_CLEAR		17	/* RV_REG_A7 unused in pt_regmap */

Hmm, so this is just a a sentinel node, right? Isn't it more robust to
use, say REG_ZERO which will never be used? Maybe REG_DONT_CLEAR_MARKER
or smth, so it's obvious how it's used?


>  bool ex_handler_bpf(const struct exception_table_entry *ex,
>  		    struct pt_regs *regs)
> @@ -555,7 +560,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
>  	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
>  	int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
>  
> -	*(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
> +	if (regs_offset != DONT_CLEAR)
> +		*(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
>  	regs->epc = (unsigned long)&ex->fixup - offset;
>  
>  	return true;
> @@ -572,7 +578,8 @@ static int add_exception_handler(const struct bpf_insn *insn,
>  	off_t fixup_offset;
>  
>  	if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable ||
> -	    (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX))
> +	    (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
> +	     BPF_MODE(insn->code) != BPF_PROBE_MEM32))
>  		return 0;
>  
>  	if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
> @@ -622,6 +629,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
>  
>  	ex->insn = ins_offset;
>  
> +	if (BPF_CLASS(insn->code) != BPF_LDX)
> +		dst_reg = DONT_CLEAR;
> +

Instead of having a side-effect, and passing a dummy dst_reg for the
probe_mem32, just explicitly add DONT_CLEAR when calling
add_exception_handler(). It's more obvious to me at least.

>  	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
>  		FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
>  	ex->type = EX_TYPE_BPF;
> @@ -1063,7 +1073,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>  		    BPF_CLASS(insn->code) == BPF_JMP;
>  	int s, e, rvoff, ret, i = insn - ctx->prog->insnsi;
>  	struct bpf_prog_aux *aux = ctx->prog->aux;
> -	u8 rd = -1, rs = -1, code = insn->code;
> +	u8 rd = -1, rs = -1, code = insn->code, reg_arena_vm_start = RV_REG_S7;
>  	s16 off = insn->off;
>  	s32 imm = insn->imm;
>  
> @@ -1539,6 +1549,11 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>  	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
>  	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
>  	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
> +	/* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + S7 + off)*/
> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
>  	{
>  		int insn_len, insns_start;
>  		bool sign_ext;
> @@ -1546,6 +1561,11 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>  		sign_ext = BPF_MODE(insn->code) == BPF_MEMSX ||
>  			   BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
>  
> +		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
> +			emit_add(RV_REG_T2, rs, reg_arena_vm_start, ctx);
> +			rs = RV_REG_T2;
> +		}
> +
>  		switch (BPF_SIZE(code)) {
>  		case BPF_B:
>  			if (is_12b_int(off)) {
> @@ -1682,6 +1702,87 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>  		emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
>  		break;
>  
> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
> +	{
> +		int insn_len, insns_start;
> +
> +		emit_add(RV_REG_T3, rd, reg_arena_vm_start, ctx);
> +		rd = RV_REG_T3;
> +
> +		/* Load imm to a register then store it */
> +		emit_imm(RV_REG_T1, imm, ctx);
> +
> +		switch (BPF_SIZE(code)) {
> +		case BPF_B:
> +			if (is_12b_int(off)) {
> +				insns_start = ctx->ninsns;
> +				emit(rv_sb(rd, off, RV_REG_T1), ctx);
> +				insn_len = ctx->ninsns - insns_start;
> +				break;
> +			}
> +
> +			emit_imm(RV_REG_T2, off, ctx);
> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
> +			insns_start = ctx->ninsns;
> +			emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
> +			insn_len = ctx->ninsns - insns_start;
> +
> +			break;
> +
> +		case BPF_H:
> +			if (is_12b_int(off)) {
> +				insns_start = ctx->ninsns;
> +				emit(rv_sh(rd, off, RV_REG_T1), ctx);
> +				insn_len = ctx->ninsns - insns_start;
> +				break;
> +			}
> +
> +			emit_imm(RV_REG_T2, off, ctx);
> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
> +			insns_start = ctx->ninsns;
> +			emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
> +			insn_len = ctx->ninsns - insns_start;
> +			break;
> +		case BPF_W:
> +			if (is_12b_int(off)) {
> +				insns_start = ctx->ninsns;
> +				emit_sw(rd, off, RV_REG_T1, ctx);
> +				insn_len = ctx->ninsns - insns_start;
> +				break;
> +			}
> +
> +			emit_imm(RV_REG_T2, off, ctx);
> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
> +			insns_start = ctx->ninsns;
> +			emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
> +			insn_len = ctx->ninsns - insns_start;
> +			break;
> +		case BPF_DW:
> +			if (is_12b_int(off)) {
> +				insns_start = ctx->ninsns;
> +				emit_sd(rd, off, RV_REG_T1, ctx);
> +				insn_len = ctx->ninsns - insns_start;
> +				break;
> +			}
> +
> +			emit_imm(RV_REG_T2, off, ctx);
> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
> +			insns_start = ctx->ninsns;
> +			emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
> +			insn_len = ctx->ninsns - insns_start;
> +			break;
> +		}

A lot of similar code, with emit of different sizes. Possible to move
move out to a function, and wrap the emits? The main loop is hard read
already!

> +
> +		ret = add_exception_handler(insn, ctx, rd, insn_len);
> +		if (ret)
> +			return ret;
> +
> +		break;
> +	}
> +
>  	/* STX: *(size *)(dst + off) = src */
>  	case BPF_STX | BPF_MEM | BPF_B:
>  		if (is_12b_int(off)) {
> @@ -1728,6 +1829,83 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>  		emit_atomic(rd, rs, off, imm,
>  			    BPF_SIZE(code) == BPF_DW, ctx);
>  		break;
> +
> +	case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
> +	case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
> +	case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
> +	case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
> +	{
> +		int insn_len, insns_start;
> +
> +		emit_add(RV_REG_T2, rd, reg_arena_vm_start, ctx);
> +		rd = RV_REG_T2;
> +
> +		switch (BPF_SIZE(code)) {
> +		case BPF_B:
> +			if (is_12b_int(off)) {
> +				insns_start = ctx->ninsns;
> +				emit(rv_sb(rd, off, rs), ctx);
> +				insn_len = ctx->ninsns - insns_start;
> +				break;
> +			}
> +
> +			emit_imm(RV_REG_T1, off, ctx);
> +			emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
> +			insns_start = ctx->ninsns;
> +			emit(rv_sb(RV_REG_T1, 0, rs), ctx);
> +			insn_len = ctx->ninsns - insns_start;
> +			break;
> +		case BPF_H:
> +			if (is_12b_int(off)) {
> +				insns_start = ctx->ninsns;
> +				emit(rv_sh(rd, off, rs), ctx);
> +				insn_len = ctx->ninsns - insns_start;
> +				break;
> +			}
> +
> +			emit_imm(RV_REG_T1, off, ctx);
> +			emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
> +			insns_start = ctx->ninsns;
> +			emit(rv_sh(RV_REG_T1, 0, rs), ctx);
> +			insn_len = ctx->ninsns - insns_start;
> +			break;
> +		case BPF_W:
> +			if (is_12b_int(off)) {
> +				insns_start = ctx->ninsns;
> +				emit_sw(rd, off, rs, ctx);
> +				insn_len = ctx->ninsns - insns_start;
> +				break;
> +			}
> +
> +			emit_imm(RV_REG_T1, off, ctx);
> +			emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
> +			insns_start = ctx->ninsns;
> +			emit_sw(RV_REG_T1, 0, rs, ctx);
> +			insn_len = ctx->ninsns - insns_start;
> +			break;
> +		case BPF_DW:
> +			if (is_12b_int(off)) {
> +				insns_start = ctx->ninsns;
> +				emit_sd(rd, off, rs, ctx);
> +				insn_len = ctx->ninsns - insns_start;
> +				break;
> +			}
> +
> +			emit_imm(RV_REG_T1, off, ctx);
> +			emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
> +			insns_start = ctx->ninsns;
> +			emit_sd(RV_REG_T1, 0, rs, ctx);
> +			insn_len = ctx->ninsns - insns_start;
> +			break;
> +		}

Same comment as above.


Björn
Puranjay Mohan March 25, 2024, 5:15 p.m. UTC | #2
Björn Töpel <bjorn@kernel.org> writes:

> Puranjay Mohan <puranjay12@gmail.com> writes:
>
>> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
>> instructions.  They are similar to PROBE_MEM instructions with the
>> following differences:
>> - PROBE_MEM32 supports store.
>> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
>>   src/dst register
>> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in S7
>>   in the prologue). Due to bpf_arena constructions such S7 + reg +
>>   off16 access is guaranteed to be within arena virtual range, so no
>>   address check at run-time.
>> - S7 is a free callee-saved register, so it is used to store kern_vm_start
>> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
>>   LDX faults the destination register is zeroed.
>>
>> To support these on riscv, we do tmp = S7 + src/dst reg and then use
>> tmp2 as the new src/dst register. This allows us to reuse most of the
>> code for normal [LDX | STX | ST].
>
> Cool to see the RV BPF JIT keeping up with x86 features! ;-) Nice work!

It is my self proclaimed duty to make sure that all 64-bit JITs have
feature parity. :D

>
> A couple of minor comments below.
>
>> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
>> ---
>>  arch/riscv/net/bpf_jit.h        |   1 +
>>  arch/riscv/net/bpf_jit_comp64.c | 193 +++++++++++++++++++++++++++++++-
>>  arch/riscv/net/bpf_jit_core.c   |   1 +
>>  3 files changed, 192 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
>> index f4b6b3b9edda..8a47da08dd9c 100644
>> --- a/arch/riscv/net/bpf_jit.h
>> +++ b/arch/riscv/net/bpf_jit.h
>> @@ -81,6 +81,7 @@ struct rv_jit_context {
>>  	int nexentries;
>>  	unsigned long flags;
>>  	int stack_size;
>> +	u64 arena_vm_start;
>>  };
>>  
>>  /* Convert from ninsns to bytes. */
>> diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
>> index 1adf2f39ce59..0c0588e327af 100644
>> --- a/arch/riscv/net/bpf_jit_comp64.c
>> +++ b/arch/riscv/net/bpf_jit_comp64.c
>> @@ -255,6 +255,10 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
>>  		emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
>>  		store_offset -= 8;
>>  	}
>> +	if (ctx->arena_vm_start) {
>> +		emit_ld(RV_REG_S7, store_offset, RV_REG_SP, ctx);
>> +		store_offset -= 8;
>> +	}
>>  
>>  	emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
>>  	/* Set return value. */
>> @@ -548,6 +552,7 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
>>  
>>  #define BPF_FIXUP_OFFSET_MASK   GENMASK(26, 0)
>>  #define BPF_FIXUP_REG_MASK      GENMASK(31, 27)
>> +#define DONT_CLEAR		17	/* RV_REG_A7 unused in pt_regmap */
>
> Hmm, so this is just a a sentinel node, right? Isn't it more robust to
> use, say REG_ZERO which will never be used? Maybe REG_DONT_CLEAR_MARKER
> or smth, so it's obvious how it's used?

Yes, I agree, RV_REG_ZERO would be the best thing to use here.

>
>
>>  bool ex_handler_bpf(const struct exception_table_entry *ex,
>>  		    struct pt_regs *regs)
>> @@ -555,7 +560,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
>>  	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
>>  	int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
>>  
>> -	*(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
>> +	if (regs_offset != DONT_CLEAR)
>> +		*(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
>>  	regs->epc = (unsigned long)&ex->fixup - offset;
>>  
>>  	return true;
>> @@ -572,7 +578,8 @@ static int add_exception_handler(const struct bpf_insn *insn,
>>  	off_t fixup_offset;
>>  
>>  	if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable ||
>> -	    (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX))
>> +	    (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
>> +	     BPF_MODE(insn->code) != BPF_PROBE_MEM32))
>>  		return 0;
>>  
>>  	if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
>> @@ -622,6 +629,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
>>  
>>  	ex->insn = ins_offset;
>>  
>> +	if (BPF_CLASS(insn->code) != BPF_LDX)
>> +		dst_reg = DONT_CLEAR;
>> +
>
> Instead of having a side-effect, and passing a dummy dst_reg for the
> probe_mem32, just explicitly add DONT_CLEAR when calling
> add_exception_handler(). It's more obvious to me at least.

Sure, will do that in the next version.

>
>>  	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
>>  		FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
>>  	ex->type = EX_TYPE_BPF;
>> @@ -1063,7 +1073,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>>  		    BPF_CLASS(insn->code) == BPF_JMP;
>>  	int s, e, rvoff, ret, i = insn - ctx->prog->insnsi;
>>  	struct bpf_prog_aux *aux = ctx->prog->aux;
>> -	u8 rd = -1, rs = -1, code = insn->code;
>> +	u8 rd = -1, rs = -1, code = insn->code, reg_arena_vm_start = RV_REG_S7;
>>  	s16 off = insn->off;
>>  	s32 imm = insn->imm;
>>  
>> @@ -1539,6 +1549,11 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>>  	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
>>  	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
>>  	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
>> +	/* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + S7 + off)*/
>> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
>> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
>> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
>> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
>>  	{
>>  		int insn_len, insns_start;
>>  		bool sign_ext;
>> @@ -1546,6 +1561,11 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>>  		sign_ext = BPF_MODE(insn->code) == BPF_MEMSX ||
>>  			   BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
>>  
>> +		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
>> +			emit_add(RV_REG_T2, rs, reg_arena_vm_start, ctx);
>> +			rs = RV_REG_T2;
>> +		}
>> +
>>  		switch (BPF_SIZE(code)) {
>>  		case BPF_B:
>>  			if (is_12b_int(off)) {
>> @@ -1682,6 +1702,87 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>>  		emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
>>  		break;
>>  
>> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
>> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
>> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
>> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
>> +	{
>> +		int insn_len, insns_start;
>> +
>> +		emit_add(RV_REG_T3, rd, reg_arena_vm_start, ctx);
>> +		rd = RV_REG_T3;
>> +
>> +		/* Load imm to a register then store it */
>> +		emit_imm(RV_REG_T1, imm, ctx);
>> +
>> +		switch (BPF_SIZE(code)) {
>> +		case BPF_B:
>> +			if (is_12b_int(off)) {
>> +				insns_start = ctx->ninsns;
>> +				emit(rv_sb(rd, off, RV_REG_T1), ctx);
>> +				insn_len = ctx->ninsns - insns_start;
>> +				break;
>> +			}
>> +
>> +			emit_imm(RV_REG_T2, off, ctx);
>> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
>> +			insns_start = ctx->ninsns;
>> +			emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
>> +			insn_len = ctx->ninsns - insns_start;
>> +
>> +			break;
>> +
>> +		case BPF_H:
>> +			if (is_12b_int(off)) {
>> +				insns_start = ctx->ninsns;
>> +				emit(rv_sh(rd, off, RV_REG_T1), ctx);
>> +				insn_len = ctx->ninsns - insns_start;
>> +				break;
>> +			}
>> +
>> +			emit_imm(RV_REG_T2, off, ctx);
>> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
>> +			insns_start = ctx->ninsns;
>> +			emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
>> +			insn_len = ctx->ninsns - insns_start;
>> +			break;
>> +		case BPF_W:
>> +			if (is_12b_int(off)) {
>> +				insns_start = ctx->ninsns;
>> +				emit_sw(rd, off, RV_REG_T1, ctx);
>> +				insn_len = ctx->ninsns - insns_start;
>> +				break;
>> +			}
>> +
>> +			emit_imm(RV_REG_T2, off, ctx);
>> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
>> +			insns_start = ctx->ninsns;
>> +			emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
>> +			insn_len = ctx->ninsns - insns_start;
>> +			break;
>> +		case BPF_DW:
>> +			if (is_12b_int(off)) {
>> +				insns_start = ctx->ninsns;
>> +				emit_sd(rd, off, RV_REG_T1, ctx);
>> +				insn_len = ctx->ninsns - insns_start;
>> +				break;
>> +			}
>> +
>> +			emit_imm(RV_REG_T2, off, ctx);
>> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
>> +			insns_start = ctx->ninsns;
>> +			emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
>> +			insn_len = ctx->ninsns - insns_start;
>> +			break;
>> +		}
>
> A lot of similar code, with emit of different sizes. Possible to move
> move out to a function, and wrap the emits? The main loop is hard read
> already!

I thought about this as well. My plan is to refactor the whole thing in a
seperate patch. I did not do it with this feature as it will cause a lot
of unrelated code churn.

Thanks,
Puranjay
Pu Lehui March 26, 2024, 3:15 a.m. UTC | #3
On 2024/3/26 1:15, Puranjay Mohan wrote:
> Björn Töpel <bjorn@kernel.org> writes:
> 
>> Puranjay Mohan <puranjay12@gmail.com> writes:
>>
>>> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
>>> instructions.  They are similar to PROBE_MEM instructions with the
>>> following differences:
>>> - PROBE_MEM32 supports store.
>>> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
>>>    src/dst register
>>> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in S7
>>>    in the prologue). Due to bpf_arena constructions such S7 + reg +
>>>    off16 access is guaranteed to be within arena virtual range, so no
>>>    address check at run-time.
>>> - S7 is a free callee-saved register, so it is used to store kern_vm_start
>>> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
>>>    LDX faults the destination register is zeroed.
>>>
>>> To support these on riscv, we do tmp = S7 + src/dst reg and then use
>>> tmp2 as the new src/dst register. This allows us to reuse most of the
>>> code for normal [LDX | STX | ST].
>>
>> Cool to see the RV BPF JIT keeping up with x86 features! ;-) Nice work!
> 
> It is my self proclaimed duty to make sure that all 64-bit JITs have
> feature parity. :D
> 
>>
>> A couple of minor comments below.
>>
>>> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
>>> ---
>>>   arch/riscv/net/bpf_jit.h        |   1 +
>>>   arch/riscv/net/bpf_jit_comp64.c | 193 +++++++++++++++++++++++++++++++-
>>>   arch/riscv/net/bpf_jit_core.c   |   1 +
>>>   3 files changed, 192 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
>>> index f4b6b3b9edda..8a47da08dd9c 100644
>>> --- a/arch/riscv/net/bpf_jit.h
>>> +++ b/arch/riscv/net/bpf_jit.h
>>> @@ -81,6 +81,7 @@ struct rv_jit_context {
>>>   	int nexentries;
>>>   	unsigned long flags;
>>>   	int stack_size;
>>> +	u64 arena_vm_start;
>>>   };
>>>   
>>>   /* Convert from ninsns to bytes. */
>>> diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
>>> index 1adf2f39ce59..0c0588e327af 100644
>>> --- a/arch/riscv/net/bpf_jit_comp64.c
>>> +++ b/arch/riscv/net/bpf_jit_comp64.c
>>> @@ -255,6 +255,10 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
>>>   		emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
>>>   		store_offset -= 8;
>>>   	}
>>> +	if (ctx->arena_vm_start) {
>>> +		emit_ld(RV_REG_S7, store_offset, RV_REG_SP, ctx);
>>> +		store_offset -= 8;
>>> +	}

As RV_REG_S7 is only for bpf arena, how about define this register as 
bellow, like RV_REG_TCC

#define RV_REG_ARENA RV_REG_S7

>>>   
>>>   	emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
>>>   	/* Set return value. */
>>> @@ -548,6 +552,7 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
>>>   
>>>   #define BPF_FIXUP_OFFSET_MASK   GENMASK(26, 0)
>>>   #define BPF_FIXUP_REG_MASK      GENMASK(31, 27)
>>> +#define DONT_CLEAR		17	/* RV_REG_A7 unused in pt_regmap */
>>
>> Hmm, so this is just a a sentinel node, right? Isn't it more robust to
>> use, say REG_ZERO which will never be used? Maybe REG_DONT_CLEAR_MARKER
>> or smth, so it's obvious how it's used?
> 
> Yes, I agree, RV_REG_ZERO would be the best thing to use here.
> 
>>
>>
>>>   bool ex_handler_bpf(const struct exception_table_entry *ex,
>>>   		    struct pt_regs *regs)
>>> @@ -555,7 +560,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
>>>   	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
>>>   	int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
>>>   
>>> -	*(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
>>> +	if (regs_offset != DONT_CLEAR)
>>> +		*(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
>>>   	regs->epc = (unsigned long)&ex->fixup - offset;
>>>   
>>>   	return true;
>>> @@ -572,7 +578,8 @@ static int add_exception_handler(const struct bpf_insn *insn,
>>>   	off_t fixup_offset;
>>>   
>>>   	if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable ||
>>> -	    (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX))
>>> +	    (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
>>> +	     BPF_MODE(insn->code) != BPF_PROBE_MEM32))
>>>   		return 0;
>>>   
>>>   	if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
>>> @@ -622,6 +629,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
>>>   
>>>   	ex->insn = ins_offset;
>>>   
>>> +	if (BPF_CLASS(insn->code) != BPF_LDX)
>>> +		dst_reg = DONT_CLEAR;
>>> +
>>
>> Instead of having a side-effect, and passing a dummy dst_reg for the
>> probe_mem32, just explicitly add DONT_CLEAR when calling
>> add_exception_handler(). It's more obvious to me at least.
> 
> Sure, will do that in the next version.
> 
>>
>>>   	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
>>>   		FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
>>>   	ex->type = EX_TYPE_BPF;
>>> @@ -1063,7 +1073,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>>>   		    BPF_CLASS(insn->code) == BPF_JMP;
>>>   	int s, e, rvoff, ret, i = insn - ctx->prog->insnsi;
>>>   	struct bpf_prog_aux *aux = ctx->prog->aux;
>>> -	u8 rd = -1, rs = -1, code = insn->code;
>>> +	u8 rd = -1, rs = -1, code = insn->code, reg_arena_vm_start = RV_REG_S7;
>>>   	s16 off = insn->off;
>>>   	s32 imm = insn->imm;
>>>   
>>> @@ -1539,6 +1549,11 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>>>   	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
>>>   	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
>>>   	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
>>> +	/* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + S7 + off)*/
>>> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
>>> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
>>> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
>>> +	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
>>>   	{
>>>   		int insn_len, insns_start;
>>>   		bool sign_ext;
>>> @@ -1546,6 +1561,11 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>>>   		sign_ext = BPF_MODE(insn->code) == BPF_MEMSX ||
>>>   			   BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
>>>   
>>> +		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
>>> +			emit_add(RV_REG_T2, rs, reg_arena_vm_start, ctx);
>>> +			rs = RV_REG_T2;
>>> +		}
>>> +
>>>   		switch (BPF_SIZE(code)) {
>>>   		case BPF_B:
>>>   			if (is_12b_int(off)) {
>>> @@ -1682,6 +1702,87 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
>>>   		emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
>>>   		break;
>>>   
>>> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
>>> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
>>> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
>>> +	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
>>> +	{
>>> +		int insn_len, insns_start;
>>> +
>>> +		emit_add(RV_REG_T3, rd, reg_arena_vm_start, ctx);
>>> +		rd = RV_REG_T3;
>>> +
>>> +		/* Load imm to a register then store it */
>>> +		emit_imm(RV_REG_T1, imm, ctx);
>>> +
>>> +		switch (BPF_SIZE(code)) {
>>> +		case BPF_B:
>>> +			if (is_12b_int(off)) {
>>> +				insns_start = ctx->ninsns;
>>> +				emit(rv_sb(rd, off, RV_REG_T1), ctx);
>>> +				insn_len = ctx->ninsns - insns_start;
>>> +				break;
>>> +			}
>>> +
>>> +			emit_imm(RV_REG_T2, off, ctx);
>>> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
>>> +			insns_start = ctx->ninsns;
>>> +			emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
>>> +			insn_len = ctx->ninsns - insns_start;
>>> +
>>> +			break;
>>> +
>>> +		case BPF_H:
>>> +			if (is_12b_int(off)) {
>>> +				insns_start = ctx->ninsns;
>>> +				emit(rv_sh(rd, off, RV_REG_T1), ctx);
>>> +				insn_len = ctx->ninsns - insns_start;
>>> +				break;
>>> +			}
>>> +
>>> +			emit_imm(RV_REG_T2, off, ctx);
>>> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
>>> +			insns_start = ctx->ninsns;
>>> +			emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
>>> +			insn_len = ctx->ninsns - insns_start;
>>> +			break;
>>> +		case BPF_W:
>>> +			if (is_12b_int(off)) {
>>> +				insns_start = ctx->ninsns;
>>> +				emit_sw(rd, off, RV_REG_T1, ctx);
>>> +				insn_len = ctx->ninsns - insns_start;
>>> +				break;
>>> +			}
>>> +
>>> +			emit_imm(RV_REG_T2, off, ctx);
>>> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
>>> +			insns_start = ctx->ninsns;
>>> +			emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
>>> +			insn_len = ctx->ninsns - insns_start;
>>> +			break;
>>> +		case BPF_DW:
>>> +			if (is_12b_int(off)) {
>>> +				insns_start = ctx->ninsns;
>>> +				emit_sd(rd, off, RV_REG_T1, ctx);
>>> +				insn_len = ctx->ninsns - insns_start;
>>> +				break;
>>> +			}
>>> +
>>> +			emit_imm(RV_REG_T2, off, ctx);
>>> +			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
>>> +			insns_start = ctx->ninsns;
>>> +			emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
>>> +			insn_len = ctx->ninsns - insns_start;
>>> +			break;
>>> +		}
>>
>> A lot of similar code, with emit of different sizes. Possible to move
>> move out to a function, and wrap the emits? The main loop is hard read
>> already!
> 
> I thought about this as well. My plan is to refactor the whole thing in a
> seperate patch. I did not do it with this feature as it will cause a lot
> of unrelated code churn.

Yeah, I think we could do that factor out for LDX, ST, STX, while I had 
done it before another riscv bpf arena. BUT, looking forword to your 
implementation.
diff mbox series

Patch

diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index f4b6b3b9edda..8a47da08dd9c 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -81,6 +81,7 @@  struct rv_jit_context {
 	int nexentries;
 	unsigned long flags;
 	int stack_size;
+	u64 arena_vm_start;
 };
 
 /* Convert from ninsns to bytes. */
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 1adf2f39ce59..0c0588e327af 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -255,6 +255,10 @@  static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
 		emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
 		store_offset -= 8;
 	}
+	if (ctx->arena_vm_start) {
+		emit_ld(RV_REG_S7, store_offset, RV_REG_SP, ctx);
+		store_offset -= 8;
+	}
 
 	emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
 	/* Set return value. */
@@ -548,6 +552,7 @@  static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
 
 #define BPF_FIXUP_OFFSET_MASK   GENMASK(26, 0)
 #define BPF_FIXUP_REG_MASK      GENMASK(31, 27)
+#define DONT_CLEAR		17	/* RV_REG_A7 unused in pt_regmap */
 
 bool ex_handler_bpf(const struct exception_table_entry *ex,
 		    struct pt_regs *regs)
@@ -555,7 +560,8 @@  bool ex_handler_bpf(const struct exception_table_entry *ex,
 	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
 	int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
 
-	*(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
+	if (regs_offset != DONT_CLEAR)
+		*(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
 	regs->epc = (unsigned long)&ex->fixup - offset;
 
 	return true;
@@ -572,7 +578,8 @@  static int add_exception_handler(const struct bpf_insn *insn,
 	off_t fixup_offset;
 
 	if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable ||
-	    (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX))
+	    (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
+	     BPF_MODE(insn->code) != BPF_PROBE_MEM32))
 		return 0;
 
 	if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
@@ -622,6 +629,9 @@  static int add_exception_handler(const struct bpf_insn *insn,
 
 	ex->insn = ins_offset;
 
+	if (BPF_CLASS(insn->code) != BPF_LDX)
+		dst_reg = DONT_CLEAR;
+
 	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
 		FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
 	ex->type = EX_TYPE_BPF;
@@ -1063,7 +1073,7 @@  int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 		    BPF_CLASS(insn->code) == BPF_JMP;
 	int s, e, rvoff, ret, i = insn - ctx->prog->insnsi;
 	struct bpf_prog_aux *aux = ctx->prog->aux;
-	u8 rd = -1, rs = -1, code = insn->code;
+	u8 rd = -1, rs = -1, code = insn->code, reg_arena_vm_start = RV_REG_S7;
 	s16 off = insn->off;
 	s32 imm = insn->imm;
 
@@ -1539,6 +1549,11 @@  int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+	/* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + S7 + off)*/
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
 	{
 		int insn_len, insns_start;
 		bool sign_ext;
@@ -1546,6 +1561,11 @@  int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 		sign_ext = BPF_MODE(insn->code) == BPF_MEMSX ||
 			   BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
 
+		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+			emit_add(RV_REG_T2, rs, reg_arena_vm_start, ctx);
+			rs = RV_REG_T2;
+		}
+
 		switch (BPF_SIZE(code)) {
 		case BPF_B:
 			if (is_12b_int(off)) {
@@ -1682,6 +1702,87 @@  int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 		emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
 		break;
 
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
+	{
+		int insn_len, insns_start;
+
+		emit_add(RV_REG_T3, rd, reg_arena_vm_start, ctx);
+		rd = RV_REG_T3;
+
+		/* Load imm to a register then store it */
+		emit_imm(RV_REG_T1, imm, ctx);
+
+		switch (BPF_SIZE(code)) {
+		case BPF_B:
+			if (is_12b_int(off)) {
+				insns_start = ctx->ninsns;
+				emit(rv_sb(rd, off, RV_REG_T1), ctx);
+				insn_len = ctx->ninsns - insns_start;
+				break;
+			}
+
+			emit_imm(RV_REG_T2, off, ctx);
+			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+			insns_start = ctx->ninsns;
+			emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
+			insn_len = ctx->ninsns - insns_start;
+
+			break;
+
+		case BPF_H:
+			if (is_12b_int(off)) {
+				insns_start = ctx->ninsns;
+				emit(rv_sh(rd, off, RV_REG_T1), ctx);
+				insn_len = ctx->ninsns - insns_start;
+				break;
+			}
+
+			emit_imm(RV_REG_T2, off, ctx);
+			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+			insns_start = ctx->ninsns;
+			emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
+			insn_len = ctx->ninsns - insns_start;
+			break;
+		case BPF_W:
+			if (is_12b_int(off)) {
+				insns_start = ctx->ninsns;
+				emit_sw(rd, off, RV_REG_T1, ctx);
+				insn_len = ctx->ninsns - insns_start;
+				break;
+			}
+
+			emit_imm(RV_REG_T2, off, ctx);
+			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+			insns_start = ctx->ninsns;
+			emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
+			insn_len = ctx->ninsns - insns_start;
+			break;
+		case BPF_DW:
+			if (is_12b_int(off)) {
+				insns_start = ctx->ninsns;
+				emit_sd(rd, off, RV_REG_T1, ctx);
+				insn_len = ctx->ninsns - insns_start;
+				break;
+			}
+
+			emit_imm(RV_REG_T2, off, ctx);
+			emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+			insns_start = ctx->ninsns;
+			emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
+			insn_len = ctx->ninsns - insns_start;
+			break;
+		}
+
+		ret = add_exception_handler(insn, ctx, rd, insn_len);
+		if (ret)
+			return ret;
+
+		break;
+	}
+
 	/* STX: *(size *)(dst + off) = src */
 	case BPF_STX | BPF_MEM | BPF_B:
 		if (is_12b_int(off)) {
@@ -1728,6 +1829,83 @@  int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 		emit_atomic(rd, rs, off, imm,
 			    BPF_SIZE(code) == BPF_DW, ctx);
 		break;
+
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+	{
+		int insn_len, insns_start;
+
+		emit_add(RV_REG_T2, rd, reg_arena_vm_start, ctx);
+		rd = RV_REG_T2;
+
+		switch (BPF_SIZE(code)) {
+		case BPF_B:
+			if (is_12b_int(off)) {
+				insns_start = ctx->ninsns;
+				emit(rv_sb(rd, off, rs), ctx);
+				insn_len = ctx->ninsns - insns_start;
+				break;
+			}
+
+			emit_imm(RV_REG_T1, off, ctx);
+			emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+			insns_start = ctx->ninsns;
+			emit(rv_sb(RV_REG_T1, 0, rs), ctx);
+			insn_len = ctx->ninsns - insns_start;
+			break;
+		case BPF_H:
+			if (is_12b_int(off)) {
+				insns_start = ctx->ninsns;
+				emit(rv_sh(rd, off, rs), ctx);
+				insn_len = ctx->ninsns - insns_start;
+				break;
+			}
+
+			emit_imm(RV_REG_T1, off, ctx);
+			emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+			insns_start = ctx->ninsns;
+			emit(rv_sh(RV_REG_T1, 0, rs), ctx);
+			insn_len = ctx->ninsns - insns_start;
+			break;
+		case BPF_W:
+			if (is_12b_int(off)) {
+				insns_start = ctx->ninsns;
+				emit_sw(rd, off, rs, ctx);
+				insn_len = ctx->ninsns - insns_start;
+				break;
+			}
+
+			emit_imm(RV_REG_T1, off, ctx);
+			emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+			insns_start = ctx->ninsns;
+			emit_sw(RV_REG_T1, 0, rs, ctx);
+			insn_len = ctx->ninsns - insns_start;
+			break;
+		case BPF_DW:
+			if (is_12b_int(off)) {
+				insns_start = ctx->ninsns;
+				emit_sd(rd, off, rs, ctx);
+				insn_len = ctx->ninsns - insns_start;
+				break;
+			}
+
+			emit_imm(RV_REG_T1, off, ctx);
+			emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+			insns_start = ctx->ninsns;
+			emit_sd(RV_REG_T1, 0, rs, ctx);
+			insn_len = ctx->ninsns - insns_start;
+			break;
+		}
+
+		ret = add_exception_handler(insn, ctx, rd, insn_len);
+		if (ret)
+			return ret;
+
+		break;
+	}
+
 	default:
 		pr_err("bpf-jit: unknown opcode %02x\n", code);
 		return -EINVAL;
@@ -1759,6 +1937,8 @@  void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog)
 		stack_adjust += 8;
 	if (seen_reg(RV_REG_S6, ctx))
 		stack_adjust += 8;
+	if (ctx->arena_vm_start)
+		stack_adjust += 8;
 
 	stack_adjust = round_up(stack_adjust, 16);
 	stack_adjust += bpf_stack_adjust;
@@ -1810,6 +1990,10 @@  void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog)
 		emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx);
 		store_offset -= 8;
 	}
+	if (ctx->arena_vm_start) {
+		emit_sd(RV_REG_SP, store_offset, RV_REG_S7, ctx);
+		store_offset -= 8;
+	}
 
 	emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx);
 
@@ -1823,6 +2007,9 @@  void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog)
 		emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx);
 
 	ctx->stack_size = stack_adjust;
+
+	if (ctx->arena_vm_start)
+		emit_imm(RV_REG_S7, ctx->arena_vm_start, ctx);
 }
 
 void bpf_jit_build_epilogue(struct rv_jit_context *ctx)
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 6b3acac30c06..9ab739b9f9a2 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -80,6 +80,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		goto skip_init_ctx;
 	}
 
+	ctx->arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
 	ctx->prog = prog;
 	ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
 	if (!ctx->offset) {