Message ID | 1661999249-10258-4-git-send-email-yangtiezhu@loongson.cn (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | Add BPF JIT support for LoongArch | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Clearly marked for bpf-next, async |
netdev/apply | fail | Patch does not apply to bpf-next |
bpf/vmtest-bpf-next-PR | fail | merge-conflict |
On Thu, Sep 1, 2022 at 10:27 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote: > > BPF programs are normally handled by a BPF interpreter, add BPF JIT > support for LoongArch to allow the kernel to generate native code > when a program is loaded into the kernel, this will significantly > speed-up processing of BPF programs. > > Co-developed-by: Youling Tang <tangyouling@loongson.cn> > Signed-off-by: Youling Tang <tangyouling@loongson.cn> > Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn> > --- > arch/loongarch/Kbuild | 1 + > arch/loongarch/Kconfig | 1 + > arch/loongarch/include/asm/inst.h | 221 +++++++ > arch/loongarch/net/Makefile | 7 + > arch/loongarch/net/bpf_jit.c | 1160 +++++++++++++++++++++++++++++++++++++ > arch/loongarch/net/bpf_jit.h | 282 +++++++++ > 6 files changed, 1672 insertions(+) > create mode 100644 arch/loongarch/net/Makefile > create mode 100644 arch/loongarch/net/bpf_jit.c > create mode 100644 arch/loongarch/net/bpf_jit.h > > diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild > index ab5373d..b01f5cd 100644 > --- a/arch/loongarch/Kbuild > +++ b/arch/loongarch/Kbuild > @@ -1,5 +1,6 @@ > obj-y += kernel/ > obj-y += mm/ > +obj-y += net/ > obj-y += vdso/ > > # for cleaning > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig > index 26aeb14..cff7a0f 100644 > --- a/arch/loongarch/Kconfig > +++ b/arch/loongarch/Kconfig > @@ -82,6 +82,7 @@ config LOONGARCH > select HAVE_CONTEXT_TRACKING_USER > select HAVE_DEBUG_STACKOVERFLOW > select HAVE_DMA_CONTIGUOUS > + select HAVE_EBPF_JIT if 64BIT > select HAVE_EXIT_THREAD > select HAVE_FAST_GUP > select HAVE_GENERIC_VDSO > diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h > index 691170b..5e7a4d2 100644 > --- a/arch/loongarch/include/asm/inst.h > +++ b/arch/loongarch/include/asm/inst.h > @@ -318,4 +318,225 @@ static inline bool unsigned_imm_check(unsigned long val, unsigned int bit) > return val < (1UL << bit); > } > > +#define DEF_EMIT_REG0I26_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + int offset) \ > +{ \ > + unsigned int immediate_l, immediate_h; \ > + \ > + immediate_l = offset & 0xffff; \ > + offset >>= 16; \ > + immediate_h = offset & 0x3ff; \ > + \ > + insn->reg0i26_format.opcode = OP; \ > + insn->reg0i26_format.immediate_l = immediate_l; \ > + insn->reg0i26_format.immediate_h = immediate_h; \ > +} > + > +DEF_EMIT_REG0I26_FORMAT(b, b_op) > + > +#define DEF_EMIT_REG1I20_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + enum loongarch_gpr rd, int imm) \ > +{ \ > + insn->reg1i20_format.opcode = OP; \ > + insn->reg1i20_format.immediate = imm; \ > + insn->reg1i20_format.rd = rd; \ > +} > + > +DEF_EMIT_REG1I20_FORMAT(lu12iw, lu12iw_op) > +DEF_EMIT_REG1I20_FORMAT(lu32id, lu32id_op) > +DEF_EMIT_REG1I20_FORMAT(pcaddu18i, pcaddu18i_op) > + > +#define DEF_EMIT_REG2_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + enum loongarch_gpr rd, \ > + enum loongarch_gpr rj) \ > +{ \ > + insn->reg2_format.opcode = OP; \ > + insn->reg2_format.rd = rd; \ > + insn->reg2_format.rj = rj; \ > +} > + > +DEF_EMIT_REG2_FORMAT(revb2h, revb2h_op) > +DEF_EMIT_REG2_FORMAT(revb2w, revb2w_op) > +DEF_EMIT_REG2_FORMAT(revbd, revbd_op) > + > +#define DEF_EMIT_REG2I5_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + enum loongarch_gpr rd, \ > + enum loongarch_gpr rj, \ > + int imm) \ > +{ \ > + insn->reg2i5_format.opcode = OP; \ > + insn->reg2i5_format.immediate = imm; \ > + insn->reg2i5_format.rd = rd; \ > + insn->reg2i5_format.rj = rj; \ > +} > + > +DEF_EMIT_REG2I5_FORMAT(slliw, slliw_op) > +DEF_EMIT_REG2I5_FORMAT(srliw, srliw_op) > +DEF_EMIT_REG2I5_FORMAT(sraiw, sraiw_op) > + > +#define DEF_EMIT_REG2I6_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + enum loongarch_gpr rd, \ > + enum loongarch_gpr rj, \ > + int imm) \ > +{ \ > + insn->reg2i6_format.opcode = OP; \ > + insn->reg2i6_format.immediate = imm; \ > + insn->reg2i6_format.rd = rd; \ > + insn->reg2i6_format.rj = rj; \ > +} > + > +DEF_EMIT_REG2I6_FORMAT(sllid, sllid_op) > +DEF_EMIT_REG2I6_FORMAT(srlid, srlid_op) > +DEF_EMIT_REG2I6_FORMAT(sraid, sraid_op) > + > +#define DEF_EMIT_REG2I12_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + enum loongarch_gpr rd, \ > + enum loongarch_gpr rj, \ > + int imm) \ > +{ \ > + insn->reg2i12_format.opcode = OP; \ > + insn->reg2i12_format.immediate = imm; \ > + insn->reg2i12_format.rd = rd; \ > + insn->reg2i12_format.rj = rj; \ > +} > + > +DEF_EMIT_REG2I12_FORMAT(addiw, addiw_op) > +DEF_EMIT_REG2I12_FORMAT(addid, addid_op) > +DEF_EMIT_REG2I12_FORMAT(lu52id, lu52id_op) > +DEF_EMIT_REG2I12_FORMAT(andi, andi_op) > +DEF_EMIT_REG2I12_FORMAT(ori, ori_op) > +DEF_EMIT_REG2I12_FORMAT(xori, xori_op) > +DEF_EMIT_REG2I12_FORMAT(ldbu, ldbu_op) > +DEF_EMIT_REG2I12_FORMAT(ldhu, ldhu_op) > +DEF_EMIT_REG2I12_FORMAT(ldwu, ldwu_op) > +DEF_EMIT_REG2I12_FORMAT(ldd, ldd_op) > +DEF_EMIT_REG2I12_FORMAT(stb, stb_op) > +DEF_EMIT_REG2I12_FORMAT(sth, sth_op) > +DEF_EMIT_REG2I12_FORMAT(stw, stw_op) > +DEF_EMIT_REG2I12_FORMAT(std, std_op) > + > +#define DEF_EMIT_REG2I14_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + enum loongarch_gpr rd, \ > + enum loongarch_gpr rj, \ > + int imm) \ > +{ \ > + insn->reg2i14_format.opcode = OP; \ > + insn->reg2i14_format.immediate = imm; \ > + insn->reg2i14_format.rd = rd; \ > + insn->reg2i14_format.rj = rj; \ > +} > + > +DEF_EMIT_REG2I14_FORMAT(llw, llw_op) > +DEF_EMIT_REG2I14_FORMAT(scw, scw_op) > +DEF_EMIT_REG2I14_FORMAT(lld, lld_op) > +DEF_EMIT_REG2I14_FORMAT(scd, scd_op) > +DEF_EMIT_REG2I14_FORMAT(ldptrw, ldptrw_op) > +DEF_EMIT_REG2I14_FORMAT(stptrw, stptrw_op) > +DEF_EMIT_REG2I14_FORMAT(ldptrd, ldptrd_op) > +DEF_EMIT_REG2I14_FORMAT(stptrd, stptrd_op) > + > +#define DEF_EMIT_REG2I16_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + enum loongarch_gpr rj, \ > + enum loongarch_gpr rd, \ > + int offset) \ > +{ \ > + insn->reg2i16_format.opcode = OP; \ > + insn->reg2i16_format.immediate = offset; \ > + insn->reg2i16_format.rj = rj; \ > + insn->reg2i16_format.rd = rd; \ > +} > + > +DEF_EMIT_REG2I16_FORMAT(beq, beq_op) > +DEF_EMIT_REG2I16_FORMAT(bne, bne_op) > +DEF_EMIT_REG2I16_FORMAT(blt, blt_op) > +DEF_EMIT_REG2I16_FORMAT(bge, bge_op) > +DEF_EMIT_REG2I16_FORMAT(bltu, bltu_op) > +DEF_EMIT_REG2I16_FORMAT(bgeu, bgeu_op) > +DEF_EMIT_REG2I16_FORMAT(jirl, jirl_op) > + > +#define DEF_EMIT_REG2BSTRD_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + enum loongarch_gpr rd, \ > + enum loongarch_gpr rj, \ > + int msbd, \ > + int lsbd) \ > +{ \ > + insn->reg2bstrd_format.opcode = OP; \ > + insn->reg2bstrd_format.msbd = msbd; \ > + insn->reg2bstrd_format.lsbd = lsbd; \ > + insn->reg2bstrd_format.rj = rj; \ > + insn->reg2bstrd_format.rd = rd; \ > +} > + > +DEF_EMIT_REG2BSTRD_FORMAT(bstrpickd, bstrpickd_op) > + > +#define DEF_EMIT_REG3_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + enum loongarch_gpr rd, \ > + enum loongarch_gpr rj, \ > + enum loongarch_gpr rk) \ > +{ \ > + insn->reg3_format.opcode = OP; \ > + insn->reg3_format.rd = rd; \ > + insn->reg3_format.rj = rj; \ > + insn->reg3_format.rk = rk; \ > +} > + > +DEF_EMIT_REG3_FORMAT(addd, addd_op) > +DEF_EMIT_REG3_FORMAT(subd, subd_op) > +DEF_EMIT_REG3_FORMAT(muld, muld_op) > +DEF_EMIT_REG3_FORMAT(divdu, divdu_op) > +DEF_EMIT_REG3_FORMAT(moddu, moddu_op) > +DEF_EMIT_REG3_FORMAT(and, and_op) > +DEF_EMIT_REG3_FORMAT(or, or_op) > +DEF_EMIT_REG3_FORMAT(xor, xor_op) > +DEF_EMIT_REG3_FORMAT(sllw, sllw_op) > +DEF_EMIT_REG3_FORMAT(slld, slld_op) > +DEF_EMIT_REG3_FORMAT(srlw, srlw_op) > +DEF_EMIT_REG3_FORMAT(srld, srld_op) > +DEF_EMIT_REG3_FORMAT(sraw, sraw_op) > +DEF_EMIT_REG3_FORMAT(srad, srad_op) > +DEF_EMIT_REG3_FORMAT(ldxbu, ldxbu_op) > +DEF_EMIT_REG3_FORMAT(ldxhu, ldxhu_op) > +DEF_EMIT_REG3_FORMAT(ldxwu, ldxwu_op) > +DEF_EMIT_REG3_FORMAT(ldxd, ldxd_op) > +DEF_EMIT_REG3_FORMAT(stxb, stxb_op) > +DEF_EMIT_REG3_FORMAT(stxh, stxh_op) > +DEF_EMIT_REG3_FORMAT(stxw, stxw_op) > +DEF_EMIT_REG3_FORMAT(stxd, stxd_op) > +DEF_EMIT_REG3_FORMAT(amaddw, amaddw_op) > +DEF_EMIT_REG3_FORMAT(amaddd, amaddd_op) > +DEF_EMIT_REG3_FORMAT(amandw, amandw_op) > +DEF_EMIT_REG3_FORMAT(amandd, amandd_op) > +DEF_EMIT_REG3_FORMAT(amorw, amorw_op) > +DEF_EMIT_REG3_FORMAT(amord, amord_op) > +DEF_EMIT_REG3_FORMAT(amxorw, amxorw_op) > +DEF_EMIT_REG3_FORMAT(amxord, amxord_op) > +DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op) > +DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op) > + > +#define DEF_EMIT_REG3SA2_FORMAT(NAME, OP) \ > +static inline void emit_##NAME(union loongarch_instruction *insn, \ > + enum loongarch_gpr rd, \ > + enum loongarch_gpr rj, \ > + enum loongarch_gpr rk, \ > + int imm) \ > +{ \ > + insn->reg3sa2_format.opcode = OP; \ > + insn->reg3sa2_format.immediate = imm; \ > + insn->reg3sa2_format.rd = rd; \ > + insn->reg3sa2_format.rj = rj; \ > + insn->reg3sa2_format.rk = rk; \ > +} > + > +DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op) > + > #endif /* _ASM_INST_H */ > diff --git a/arch/loongarch/net/Makefile b/arch/loongarch/net/Makefile > new file mode 100644 > index 0000000..1ec12a0 > --- /dev/null > +++ b/arch/loongarch/net/Makefile > @@ -0,0 +1,7 @@ > +# SPDX-License-Identifier: GPL-2.0-only > +# > +# Makefile for arch/loongarch/net > +# > +# Copyright (C) 2022 Loongson Technology Corporation Limited > +# > +obj-$(CONFIG_BPF_JIT) += bpf_jit.o > diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c > new file mode 100644 > index 0000000..e9c9ce9 > --- /dev/null > +++ b/arch/loongarch/net/bpf_jit.c > @@ -0,0 +1,1160 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * BPF JIT compiler for LoongArch > + * > + * Copyright (C) 2022 Loongson Technology Corporation Limited > + */ > +#include "bpf_jit.h" > + > +#define REG_TCC LOONGARCH_GPR_A6 > +#define TCC_SAVED LOONGARCH_GPR_S5 > + > +#define SAVE_RA BIT(0) > +#define SAVE_TCC BIT(1) > + > +static const int regmap[] = { > + /* return value from in-kernel function, and exit value for eBPF program */ > + [BPF_REG_0] = LOONGARCH_GPR_A5, > + /* arguments from eBPF program to in-kernel function */ > + [BPF_REG_1] = LOONGARCH_GPR_A0, > + [BPF_REG_2] = LOONGARCH_GPR_A1, > + [BPF_REG_3] = LOONGARCH_GPR_A2, > + [BPF_REG_4] = LOONGARCH_GPR_A3, > + [BPF_REG_5] = LOONGARCH_GPR_A4, > + /* callee saved registers that in-kernel function will preserve */ > + [BPF_REG_6] = LOONGARCH_GPR_S0, > + [BPF_REG_7] = LOONGARCH_GPR_S1, > + [BPF_REG_8] = LOONGARCH_GPR_S2, > + [BPF_REG_9] = LOONGARCH_GPR_S3, > + /* read-only frame pointer to access stack */ > + [BPF_REG_FP] = LOONGARCH_GPR_S4, > + /* temporary register for blinding constants */ > + [BPF_REG_AX] = LOONGARCH_GPR_T0, > +}; > + > +static void mark_call(struct jit_ctx *ctx) > +{ > + ctx->flags |= SAVE_RA; > +} > + > +static void mark_tail_call(struct jit_ctx *ctx) > +{ > + ctx->flags |= SAVE_TCC; > +} > + > +static bool seen_call(struct jit_ctx *ctx) > +{ > + return (ctx->flags & SAVE_RA); > +} > + > +static bool seen_tail_call(struct jit_ctx *ctx) > +{ > + return (ctx->flags & SAVE_TCC); > +} > + > +static u8 tail_call_reg(struct jit_ctx *ctx) > +{ > + if (seen_call(ctx)) > + return TCC_SAVED; > + > + return REG_TCC; > +} > + > +/* > + * eBPF prog stack layout: > + * > + * high > + * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP > + * | $ra | > + * +-------------------------+ > + * | $fp | > + * +-------------------------+ > + * | $s0 | > + * +-------------------------+ > + * | $s1 | > + * +-------------------------+ > + * | $s2 | > + * +-------------------------+ > + * | $s3 | > + * +-------------------------+ > + * | $s4 | > + * +-------------------------+ > + * | $s5 | > + * +-------------------------+ <--BPF_REG_FP > + * | prog->aux->stack_depth | > + * | (optional) | > + * current $sp -------------> +-------------------------+ > + * low > + */ > +static void build_prologue(struct jit_ctx *ctx) > +{ > + int stack_adjust = 0, store_offset, bpf_stack_adjust; > + > + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); > + > + /* To store ra, fp, s0, s1, s2, s3, s4 and s5. */ > + stack_adjust += sizeof(long) * 8; > + > + stack_adjust = round_up(stack_adjust, 16); > + stack_adjust += bpf_stack_adjust; > + > + /* > + * First instruction initializes the tail call count (TCC). > + * On tail call we skip this instruction, and the TCC is > + * passed in REG_TCC from the caller. > + */ > + emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); > + > + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust); > + > + store_offset = stack_adjust - sizeof(long); > + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset); > + > + store_offset -= sizeof(long); > + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset); > + > + store_offset -= sizeof(long); > + emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset); > + > + store_offset -= sizeof(long); > + emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset); > + > + store_offset -= sizeof(long); > + emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset); > + > + store_offset -= sizeof(long); > + emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset); > + > + store_offset -= sizeof(long); > + emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset); > + > + store_offset -= sizeof(long); > + emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset); > + > + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust); > + > + if (bpf_stack_adjust) > + emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust); > + > + /* > + * Program contains calls and tail calls, so REG_TCC need > + * to be saved across calls. > + */ > + if (seen_tail_call(ctx) && seen_call(ctx)) > + move_reg(ctx, TCC_SAVED, REG_TCC); > + > + ctx->stack_size = stack_adjust; > +} > + > +static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) > +{ > + int stack_adjust = ctx->stack_size; > + int load_offset; > + > + load_offset = stack_adjust - sizeof(long); > + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset); > + > + load_offset -= sizeof(long); > + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset); > + > + load_offset -= sizeof(long); > + emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset); > + > + load_offset -= sizeof(long); > + emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset); > + > + load_offset -= sizeof(long); > + emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset); > + > + load_offset -= sizeof(long); > + emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset); > + > + load_offset -= sizeof(long); > + emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset); > + > + load_offset -= sizeof(long); > + emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset); > + > + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust); > + > + if (!is_tail_call) { > + /* Set return value */ > + move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]); > + /* Return to the caller */ > + emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0); > + } else { > + /* > + * Call the next bpf prog and skip the first instruction > + * of TCC initialization. > + */ > + emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1); > + } > +} > + > +static void build_epilogue(struct jit_ctx *ctx) > +{ > + __build_epilogue(ctx, false); > +} > + > +bool bpf_jit_supports_kfunc_call(void) > +{ > + return true; > +} > + > +/* initialized on the first pass of build_body() */ > +static int out_offset = -1; > +static int emit_bpf_tail_call(struct jit_ctx *ctx) > +{ > + int off; > + u8 tcc = tail_call_reg(ctx); > + u8 a1 = LOONGARCH_GPR_A1; > + u8 a2 = LOONGARCH_GPR_A2; > + u8 t1 = LOONGARCH_GPR_T1; > + u8 t2 = LOONGARCH_GPR_T2; > + u8 t3 = LOONGARCH_GPR_T3; > + const int idx0 = ctx->idx; > + > +#define cur_offset (ctx->idx - idx0) > +#define jmp_offset (out_offset - (cur_offset)) > + > + /* > + * a0: &ctx > + * a1: &array > + * a2: index > + * > + * if (index >= array->map.max_entries) > + * goto out; > + */ > + off = offsetof(struct bpf_array, map.max_entries); > + emit_insn(ctx, ldwu, t1, a1, off); > + /* bgeu $a2, $t1, jmp_offset */ > + if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0) > + goto toofar; > + > + /* > + * if (--TCC < 0) > + * goto out; > + */ > + emit_insn(ctx, addid, REG_TCC, tcc, -1); > + if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0) > + goto toofar; > + > + /* > + * prog = array->ptrs[index]; > + * if (!prog) > + * goto out; > + */ > + emit_insn(ctx, alsld, t2, a2, a1, 2); > + off = offsetof(struct bpf_array, ptrs); > + emit_insn(ctx, ldd, t2, t2, off); > + /* beq $t2, $zero, jmp_offset */ > + if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0) > + goto toofar; > + > + /* goto *(prog->bpf_func + 4); */ > + off = offsetof(struct bpf_prog, bpf_func); > + emit_insn(ctx, ldd, t3, t2, off); > + __build_epilogue(ctx, true); > + > + /* out: */ > + if (out_offset == -1) > + out_offset = cur_offset; > + if (cur_offset != out_offset) { > + pr_err_once("tail_call out_offset = %d, expected %d!\n", > + cur_offset, out_offset); > + return -1; > + } > + > + return 0; > + > +toofar: > + pr_info_once("tail_call: jump too far\n"); > + return -1; > +#undef cur_offset > +#undef jmp_offset > +} > + > +static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) > +{ > + const u8 dst = regmap[insn->dst_reg]; > + const u8 src = regmap[insn->src_reg]; > + const u8 t1 = LOONGARCH_GPR_T1; > + const u8 t2 = LOONGARCH_GPR_T2; > + const u8 t3 = LOONGARCH_GPR_T3; > + const s16 off = insn->off; > + const s32 imm = insn->imm; > + const bool isdw = BPF_SIZE(insn->code) == BPF_DW; > + > + move_imm(ctx, t1, off, false); > + emit_insn(ctx, addd, t1, dst, t1); > + move_reg(ctx, t3, src); > + > + switch (imm) { > + /* lock *(size *)(dst + off) <op>= src */ > + case BPF_ADD: > + if (isdw) > + emit_insn(ctx, amaddd, t2, t1, src); > + else > + emit_insn(ctx, amaddw, t2, t1, src); > + break; > + case BPF_AND: > + if (isdw) > + emit_insn(ctx, amandd, t2, t1, src); > + else > + emit_insn(ctx, amandw, t2, t1, src); > + break; > + case BPF_OR: > + if (isdw) > + emit_insn(ctx, amord, t2, t1, src); > + else > + emit_insn(ctx, amorw, t2, t1, src); > + break; > + case BPF_XOR: > + if (isdw) > + emit_insn(ctx, amxord, t2, t1, src); > + else > + emit_insn(ctx, amxorw, t2, t1, src); > + break; > + /* src = atomic_fetch_<op>(dst + off, src) */ > + case BPF_ADD | BPF_FETCH: > + if (isdw) { > + emit_insn(ctx, amaddd, src, t1, t3); > + } else { > + emit_insn(ctx, amaddw, src, t1, t3); > + emit_zext_32(ctx, src, true); > + } > + break; > + case BPF_AND | BPF_FETCH: > + if (isdw) { > + emit_insn(ctx, amandd, src, t1, t3); > + } else { > + emit_insn(ctx, amandw, src, t1, t3); > + emit_zext_32(ctx, src, true); > + } > + break; > + case BPF_OR | BPF_FETCH: > + if (isdw) { > + emit_insn(ctx, amord, src, t1, t3); > + } else { > + emit_insn(ctx, amorw, src, t1, t3); > + emit_zext_32(ctx, src, true); > + } > + break; > + case BPF_XOR | BPF_FETCH: > + if (isdw) { > + emit_insn(ctx, amxord, src, t1, t3); > + } else { > + emit_insn(ctx, amxorw, src, t1, t3); > + emit_zext_32(ctx, src, true); > + } > + break; > + /* src = atomic_xchg(dst + off, src); */ > + case BPF_XCHG: > + if (isdw) { > + emit_insn(ctx, amswapd, src, t1, t3); > + } else { > + emit_insn(ctx, amswapw, src, t1, t3); > + emit_zext_32(ctx, src, true); > + } > + break; > + /* r0 = atomic_cmpxchg(dst + off, r0, src); */ > + case BPF_CMPXCHG: > + u8 r0 = regmap[BPF_REG_0]; > + > + move_reg(ctx, t2, r0); > + if (isdw) { > + emit_insn(ctx, lld, r0, t1, 0); > + emit_insn(ctx, bne, t2, r0, 4); > + move_reg(ctx, t3, src); > + emit_insn(ctx, scd, t3, t1, 0); > + emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4); > + } else { > + emit_insn(ctx, llw, r0, t1, 0); > + emit_zext_32(ctx, t2, true); > + emit_zext_32(ctx, r0, true); > + emit_insn(ctx, bne, t2, r0, 4); > + move_reg(ctx, t3, src); > + emit_insn(ctx, scw, t3, t1, 0); > + emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6); > + emit_zext_32(ctx, r0, true); > + } > + break; > + } > +} > + > +static bool is_signed_bpf_cond(u8 cond) > +{ > + return cond == BPF_JSGT || cond == BPF_JSLT || > + cond == BPF_JSGE || cond == BPF_JSLE; > +} > + > +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) > +{ > + const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || > + BPF_CLASS(insn->code) == BPF_JMP32; > + const u8 code = insn->code; > + const u8 cond = BPF_OP(code); > + const u8 dst = regmap[insn->dst_reg]; > + const u8 src = regmap[insn->src_reg]; > + const u8 t1 = LOONGARCH_GPR_T1; > + const u8 t2 = LOONGARCH_GPR_T2; > + const s16 off = insn->off; > + const s32 imm = insn->imm; > + int i = insn - ctx->prog->insnsi; > + int jmp_offset; > + > + switch (code) { > + /* dst = src */ > + case BPF_ALU | BPF_MOV | BPF_X: > + case BPF_ALU64 | BPF_MOV | BPF_X: > + move_reg(ctx, dst, src); > + emit_zext_32(ctx, dst, is32); > + break; > + /* dst = imm */ > + case BPF_ALU | BPF_MOV | BPF_K: > + case BPF_ALU64 | BPF_MOV | BPF_K: > + move_imm(ctx, dst, imm, is32); > + break; > + > + /* dst = dst + src */ > + case BPF_ALU | BPF_ADD | BPF_X: > + case BPF_ALU64 | BPF_ADD | BPF_X: > + emit_insn(ctx, addd, dst, dst, src); > + emit_zext_32(ctx, dst, is32); > + break; > + /* dst = dst + imm */ > + case BPF_ALU | BPF_ADD | BPF_K: > + case BPF_ALU64 | BPF_ADD | BPF_K: > + if (is_signed_imm12(imm)) { > + emit_insn(ctx, addid, dst, dst, imm); > + } else { > + move_imm(ctx, t1, imm, is32); > + emit_insn(ctx, addd, dst, dst, t1); > + } > + emit_zext_32(ctx, dst, is32); > + break; > + > + /* dst = dst - src */ > + case BPF_ALU | BPF_SUB | BPF_X: > + case BPF_ALU64 | BPF_SUB | BPF_X: > + emit_insn(ctx, subd, dst, dst, src); > + emit_zext_32(ctx, dst, is32); > + break; > + /* dst = dst - imm */ > + case BPF_ALU | BPF_SUB | BPF_K: > + case BPF_ALU64 | BPF_SUB | BPF_K: > + if (is_signed_imm12(-imm)) { > + emit_insn(ctx, addid, dst, dst, -imm); > + } else { > + move_imm(ctx, t1, imm, is32); > + emit_insn(ctx, subd, dst, dst, t1); > + } > + emit_zext_32(ctx, dst, is32); > + break; > + > + /* dst = dst * src */ > + case BPF_ALU | BPF_MUL | BPF_X: > + case BPF_ALU64 | BPF_MUL | BPF_X: > + emit_insn(ctx, muld, dst, dst, src); > + emit_zext_32(ctx, dst, is32); > + break; > + /* dst = dst * imm */ > + case BPF_ALU | BPF_MUL | BPF_K: > + case BPF_ALU64 | BPF_MUL | BPF_K: > + move_imm(ctx, t1, imm, is32); > + emit_insn(ctx, muld, dst, dst, t1); > + emit_zext_32(ctx, dst, is32); > + break; > + > + /* dst = dst / src */ > + case BPF_ALU | BPF_DIV | BPF_X: > + case BPF_ALU64 | BPF_DIV | BPF_X: > + emit_zext_32(ctx, dst, is32); > + move_reg(ctx, t1, src); > + emit_zext_32(ctx, t1, is32); > + emit_insn(ctx, divdu, dst, dst, t1); > + emit_zext_32(ctx, dst, is32); > + break; > + /* dst = dst / imm */ > + case BPF_ALU | BPF_DIV | BPF_K: > + case BPF_ALU64 | BPF_DIV | BPF_K: > + move_imm(ctx, t1, imm, is32); > + emit_zext_32(ctx, dst, is32); > + emit_insn(ctx, divdu, dst, dst, t1); > + emit_zext_32(ctx, dst, is32); > + break; > + > + /* dst = dst % src */ > + case BPF_ALU | BPF_MOD | BPF_X: > + case BPF_ALU64 | BPF_MOD | BPF_X: > + emit_zext_32(ctx, dst, is32); > + move_reg(ctx, t1, src); > + emit_zext_32(ctx, t1, is32); > + emit_insn(ctx, moddu, dst, dst, t1); > + emit_zext_32(ctx, dst, is32); > + break; > + /* dst = dst % imm */ > + case BPF_ALU | BPF_MOD | BPF_K: > + case BPF_ALU64 | BPF_MOD | BPF_K: > + move_imm(ctx, t1, imm, is32); > + emit_zext_32(ctx, dst, is32); > + emit_insn(ctx, moddu, dst, dst, t1); > + emit_zext_32(ctx, dst, is32); > + break; > + > + /* dst = -dst */ > + case BPF_ALU | BPF_NEG: > + case BPF_ALU64 | BPF_NEG: > + move_imm(ctx, t1, imm, is32); > + emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst); > + emit_zext_32(ctx, dst, is32); > + break; > + > + /* dst = dst & src */ > + case BPF_ALU | BPF_AND | BPF_X: > + case BPF_ALU64 | BPF_AND | BPF_X: > + emit_insn(ctx, and, dst, dst, src); > + emit_zext_32(ctx, dst, is32); > + break; > + /* dst = dst & imm */ > + case BPF_ALU | BPF_AND | BPF_K: > + case BPF_ALU64 | BPF_AND | BPF_K: > + if (is_unsigned_imm12(imm)) { > + emit_insn(ctx, andi, dst, dst, imm); > + } else { > + move_imm(ctx, t1, imm, is32); > + emit_insn(ctx, and, dst, dst, t1); > + } > + emit_zext_32(ctx, dst, is32); > + break; > + > + /* dst = dst | src */ > + case BPF_ALU | BPF_OR | BPF_X: > + case BPF_ALU64 | BPF_OR | BPF_X: > + emit_insn(ctx, or, dst, dst, src); > + emit_zext_32(ctx, dst, is32); > + break; > + /* dst = dst | imm */ > + case BPF_ALU | BPF_OR | BPF_K: > + case BPF_ALU64 | BPF_OR | BPF_K: > + if (is_unsigned_imm12(imm)) { > + emit_insn(ctx, ori, dst, dst, imm); > + } else { > + move_imm(ctx, t1, imm, is32); > + emit_insn(ctx, or, dst, dst, t1); > + } > + emit_zext_32(ctx, dst, is32); > + break; > + > + /* dst = dst ^ src */ > + case BPF_ALU | BPF_XOR | BPF_X: > + case BPF_ALU64 | BPF_XOR | BPF_X: > + emit_insn(ctx, xor, dst, dst, src); > + emit_zext_32(ctx, dst, is32); > + break; > + /* dst = dst ^ imm */ > + case BPF_ALU | BPF_XOR | BPF_K: > + case BPF_ALU64 | BPF_XOR | BPF_K: > + if (is_unsigned_imm12(imm)) { > + emit_insn(ctx, xori, dst, dst, imm); > + } else { > + move_imm(ctx, t1, imm, is32); > + emit_insn(ctx, xor, dst, dst, t1); > + } > + emit_zext_32(ctx, dst, is32); > + break; > + > + /* dst = dst << src (logical) */ > + case BPF_ALU | BPF_LSH | BPF_X: > + emit_insn(ctx, sllw, dst, dst, src); > + emit_zext_32(ctx, dst, is32); > + break; > + case BPF_ALU64 | BPF_LSH | BPF_X: > + emit_insn(ctx, slld, dst, dst, src); > + break; > + /* dst = dst << imm (logical) */ > + case BPF_ALU | BPF_LSH | BPF_K: > + emit_insn(ctx, slliw, dst, dst, imm); > + emit_zext_32(ctx, dst, is32); > + break; > + case BPF_ALU64 | BPF_LSH | BPF_K: > + emit_insn(ctx, sllid, dst, dst, imm); > + break; > + > + /* dst = dst >> src (logical) */ > + case BPF_ALU | BPF_RSH | BPF_X: > + emit_insn(ctx, srlw, dst, dst, src); > + emit_zext_32(ctx, dst, is32); > + break; > + case BPF_ALU64 | BPF_RSH | BPF_X: > + emit_insn(ctx, srld, dst, dst, src); > + break; > + /* dst = dst >> imm (logical) */ > + case BPF_ALU | BPF_RSH | BPF_K: > + emit_insn(ctx, srliw, dst, dst, imm); > + emit_zext_32(ctx, dst, is32); > + break; > + case BPF_ALU64 | BPF_RSH | BPF_K: > + emit_insn(ctx, srlid, dst, dst, imm); > + break; > + > + /* dst = dst >> src (arithmetic) */ > + case BPF_ALU | BPF_ARSH | BPF_X: > + emit_insn(ctx, sraw, dst, dst, src); > + emit_zext_32(ctx, dst, is32); > + break; > + case BPF_ALU64 | BPF_ARSH | BPF_X: > + emit_insn(ctx, srad, dst, dst, src); > + break; > + /* dst = dst >> imm (arithmetic) */ > + case BPF_ALU | BPF_ARSH | BPF_K: > + emit_insn(ctx, sraiw, dst, dst, imm); > + emit_zext_32(ctx, dst, is32); > + break; > + case BPF_ALU64 | BPF_ARSH | BPF_K: > + emit_insn(ctx, sraid, dst, dst, imm); > + break; > + > + /* dst = BSWAP##imm(dst) */ > + case BPF_ALU | BPF_END | BPF_FROM_LE: > + switch (imm) { > + case 16: > + /* zero-extend 16 bits into 64 bits */ > + emit_insn(ctx, bstrpickd, dst, dst, 15, 0); > + break; > + case 32: > + /* zero-extend 32 bits into 64 bits */ > + emit_zext_32(ctx, dst, is32); > + break; > + case 64: > + /* do nothing */ > + break; > + } > + break; > + case BPF_ALU | BPF_END | BPF_FROM_BE: > + switch (imm) { > + case 16: > + emit_insn(ctx, revb2h, dst, dst); > + /* zero-extend 16 bits into 64 bits */ > + emit_insn(ctx, bstrpickd, dst, dst, 15, 0); > + break; > + case 32: > + emit_insn(ctx, revb2w, dst, dst); > + /* zero-extend 32 bits into 64 bits */ > + emit_zext_32(ctx, dst, is32); > + break; > + case 64: > + emit_insn(ctx, revbd, dst, dst); > + break; > + } > + break; > + > + /* PC += off if dst cond src */ > + case BPF_JMP | BPF_JEQ | BPF_X: > + case BPF_JMP | BPF_JNE | BPF_X: > + case BPF_JMP | BPF_JGT | BPF_X: > + case BPF_JMP | BPF_JGE | BPF_X: > + case BPF_JMP | BPF_JLT | BPF_X: > + case BPF_JMP | BPF_JLE | BPF_X: > + case BPF_JMP | BPF_JSGT | BPF_X: > + case BPF_JMP | BPF_JSGE | BPF_X: > + case BPF_JMP | BPF_JSLT | BPF_X: > + case BPF_JMP | BPF_JSLE | BPF_X: > + case BPF_JMP32 | BPF_JEQ | BPF_X: > + case BPF_JMP32 | BPF_JNE | BPF_X: > + case BPF_JMP32 | BPF_JGT | BPF_X: > + case BPF_JMP32 | BPF_JGE | BPF_X: > + case BPF_JMP32 | BPF_JLT | BPF_X: > + case BPF_JMP32 | BPF_JLE | BPF_X: > + case BPF_JMP32 | BPF_JSGT | BPF_X: > + case BPF_JMP32 | BPF_JSGE | BPF_X: > + case BPF_JMP32 | BPF_JSLT | BPF_X: > + case BPF_JMP32 | BPF_JSLE | BPF_X: > + jmp_offset = bpf2la_offset(i, off, ctx); > + move_reg(ctx, t1, dst); > + move_reg(ctx, t2, src); > + if (is_signed_bpf_cond(BPF_OP(code))) { > + emit_sext_32(ctx, t1, is32); > + emit_sext_32(ctx, t2, is32); > + } else { > + emit_zext_32(ctx, t1, is32); > + emit_zext_32(ctx, t2, is32); > + } > + if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0) > + goto toofar; > + break; > + > + /* PC += off if dst cond imm */ > + case BPF_JMP | BPF_JEQ | BPF_K: > + case BPF_JMP | BPF_JNE | BPF_K: > + case BPF_JMP | BPF_JGT | BPF_K: > + case BPF_JMP | BPF_JGE | BPF_K: > + case BPF_JMP | BPF_JLT | BPF_K: > + case BPF_JMP | BPF_JLE | BPF_K: > + case BPF_JMP | BPF_JSGT | BPF_K: > + case BPF_JMP | BPF_JSGE | BPF_K: > + case BPF_JMP | BPF_JSLT | BPF_K: > + case BPF_JMP | BPF_JSLE | BPF_K: > + case BPF_JMP32 | BPF_JEQ | BPF_K: > + case BPF_JMP32 | BPF_JNE | BPF_K: > + case BPF_JMP32 | BPF_JGT | BPF_K: > + case BPF_JMP32 | BPF_JGE | BPF_K: > + case BPF_JMP32 | BPF_JLT | BPF_K: > + case BPF_JMP32 | BPF_JLE | BPF_K: > + case BPF_JMP32 | BPF_JSGT | BPF_K: > + case BPF_JMP32 | BPF_JSGE | BPF_K: > + case BPF_JMP32 | BPF_JSLT | BPF_K: > + case BPF_JMP32 | BPF_JSLE | BPF_K: > + u8 t7 = -1; > + jmp_offset = bpf2la_offset(i, off, ctx); > + if (imm) { > + move_imm(ctx, t1, imm, false); > + t7 = t1; > + } else { > + /* If imm is 0, simply use zero register. */ > + t7 = LOONGARCH_GPR_ZERO; > + } > + move_reg(ctx, t2, dst); > + if (is_signed_bpf_cond(BPF_OP(code))) { > + emit_sext_32(ctx, t7, is32); > + emit_sext_32(ctx, t2, is32); > + } else { > + emit_zext_32(ctx, t7, is32); > + emit_zext_32(ctx, t2, is32); > + } > + if (emit_cond_jmp(ctx, cond, t2, t7, jmp_offset) < 0) > + goto toofar; > + break; > + > + /* PC += off if dst & src */ > + case BPF_JMP | BPF_JSET | BPF_X: > + case BPF_JMP32 | BPF_JSET | BPF_X: > + jmp_offset = bpf2la_offset(i, off, ctx); > + emit_insn(ctx, and, t1, dst, src); > + emit_zext_32(ctx, t1, is32); > + if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0) > + goto toofar; > + break; > + /* PC += off if dst & imm */ > + case BPF_JMP | BPF_JSET | BPF_K: > + case BPF_JMP32 | BPF_JSET | BPF_K: > + jmp_offset = bpf2la_offset(i, off, ctx); > + move_imm(ctx, t1, imm, is32); > + emit_insn(ctx, and, t1, dst, t1); > + emit_zext_32(ctx, t1, is32); > + if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0) > + goto toofar; > + break; > + > + /* PC += off */ > + case BPF_JMP | BPF_JA: > + jmp_offset = bpf2la_offset(i, off, ctx); > + if (emit_uncond_jmp(ctx, jmp_offset) < 0) > + goto toofar; > + break; > + > + /* function call */ > + case BPF_JMP | BPF_CALL: > + bool func_addr_fixed; > + u64 func_addr; > + int ret; > + > + mark_call(ctx); > + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, > + &func_addr, &func_addr_fixed); > + if (ret < 0) > + return ret; > + > + move_imm(ctx, t1, func_addr, is32); > + emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0); > + move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); > + break; > + > + /* tail call */ > + case BPF_JMP | BPF_TAIL_CALL: > + mark_tail_call(ctx); > + if (emit_bpf_tail_call(ctx) < 0) > + return -EINVAL; > + break; > + > + /* function return */ > + case BPF_JMP | BPF_EXIT: > + emit_sext_32(ctx, regmap[BPF_REG_0], true); > + > + if (i == ctx->prog->len - 1) > + break; > + > + jmp_offset = epilogue_offset(ctx); > + if (emit_uncond_jmp(ctx, jmp_offset) < 0) > + goto toofar; > + break; > + > + /* dst = imm64 */ > + case BPF_LD | BPF_IMM | BPF_DW: > + u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; > + > + move_imm(ctx, dst, imm64, is32); > + return 1; > + > + /* dst = *(size *)(src + off) */ > + case BPF_LDX | BPF_MEM | BPF_B: > + case BPF_LDX | BPF_MEM | BPF_H: > + case BPF_LDX | BPF_MEM | BPF_W: > + case BPF_LDX | BPF_MEM | BPF_DW: > + switch (BPF_SIZE(code)) { > + case BPF_B: > + if (is_signed_imm12(off)) { > + emit_insn(ctx, ldbu, dst, src, off); > + } else { > + move_imm(ctx, t1, off, is32); > + emit_insn(ctx, ldxbu, dst, src, t1); > + } > + break; > + case BPF_H: > + if (is_signed_imm12(off)) { > + emit_insn(ctx, ldhu, dst, src, off); > + } else { > + move_imm(ctx, t1, off, is32); > + emit_insn(ctx, ldxhu, dst, src, t1); > + } > + break; > + case BPF_W: > + if (is_signed_imm12(off)) { > + emit_insn(ctx, ldwu, dst, src, off); > + } else if (is_signed_imm14(off)) { > + emit_insn(ctx, ldptrw, dst, src, off); > + } else { > + move_imm(ctx, t1, off, is32); > + emit_insn(ctx, ldxwu, dst, src, t1); > + } > + break; > + case BPF_DW: > + if (is_signed_imm12(off)) { > + emit_insn(ctx, ldd, dst, src, off); > + } else if (is_signed_imm14(off)) { > + emit_insn(ctx, ldptrd, dst, src, off); > + } else { > + move_imm(ctx, t1, off, is32); > + emit_insn(ctx, ldxd, dst, src, t1); > + } > + break; > + } > + break; > + > + /* *(size *)(dst + off) = imm */ > + case BPF_ST | BPF_MEM | BPF_B: > + case BPF_ST | BPF_MEM | BPF_H: > + case BPF_ST | BPF_MEM | BPF_W: > + case BPF_ST | BPF_MEM | BPF_DW: > + switch (BPF_SIZE(code)) { > + case BPF_B: > + move_imm(ctx, t1, imm, is32); > + if (is_signed_imm12(off)) { > + emit_insn(ctx, stb, t1, dst, off); > + } else { > + move_imm(ctx, t2, off, is32); > + emit_insn(ctx, stxb, t1, dst, t2); > + } > + break; > + case BPF_H: > + move_imm(ctx, t1, imm, is32); > + if (is_signed_imm12(off)) { > + emit_insn(ctx, sth, t1, dst, off); > + } else { > + move_imm(ctx, t2, off, is32); > + emit_insn(ctx, stxh, t1, dst, t2); > + } > + break; > + case BPF_W: > + move_imm(ctx, t1, imm, is32); > + if (is_signed_imm12(off)) { > + emit_insn(ctx, stw, t1, dst, off); > + } else if (is_signed_imm14(off)) { > + emit_insn(ctx, stptrw, t1, dst, off); > + } else { > + move_imm(ctx, t2, off, is32); > + emit_insn(ctx, stxw, t1, dst, t2); > + } > + break; > + case BPF_DW: > + move_imm(ctx, t1, imm, is32); > + if (is_signed_imm12(off)) { > + emit_insn(ctx, std, t1, dst, off); > + } else if (is_signed_imm14(off)) { > + emit_insn(ctx, stptrd, t1, dst, off); > + } else { > + move_imm(ctx, t2, off, is32); > + emit_insn(ctx, stxd, t1, dst, t2); > + } > + break; > + } > + break; > + > + /* *(size *)(dst + off) = src */ > + case BPF_STX | BPF_MEM | BPF_B: > + case BPF_STX | BPF_MEM | BPF_H: > + case BPF_STX | BPF_MEM | BPF_W: > + case BPF_STX | BPF_MEM | BPF_DW: > + switch (BPF_SIZE(code)) { > + case BPF_B: > + if (is_signed_imm12(off)) { > + emit_insn(ctx, stb, src, dst, off); > + } else { > + move_imm(ctx, t1, off, is32); > + emit_insn(ctx, stxb, src, dst, t1); > + } > + break; > + case BPF_H: > + if (is_signed_imm12(off)) { > + emit_insn(ctx, sth, src, dst, off); > + } else { > + move_imm(ctx, t1, off, is32); > + emit_insn(ctx, stxh, src, dst, t1); > + } > + break; > + case BPF_W: > + if (is_signed_imm12(off)) { > + emit_insn(ctx, stw, src, dst, off); > + } else if (is_signed_imm14(off)) { > + emit_insn(ctx, stptrw, src, dst, off); > + } else { > + move_imm(ctx, t1, off, is32); > + emit_insn(ctx, stxw, src, dst, t1); > + } > + break; > + case BPF_DW: > + if (is_signed_imm12(off)) { > + emit_insn(ctx, std, src, dst, off); > + } else if (is_signed_imm14(off)) { > + emit_insn(ctx, stptrd, src, dst, off); > + } else { > + move_imm(ctx, t1, off, is32); > + emit_insn(ctx, stxd, src, dst, t1); > + } > + break; > + } > + break; > + > + case BPF_STX | BPF_ATOMIC | BPF_W: > + case BPF_STX | BPF_ATOMIC | BPF_DW: > + emit_atomic(insn, ctx); > + break; > + > + default: > + pr_err("bpf_jit: unknown opcode %02x\n", code); > + return -EINVAL; > + } > + > + return 0; > + > +toofar: > + pr_info_once("bpf_jit: opcode %02x, jump too far\n", code); > + return -E2BIG; > +} > + > +static int build_body(struct jit_ctx *ctx, bool extra_pass) > +{ > + const struct bpf_prog *prog = ctx->prog; > + int i; > + > + for (i = 0; i < prog->len; i++) { > + const struct bpf_insn *insn = &prog->insnsi[i]; > + int ret; > + > + if (ctx->image == NULL) > + ctx->offset[i] = ctx->idx; > + > + ret = build_insn(insn, ctx, extra_pass); > + if (ret > 0) { > + i++; > + if (ctx->image == NULL) > + ctx->offset[i] = ctx->idx; > + continue; > + } > + if (ret) > + return ret; > + } > + > + if (ctx->image == NULL) > + ctx->offset[i] = ctx->idx; > + > + return 0; > +} > + > +static inline void bpf_flush_icache(void *start, void *end) > +{ > + flush_icache_range((unsigned long)start, (unsigned long)end); > +} > + > +/* Fill space with illegal instructions */ > +static void jit_fill_hole(void *area, unsigned int size) > +{ > + u32 *ptr; > + > + /* We are guaranteed to have aligned memory */ > + for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) > + *ptr++ = INSN_BREAK; > +} > + > +static int validate_code(struct jit_ctx *ctx) > +{ > + int i; > + union loongarch_instruction insn; > + > + for (i = 0; i < ctx->idx; i++) { > + insn = ctx->image[i]; > + /* Check INSN_BREAK */ > + if (insn.word == INSN_BREAK) > + return -1; > + } > + > + return 0; > +} > + > +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) > +{ > + bool tmp_blinded = false, extra_pass = false; > + struct bpf_prog *tmp, *orig_prog = prog; > + struct bpf_binary_header *header; > + struct jit_data *jit_data; > + struct jit_ctx ctx; > + int image_size; > + u8 *image_ptr; > + > + /* > + * If BPF JIT was not enabled then we must fall back to > + * the interpreter. > + */ > + if (!prog->jit_requested) > + return orig_prog; > + > + tmp = bpf_jit_blind_constants(prog); > + /* > + * If blinding was requested and we failed during blinding, > + * we must fall back to the interpreter. Otherwise, we save > + * the new JITed code. > + */ > + if (IS_ERR(tmp)) > + return orig_prog; > + if (tmp != prog) { > + tmp_blinded = true; > + prog = tmp; > + } > + > + jit_data = prog->aux->jit_data; > + if (!jit_data) { > + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); > + if (!jit_data) { > + prog = orig_prog; > + goto out; > + } > + prog->aux->jit_data = jit_data; > + } > + if (jit_data->ctx.offset) { > + ctx = jit_data->ctx; > + image_ptr = jit_data->image; > + header = jit_data->header; > + extra_pass = true; > + image_size = sizeof(u32) * ctx.idx; > + goto skip_init_ctx; > + } > + > + memset(&ctx, 0, sizeof(ctx)); > + ctx.prog = prog; > + > + ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL); > + if (ctx.offset == NULL) { > + prog = orig_prog; > + goto out_offset; > + } > + > + /* 1. Initial fake pass to compute ctx->idx and set ctx->flags */ > + build_prologue(&ctx); > + if (build_body(&ctx, extra_pass)) { > + prog = orig_prog; > + goto out_offset; > + } > + ctx.epilogue_offset = ctx.idx; > + build_epilogue(&ctx); > + > + /* Now we know the actual image size. > + * As each LoongArch instruction is of length 32bit, > + * we are translating number of JITed intructions into > + * the size required to store these JITed code. > + */ > + image_size = sizeof(u32) * ctx.idx; > + /* Now we know the size of the structure to make */ > + header = bpf_jit_binary_alloc(image_size, &image_ptr, > + sizeof(u32), jit_fill_hole); > + if (header == NULL) { > + prog = orig_prog; > + goto out_offset; > + } > + > + /* 2. Now, the actual pass to generate final JIT code */ > + ctx.image = (union loongarch_instruction *)image_ptr; > +skip_init_ctx: > + ctx.idx = 0; > + > + build_prologue(&ctx); > + if (build_body(&ctx, extra_pass)) { > + bpf_jit_binary_free(header); > + prog = orig_prog; > + goto out_offset; > + } > + build_epilogue(&ctx); > + > + /* 3. Extra pass to validate JITed code */ > + if (validate_code(&ctx)) { > + bpf_jit_binary_free(header); > + prog = orig_prog; > + goto out_offset; > + } > + > + /* And we're done */ > + if (bpf_jit_enable > 1) > + bpf_jit_dump(prog->len, image_size, 2, ctx.image); > + > + /* Update the icache */ > + bpf_flush_icache(header, ctx.image + ctx.idx); > + > + if (!prog->is_func || extra_pass) { > + if (extra_pass && ctx.idx != jit_data->ctx.idx) { > + pr_err_once("multi-func JIT bug %d != %d\n", > + ctx.idx, jit_data->ctx.idx); > + bpf_jit_binary_free(header); > + prog->bpf_func = NULL; > + prog->jited = 0; > + prog->jited_len = 0; > + goto out_offset; > + } > + bpf_jit_binary_lock_ro(header); > + } else { > + jit_data->ctx = ctx; > + jit_data->image = image_ptr; > + jit_data->header = header; > + } > + prog->bpf_func = (void *)ctx.image; > + prog->jited = 1; > + prog->jited_len = image_size; > + > + if (!prog->is_func || extra_pass) { > + int i; > + > + /* offset[prog->len] is the size of program */ > + for (i = 0; i <= prog->len; i++) > + ctx.offset[i] *= LOONGARCH_INSN_SIZE; > + bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); > +out_offset: > + kvfree(ctx.offset); > + kfree(jit_data); > + prog->aux->jit_data = NULL; > + } > +out: > + if (tmp_blinded) > + bpf_jit_prog_release_other(prog, prog == orig_prog ? > + tmp : orig_prog); > + > + out_offset = -1; > + return prog; > +} > diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h > new file mode 100644 > index 0000000..205a935 > --- /dev/null > +++ b/arch/loongarch/net/bpf_jit.h > @@ -0,0 +1,282 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * BPF JIT compiler for LoongArch > + * > + * Copyright (C) 2022 Loongson Technology Corporation Limited > + */ > +#include <linux/bpf.h> > +#include <linux/filter.h> > +#include <asm/cacheflush.h> > +#include <asm/inst.h> > + > +struct jit_ctx { > + const struct bpf_prog *prog; > + unsigned int idx; > + unsigned int flags; > + unsigned int epilogue_offset; > + u32 *offset; > + union loongarch_instruction *image; > + u32 stack_size; > +}; > + > +struct jit_data { > + struct bpf_binary_header *header; > + u8 *image; > + struct jit_ctx ctx; > +}; > + > +#define emit_insn(ctx, func, ...) \ > +do { \ > + if (ctx->image != NULL) { \ > + union loongarch_instruction *insn = &ctx->image[ctx->idx]; \ > + emit_##func(insn, ##__VA_ARGS__); \ > + } \ > + ctx->idx++; \ > +} while (0) > + > +#define is_signed_imm12(val) signed_imm_check(val, 12) > +#define is_signed_imm14(val) signed_imm_check(val, 14) > +#define is_signed_imm16(val) signed_imm_check(val, 16) > +#define is_signed_imm26(val) signed_imm_check(val, 26) > +#define is_signed_imm32(val) signed_imm_check(val, 32) > +#define is_signed_imm52(val) signed_imm_check(val, 52) > +#define is_unsigned_imm12(val) unsigned_imm_check(val, 12) > + > +static inline int bpf2la_offset(int bpf_insn, int off, const struct jit_ctx *ctx) > +{ > + /* BPF JMP offset is relative to the next instruction */ > + bpf_insn++; > + /* > + * Whereas LoongArch branch instructions encode the offset > + * from the branch itself, so we must subtract 1 from the > + * instruction offset. > + */ > + return (ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1)); > +} > + > +static inline int epilogue_offset(const struct jit_ctx *ctx) > +{ > + int to = ctx->epilogue_offset; > + int from = ctx->idx; > + > + return (to - from); > +} > + > +/* Zero-extend 32 bits into 64 bits */ > +static inline void emit_zext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32) > +{ > + if (!is32) > + return; > + > + emit_insn(ctx, lu32id, reg, 0); > +} > + > +/* Signed-extend 32 bits into 64 bits */ > +static inline void emit_sext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32) > +{ > + if (!is32) > + return; > + > + emit_insn(ctx, addiw, reg, reg, 0); > +} > + > +static inline void move_imm(struct jit_ctx *ctx, enum loongarch_gpr rd, long imm, bool is32) > +{ > + long imm_11_0, imm_31_12, imm_51_32, imm_63_52, imm_51_0, imm_51_31; > + > + /* or rd, $zero, $zero */ > + if (imm == 0) { > + emit_insn(ctx, or, rd, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_ZERO); > + return; > + } > + > + /* addiw rd, $zero, imm_11_0 */ > + if (is_signed_imm12(imm)) { > + emit_insn(ctx, addiw, rd, LOONGARCH_GPR_ZERO, imm); > + goto zext; > + } > + > + /* ori rd, $zero, imm_11_0 */ > + if (is_unsigned_imm12(imm)) { > + emit_insn(ctx, ori, rd, LOONGARCH_GPR_ZERO, imm); > + goto zext; > + } > + > + /* lu52id rd, $zero, imm_63_52 */ > + imm_63_52 = (imm >> 52) & 0xfff; > + imm_51_0 = imm & 0xfffffffffffff; > + if (imm_63_52 != 0 && imm_51_0 == 0) { > + emit_insn(ctx, lu52id, rd, LOONGARCH_GPR_ZERO, imm_63_52); > + return; > + } > + > + /* lu12iw rd, imm_31_12 */ > + imm_31_12 = (imm >> 12) & 0xfffff; > + emit_insn(ctx, lu12iw, rd, imm_31_12); > + > + /* ori rd, rd, imm_11_0 */ > + imm_11_0 = imm & 0xfff; > + if (imm_11_0 != 0) > + emit_insn(ctx, ori, rd, rd, imm_11_0); > + > + if (!is_signed_imm32(imm)) { > + if (imm_51_0 != 0) { > + /* > + * If bit[51:31] is all 0 or all 1, > + * it means bit[51:32] is sign extended by lu12iw, > + * no need to call lu32id to do a new filled operation. > + */ > + imm_51_31 = (imm >> 31) & 0x1fffff; > + if (imm_51_31 != 0 || imm_51_31 != 0x1fffff) { > + /* lu32id rd, imm_51_32 */ > + imm_51_32 = (imm >> 32) & 0xfffff; > + emit_insn(ctx, lu32id, rd, imm_51_32); > + } > + } > + > + /* lu52id rd, rd, imm_63_52 */ > + if (!is_signed_imm52(imm)) > + emit_insn(ctx, lu52id, rd, rd, imm_63_52); > + } > + > +zext: > + emit_zext_32(ctx, rd, is32); > +} > + > +static inline void move_reg(struct jit_ctx *ctx, enum loongarch_gpr rd, > + enum loongarch_gpr rj) > +{ > + emit_insn(ctx, or, rd, rj, LOONGARCH_GPR_ZERO); > +} > + > +static inline int invert_jmp_cond(u8 cond) > +{ > + switch (cond) { > + case BPF_JEQ: > + return BPF_JNE; > + case BPF_JNE: > + case BPF_JSET: > + return BPF_JEQ; > + case BPF_JGT: > + return BPF_JLE; > + case BPF_JGE: > + return BPF_JLT; > + case BPF_JLT: > + return BPF_JGE; > + case BPF_JLE: > + return BPF_JGT; > + case BPF_JSGT: > + return BPF_JSLE; > + case BPF_JSGE: > + return BPF_JSLT; > + case BPF_JSLT: > + return BPF_JSGE; > + case BPF_JSLE: > + return BPF_JSGT; > + } > + return -1; > +} > + > +static inline void cond_jmp_offs16(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, > + enum loongarch_gpr rd, int jmp_offset) > +{ > + switch (cond) { > + case BPF_JEQ: > + /* PC += jmp_offset if rj == rd */ > + emit_insn(ctx, beq, rj, rd, jmp_offset); > + return; > + case BPF_JNE: > + case BPF_JSET: > + /* PC += jmp_offset if rj != rd */ > + emit_insn(ctx, bne, rj, rd, jmp_offset); > + return; > + case BPF_JGT: > + /* PC += jmp_offset if rj > rd (unsigned) */ > + emit_insn(ctx, bltu, rd, rj, jmp_offset); > + return; > + case BPF_JLT: > + /* PC += jmp_offset if rj < rd (unsigned) */ > + emit_insn(ctx, bltu, rj, rd, jmp_offset); > + return; > + case BPF_JGE: > + /* PC += jmp_offset if rj >= rd (unsigned) */ > + emit_insn(ctx, bgeu, rj, rd, jmp_offset); > + return; > + case BPF_JLE: > + /* PC += jmp_offset if rj <= rd (unsigned) */ > + emit_insn(ctx, bgeu, rd, rj, jmp_offset); > + return; > + case BPF_JSGT: > + /* PC += jmp_offset if rj > rd (signed) */ > + emit_insn(ctx, blt, rd, rj, jmp_offset); > + return; > + case BPF_JSLT: > + /* PC += jmp_offset if rj < rd (signed) */ > + emit_insn(ctx, blt, rj, rd, jmp_offset); > + return; > + case BPF_JSGE: > + /* PC += jmp_offset if rj >= rd (signed) */ > + emit_insn(ctx, bge, rj, rd, jmp_offset); > + return; > + case BPF_JSLE: > + /* PC += jmp_offset if rj <= rd (signed) */ > + emit_insn(ctx, bge, rd, rj, jmp_offset); > + return; > + } > +} > + > +static inline void cond_jmp_offs26(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, > + enum loongarch_gpr rd, int jmp_offset) > +{ > + cond = invert_jmp_cond(cond); > + cond_jmp_offs16(ctx, cond, rj, rd, 2); > + emit_insn(ctx, b, jmp_offset); > +} > + > +static inline void uncond_jmp_offs26(struct jit_ctx *ctx, int jmp_offset) > +{ > + emit_insn(ctx, b, jmp_offset); > +} > + > +static inline int emit_cond_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, > + enum loongarch_gpr rd, int jmp_offset) > +{ > + /* > + * A large PC-relative jump offset may overflow the immediate field of > + * the native conditional branch instruction, triggering a conversion > + * to use an absolute jump instead, this jump sequence is particularly > + * nasty. For now, use cond_jmp_offs26() directly to keep it simple. > + * In the future, maybe we can add support for far branching, the branch > + * relaxation requires more than two passes to converge, the code seems > + * too complex to understand, not quite sure whether it is necessary and > + * worth the extra pain. Anyway, just leave it as it is to enhance code > + * readability now. Oh, no. I don't think this is a very difficult problem, because the old version has already solved [1]. Please improve your code and send V4. BTW, I have committed V3 with some small modifications in https://github.com/loongson/linux/commits/loongarch-next, please make V4 based on that. [1] https://github.com/loongson/linux/commit/e20b2353f40cd13720996524e1df6d0ca086eeb8#diff-6d2f4f5a862a5dce12f8eb0feeca095825c4ed1c2e7151b0905fb8d03c98922e --------code in the old version-------- static inline void emit_cond_jump(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, enum loongarch_gpr rd, int jmp_offset) { if (is_signed_imm16(jmp_offset)) cond_jump_offs16(ctx, cond, rj, rd, jmp_offset); else if (is_signed_imm26(jmp_offset)) cond_jump_offs26(ctx, cond, rj, rd, jmp_offset); else cond_jump_offs32(ctx, cond, rj, rd, jmp_offset); } static inline void emit_uncond_jump(struct jit_ctx *ctx, int jmp_offset, bool is_exit) { if (is_signed_imm26(jmp_offset)) uncond_jump_offs26(ctx, jmp_offset); else uncond_jump_offs32(ctx, jmp_offset, is_exit); } --------end of code-------- Huacai > + */ > + if (is_signed_imm26(jmp_offset)) { > + cond_jmp_offs26(ctx, cond, rj, rd, jmp_offset); > + return 0; > + } > + > + return -EINVAL; > +} > + > +static inline int emit_uncond_jmp(struct jit_ctx *ctx, int jmp_offset) > +{ > + if (is_signed_imm26(jmp_offset)) { > + uncond_jmp_offs26(ctx, jmp_offset); > + return 0; > + } > + > + return -EINVAL; > +} > + > +static inline int emit_tailcall_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, > + enum loongarch_gpr rd, int jmp_offset) > +{ > + if (is_signed_imm16(jmp_offset)) { > + cond_jmp_offs16(ctx, cond, rj, rd, jmp_offset); > + return 0; > + } > + > + return -EINVAL; > +} > -- > 2.1.0 > >
On 09/03/2022 04:32 PM, Huacai Chen wrote: > On Thu, Sep 1, 2022 at 10:27 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote: >> >> BPF programs are normally handled by a BPF interpreter, add BPF JIT >> support for LoongArch to allow the kernel to generate native code >> when a program is loaded into the kernel, this will significantly >> speed-up processing of BPF programs. [...] >> + >> +static inline int emit_cond_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, >> + enum loongarch_gpr rd, int jmp_offset) >> +{ >> + /* >> + * A large PC-relative jump offset may overflow the immediate field of >> + * the native conditional branch instruction, triggering a conversion >> + * to use an absolute jump instead, this jump sequence is particularly >> + * nasty. For now, use cond_jmp_offs26() directly to keep it simple. >> + * In the future, maybe we can add support for far branching, the branch >> + * relaxation requires more than two passes to converge, the code seems >> + * too complex to understand, not quite sure whether it is necessary and >> + * worth the extra pain. Anyway, just leave it as it is to enhance code >> + * readability now. > Oh, no. I don't think this is a very difficult problem, because the > old version has already solved [1]. Please improve your code and send > V4. > BTW, I have committed V3 with some small modifications in > https://github.com/loongson/linux/commits/loongarch-next, please make > V4 based on that. > > [1] https://github.com/loongson/linux/commit/e20b2353f40cd13720996524e1df6d0ca086eeb8#diff-6d2f4f5a862a5dce12f8eb0feeca095825c4ed1c2e7151b0905fb8d03c98922e > > --------code in the old version-------- > static inline void emit_cond_jump(struct jit_ctx *ctx, u8 cond, enum > loongarch_gpr rj, > enum loongarch_gpr rd, int jmp_offset) > { > if (is_signed_imm16(jmp_offset)) > cond_jump_offs16(ctx, cond, rj, rd, jmp_offset); > else if (is_signed_imm26(jmp_offset)) > cond_jump_offs26(ctx, cond, rj, rd, jmp_offset); > else > cond_jump_offs32(ctx, cond, rj, rd, jmp_offset); > } > > static inline void emit_uncond_jump(struct jit_ctx *ctx, int > jmp_offset, bool is_exit) > { > if (is_signed_imm26(jmp_offset)) > uncond_jump_offs26(ctx, jmp_offset); > else > uncond_jump_offs32(ctx, jmp_offset, is_exit); > } > --------end of code-------- > > Huacai > Hi Huacai, This change is to pass the special test cases: "a new type of jump test where the program jumps forwards and backwards with increasing offset. It mainly tests JITs where a relative jump may generate different JITed code depending on the offset size." They are introduced in commit a7d2e752e520 ("bpf/tests: Add staggered JMP and JMP32 tests") after the old internal version you mentioned. Here, we use the native instructions to enlarge the jump range to 26 bit directly rather than 16 bit first, and also take no account of more than 26 bit case because there is no native instruction and it needs to emulate. As the code comment said, this is to enhance code readability now. Thanks, Tiezhu
On Sat, Sep 3, 2022 at 6:11 PM Tiezhu Yang <yangtiezhu@loongson.cn> wrote: > > > > On 09/03/2022 04:32 PM, Huacai Chen wrote: > > On Thu, Sep 1, 2022 at 10:27 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote: > >> > >> BPF programs are normally handled by a BPF interpreter, add BPF JIT > >> support for LoongArch to allow the kernel to generate native code > >> when a program is loaded into the kernel, this will significantly > >> speed-up processing of BPF programs. > > [...] > > >> + > >> +static inline int emit_cond_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, > >> + enum loongarch_gpr rd, int jmp_offset) > >> +{ > >> + /* > >> + * A large PC-relative jump offset may overflow the immediate field of > >> + * the native conditional branch instruction, triggering a conversion > >> + * to use an absolute jump instead, this jump sequence is particularly > >> + * nasty. For now, use cond_jmp_offs26() directly to keep it simple. > >> + * In the future, maybe we can add support for far branching, the branch > >> + * relaxation requires more than two passes to converge, the code seems > >> + * too complex to understand, not quite sure whether it is necessary and > >> + * worth the extra pain. Anyway, just leave it as it is to enhance code > >> + * readability now. > > Oh, no. I don't think this is a very difficult problem, because the > > old version has already solved [1]. Please improve your code and send > > V4. > > BTW, I have committed V3 with some small modifications in > > https://github.com/loongson/linux/commits/loongarch-next, please make > > V4 based on that. > > > > [1] https://github.com/loongson/linux/commit/e20b2353f40cd13720996524e1df6d0ca086eeb8#diff-6d2f4f5a862a5dce12f8eb0feeca095825c4ed1c2e7151b0905fb8d03c98922e > > > > --------code in the old version-------- > > static inline void emit_cond_jump(struct jit_ctx *ctx, u8 cond, enum > > loongarch_gpr rj, > > enum loongarch_gpr rd, int jmp_offset) > > { > > if (is_signed_imm16(jmp_offset)) > > cond_jump_offs16(ctx, cond, rj, rd, jmp_offset); > > else if (is_signed_imm26(jmp_offset)) > > cond_jump_offs26(ctx, cond, rj, rd, jmp_offset); > > else > > cond_jump_offs32(ctx, cond, rj, rd, jmp_offset); > > } > > > > static inline void emit_uncond_jump(struct jit_ctx *ctx, int > > jmp_offset, bool is_exit) > > { > > if (is_signed_imm26(jmp_offset)) > > uncond_jump_offs26(ctx, jmp_offset); > > else > > uncond_jump_offs32(ctx, jmp_offset, is_exit); > > } > > --------end of code-------- > > > > Huacai > > > > Hi Huacai, > > This change is to pass the special test cases: > "a new type of jump test where the program jumps forwards > and backwards with increasing offset. It mainly tests JITs where a > relative jump may generate different JITed code depending on the offset > size." > > They are introduced in commit a7d2e752e520 ("bpf/tests: Add staggered > JMP and JMP32 tests") after the old internal version you mentioned. > > Here, we use the native instructions to enlarge the jump range to 26 bit > directly rather than 16 bit first, and also take no account of more than > 26 bit case because there is no native instruction and it needs to emulate. > > As the code comment said, this is to enhance code readability now. I'm not familiar with bpf, Daniel, Ruoyao and Xuerui, what do you think about it? Huacai > > Thanks, > Tiezhu >
Hi, On 9/4/22 17:04, Huacai Chen wrote: > On Sat, Sep 3, 2022 at 6:11 PM Tiezhu Yang <yangtiezhu@loongson.cn> wrote: >> >> >> On 09/03/2022 04:32 PM, Huacai Chen wrote: >>> On Thu, Sep 1, 2022 at 10:27 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote: >>>> BPF programs are normally handled by a BPF interpreter, add BPF JIT >>>> support for LoongArch to allow the kernel to generate native code >>>> when a program is loaded into the kernel, this will significantly >>>> speed-up processing of BPF programs. >> [...] >> >>>> + >>>> +static inline int emit_cond_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, >>>> + enum loongarch_gpr rd, int jmp_offset) >>>> +{ >>>> + /* >>>> + * A large PC-relative jump offset may overflow the immediate field of >>>> + * the native conditional branch instruction, triggering a conversion >>>> + * to use an absolute jump instead, this jump sequence is particularly >>>> + * nasty. For now, use cond_jmp_offs26() directly to keep it simple. >>>> + * In the future, maybe we can add support for far branching, the branch >>>> + * relaxation requires more than two passes to converge, the code seems >>>> + * too complex to understand, not quite sure whether it is necessary and >>>> + * worth the extra pain. Anyway, just leave it as it is to enhance code >>>> + * readability now. >>> Oh, no. I don't think this is a very difficult problem, because the >>> old version has already solved [1]. Please improve your code and send >>> V4. >>> BTW, I have committed V3 with some small modifications in >>> https://github.com/loongson/linux/commits/loongarch-next, please make >>> V4 based on that. >>> >>> [1] https://github.com/loongson/linux/commit/e20b2353f40cd13720996524e1df6d0ca086eeb8#diff-6d2f4f5a862a5dce12f8eb0feeca095825c4ed1c2e7151b0905fb8d03c98922e >>> >>> --------code in the old version-------- >>> static inline void emit_cond_jump(struct jit_ctx *ctx, u8 cond, enum >>> loongarch_gpr rj, >>> enum loongarch_gpr rd, int jmp_offset) >>> { >>> if (is_signed_imm16(jmp_offset)) >>> cond_jump_offs16(ctx, cond, rj, rd, jmp_offset); >>> else if (is_signed_imm26(jmp_offset)) >>> cond_jump_offs26(ctx, cond, rj, rd, jmp_offset); >>> else >>> cond_jump_offs32(ctx, cond, rj, rd, jmp_offset); >>> } >>> >>> static inline void emit_uncond_jump(struct jit_ctx *ctx, int >>> jmp_offset, bool is_exit) >>> { >>> if (is_signed_imm26(jmp_offset)) >>> uncond_jump_offs26(ctx, jmp_offset); >>> else >>> uncond_jump_offs32(ctx, jmp_offset, is_exit); >>> } >>> --------end of code-------- >>> >>> Huacai >>> >> Hi Huacai, >> >> This change is to pass the special test cases: >> "a new type of jump test where the program jumps forwards >> and backwards with increasing offset. It mainly tests JITs where a >> relative jump may generate different JITed code depending on the offset >> size." >> >> They are introduced in commit a7d2e752e520 ("bpf/tests: Add staggered >> JMP and JMP32 tests") after the old internal version you mentioned. >> >> Here, we use the native instructions to enlarge the jump range to 26 bit >> directly rather than 16 bit first, and also take no account of more than >> 26 bit case because there is no native instruction and it needs to emulate. >> >> As the code comment said, this is to enhance code readability now. > I'm not familiar with bpf, Daniel, Ruoyao and Xuerui, what do you > think about it? I'm not familiar with eBPF JITs either, but from a cursory look it seems the readability isn't that bad even for the original version? It is clear that the appropriate instructions are selected according to size of the immediate. And the current way of doing things doesn't really fix the problem, it relies on the fact that *normally* the negative branch offset is expressible in 16 bits (actually 18 bits). So, at the very least, you should keep the fallback logic towards cond_jump_offs32. At which point adding back enough code so you're back to the original version doesn't sound like it's too much anyway...
On 09/09/2022 12:51 AM, WANG Xuerui wrote: > Hi, > > On 9/4/22 17:04, Huacai Chen wrote: >> On Sat, Sep 3, 2022 at 6:11 PM Tiezhu Yang <yangtiezhu@loongson.cn> >> wrote: >>> >>> >>> On 09/03/2022 04:32 PM, Huacai Chen wrote: >>>> On Thu, Sep 1, 2022 at 10:27 AM Tiezhu Yang <yangtiezhu@loongson.cn> >>>> wrote: >>>>> BPF programs are normally handled by a BPF interpreter, add BPF JIT >>>>> support for LoongArch to allow the kernel to generate native code >>>>> when a program is loaded into the kernel, this will significantly >>>>> speed-up processing of BPF programs. >>> [...] >>> >>>>> + >>>>> +static inline int emit_cond_jmp(struct jit_ctx *ctx, u8 cond, enum >>>>> loongarch_gpr rj, >>>>> + enum loongarch_gpr rd, int jmp_offset) >>>>> +{ >>>>> + /* >>>>> + * A large PC-relative jump offset may overflow the >>>>> immediate field of >>>>> + * the native conditional branch instruction, triggering a >>>>> conversion >>>>> + * to use an absolute jump instead, this jump sequence is >>>>> particularly >>>>> + * nasty. For now, use cond_jmp_offs26() directly to keep >>>>> it simple. >>>>> + * In the future, maybe we can add support for far >>>>> branching, the branch >>>>> + * relaxation requires more than two passes to converge, >>>>> the code seems >>>>> + * too complex to understand, not quite sure whether it is >>>>> necessary and >>>>> + * worth the extra pain. Anyway, just leave it as it is to >>>>> enhance code >>>>> + * readability now. >>>> Oh, no. I don't think this is a very difficult problem, because the >>>> old version has already solved [1]. Please improve your code and send >>>> V4. >>>> BTW, I have committed V3 with some small modifications in >>>> https://github.com/loongson/linux/commits/loongarch-next, please make >>>> V4 based on that. >>>> >>>> [1] >>>> https://github.com/loongson/linux/commit/e20b2353f40cd13720996524e1df6d0ca086eeb8#diff-6d2f4f5a862a5dce12f8eb0feeca095825c4ed1c2e7151b0905fb8d03c98922e >>>> >>>> >>>> --------code in the old version-------- >>>> static inline void emit_cond_jump(struct jit_ctx *ctx, u8 cond, enum >>>> loongarch_gpr rj, >>>> enum loongarch_gpr rd, int >>>> jmp_offset) >>>> { >>>> if (is_signed_imm16(jmp_offset)) >>>> cond_jump_offs16(ctx, cond, rj, rd, jmp_offset); >>>> else if (is_signed_imm26(jmp_offset)) >>>> cond_jump_offs26(ctx, cond, rj, rd, jmp_offset); >>>> else >>>> cond_jump_offs32(ctx, cond, rj, rd, jmp_offset); >>>> } >>>> >>>> static inline void emit_uncond_jump(struct jit_ctx *ctx, int >>>> jmp_offset, bool is_exit) >>>> { >>>> if (is_signed_imm26(jmp_offset)) >>>> uncond_jump_offs26(ctx, jmp_offset); >>>> else >>>> uncond_jump_offs32(ctx, jmp_offset, is_exit); >>>> } >>>> --------end of code-------- >>>> >>>> Huacai >>>> >>> Hi Huacai, >>> >>> This change is to pass the special test cases: >>> "a new type of jump test where the program jumps forwards >>> and backwards with increasing offset. It mainly tests JITs where a >>> relative jump may generate different JITed code depending on the offset >>> size." >>> >>> They are introduced in commit a7d2e752e520 ("bpf/tests: Add staggered >>> JMP and JMP32 tests") after the old internal version you mentioned. >>> >>> Here, we use the native instructions to enlarge the jump range to 26 bit >>> directly rather than 16 bit first, and also take no account of more than >>> 26 bit case because there is no native instruction and it needs to >>> emulate. >>> >>> As the code comment said, this is to enhance code readability now. >> I'm not familiar with bpf, Daniel, Ruoyao and Xuerui, what do you >> think about it? > > I'm not familiar with eBPF JITs either, but from a cursory look it seems > the readability isn't that bad even for the original version? It is > clear that the appropriate instructions are selected according to size > of the immediate. And the current way of doing things doesn't really fix > the problem, it relies on the fact that *normally* the negative branch > offset is expressible in 16 bits (actually 18 bits). > > So, at the very least, you should keep the fallback logic towards > cond_jump_offs32. At which point adding back enough code so you're back > to the original version doesn't sound like it's too much anyway... > Hi Huacai and Xuerui, cond_jump_offs26() has the ability to handle the offs16 range, this is intended to pass a new type of jump test where the program jumps forwards and backwards with increasing offset. The case with larger jump offset is rare, it can fall back to the interpreter if jit failed. As we discussed offline, the current implementation is OK, the committed v3 with some small modifications by Huacai in https://github.com/loongson/linux/commits/loongarch-next looks good and works well, no need to do more modifications, thank you all for your reviews. Thanks, Tiezhu
diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index ab5373d..b01f5cd 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -1,5 +1,6 @@ obj-y += kernel/ obj-y += mm/ +obj-y += net/ obj-y += vdso/ # for cleaning diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 26aeb14..cff7a0f 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -82,6 +82,7 @@ config LOONGARCH select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS + select HAVE_EBPF_JIT if 64BIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_GENERIC_VDSO diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 691170b..5e7a4d2 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -318,4 +318,225 @@ static inline bool unsigned_imm_check(unsigned long val, unsigned int bit) return val < (1UL << bit); } +#define DEF_EMIT_REG0I26_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + int offset) \ +{ \ + unsigned int immediate_l, immediate_h; \ + \ + immediate_l = offset & 0xffff; \ + offset >>= 16; \ + immediate_h = offset & 0x3ff; \ + \ + insn->reg0i26_format.opcode = OP; \ + insn->reg0i26_format.immediate_l = immediate_l; \ + insn->reg0i26_format.immediate_h = immediate_h; \ +} + +DEF_EMIT_REG0I26_FORMAT(b, b_op) + +#define DEF_EMIT_REG1I20_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, int imm) \ +{ \ + insn->reg1i20_format.opcode = OP; \ + insn->reg1i20_format.immediate = imm; \ + insn->reg1i20_format.rd = rd; \ +} + +DEF_EMIT_REG1I20_FORMAT(lu12iw, lu12iw_op) +DEF_EMIT_REG1I20_FORMAT(lu32id, lu32id_op) +DEF_EMIT_REG1I20_FORMAT(pcaddu18i, pcaddu18i_op) + +#define DEF_EMIT_REG2_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj) \ +{ \ + insn->reg2_format.opcode = OP; \ + insn->reg2_format.rd = rd; \ + insn->reg2_format.rj = rj; \ +} + +DEF_EMIT_REG2_FORMAT(revb2h, revb2h_op) +DEF_EMIT_REG2_FORMAT(revb2w, revb2w_op) +DEF_EMIT_REG2_FORMAT(revbd, revbd_op) + +#define DEF_EMIT_REG2I5_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i5_format.opcode = OP; \ + insn->reg2i5_format.immediate = imm; \ + insn->reg2i5_format.rd = rd; \ + insn->reg2i5_format.rj = rj; \ +} + +DEF_EMIT_REG2I5_FORMAT(slliw, slliw_op) +DEF_EMIT_REG2I5_FORMAT(srliw, srliw_op) +DEF_EMIT_REG2I5_FORMAT(sraiw, sraiw_op) + +#define DEF_EMIT_REG2I6_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i6_format.opcode = OP; \ + insn->reg2i6_format.immediate = imm; \ + insn->reg2i6_format.rd = rd; \ + insn->reg2i6_format.rj = rj; \ +} + +DEF_EMIT_REG2I6_FORMAT(sllid, sllid_op) +DEF_EMIT_REG2I6_FORMAT(srlid, srlid_op) +DEF_EMIT_REG2I6_FORMAT(sraid, sraid_op) + +#define DEF_EMIT_REG2I12_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i12_format.opcode = OP; \ + insn->reg2i12_format.immediate = imm; \ + insn->reg2i12_format.rd = rd; \ + insn->reg2i12_format.rj = rj; \ +} + +DEF_EMIT_REG2I12_FORMAT(addiw, addiw_op) +DEF_EMIT_REG2I12_FORMAT(addid, addid_op) +DEF_EMIT_REG2I12_FORMAT(lu52id, lu52id_op) +DEF_EMIT_REG2I12_FORMAT(andi, andi_op) +DEF_EMIT_REG2I12_FORMAT(ori, ori_op) +DEF_EMIT_REG2I12_FORMAT(xori, xori_op) +DEF_EMIT_REG2I12_FORMAT(ldbu, ldbu_op) +DEF_EMIT_REG2I12_FORMAT(ldhu, ldhu_op) +DEF_EMIT_REG2I12_FORMAT(ldwu, ldwu_op) +DEF_EMIT_REG2I12_FORMAT(ldd, ldd_op) +DEF_EMIT_REG2I12_FORMAT(stb, stb_op) +DEF_EMIT_REG2I12_FORMAT(sth, sth_op) +DEF_EMIT_REG2I12_FORMAT(stw, stw_op) +DEF_EMIT_REG2I12_FORMAT(std, std_op) + +#define DEF_EMIT_REG2I14_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i14_format.opcode = OP; \ + insn->reg2i14_format.immediate = imm; \ + insn->reg2i14_format.rd = rd; \ + insn->reg2i14_format.rj = rj; \ +} + +DEF_EMIT_REG2I14_FORMAT(llw, llw_op) +DEF_EMIT_REG2I14_FORMAT(scw, scw_op) +DEF_EMIT_REG2I14_FORMAT(lld, lld_op) +DEF_EMIT_REG2I14_FORMAT(scd, scd_op) +DEF_EMIT_REG2I14_FORMAT(ldptrw, ldptrw_op) +DEF_EMIT_REG2I14_FORMAT(stptrw, stptrw_op) +DEF_EMIT_REG2I14_FORMAT(ldptrd, ldptrd_op) +DEF_EMIT_REG2I14_FORMAT(stptrd, stptrd_op) + +#define DEF_EMIT_REG2I16_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rj, \ + enum loongarch_gpr rd, \ + int offset) \ +{ \ + insn->reg2i16_format.opcode = OP; \ + insn->reg2i16_format.immediate = offset; \ + insn->reg2i16_format.rj = rj; \ + insn->reg2i16_format.rd = rd; \ +} + +DEF_EMIT_REG2I16_FORMAT(beq, beq_op) +DEF_EMIT_REG2I16_FORMAT(bne, bne_op) +DEF_EMIT_REG2I16_FORMAT(blt, blt_op) +DEF_EMIT_REG2I16_FORMAT(bge, bge_op) +DEF_EMIT_REG2I16_FORMAT(bltu, bltu_op) +DEF_EMIT_REG2I16_FORMAT(bgeu, bgeu_op) +DEF_EMIT_REG2I16_FORMAT(jirl, jirl_op) + +#define DEF_EMIT_REG2BSTRD_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int msbd, \ + int lsbd) \ +{ \ + insn->reg2bstrd_format.opcode = OP; \ + insn->reg2bstrd_format.msbd = msbd; \ + insn->reg2bstrd_format.lsbd = lsbd; \ + insn->reg2bstrd_format.rj = rj; \ + insn->reg2bstrd_format.rd = rd; \ +} + +DEF_EMIT_REG2BSTRD_FORMAT(bstrpickd, bstrpickd_op) + +#define DEF_EMIT_REG3_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + enum loongarch_gpr rk) \ +{ \ + insn->reg3_format.opcode = OP; \ + insn->reg3_format.rd = rd; \ + insn->reg3_format.rj = rj; \ + insn->reg3_format.rk = rk; \ +} + +DEF_EMIT_REG3_FORMAT(addd, addd_op) +DEF_EMIT_REG3_FORMAT(subd, subd_op) +DEF_EMIT_REG3_FORMAT(muld, muld_op) +DEF_EMIT_REG3_FORMAT(divdu, divdu_op) +DEF_EMIT_REG3_FORMAT(moddu, moddu_op) +DEF_EMIT_REG3_FORMAT(and, and_op) +DEF_EMIT_REG3_FORMAT(or, or_op) +DEF_EMIT_REG3_FORMAT(xor, xor_op) +DEF_EMIT_REG3_FORMAT(sllw, sllw_op) +DEF_EMIT_REG3_FORMAT(slld, slld_op) +DEF_EMIT_REG3_FORMAT(srlw, srlw_op) +DEF_EMIT_REG3_FORMAT(srld, srld_op) +DEF_EMIT_REG3_FORMAT(sraw, sraw_op) +DEF_EMIT_REG3_FORMAT(srad, srad_op) +DEF_EMIT_REG3_FORMAT(ldxbu, ldxbu_op) +DEF_EMIT_REG3_FORMAT(ldxhu, ldxhu_op) +DEF_EMIT_REG3_FORMAT(ldxwu, ldxwu_op) +DEF_EMIT_REG3_FORMAT(ldxd, ldxd_op) +DEF_EMIT_REG3_FORMAT(stxb, stxb_op) +DEF_EMIT_REG3_FORMAT(stxh, stxh_op) +DEF_EMIT_REG3_FORMAT(stxw, stxw_op) +DEF_EMIT_REG3_FORMAT(stxd, stxd_op) +DEF_EMIT_REG3_FORMAT(amaddw, amaddw_op) +DEF_EMIT_REG3_FORMAT(amaddd, amaddd_op) +DEF_EMIT_REG3_FORMAT(amandw, amandw_op) +DEF_EMIT_REG3_FORMAT(amandd, amandd_op) +DEF_EMIT_REG3_FORMAT(amorw, amorw_op) +DEF_EMIT_REG3_FORMAT(amord, amord_op) +DEF_EMIT_REG3_FORMAT(amxorw, amxorw_op) +DEF_EMIT_REG3_FORMAT(amxord, amxord_op) +DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op) +DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op) + +#define DEF_EMIT_REG3SA2_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + enum loongarch_gpr rk, \ + int imm) \ +{ \ + insn->reg3sa2_format.opcode = OP; \ + insn->reg3sa2_format.immediate = imm; \ + insn->reg3sa2_format.rd = rd; \ + insn->reg3sa2_format.rj = rj; \ + insn->reg3sa2_format.rk = rk; \ +} + +DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op) + #endif /* _ASM_INST_H */ diff --git a/arch/loongarch/net/Makefile b/arch/loongarch/net/Makefile new file mode 100644 index 0000000..1ec12a0 --- /dev/null +++ b/arch/loongarch/net/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for arch/loongarch/net +# +# Copyright (C) 2022 Loongson Technology Corporation Limited +# +obj-$(CONFIG_BPF_JIT) += bpf_jit.o diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c new file mode 100644 index 0000000..e9c9ce9 --- /dev/null +++ b/arch/loongarch/net/bpf_jit.c @@ -0,0 +1,1160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * BPF JIT compiler for LoongArch + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ +#include "bpf_jit.h" + +#define REG_TCC LOONGARCH_GPR_A6 +#define TCC_SAVED LOONGARCH_GPR_S5 + +#define SAVE_RA BIT(0) +#define SAVE_TCC BIT(1) + +static const int regmap[] = { + /* return value from in-kernel function, and exit value for eBPF program */ + [BPF_REG_0] = LOONGARCH_GPR_A5, + /* arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = LOONGARCH_GPR_A0, + [BPF_REG_2] = LOONGARCH_GPR_A1, + [BPF_REG_3] = LOONGARCH_GPR_A2, + [BPF_REG_4] = LOONGARCH_GPR_A3, + [BPF_REG_5] = LOONGARCH_GPR_A4, + /* callee saved registers that in-kernel function will preserve */ + [BPF_REG_6] = LOONGARCH_GPR_S0, + [BPF_REG_7] = LOONGARCH_GPR_S1, + [BPF_REG_8] = LOONGARCH_GPR_S2, + [BPF_REG_9] = LOONGARCH_GPR_S3, + /* read-only frame pointer to access stack */ + [BPF_REG_FP] = LOONGARCH_GPR_S4, + /* temporary register for blinding constants */ + [BPF_REG_AX] = LOONGARCH_GPR_T0, +}; + +static void mark_call(struct jit_ctx *ctx) +{ + ctx->flags |= SAVE_RA; +} + +static void mark_tail_call(struct jit_ctx *ctx) +{ + ctx->flags |= SAVE_TCC; +} + +static bool seen_call(struct jit_ctx *ctx) +{ + return (ctx->flags & SAVE_RA); +} + +static bool seen_tail_call(struct jit_ctx *ctx) +{ + return (ctx->flags & SAVE_TCC); +} + +static u8 tail_call_reg(struct jit_ctx *ctx) +{ + if (seen_call(ctx)) + return TCC_SAVED; + + return REG_TCC; +} + +/* + * eBPF prog stack layout: + * + * high + * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP + * | $ra | + * +-------------------------+ + * | $fp | + * +-------------------------+ + * | $s0 | + * +-------------------------+ + * | $s1 | + * +-------------------------+ + * | $s2 | + * +-------------------------+ + * | $s3 | + * +-------------------------+ + * | $s4 | + * +-------------------------+ + * | $s5 | + * +-------------------------+ <--BPF_REG_FP + * | prog->aux->stack_depth | + * | (optional) | + * current $sp -------------> +-------------------------+ + * low + */ +static void build_prologue(struct jit_ctx *ctx) +{ + int stack_adjust = 0, store_offset, bpf_stack_adjust; + + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + + /* To store ra, fp, s0, s1, s2, s3, s4 and s5. */ + stack_adjust += sizeof(long) * 8; + + stack_adjust = round_up(stack_adjust, 16); + stack_adjust += bpf_stack_adjust; + + /* + * First instruction initializes the tail call count (TCC). + * On tail call we skip this instruction, and the TCC is + * passed in REG_TCC from the caller. + */ + emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust); + + store_offset = stack_adjust - sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset); + + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust); + + if (bpf_stack_adjust) + emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust); + + /* + * Program contains calls and tail calls, so REG_TCC need + * to be saved across calls. + */ + if (seen_tail_call(ctx) && seen_call(ctx)) + move_reg(ctx, TCC_SAVED, REG_TCC); + + ctx->stack_size = stack_adjust; +} + +static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) +{ + int stack_adjust = ctx->stack_size; + int load_offset; + + load_offset = stack_adjust - sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust); + + if (!is_tail_call) { + /* Set return value */ + move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]); + /* Return to the caller */ + emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0); + } else { + /* + * Call the next bpf prog and skip the first instruction + * of TCC initialization. + */ + emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1); + } +} + +static void build_epilogue(struct jit_ctx *ctx) +{ + __build_epilogue(ctx, false); +} + +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} + +/* initialized on the first pass of build_body() */ +static int out_offset = -1; +static int emit_bpf_tail_call(struct jit_ctx *ctx) +{ + int off; + u8 tcc = tail_call_reg(ctx); + u8 a1 = LOONGARCH_GPR_A1; + u8 a2 = LOONGARCH_GPR_A2; + u8 t1 = LOONGARCH_GPR_T1; + u8 t2 = LOONGARCH_GPR_T2; + u8 t3 = LOONGARCH_GPR_T3; + const int idx0 = ctx->idx; + +#define cur_offset (ctx->idx - idx0) +#define jmp_offset (out_offset - (cur_offset)) + + /* + * a0: &ctx + * a1: &array + * a2: index + * + * if (index >= array->map.max_entries) + * goto out; + */ + off = offsetof(struct bpf_array, map.max_entries); + emit_insn(ctx, ldwu, t1, a1, off); + /* bgeu $a2, $t1, jmp_offset */ + if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0) + goto toofar; + + /* + * if (--TCC < 0) + * goto out; + */ + emit_insn(ctx, addid, REG_TCC, tcc, -1); + if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + + /* + * prog = array->ptrs[index]; + * if (!prog) + * goto out; + */ + emit_insn(ctx, alsld, t2, a2, a1, 2); + off = offsetof(struct bpf_array, ptrs); + emit_insn(ctx, ldd, t2, t2, off); + /* beq $t2, $zero, jmp_offset */ + if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + + /* goto *(prog->bpf_func + 4); */ + off = offsetof(struct bpf_prog, bpf_func); + emit_insn(ctx, ldd, t3, t2, off); + __build_epilogue(ctx, true); + + /* out: */ + if (out_offset == -1) + out_offset = cur_offset; + if (cur_offset != out_offset) { + pr_err_once("tail_call out_offset = %d, expected %d!\n", + cur_offset, out_offset); + return -1; + } + + return 0; + +toofar: + pr_info_once("tail_call: jump too far\n"); + return -1; +#undef cur_offset +#undef jmp_offset +} + +static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const u8 dst = regmap[insn->dst_reg]; + const u8 src = regmap[insn->src_reg]; + const u8 t1 = LOONGARCH_GPR_T1; + const u8 t2 = LOONGARCH_GPR_T2; + const u8 t3 = LOONGARCH_GPR_T3; + const s16 off = insn->off; + const s32 imm = insn->imm; + const bool isdw = BPF_SIZE(insn->code) == BPF_DW; + + move_imm(ctx, t1, off, false); + emit_insn(ctx, addd, t1, dst, t1); + move_reg(ctx, t3, src); + + switch (imm) { + /* lock *(size *)(dst + off) <op>= src */ + case BPF_ADD: + if (isdw) + emit_insn(ctx, amaddd, t2, t1, src); + else + emit_insn(ctx, amaddw, t2, t1, src); + break; + case BPF_AND: + if (isdw) + emit_insn(ctx, amandd, t2, t1, src); + else + emit_insn(ctx, amandw, t2, t1, src); + break; + case BPF_OR: + if (isdw) + emit_insn(ctx, amord, t2, t1, src); + else + emit_insn(ctx, amorw, t2, t1, src); + break; + case BPF_XOR: + if (isdw) + emit_insn(ctx, amxord, t2, t1, src); + else + emit_insn(ctx, amxorw, t2, t1, src); + break; + /* src = atomic_fetch_<op>(dst + off, src) */ + case BPF_ADD | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amaddd, src, t1, t3); + } else { + emit_insn(ctx, amaddw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + case BPF_AND | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amandd, src, t1, t3); + } else { + emit_insn(ctx, amandw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + case BPF_OR | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amord, src, t1, t3); + } else { + emit_insn(ctx, amorw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + case BPF_XOR | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amxord, src, t1, t3); + } else { + emit_insn(ctx, amxorw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + /* src = atomic_xchg(dst + off, src); */ + case BPF_XCHG: + if (isdw) { + emit_insn(ctx, amswapd, src, t1, t3); + } else { + emit_insn(ctx, amswapw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + /* r0 = atomic_cmpxchg(dst + off, r0, src); */ + case BPF_CMPXCHG: + u8 r0 = regmap[BPF_REG_0]; + + move_reg(ctx, t2, r0); + if (isdw) { + emit_insn(ctx, lld, r0, t1, 0); + emit_insn(ctx, bne, t2, r0, 4); + move_reg(ctx, t3, src); + emit_insn(ctx, scd, t3, t1, 0); + emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4); + } else { + emit_insn(ctx, llw, r0, t1, 0); + emit_zext_32(ctx, t2, true); + emit_zext_32(ctx, r0, true); + emit_insn(ctx, bne, t2, r0, 4); + move_reg(ctx, t3, src); + emit_insn(ctx, scw, t3, t1, 0); + emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6); + emit_zext_32(ctx, r0, true); + } + break; + } +} + +static bool is_signed_bpf_cond(u8 cond) +{ + return cond == BPF_JSGT || cond == BPF_JSLT || + cond == BPF_JSGE || cond == BPF_JSLE; +} + +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) +{ + const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || + BPF_CLASS(insn->code) == BPF_JMP32; + const u8 code = insn->code; + const u8 cond = BPF_OP(code); + const u8 dst = regmap[insn->dst_reg]; + const u8 src = regmap[insn->src_reg]; + const u8 t1 = LOONGARCH_GPR_T1; + const u8 t2 = LOONGARCH_GPR_T2; + const s16 off = insn->off; + const s32 imm = insn->imm; + int i = insn - ctx->prog->insnsi; + int jmp_offset; + + switch (code) { + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + case BPF_ALU64 | BPF_MOV | BPF_X: + move_reg(ctx, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_MOV | BPF_K: + move_imm(ctx, dst, imm, is32); + break; + + /* dst = dst + src */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + emit_insn(ctx, addd, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst + imm */ + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + if (is_signed_imm12(imm)) { + emit_insn(ctx, addid, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, addd, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst - src */ + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + emit_insn(ctx, subd, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst - imm */ + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + if (is_signed_imm12(-imm)) { + emit_insn(ctx, addid, dst, dst, -imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, subd, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst * src */ + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_X: + emit_insn(ctx, muld, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst * imm */ + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, muld, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst / src */ + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + emit_zext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, is32); + emit_insn(ctx, divdu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst / imm */ + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + move_imm(ctx, t1, imm, is32); + emit_zext_32(ctx, dst, is32); + emit_insn(ctx, divdu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst % src */ + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_zext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, is32); + emit_insn(ctx, moddu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst % imm */ + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + move_imm(ctx, t1, imm, is32); + emit_zext_32(ctx, dst, is32); + emit_insn(ctx, moddu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + case BPF_ALU64 | BPF_NEG: + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst & src */ + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_AND | BPF_X: + emit_insn(ctx, and, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst & imm */ + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, andi, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, and, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst | src */ + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + emit_insn(ctx, or, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst | imm */ + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, ori, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, or, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst ^ src */ + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + emit_insn(ctx, xor, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst ^ imm */ + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, xori, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, xor, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst << src (logical) */ + case BPF_ALU | BPF_LSH | BPF_X: + emit_insn(ctx, sllw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: + emit_insn(ctx, slld, dst, dst, src); + break; + /* dst = dst << imm (logical) */ + case BPF_ALU | BPF_LSH | BPF_K: + emit_insn(ctx, slliw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: + emit_insn(ctx, sllid, dst, dst, imm); + break; + + /* dst = dst >> src (logical) */ + case BPF_ALU | BPF_RSH | BPF_X: + emit_insn(ctx, srlw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_RSH | BPF_X: + emit_insn(ctx, srld, dst, dst, src); + break; + /* dst = dst >> imm (logical) */ + case BPF_ALU | BPF_RSH | BPF_K: + emit_insn(ctx, srliw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_RSH | BPF_K: + emit_insn(ctx, srlid, dst, dst, imm); + break; + + /* dst = dst >> src (arithmetic) */ + case BPF_ALU | BPF_ARSH | BPF_X: + emit_insn(ctx, sraw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_ARSH | BPF_X: + emit_insn(ctx, srad, dst, dst, src); + break; + /* dst = dst >> imm (arithmetic) */ + case BPF_ALU | BPF_ARSH | BPF_K: + emit_insn(ctx, sraiw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: + emit_insn(ctx, sraid, dst, dst, imm); + break; + + /* dst = BSWAP##imm(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit_insn(ctx, bstrpickd, dst, dst, 15, 0); + break; + case 32: + /* zero-extend 32 bits into 64 bits */ + emit_zext_32(ctx, dst, is32); + break; + case 64: + /* do nothing */ + break; + } + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + emit_insn(ctx, revb2h, dst, dst); + /* zero-extend 16 bits into 64 bits */ + emit_insn(ctx, bstrpickd, dst, dst, 15, 0); + break; + case 32: + emit_insn(ctx, revb2w, dst, dst); + /* zero-extend 32 bits into 64 bits */ + emit_zext_32(ctx, dst, is32); + break; + case 64: + emit_insn(ctx, revbd, dst, dst); + break; + } + break; + + /* PC += off if dst cond src */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + jmp_offset = bpf2la_offset(i, off, ctx); + move_reg(ctx, t1, dst); + move_reg(ctx, t2, src); + if (is_signed_bpf_cond(BPF_OP(code))) { + emit_sext_32(ctx, t1, is32); + emit_sext_32(ctx, t2, is32); + } else { + emit_zext_32(ctx, t1, is32); + emit_zext_32(ctx, t2, is32); + } + if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off if dst cond imm */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + u8 t7 = -1; + jmp_offset = bpf2la_offset(i, off, ctx); + if (imm) { + move_imm(ctx, t1, imm, false); + t7 = t1; + } else { + /* If imm is 0, simply use zero register. */ + t7 = LOONGARCH_GPR_ZERO; + } + move_reg(ctx, t2, dst); + if (is_signed_bpf_cond(BPF_OP(code))) { + emit_sext_32(ctx, t7, is32); + emit_sext_32(ctx, t2, is32); + } else { + emit_zext_32(ctx, t7, is32); + emit_zext_32(ctx, t2, is32); + } + if (emit_cond_jmp(ctx, cond, t2, t7, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off if dst & src */ + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + jmp_offset = bpf2la_offset(i, off, ctx); + emit_insn(ctx, and, t1, dst, src); + emit_zext_32(ctx, t1, is32); + if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + break; + /* PC += off if dst & imm */ + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + jmp_offset = bpf2la_offset(i, off, ctx); + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, and, t1, dst, t1); + emit_zext_32(ctx, t1, is32); + if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off */ + case BPF_JMP | BPF_JA: + jmp_offset = bpf2la_offset(i, off, ctx); + if (emit_uncond_jmp(ctx, jmp_offset) < 0) + goto toofar; + break; + + /* function call */ + case BPF_JMP | BPF_CALL: + bool func_addr_fixed; + u64 func_addr; + int ret; + + mark_call(ctx); + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + + move_imm(ctx, t1, func_addr, is32); + emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0); + move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); + break; + + /* tail call */ + case BPF_JMP | BPF_TAIL_CALL: + mark_tail_call(ctx); + if (emit_bpf_tail_call(ctx) < 0) + return -EINVAL; + break; + + /* function return */ + case BPF_JMP | BPF_EXIT: + emit_sext_32(ctx, regmap[BPF_REG_0], true); + + if (i == ctx->prog->len - 1) + break; + + jmp_offset = epilogue_offset(ctx); + if (emit_uncond_jmp(ctx, jmp_offset) < 0) + goto toofar; + break; + + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; + + move_imm(ctx, dst, imm64, is32); + return 1; + + /* dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldbu, dst, src, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxbu, dst, src, t1); + } + break; + case BPF_H: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldhu, dst, src, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxhu, dst, src, t1); + } + break; + case BPF_W: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldwu, dst, src, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, ldptrw, dst, src, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxwu, dst, src, t1); + } + break; + case BPF_DW: + if (is_signed_imm12(off)) { + emit_insn(ctx, ldd, dst, src, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, ldptrd, dst, src, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxd, dst, src, t1); + } + break; + } + break; + + /* *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_DW: + switch (BPF_SIZE(code)) { + case BPF_B: + move_imm(ctx, t1, imm, is32); + if (is_signed_imm12(off)) { + emit_insn(ctx, stb, t1, dst, off); + } else { + move_imm(ctx, t2, off, is32); + emit_insn(ctx, stxb, t1, dst, t2); + } + break; + case BPF_H: + move_imm(ctx, t1, imm, is32); + if (is_signed_imm12(off)) { + emit_insn(ctx, sth, t1, dst, off); + } else { + move_imm(ctx, t2, off, is32); + emit_insn(ctx, stxh, t1, dst, t2); + } + break; + case BPF_W: + move_imm(ctx, t1, imm, is32); + if (is_signed_imm12(off)) { + emit_insn(ctx, stw, t1, dst, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, stptrw, t1, dst, off); + } else { + move_imm(ctx, t2, off, is32); + emit_insn(ctx, stxw, t1, dst, t2); + } + break; + case BPF_DW: + move_imm(ctx, t1, imm, is32); + if (is_signed_imm12(off)) { + emit_insn(ctx, std, t1, dst, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, stptrd, t1, dst, off); + } else { + move_imm(ctx, t2, off, is32); + emit_insn(ctx, stxd, t1, dst, t2); + } + break; + } + break; + + /* *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_DW: + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_signed_imm12(off)) { + emit_insn(ctx, stb, src, dst, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, stxb, src, dst, t1); + } + break; + case BPF_H: + if (is_signed_imm12(off)) { + emit_insn(ctx, sth, src, dst, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, stxh, src, dst, t1); + } + break; + case BPF_W: + if (is_signed_imm12(off)) { + emit_insn(ctx, stw, src, dst, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, stptrw, src, dst, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, stxw, src, dst, t1); + } + break; + case BPF_DW: + if (is_signed_imm12(off)) { + emit_insn(ctx, std, src, dst, off); + } else if (is_signed_imm14(off)) { + emit_insn(ctx, stptrd, src, dst, off); + } else { + move_imm(ctx, t1, off, is32); + emit_insn(ctx, stxd, src, dst, t1); + } + break; + } + break; + + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + emit_atomic(insn, ctx); + break; + + default: + pr_err("bpf_jit: unknown opcode %02x\n", code); + return -EINVAL; + } + + return 0; + +toofar: + pr_info_once("bpf_jit: opcode %02x, jump too far\n", code); + return -E2BIG; +} + +static int build_body(struct jit_ctx *ctx, bool extra_pass) +{ + const struct bpf_prog *prog = ctx->prog; + int i; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + int ret; + + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; + + ret = build_insn(insn, ctx, extra_pass); + if (ret > 0) { + i++; + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; + continue; + } + if (ret) + return ret; + } + + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; + + return 0; +} + +static inline void bpf_flush_icache(void *start, void *end) +{ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +/* Fill space with illegal instructions */ +static void jit_fill_hole(void *area, unsigned int size) +{ + u32 *ptr; + + /* We are guaranteed to have aligned memory */ + for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) + *ptr++ = INSN_BREAK; +} + +static int validate_code(struct jit_ctx *ctx) +{ + int i; + union loongarch_instruction insn; + + for (i = 0; i < ctx->idx; i++) { + insn = ctx->image[i]; + /* Check INSN_BREAK */ + if (insn.word == INSN_BREAK) + return -1; + } + + return 0; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + bool tmp_blinded = false, extra_pass = false; + struct bpf_prog *tmp, *orig_prog = prog; + struct bpf_binary_header *header; + struct jit_data *jit_data; + struct jit_ctx ctx; + int image_size; + u8 *image_ptr; + + /* + * If BPF JIT was not enabled then we must fall back to + * the interpreter. + */ + if (!prog->jit_requested) + return orig_prog; + + tmp = bpf_jit_blind_constants(prog); + /* + * If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. Otherwise, we save + * the new JITed code. + */ + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + if (jit_data->ctx.offset) { + ctx = jit_data->ctx; + image_ptr = jit_data->image; + header = jit_data->header; + extra_pass = true; + image_size = sizeof(u32) * ctx.idx; + goto skip_init_ctx; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.prog = prog; + + ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL); + if (ctx.offset == NULL) { + prog = orig_prog; + goto out_offset; + } + + /* 1. Initial fake pass to compute ctx->idx and set ctx->flags */ + build_prologue(&ctx); + if (build_body(&ctx, extra_pass)) { + prog = orig_prog; + goto out_offset; + } + ctx.epilogue_offset = ctx.idx; + build_epilogue(&ctx); + + /* Now we know the actual image size. + * As each LoongArch instruction is of length 32bit, + * we are translating number of JITed intructions into + * the size required to store these JITed code. + */ + image_size = sizeof(u32) * ctx.idx; + /* Now we know the size of the structure to make */ + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (header == NULL) { + prog = orig_prog; + goto out_offset; + } + + /* 2. Now, the actual pass to generate final JIT code */ + ctx.image = (union loongarch_instruction *)image_ptr; +skip_init_ctx: + ctx.idx = 0; + + build_prologue(&ctx); + if (build_body(&ctx, extra_pass)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_offset; + } + build_epilogue(&ctx); + + /* 3. Extra pass to validate JITed code */ + if (validate_code(&ctx)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_offset; + } + + /* And we're done */ + if (bpf_jit_enable > 1) + bpf_jit_dump(prog->len, image_size, 2, ctx.image); + + /* Update the icache */ + bpf_flush_icache(header, ctx.image + ctx.idx); + + if (!prog->is_func || extra_pass) { + if (extra_pass && ctx.idx != jit_data->ctx.idx) { + pr_err_once("multi-func JIT bug %d != %d\n", + ctx.idx, jit_data->ctx.idx); + bpf_jit_binary_free(header); + prog->bpf_func = NULL; + prog->jited = 0; + prog->jited_len = 0; + goto out_offset; + } + bpf_jit_binary_lock_ro(header); + } else { + jit_data->ctx = ctx; + jit_data->image = image_ptr; + jit_data->header = header; + } + prog->bpf_func = (void *)ctx.image; + prog->jited = 1; + prog->jited_len = image_size; + + if (!prog->is_func || extra_pass) { + int i; + + /* offset[prog->len] is the size of program */ + for (i = 0; i <= prog->len; i++) + ctx.offset[i] *= LOONGARCH_INSN_SIZE; + bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); +out_offset: + kvfree(ctx.offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } +out: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + + out_offset = -1; + return prog; +} diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h new file mode 100644 index 0000000..205a935 --- /dev/null +++ b/arch/loongarch/net/bpf_jit.h @@ -0,0 +1,282 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * BPF JIT compiler for LoongArch + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ +#include <linux/bpf.h> +#include <linux/filter.h> +#include <asm/cacheflush.h> +#include <asm/inst.h> + +struct jit_ctx { + const struct bpf_prog *prog; + unsigned int idx; + unsigned int flags; + unsigned int epilogue_offset; + u32 *offset; + union loongarch_instruction *image; + u32 stack_size; +}; + +struct jit_data { + struct bpf_binary_header *header; + u8 *image; + struct jit_ctx ctx; +}; + +#define emit_insn(ctx, func, ...) \ +do { \ + if (ctx->image != NULL) { \ + union loongarch_instruction *insn = &ctx->image[ctx->idx]; \ + emit_##func(insn, ##__VA_ARGS__); \ + } \ + ctx->idx++; \ +} while (0) + +#define is_signed_imm12(val) signed_imm_check(val, 12) +#define is_signed_imm14(val) signed_imm_check(val, 14) +#define is_signed_imm16(val) signed_imm_check(val, 16) +#define is_signed_imm26(val) signed_imm_check(val, 26) +#define is_signed_imm32(val) signed_imm_check(val, 32) +#define is_signed_imm52(val) signed_imm_check(val, 52) +#define is_unsigned_imm12(val) unsigned_imm_check(val, 12) + +static inline int bpf2la_offset(int bpf_insn, int off, const struct jit_ctx *ctx) +{ + /* BPF JMP offset is relative to the next instruction */ + bpf_insn++; + /* + * Whereas LoongArch branch instructions encode the offset + * from the branch itself, so we must subtract 1 from the + * instruction offset. + */ + return (ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1)); +} + +static inline int epilogue_offset(const struct jit_ctx *ctx) +{ + int to = ctx->epilogue_offset; + int from = ctx->idx; + + return (to - from); +} + +/* Zero-extend 32 bits into 64 bits */ +static inline void emit_zext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32) +{ + if (!is32) + return; + + emit_insn(ctx, lu32id, reg, 0); +} + +/* Signed-extend 32 bits into 64 bits */ +static inline void emit_sext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32) +{ + if (!is32) + return; + + emit_insn(ctx, addiw, reg, reg, 0); +} + +static inline void move_imm(struct jit_ctx *ctx, enum loongarch_gpr rd, long imm, bool is32) +{ + long imm_11_0, imm_31_12, imm_51_32, imm_63_52, imm_51_0, imm_51_31; + + /* or rd, $zero, $zero */ + if (imm == 0) { + emit_insn(ctx, or, rd, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_ZERO); + return; + } + + /* addiw rd, $zero, imm_11_0 */ + if (is_signed_imm12(imm)) { + emit_insn(ctx, addiw, rd, LOONGARCH_GPR_ZERO, imm); + goto zext; + } + + /* ori rd, $zero, imm_11_0 */ + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, ori, rd, LOONGARCH_GPR_ZERO, imm); + goto zext; + } + + /* lu52id rd, $zero, imm_63_52 */ + imm_63_52 = (imm >> 52) & 0xfff; + imm_51_0 = imm & 0xfffffffffffff; + if (imm_63_52 != 0 && imm_51_0 == 0) { + emit_insn(ctx, lu52id, rd, LOONGARCH_GPR_ZERO, imm_63_52); + return; + } + + /* lu12iw rd, imm_31_12 */ + imm_31_12 = (imm >> 12) & 0xfffff; + emit_insn(ctx, lu12iw, rd, imm_31_12); + + /* ori rd, rd, imm_11_0 */ + imm_11_0 = imm & 0xfff; + if (imm_11_0 != 0) + emit_insn(ctx, ori, rd, rd, imm_11_0); + + if (!is_signed_imm32(imm)) { + if (imm_51_0 != 0) { + /* + * If bit[51:31] is all 0 or all 1, + * it means bit[51:32] is sign extended by lu12iw, + * no need to call lu32id to do a new filled operation. + */ + imm_51_31 = (imm >> 31) & 0x1fffff; + if (imm_51_31 != 0 || imm_51_31 != 0x1fffff) { + /* lu32id rd, imm_51_32 */ + imm_51_32 = (imm >> 32) & 0xfffff; + emit_insn(ctx, lu32id, rd, imm_51_32); + } + } + + /* lu52id rd, rd, imm_63_52 */ + if (!is_signed_imm52(imm)) + emit_insn(ctx, lu52id, rd, rd, imm_63_52); + } + +zext: + emit_zext_32(ctx, rd, is32); +} + +static inline void move_reg(struct jit_ctx *ctx, enum loongarch_gpr rd, + enum loongarch_gpr rj) +{ + emit_insn(ctx, or, rd, rj, LOONGARCH_GPR_ZERO); +} + +static inline int invert_jmp_cond(u8 cond) +{ + switch (cond) { + case BPF_JEQ: + return BPF_JNE; + case BPF_JNE: + case BPF_JSET: + return BPF_JEQ; + case BPF_JGT: + return BPF_JLE; + case BPF_JGE: + return BPF_JLT; + case BPF_JLT: + return BPF_JGE; + case BPF_JLE: + return BPF_JGT; + case BPF_JSGT: + return BPF_JSLE; + case BPF_JSGE: + return BPF_JSLT; + case BPF_JSLT: + return BPF_JSGE; + case BPF_JSLE: + return BPF_JSGT; + } + return -1; +} + +static inline void cond_jmp_offs16(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + switch (cond) { + case BPF_JEQ: + /* PC += jmp_offset if rj == rd */ + emit_insn(ctx, beq, rj, rd, jmp_offset); + return; + case BPF_JNE: + case BPF_JSET: + /* PC += jmp_offset if rj != rd */ + emit_insn(ctx, bne, rj, rd, jmp_offset); + return; + case BPF_JGT: + /* PC += jmp_offset if rj > rd (unsigned) */ + emit_insn(ctx, bltu, rd, rj, jmp_offset); + return; + case BPF_JLT: + /* PC += jmp_offset if rj < rd (unsigned) */ + emit_insn(ctx, bltu, rj, rd, jmp_offset); + return; + case BPF_JGE: + /* PC += jmp_offset if rj >= rd (unsigned) */ + emit_insn(ctx, bgeu, rj, rd, jmp_offset); + return; + case BPF_JLE: + /* PC += jmp_offset if rj <= rd (unsigned) */ + emit_insn(ctx, bgeu, rd, rj, jmp_offset); + return; + case BPF_JSGT: + /* PC += jmp_offset if rj > rd (signed) */ + emit_insn(ctx, blt, rd, rj, jmp_offset); + return; + case BPF_JSLT: + /* PC += jmp_offset if rj < rd (signed) */ + emit_insn(ctx, blt, rj, rd, jmp_offset); + return; + case BPF_JSGE: + /* PC += jmp_offset if rj >= rd (signed) */ + emit_insn(ctx, bge, rj, rd, jmp_offset); + return; + case BPF_JSLE: + /* PC += jmp_offset if rj <= rd (signed) */ + emit_insn(ctx, bge, rd, rj, jmp_offset); + return; + } +} + +static inline void cond_jmp_offs26(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + cond = invert_jmp_cond(cond); + cond_jmp_offs16(ctx, cond, rj, rd, 2); + emit_insn(ctx, b, jmp_offset); +} + +static inline void uncond_jmp_offs26(struct jit_ctx *ctx, int jmp_offset) +{ + emit_insn(ctx, b, jmp_offset); +} + +static inline int emit_cond_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + /* + * A large PC-relative jump offset may overflow the immediate field of + * the native conditional branch instruction, triggering a conversion + * to use an absolute jump instead, this jump sequence is particularly + * nasty. For now, use cond_jmp_offs26() directly to keep it simple. + * In the future, maybe we can add support for far branching, the branch + * relaxation requires more than two passes to converge, the code seems + * too complex to understand, not quite sure whether it is necessary and + * worth the extra pain. Anyway, just leave it as it is to enhance code + * readability now. + */ + if (is_signed_imm26(jmp_offset)) { + cond_jmp_offs26(ctx, cond, rj, rd, jmp_offset); + return 0; + } + + return -EINVAL; +} + +static inline int emit_uncond_jmp(struct jit_ctx *ctx, int jmp_offset) +{ + if (is_signed_imm26(jmp_offset)) { + uncond_jmp_offs26(ctx, jmp_offset); + return 0; + } + + return -EINVAL; +} + +static inline int emit_tailcall_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + if (is_signed_imm16(jmp_offset)) { + cond_jmp_offs16(ctx, cond, rj, rd, jmp_offset); + return 0; + } + + return -EINVAL; +}