Message ID | 2a45e43866e9ff2e53e3efd2675c0b027aa07aac.1740009184.git.yepeilin@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Introduce load-acquire and store-release BPF instructions | expand |
On 2/20/2025 9:21 AM, Peilin Ye wrote: > Support BPF load-acquire (BPF_LOAD_ACQ) and store-release > (BPF_STORE_REL) instructions in the arm64 JIT compiler. For example > (assuming little-endian): > > db 10 00 00 00 01 00 00 r0 = load_acquire((u64 *)(r1 + 0x0)) > 95 00 00 00 00 00 00 00 exit > > opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX > imm (0x00000100): BPF_LOAD_ACQ > > The JIT compiler would emit an LDAR instruction for the above, e.g.: > > ldar x7, [x0] > > Similarly, consider the following 16-bit store-release: > > cb 21 00 00 10 01 00 00 store_release((u16 *)(r1 + 0x0), w2) > 95 00 00 00 00 00 00 00 exit > > opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX > imm (0x00000110): BPF_STORE_REL > > An STLRH instruction would be emitted, e.g.: > > stlrh w1, [x0] > > For a complete mapping: > > load-acquire 8-bit LDARB > (BPF_LOAD_ACQ) 16-bit LDARH > 32-bit LDAR (32-bit) > 64-bit LDAR (64-bit) > store-release 8-bit STLRB > (BPF_STORE_REL) 16-bit STLRH > 32-bit STLR (32-bit) > 64-bit STLR (64-bit) > > Arena accesses are supported. > bpf_jit_supports_insn(..., /*in_arena=*/true) always returns true for > BPF_LOAD_ACQ and BPF_STORE_REL instructions, as they don't depend on > ARM64_HAS_LSE_ATOMICS. > > Signed-off-by: Peilin Ye <yepeilin@google.com> > --- > arch/arm64/net/bpf_jit.h | 20 ++++++++ > arch/arm64/net/bpf_jit_comp.c | 91 ++++++++++++++++++++++++++++++++--- > 2 files changed, 105 insertions(+), 6 deletions(-) > > diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h > index b22ab2f97a30..a3b0e693a125 100644 > --- a/arch/arm64/net/bpf_jit.h > +++ b/arch/arm64/net/bpf_jit.h > @@ -119,6 +119,26 @@ > aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \ > AARCH64_INSN_LDST_STORE_REL_EX) > > +/* Load-acquire & store-release */ > +#define A64_LDAR(Rt, Rn, size) \ > + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \ > + AARCH64_INSN_LDST_LOAD_ACQ) > +#define A64_STLR(Rt, Rn, size) \ > + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \ > + AARCH64_INSN_LDST_STORE_REL) > + > +/* Rt = [Rn] (load acquire) */ > +#define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8) > +#define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16) > +#define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32) > +#define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64) > + > +/* [Rn] = Rt (store release) */ > +#define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8) > +#define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16) > +#define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32) > +#define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64) > + > /* > * LSE atomics > * > diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c > index 8c3b47d9e441..25562bdb8eb5 100644 > --- a/arch/arm64/net/bpf_jit_comp.c > +++ b/arch/arm64/net/bpf_jit_comp.c > @@ -647,6 +647,82 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) > return 0; > } > > +static int emit_atomic_load_store(const struct bpf_insn *insn, > + struct jit_ctx *ctx) > +{ > + const s32 imm = insn->imm; > + const s16 off = insn->off; > + const u8 code = insn->code; > + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; > + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; > + const u8 dst = bpf2a64[insn->dst_reg]; > + const u8 src = bpf2a64[insn->src_reg]; > + const u8 tmp = bpf2a64[TMP_REG_1]; > + u8 reg; > + > + switch (imm) { > + case BPF_LOAD_ACQ: > + reg = src; > + break; > + case BPF_STORE_REL: > + reg = dst; > + break; > + default: > + pr_err_once("unknown atomic load/store op code %02x\n", imm); > + return -EINVAL; > + } > + > + if (off) { > + emit_a64_add_i(1, tmp, reg, tmp, off, ctx); > + reg = tmp; > + } > + if (arena) { > + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); > + reg = tmp; > + } > + > + switch (imm) { > + case BPF_LOAD_ACQ: > + switch (BPF_SIZE(code)) { > + case BPF_B: > + emit(A64_LDARB(dst, reg), ctx); > + break; > + case BPF_H: > + emit(A64_LDARH(dst, reg), ctx); > + break; > + case BPF_W: > + emit(A64_LDAR32(dst, reg), ctx); > + break; > + case BPF_DW: > + emit(A64_LDAR64(dst, reg), ctx); > + break; > + } > + break; > + case BPF_STORE_REL: > + switch (BPF_SIZE(code)) { > + case BPF_B: > + emit(A64_STLRB(src, reg), ctx); > + break; > + case BPF_H: > + emit(A64_STLRH(src, reg), ctx); > + break; > + case BPF_W: > + emit(A64_STLR32(src, reg), ctx); > + break; > + case BPF_DW: > + emit(A64_STLR64(src, reg), ctx); > + break; > + } > + break; > + default: > + pr_err_once("unexpected atomic load/store op code %02x\n", > + imm); > + return -EINVAL; > + } > + > + return 0; > +} > + > #ifdef CONFIG_ARM64_LSE_ATOMICS > static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) > { > @@ -1641,11 +1717,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, > return ret; > break; > > + case BPF_STX | BPF_ATOMIC | BPF_B: > + case BPF_STX | BPF_ATOMIC | BPF_H: > case BPF_STX | BPF_ATOMIC | BPF_W: > case BPF_STX | BPF_ATOMIC | BPF_DW: > + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: > + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: > case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: > case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: > - if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) > + if (bpf_atomic_is_load_store(insn)) > + ret = emit_atomic_load_store(insn, ctx); > + else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) > ret = emit_lse_atomic(insn, ctx); > else > ret = emit_ll_sc_atomic(insn, ctx); > @@ -2667,13 +2749,10 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) > if (!in_arena) > return true; > switch (insn->code) { > - case BPF_STX | BPF_ATOMIC | BPF_B: > - case BPF_STX | BPF_ATOMIC | BPF_H: > case BPF_STX | BPF_ATOMIC | BPF_W: > case BPF_STX | BPF_ATOMIC | BPF_DW: > - if (bpf_atomic_is_load_store(insn)) > - return false; > - if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) > + if (!bpf_atomic_is_load_store(insn) && > + !cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) > return false; > } > return true; Acked-by: Xu Kuohai <xukuohai@huawei.com>
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index b22ab2f97a30..a3b0e693a125 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -119,6 +119,26 @@ aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \ AARCH64_INSN_LDST_STORE_REL_EX) +/* Load-acquire & store-release */ +#define A64_LDAR(Rt, Rn, size) \ + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \ + AARCH64_INSN_LDST_LOAD_ACQ) +#define A64_STLR(Rt, Rn, size) \ + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \ + AARCH64_INSN_LDST_STORE_REL) + +/* Rt = [Rn] (load acquire) */ +#define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8) +#define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16) +#define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32) +#define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64) + +/* [Rn] = Rt (store release) */ +#define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8) +#define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16) +#define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32) +#define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64) + /* * LSE atomics * diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 8c3b47d9e441..25562bdb8eb5 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -647,6 +647,82 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) return 0; } +static int emit_atomic_load_store(const struct bpf_insn *insn, + struct jit_ctx *ctx) +{ + const s32 imm = insn->imm; + const s16 off = insn->off; + const u8 code = insn->code; + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; + const u8 dst = bpf2a64[insn->dst_reg]; + const u8 src = bpf2a64[insn->src_reg]; + const u8 tmp = bpf2a64[TMP_REG_1]; + u8 reg; + + switch (imm) { + case BPF_LOAD_ACQ: + reg = src; + break; + case BPF_STORE_REL: + reg = dst; + break; + default: + pr_err_once("unknown atomic load/store op code %02x\n", imm); + return -EINVAL; + } + + if (off) { + emit_a64_add_i(1, tmp, reg, tmp, off, ctx); + reg = tmp; + } + if (arena) { + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); + reg = tmp; + } + + switch (imm) { + case BPF_LOAD_ACQ: + switch (BPF_SIZE(code)) { + case BPF_B: + emit(A64_LDARB(dst, reg), ctx); + break; + case BPF_H: + emit(A64_LDARH(dst, reg), ctx); + break; + case BPF_W: + emit(A64_LDAR32(dst, reg), ctx); + break; + case BPF_DW: + emit(A64_LDAR64(dst, reg), ctx); + break; + } + break; + case BPF_STORE_REL: + switch (BPF_SIZE(code)) { + case BPF_B: + emit(A64_STLRB(src, reg), ctx); + break; + case BPF_H: + emit(A64_STLRH(src, reg), ctx); + break; + case BPF_W: + emit(A64_STLR32(src, reg), ctx); + break; + case BPF_DW: + emit(A64_STLR64(src, reg), ctx); + break; + } + break; + default: + pr_err_once("unexpected atomic load/store op code %02x\n", + imm); + return -EINVAL; + } + + return 0; +} + #ifdef CONFIG_ARM64_LSE_ATOMICS static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { @@ -1641,11 +1717,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, return ret; break; + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: - if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + if (bpf_atomic_is_load_store(insn)) + ret = emit_atomic_load_store(insn, ctx); + else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) ret = emit_lse_atomic(insn, ctx); else ret = emit_ll_sc_atomic(insn, ctx); @@ -2667,13 +2749,10 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) if (!in_arena) return true; switch (insn->code) { - case BPF_STX | BPF_ATOMIC | BPF_B: - case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: - if (bpf_atomic_is_load_store(insn)) - return false; - if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + if (!bpf_atomic_is_load_store(insn) && + !cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) return false; } return true;
Support BPF load-acquire (BPF_LOAD_ACQ) and store-release (BPF_STORE_REL) instructions in the arm64 JIT compiler. For example (assuming little-endian): db 10 00 00 00 01 00 00 r0 = load_acquire((u64 *)(r1 + 0x0)) 95 00 00 00 00 00 00 00 exit opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX imm (0x00000100): BPF_LOAD_ACQ The JIT compiler would emit an LDAR instruction for the above, e.g.: ldar x7, [x0] Similarly, consider the following 16-bit store-release: cb 21 00 00 10 01 00 00 store_release((u16 *)(r1 + 0x0), w2) 95 00 00 00 00 00 00 00 exit opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX imm (0x00000110): BPF_STORE_REL An STLRH instruction would be emitted, e.g.: stlrh w1, [x0] For a complete mapping: load-acquire 8-bit LDARB (BPF_LOAD_ACQ) 16-bit LDARH 32-bit LDAR (32-bit) 64-bit LDAR (64-bit) store-release 8-bit STLRB (BPF_STORE_REL) 16-bit STLRH 32-bit STLR (32-bit) 64-bit STLR (64-bit) Arena accesses are supported. bpf_jit_supports_insn(..., /*in_arena=*/true) always returns true for BPF_LOAD_ACQ and BPF_STORE_REL instructions, as they don't depend on ARM64_HAS_LSE_ATOMICS. Signed-off-by: Peilin Ye <yepeilin@google.com> --- arch/arm64/net/bpf_jit.h | 20 ++++++++ arch/arm64/net/bpf_jit_comp.c | 91 ++++++++++++++++++++++++++++++++--- 2 files changed, 105 insertions(+), 6 deletions(-)