Message ID | 20240627090900.20017-9-iii@linux.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | s390/bpf: Implement arena | expand |
On Thu, Jun 27, 2024 at 2:09 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > s390x supports most BPF atomics using single instructions, which > makes implementing arena support a matter of adding arena address to > the base register (unfortunately atomics do not support index > registers), and wrapping the respective native instruction in probing > sequences. > > An exception is BPF_XCHG, which is implemented using two different > memory accesses and a loop. Make sure there is enough extable entries > for both instructions. Compute the base address once for both memory > accesses. Since on exception we need to land after the loop, emit the > nops manually. > > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> > --- > arch/s390/net/bpf_jit_comp.c | 100 +++++++++++++++++++++++++++++++---- > 1 file changed, 91 insertions(+), 9 deletions(-) > > diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c > index 1dd359c25ada..12293689ad60 100644 > --- a/arch/s390/net/bpf_jit_comp.c > +++ b/arch/s390/net/bpf_jit_comp.c > @@ -704,6 +704,10 @@ static void bpf_jit_probe_init(struct bpf_jit_probe *probe) > static void bpf_jit_probe_emit_nop(struct bpf_jit *jit, > struct bpf_jit_probe *probe) > { > + if (probe->prg == -1 || probe->nop_prg != -1) > + /* The probe is not armed or nop is already emitted. */ > + return; > + > probe->nop_prg = jit->prg; > /* bcr 0,%0 */ > _EMIT2(0x0700); > @@ -738,6 +742,21 @@ static void bpf_jit_probe_store_pre(struct bpf_jit *jit, struct bpf_insn *insn, > probe->prg = jit->prg; > } > > +static void bpf_jit_probe_atomic_pre(struct bpf_jit *jit, > + struct bpf_insn *insn, > + struct bpf_jit_probe *probe) > +{ > + if (BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) > + return; > + > + /* lgrl %r1,kern_arena */ > + EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena); > + /* agr %r1,%dst */ > + EMIT4(0xb9080000, REG_W1, insn->dst_reg); > + probe->arena_reg = REG_W1; > + probe->prg = jit->prg; > +} > + > static int bpf_jit_probe_post(struct bpf_jit *jit, struct bpf_prog *fp, > struct bpf_jit_probe *probe) > { > @@ -1523,15 +1542,30 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, > */ > case BPF_STX | BPF_ATOMIC | BPF_DW: > case BPF_STX | BPF_ATOMIC | BPF_W: > + case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: > + case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: > { > bool is32 = BPF_SIZE(insn->code) == BPF_W; > > + /* > + * Unlike loads and stores, atomics have only a base register, > + * but no index register. For the non-arena case, simply use > + * %dst as a base. For the arena case, use the work register > + * %r1: first, load the arena base into it, and then add %dst > + * to it. > + */ > + probe.arena_reg = dst_reg; > + > switch (insn->imm) { > -/* {op32|op64} {%w0|%src},%src,off(%dst) */ > #define EMIT_ATOMIC(op32, op64) do { \ > + bpf_jit_probe_atomic_pre(jit, insn, &probe); \ > + /* {op32|op64} {%w0|%src},%src,off(%arena) */ \ > EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64), \ > (insn->imm & BPF_FETCH) ? src_reg : REG_W0, \ > - src_reg, dst_reg, off); \ > + src_reg, probe.arena_reg, off); \ > + err = bpf_jit_probe_post(jit, fp, &probe); \ > + if (err < 0) \ > + return err; \ > if (insn->imm & BPF_FETCH) { \ > /* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */ \ > _EMIT2(0x07e0); \ > @@ -1560,25 +1594,48 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, > EMIT_ATOMIC(0x00f7, 0x00e7); > break; > #undef EMIT_ATOMIC > - case BPF_XCHG: > - /* {ly|lg} %w0,off(%dst) */ > + case BPF_XCHG: { > + struct bpf_jit_probe load_probe = probe; > + > + bpf_jit_probe_atomic_pre(jit, insn, &load_probe); > + /* {ly|lg} %w0,off(%arena) */ > EMIT6_DISP_LH(0xe3000000, > is32 ? 0x0058 : 0x0004, REG_W0, REG_0, > - dst_reg, off); > - /* 0: {csy|csg} %w0,%src,off(%dst) */ > + load_probe.arena_reg, off); > + bpf_jit_probe_emit_nop(jit, &load_probe); > + /* Reuse {ly|lg}'s arena_reg for {csy|csg}. */ > + if (load_probe.prg != -1) { > + probe.prg = jit->prg; > + probe.arena_reg = load_probe.arena_reg; > + } > + /* 0: {csy|csg} %w0,%src,off(%arena) */ > EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030, > - REG_W0, src_reg, dst_reg, off); > + REG_W0, src_reg, probe.arena_reg, off); > + bpf_jit_probe_emit_nop(jit, &probe); > /* brc 4,0b */ > EMIT4_PCREL_RIC(0xa7040000, 4, jit->prg - 6); > /* {llgfr|lgr} %src,%w0 */ > EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0); > + /* Both probes should land here on exception. */ > + err = bpf_jit_probe_post(jit, fp, &load_probe); > + if (err < 0) > + return err; > + err = bpf_jit_probe_post(jit, fp, &probe); > + if (err < 0) > + return err; > if (is32 && insn_is_zext(&insn[1])) > insn_count = 2; > break; > + } > case BPF_CMPXCHG: > - /* 0: {csy|csg} %b0,%src,off(%dst) */ > + bpf_jit_probe_atomic_pre(jit, insn, &probe); > + /* 0: {csy|csg} %b0,%src,off(%arena) */ > EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030, > - BPF_REG_0, src_reg, dst_reg, off); > + BPF_REG_0, src_reg, > + probe.arena_reg, off); > + err = bpf_jit_probe_post(jit, fp, &probe); > + if (err < 0) > + return err; > break; > default: > pr_err("Unknown atomic operation %02x\n", insn->imm); > @@ -2142,9 +2199,25 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit, > struct bpf_prog *fp) > { > struct bpf_binary_header *header; > + struct bpf_insn *insn; > u32 extable_size; > u32 code_size; > + int i; > > + for (i = 0; i < fp->len; i++) { > + insn = &fp->insnsi[i]; > + > + if (BPF_CLASS(insn->code) == BPF_STX && > + BPF_MODE(insn->code) == BPF_PROBE_ATOMIC && > + (BPF_SIZE(insn->code) == BPF_DW || > + BPF_SIZE(insn->code) == BPF_W) && > + insn->imm == BPF_XCHG) > + /* > + * bpf_jit_insn() emits a load and a compare-and-swap, > + * both of which need to be probed. > + */ > + fp->aux->num_exentries += 1; > + } > /* We need two entries per insn. */ > fp->aux->num_exentries *= 2; > > @@ -2825,3 +2898,12 @@ bool bpf_jit_supports_arena(void) > { > return true; > } > + > +bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) > +{ > + /* > + * Currently the verifier uses this function only to check which > + * atomic stores to arena are supported, and they all are. > + */ > + return true; Including all the multi insn instructions that are implemented as loops? On x86 I left out atomic+fetch+[and|or|xor], because they're tricky with looping. Just checking that when an exception happens the loop is not going to become infinite ? If I'm reading the code correctly the exception handling will not only skip one insn, but will skip the whole loop?
On Thu, 2024-06-27 at 17:43 -0700, Alexei Starovoitov wrote: > On Thu, Jun 27, 2024 at 2:09 AM Ilya Leoshkevich <iii@linux.ibm.com> > wrote: > > > > s390x supports most BPF atomics using single instructions, which > > makes implementing arena support a matter of adding arena address > > to > > the base register (unfortunately atomics do not support index > > registers), and wrapping the respective native instruction in > > probing > > sequences. > > > > An exception is BPF_XCHG, which is implemented using two different > > memory accesses and a loop. Make sure there is enough extable > > entries > > for both instructions. Compute the base address once for both > > memory > > accesses. Since on exception we need to land after the loop, emit > > the > > nops manually. > > > > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> > > --- > > arch/s390/net/bpf_jit_comp.c | 100 > > +++++++++++++++++++++++++++++++---- > > 1 file changed, 91 insertions(+), 9 deletions(-) [...] > > + > > +bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) > > +{ > > + /* > > + * Currently the verifier uses this function only to check > > which > > + * atomic stores to arena are supported, and they all are. > > + */ > > + return true; > > Including all the multi insn instructions that are implemented as > loops? > On x86 I left out atomic+fetch+[and|or|xor], > because they're tricky with looping. > Just checking that when an exception happens > the loop is not going to become infinite ? > If I'm reading the code correctly the exception handling will not > only > skip one insn, but will skip the whole loop? On s390x only BPF_XCHG needs to be implemented as a loop, the rest are single instructions. For example, there is LOAD AND EXCLUSIVE OR, which is atomic, updates memory, and puts the original value into a register. For BPF_XCHG the exception handler will skip the entire loop after an exception. BPF_XCHG has two memory accesses: the initial LOAD, and then the COMPARE AND SWAP loop. I wasn't able to test the exception handling for COMPARE AND SWAP, because I would have to inject a race that would free the arena page after the initial LOAD. Now that you asked, I added the following temporary patch to skip the LOAD: --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1598,10 +1598,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, struct bpf_jit_probe load_probe = probe; bpf_jit_probe_atomic_pre(jit, insn, &load_probe); - /* {ly|lg} %w0,off(%arena) */ - EMIT6_DISP_LH(0xe3000000, - is32 ? 0x0058 : 0x0004, REG_W0, REG_0, - load_probe.arena_reg, off); + /* bcr 0,%0 (nop) */ + _EMIT2(0x0700); bpf_jit_probe_emit_nop(jit, &load_probe); /* Reuse {ly|lg}'s arena_reg for {csy|csg}. */ if (load_probe.prg != -1) { This is still a valid BPF_XCHG implementation, just less efficient in the non-contended case. The exception handling works, but I found a bug: the hard-coded offset in /* brc 4,0b */ EMIT4_PCREL_RIC(0xa7040000, 4, jit->prg - 6); is no longer valid due to the extra nop added by this patch. I will fix this and resend.
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 1dd359c25ada..12293689ad60 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -704,6 +704,10 @@ static void bpf_jit_probe_init(struct bpf_jit_probe *probe) static void bpf_jit_probe_emit_nop(struct bpf_jit *jit, struct bpf_jit_probe *probe) { + if (probe->prg == -1 || probe->nop_prg != -1) + /* The probe is not armed or nop is already emitted. */ + return; + probe->nop_prg = jit->prg; /* bcr 0,%0 */ _EMIT2(0x0700); @@ -738,6 +742,21 @@ static void bpf_jit_probe_store_pre(struct bpf_jit *jit, struct bpf_insn *insn, probe->prg = jit->prg; } +static void bpf_jit_probe_atomic_pre(struct bpf_jit *jit, + struct bpf_insn *insn, + struct bpf_jit_probe *probe) +{ + if (BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) + return; + + /* lgrl %r1,kern_arena */ + EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena); + /* agr %r1,%dst */ + EMIT4(0xb9080000, REG_W1, insn->dst_reg); + probe->arena_reg = REG_W1; + probe->prg = jit->prg; +} + static int bpf_jit_probe_post(struct bpf_jit *jit, struct bpf_prog *fp, struct bpf_jit_probe *probe) { @@ -1523,15 +1542,30 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, */ case BPF_STX | BPF_ATOMIC | BPF_DW: case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: { bool is32 = BPF_SIZE(insn->code) == BPF_W; + /* + * Unlike loads and stores, atomics have only a base register, + * but no index register. For the non-arena case, simply use + * %dst as a base. For the arena case, use the work register + * %r1: first, load the arena base into it, and then add %dst + * to it. + */ + probe.arena_reg = dst_reg; + switch (insn->imm) { -/* {op32|op64} {%w0|%src},%src,off(%dst) */ #define EMIT_ATOMIC(op32, op64) do { \ + bpf_jit_probe_atomic_pre(jit, insn, &probe); \ + /* {op32|op64} {%w0|%src},%src,off(%arena) */ \ EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64), \ (insn->imm & BPF_FETCH) ? src_reg : REG_W0, \ - src_reg, dst_reg, off); \ + src_reg, probe.arena_reg, off); \ + err = bpf_jit_probe_post(jit, fp, &probe); \ + if (err < 0) \ + return err; \ if (insn->imm & BPF_FETCH) { \ /* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */ \ _EMIT2(0x07e0); \ @@ -1560,25 +1594,48 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT_ATOMIC(0x00f7, 0x00e7); break; #undef EMIT_ATOMIC - case BPF_XCHG: - /* {ly|lg} %w0,off(%dst) */ + case BPF_XCHG: { + struct bpf_jit_probe load_probe = probe; + + bpf_jit_probe_atomic_pre(jit, insn, &load_probe); + /* {ly|lg} %w0,off(%arena) */ EMIT6_DISP_LH(0xe3000000, is32 ? 0x0058 : 0x0004, REG_W0, REG_0, - dst_reg, off); - /* 0: {csy|csg} %w0,%src,off(%dst) */ + load_probe.arena_reg, off); + bpf_jit_probe_emit_nop(jit, &load_probe); + /* Reuse {ly|lg}'s arena_reg for {csy|csg}. */ + if (load_probe.prg != -1) { + probe.prg = jit->prg; + probe.arena_reg = load_probe.arena_reg; + } + /* 0: {csy|csg} %w0,%src,off(%arena) */ EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030, - REG_W0, src_reg, dst_reg, off); + REG_W0, src_reg, probe.arena_reg, off); + bpf_jit_probe_emit_nop(jit, &probe); /* brc 4,0b */ EMIT4_PCREL_RIC(0xa7040000, 4, jit->prg - 6); /* {llgfr|lgr} %src,%w0 */ EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0); + /* Both probes should land here on exception. */ + err = bpf_jit_probe_post(jit, fp, &load_probe); + if (err < 0) + return err; + err = bpf_jit_probe_post(jit, fp, &probe); + if (err < 0) + return err; if (is32 && insn_is_zext(&insn[1])) insn_count = 2; break; + } case BPF_CMPXCHG: - /* 0: {csy|csg} %b0,%src,off(%dst) */ + bpf_jit_probe_atomic_pre(jit, insn, &probe); + /* 0: {csy|csg} %b0,%src,off(%arena) */ EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030, - BPF_REG_0, src_reg, dst_reg, off); + BPF_REG_0, src_reg, + probe.arena_reg, off); + err = bpf_jit_probe_post(jit, fp, &probe); + if (err < 0) + return err; break; default: pr_err("Unknown atomic operation %02x\n", insn->imm); @@ -2142,9 +2199,25 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit, struct bpf_prog *fp) { struct bpf_binary_header *header; + struct bpf_insn *insn; u32 extable_size; u32 code_size; + int i; + for (i = 0; i < fp->len; i++) { + insn = &fp->insnsi[i]; + + if (BPF_CLASS(insn->code) == BPF_STX && + BPF_MODE(insn->code) == BPF_PROBE_ATOMIC && + (BPF_SIZE(insn->code) == BPF_DW || + BPF_SIZE(insn->code) == BPF_W) && + insn->imm == BPF_XCHG) + /* + * bpf_jit_insn() emits a load and a compare-and-swap, + * both of which need to be probed. + */ + fp->aux->num_exentries += 1; + } /* We need two entries per insn. */ fp->aux->num_exentries *= 2; @@ -2825,3 +2898,12 @@ bool bpf_jit_supports_arena(void) { return true; } + +bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) +{ + /* + * Currently the verifier uses this function only to check which + * atomic stores to arena are supported, and they all are. + */ + return true; +}
s390x supports most BPF atomics using single instructions, which makes implementing arena support a matter of adding arena address to the base register (unfortunately atomics do not support index registers), and wrapping the respective native instruction in probing sequences. An exception is BPF_XCHG, which is implemented using two different memory accesses and a loop. Make sure there is enough extable entries for both instructions. Compute the base address once for both memory accesses. Since on exception we need to land after the loop, emit the nops manually. Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> --- arch/s390/net/bpf_jit_comp.c | 100 +++++++++++++++++++++++++++++++---- 1 file changed, 91 insertions(+), 9 deletions(-)