Message ID | 20230825151810.164418-6-hbathini@linux.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | powerpc/bpf: use BPF prog pack allocator | expand |
Context | Check | Description |
---|---|---|
bpf/vmtest-bpf-next-PR | pending | PR summary |
bpf/vmtest-bpf-next-VM_Test-0 | success | Logs for ShellCheck |
bpf/vmtest-bpf-next-VM_Test-5 | success | Logs for set-matrix |
bpf/vmtest-bpf-next-VM_Test-3 | success | Logs for build for x86_64 with gcc |
bpf/vmtest-bpf-next-VM_Test-1 | success | Logs for build for aarch64 with gcc |
bpf/vmtest-bpf-next-VM_Test-4 | success | Logs for build for x86_64 with llvm-16 |
netdev/tree_selection | success | Not a local patch, async |
bpf/vmtest-bpf-next-VM_Test-2 | success | Logs for build for s390x with gcc |
bpf/vmtest-bpf-next-VM_Test-7 | pending | Logs for test_maps on s390x with gcc |
bpf/vmtest-bpf-next-VM_Test-16 | success | Logs for test_progs_no_alu32 on x86_64 with gcc |
bpf/vmtest-bpf-next-VM_Test-6 | success | Logs for test_maps on aarch64 with gcc |
bpf/vmtest-bpf-next-VM_Test-8 | success | Logs for test_maps on x86_64 with gcc |
bpf/vmtest-bpf-next-VM_Test-9 | success | Logs for test_maps on x86_64 with llvm-16 |
bpf/vmtest-bpf-next-VM_Test-17 | success | Logs for test_progs_no_alu32 on x86_64 with llvm-16 |
bpf/vmtest-bpf-next-VM_Test-18 | success | Logs for test_progs_no_alu32_parallel on aarch64 with gcc |
bpf/vmtest-bpf-next-VM_Test-19 | success | Logs for test_progs_no_alu32_parallel on x86_64 with gcc |
bpf/vmtest-bpf-next-VM_Test-20 | success | Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16 |
bpf/vmtest-bpf-next-VM_Test-21 | success | Logs for test_progs_parallel on aarch64 with gcc |
bpf/vmtest-bpf-next-VM_Test-22 | success | Logs for test_progs_parallel on x86_64 with gcc |
bpf/vmtest-bpf-next-VM_Test-23 | success | Logs for test_progs_parallel on x86_64 with llvm-16 |
bpf/vmtest-bpf-next-VM_Test-24 | success | Logs for test_verifier on aarch64 with gcc |
bpf/vmtest-bpf-next-VM_Test-26 | success | Logs for test_verifier on x86_64 with gcc |
bpf/vmtest-bpf-next-VM_Test-27 | success | Logs for test_verifier on x86_64 with llvm-16 |
bpf/vmtest-bpf-next-VM_Test-28 | success | Logs for veristat |
bpf/vmtest-bpf-next-VM_Test-10 | success | Logs for test_progs on aarch64 with gcc |
bpf/vmtest-bpf-next-VM_Test-13 | success | Logs for test_progs on x86_64 with llvm-16 |
bpf/vmtest-bpf-next-VM_Test-12 | success | Logs for test_progs on x86_64 with gcc |
bpf/vmtest-bpf-next-VM_Test-14 | success | Logs for test_progs_no_alu32 on aarch64 with gcc |
bpf/vmtest-bpf-next-VM_Test-25 | success | Logs for test_verifier on s390x with gcc |
bpf/vmtest-bpf-next-VM_Test-15 | success | Logs for test_progs_no_alu32 on s390x with gcc |
bpf/vmtest-bpf-next-VM_Test-11 | success | Logs for test_progs on s390x with gcc |
Le 25/08/2023 à 17:18, Hari Bathini a écrit : > Use the newly introduced patch_instructions() that handles patching > multiple instructions with one call. This improves speed of exectution > for JIT'ing bpf programs. > > Without this patch (on a POWER9 lpar): > > # time modprobe test_bpf > real 2m59.681s > user 0m0.000s > sys 1m44.160s > # > > With this patch (on a POWER9 lpar): > > # time modprobe test_bpf > real 0m5.013s > user 0m0.000s > sys 0m4.216s > # Right, significant improvement. Forget by comment to patch 1, I should have read the series up to the end. Just wondering why you don't just put patch 4 up front ? Christophe > > Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> > --- > arch/powerpc/net/bpf_jit_comp.c | 30 ++++-------------------------- > 1 file changed, 4 insertions(+), 26 deletions(-) > > diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c > index c60d7570e05d..1e5000d18321 100644 > --- a/arch/powerpc/net/bpf_jit_comp.c > +++ b/arch/powerpc/net/bpf_jit_comp.c > @@ -26,28 +26,6 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size) > memset32(area, BREAKPOINT_INSTRUCTION, size / 4); > } > > -/* > - * Patch 'len' bytes of instructions from opcode to addr, one instruction > - * at a time. Returns addr on success. ERR_PTR(-EINVAL), otherwise. > - */ > -static void *bpf_patch_instructions(void *addr, void *opcode, size_t len, bool fill_insn) > -{ > - while (len > 0) { > - ppc_inst_t insn = ppc_inst_read(opcode); > - int ilen = ppc_inst_len(insn); > - > - if (patch_instruction(addr, insn)) > - return ERR_PTR(-EINVAL); > - > - len -= ilen; > - addr = addr + ilen; > - if (!fill_insn) > - opcode = opcode + ilen; > - } > - > - return addr; > -} > - > int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) > { > if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { > @@ -330,16 +308,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass > > void *bpf_arch_text_copy(void *dst, void *src, size_t len) > { > - void *ret; > + int err; > > if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst))) > return ERR_PTR(-EINVAL); > > mutex_lock(&text_mutex); > - ret = bpf_patch_instructions(dst, src, len, false); > + err = patch_instructions(dst, src, len, false); > mutex_unlock(&text_mutex); > > - return ret; > + return err ? ERR_PTR(err) : dst; > } > > int bpf_arch_text_invalidate(void *dst, size_t len) > @@ -351,7 +329,7 @@ int bpf_arch_text_invalidate(void *dst, size_t len) > return -EINVAL; > > mutex_lock(&text_mutex); > - ret = IS_ERR(bpf_patch_instructions(dst, &insn, len, true)); > + ret = patch_instructions(dst, &insn, len, true); > mutex_unlock(&text_mutex); > > return ret;
On 25/08/23 9:16 pm, Christophe Leroy wrote: > > > Le 25/08/2023 à 17:18, Hari Bathini a écrit : >> Use the newly introduced patch_instructions() that handles patching >> multiple instructions with one call. This improves speed of exectution >> for JIT'ing bpf programs. >> >> Without this patch (on a POWER9 lpar): >> >> # time modprobe test_bpf >> real 2m59.681s >> user 0m0.000s >> sys 1m44.160s >> # >> >> With this patch (on a POWER9 lpar): >> >> # time modprobe test_bpf >> real 0m5.013s >> user 0m0.000s >> sys 0m4.216s >> # > > Right, significant improvement. Forget by comment to patch 1, I should > have read the series up to the end. Just wondering why you don't just > put patch 4 up front ? I wanted to remove the dependency for bpf_prog_pack enablement patches with this improvement, just in case.. - Hari
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index c60d7570e05d..1e5000d18321 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -26,28 +26,6 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size) memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } -/* - * Patch 'len' bytes of instructions from opcode to addr, one instruction - * at a time. Returns addr on success. ERR_PTR(-EINVAL), otherwise. - */ -static void *bpf_patch_instructions(void *addr, void *opcode, size_t len, bool fill_insn) -{ - while (len > 0) { - ppc_inst_t insn = ppc_inst_read(opcode); - int ilen = ppc_inst_len(insn); - - if (patch_instruction(addr, insn)) - return ERR_PTR(-EINVAL); - - len -= ilen; - addr = addr + ilen; - if (!fill_insn) - opcode = opcode + ilen; - } - - return addr; -} - int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) { if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { @@ -330,16 +308,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass void *bpf_arch_text_copy(void *dst, void *src, size_t len) { - void *ret; + int err; if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst))) return ERR_PTR(-EINVAL); mutex_lock(&text_mutex); - ret = bpf_patch_instructions(dst, src, len, false); + err = patch_instructions(dst, src, len, false); mutex_unlock(&text_mutex); - return ret; + return err ? ERR_PTR(err) : dst; } int bpf_arch_text_invalidate(void *dst, size_t len) @@ -351,7 +329,7 @@ int bpf_arch_text_invalidate(void *dst, size_t len) return -EINVAL; mutex_lock(&text_mutex); - ret = IS_ERR(bpf_patch_instructions(dst, &insn, len, true)); + ret = patch_instructions(dst, &insn, len, true); mutex_unlock(&text_mutex); return ret;
Use the newly introduced patch_instructions() that handles patching multiple instructions with one call. This improves speed of exectution for JIT'ing bpf programs. Without this patch (on a POWER9 lpar): # time modprobe test_bpf real 2m59.681s user 0m0.000s sys 1m44.160s # With this patch (on a POWER9 lpar): # time modprobe test_bpf real 0m5.013s user 0m0.000s sys 0m4.216s # Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> --- arch/powerpc/net/bpf_jit_comp.c | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-)