Message ID | 20220414162220.1985095-5-xukuohai@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | bpf trampoline for arm64 | expand |
Hi, On 4/15/2022 12:22 AM, Xu Kuohai wrote: > Impelment bpf_arch_text_poke() for arm64, so bpf trampoline code can use > it to replace nop with jump, or replace jump with nop. > > Signed-off-by: Xu Kuohai <xukuohai@huawei.com> > Acked-by: Song Liu <songliubraving@fb.com> > --- > arch/arm64/net/bpf_jit_comp.c | 52 +++++++++++++++++++++++++++++++++++ > 1 file changed, 52 insertions(+) > > diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c > index 8ab4035dea27..1a1c3ea75ee2 100644 > --- a/arch/arm64/net/bpf_jit_comp.c > +++ b/arch/arm64/net/bpf_jit_comp.c > @@ -9,6 +9,7 @@ > > #include <linux/bitfield.h> > #include <linux/bpf.h> > +#include <linux/memory.h> > #include <linux/filter.h> > #include <linux/printk.h> > #include <linux/slab.h> > @@ -18,6 +19,7 @@ > #include <asm/cacheflush.h> > #include <asm/debug-monitors.h> > #include <asm/insn.h> > +#include <asm/patching.h> > #include <asm/set_memory.h> > > #include "bpf_jit.h" > @@ -1529,3 +1531,53 @@ void bpf_jit_free_exec(void *addr) > { > return vfree(addr); > } > + > +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, > + void *addr, u32 *insn) > +{ > + if (!addr) > + *insn = aarch64_insn_gen_nop(); > + else > + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, > + (unsigned long)addr, > + type); > + > + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; > +} > + > +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > + void *old_addr, void *new_addr) > +{ > + int ret; > + u32 old_insn; > + u32 new_insn; > + u32 replaced; > + enum aarch64_insn_branch_type branch_type; > + In bpf_arch_text_poke() of x86, it disables the poking of kernel module, can you explain why it is OK to do so in arm64 ? Because there is no test cases for fentry on linux kernel module, could you please add some tests for it ? > + if (poke_type == BPF_MOD_CALL) > + branch_type = AARCH64_INSN_BRANCH_LINK; > + else > + branch_type = AARCH64_INSN_BRANCH_NOLINK; > + > + if (gen_branch_or_nop(branch_type, ip, old_addr, &old_insn) < 0) > + return -EFAULT; > + > + if (gen_branch_or_nop(branch_type, ip, new_addr, &new_insn) < 0) > + return -EFAULT; > + > + mutex_lock(&text_mutex); > + if (aarch64_insn_read(ip, &replaced)) { > + ret = -EFAULT; > + goto out; > + } > + > + if (replaced != old_insn) { > + ret = -EFAULT; > + goto out; > + } > + > + ret = aarch64_insn_patch_text_nosync((void *)ip, new_insn); > +out: > + mutex_unlock(&text_mutex); > + return ret; > +}
On 4/15/2022 10:34 AM, Hou Tao wrote: > Hi, > > On 4/15/2022 12:22 AM, Xu Kuohai wrote: >> Impelment bpf_arch_text_poke() for arm64, so bpf trampoline code can use >> it to replace nop with jump, or replace jump with nop. >> >> Signed-off-by: Xu Kuohai <xukuohai@huawei.com> >> Acked-by: Song Liu <songliubraving@fb.com> >> --- >> arch/arm64/net/bpf_jit_comp.c | 52 +++++++++++++++++++++++++++++++++++ >> 1 file changed, 52 insertions(+) >> >> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c >> index 8ab4035dea27..1a1c3ea75ee2 100644 >> --- a/arch/arm64/net/bpf_jit_comp.c >> +++ b/arch/arm64/net/bpf_jit_comp.c >> @@ -9,6 +9,7 @@ >> >> #include <linux/bitfield.h> >> #include <linux/bpf.h> >> +#include <linux/memory.h> >> #include <linux/filter.h> >> #include <linux/printk.h> >> #include <linux/slab.h> >> @@ -18,6 +19,7 @@ >> #include <asm/cacheflush.h> >> #include <asm/debug-monitors.h> >> #include <asm/insn.h> >> +#include <asm/patching.h> >> #include <asm/set_memory.h> >> >> #include "bpf_jit.h" >> @@ -1529,3 +1531,53 @@ void bpf_jit_free_exec(void *addr) >> { >> return vfree(addr); >> } >> + >> +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, >> + void *addr, u32 *insn) >> +{ >> + if (!addr) >> + *insn = aarch64_insn_gen_nop(); >> + else >> + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, >> + (unsigned long)addr, >> + type); >> + >> + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; >> +} >> + >> +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, >> + void *old_addr, void *new_addr) >> +{ >> + int ret; >> + u32 old_insn; >> + u32 new_insn; >> + u32 replaced; >> + enum aarch64_insn_branch_type branch_type; >> + > In bpf_arch_text_poke() of x86, it disables the poking of kernel module, can you > explain why it is OK to do so in arm64 ? Because there is no test cases for > fentry on linux kernel module, could you please add some tests for it ? Oops, I forget to check this condition. It's not safe to patch a ko without ko unloading disabled. For arm64, the fentry is only patched by ftrace since the nop instruciton to be instrumented is not the first instruction, so bpf_text_poke() fails when comparing the old instruction (pointed to by the "old_addr") with the nop. Since the nop in fentry is reserved by ftrace, I dont think it's reasonable to patch the nop by another interface not provided by ftrace. Besides, for long jumps outside the range of 128MB, a single branch instruction is not sufficient, perhaps we could use ftrace trampoline or some other method to support long jumps. >> + if (poke_type == BPF_MOD_CALL) >> + branch_type = AARCH64_INSN_BRANCH_LINK; >> + else >> + branch_type = AARCH64_INSN_BRANCH_NOLINK; >> + >> + if (gen_branch_or_nop(branch_type, ip, old_addr, &old_insn) < 0) >> + return -EFAULT; >> + >> + if (gen_branch_or_nop(branch_type, ip, new_addr, &new_insn) < 0) >> + return -EFAULT; >> + >> + mutex_lock(&text_mutex); >> + if (aarch64_insn_read(ip, &replaced)) { >> + ret = -EFAULT; >> + goto out; >> + } >> + >> + if (replaced != old_insn) { >> + ret = -EFAULT; >> + goto out; >> + } >> + >> + ret = aarch64_insn_patch_text_nosync((void *)ip, new_insn); >> +out: >> + mutex_unlock(&text_mutex); >> + return ret; >> +} > > .
Hi Xu, Thanks for working on this. We are also looking forward to using fentry hooks on arm64. In particular, attaching to entry/exit into/from XDP progs. On Thu, Apr 14, 2022 at 12:22 PM -04, Xu Kuohai wrote: > Impelment bpf_arch_text_poke() for arm64, so bpf trampoline code can use > it to replace nop with jump, or replace jump with nop. > > Signed-off-by: Xu Kuohai <xukuohai@huawei.com> > Acked-by: Song Liu <songliubraving@fb.com> > --- > arch/arm64/net/bpf_jit_comp.c | 52 +++++++++++++++++++++++++++++++++++ > 1 file changed, 52 insertions(+) > > diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c > index 8ab4035dea27..1a1c3ea75ee2 100644 > --- a/arch/arm64/net/bpf_jit_comp.c > +++ b/arch/arm64/net/bpf_jit_comp.c > @@ -9,6 +9,7 @@ > > #include <linux/bitfield.h> > #include <linux/bpf.h> > +#include <linux/memory.h> > #include <linux/filter.h> > #include <linux/printk.h> > #include <linux/slab.h> > @@ -18,6 +19,7 @@ > #include <asm/cacheflush.h> > #include <asm/debug-monitors.h> > #include <asm/insn.h> > +#include <asm/patching.h> > #include <asm/set_memory.h> > > #include "bpf_jit.h" > @@ -1529,3 +1531,53 @@ void bpf_jit_free_exec(void *addr) > { > return vfree(addr); > } > + > +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, > + void *addr, u32 *insn) > +{ > + if (!addr) > + *insn = aarch64_insn_gen_nop(); > + else > + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, > + (unsigned long)addr, > + type); > + > + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; > +} > + > +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > + void *old_addr, void *new_addr) > +{ > + int ret; > + u32 old_insn; > + u32 new_insn; > + u32 replaced; > + enum aarch64_insn_branch_type branch_type; > + > + if (poke_type == BPF_MOD_CALL) > + branch_type = AARCH64_INSN_BRANCH_LINK; This path, bpf_arch_text_poke(<ip>, BPF_MOD_CALL, ...), is what we hit when attaching a BPF program entry. It is exercised by selftest #232 xdp_bpf2bpf. However, with this patchset alone it will not work because we don't emit, yet, the ftrace patch (MOV X9, LR; NOP) as a part of BPF prog prologue, like ftrace_init_nop() does. So patching attempt will fail. I think that is what you mentioned to in your reply to Hou [1] So my question is - is support for attaching to BPF progs in scope for this patchset? If no, then perhaps it would be better for now to fail early with something like -EOPNOTSUPP when poke_type is BPF_MOD_CALL, rather then attempt to patch the code. If you plan to enable it as a part of this patchset, then I've given it a quick try, and it seems that not a lot is needed get fentry to BPF attachment to work. I'm including the diff for my quick and dirty attempt below. With that patch on top, the xdp_bpf2bpf tests pass: #232 xdp_bpf2bpf:OK [1] https://lore.kernel.org/bpf/d8c4f1fb-a020-9457-44e2-dc63982a9213@huawei.com/ > + else > + branch_type = AARCH64_INSN_BRANCH_NOLINK; > + > + if (gen_branch_or_nop(branch_type, ip, old_addr, &old_insn) < 0) > + return -EFAULT; > + > + if (gen_branch_or_nop(branch_type, ip, new_addr, &new_insn) < 0) > + return -EFAULT; > + > + mutex_lock(&text_mutex); > + if (aarch64_insn_read(ip, &replaced)) { > + ret = -EFAULT; > + goto out; > + } > + > + if (replaced != old_insn) { > + ret = -EFAULT; > + goto out; > + } > + > + ret = aarch64_insn_patch_text_nosync((void *)ip, new_insn); > +out: > + mutex_unlock(&text_mutex); The body of this critical section is identical as ftrace_modify_code(). Perhaps we could export it and reuse? > + return ret; > +} --- diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 5f6bd755050f..94d8251500ab 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -240,9 +240,9 @@ static bool is_lsi_offset(int offset, int scale) /* Tail call offset to jump into */ #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || \ IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) -#define PROLOGUE_OFFSET 9 +#define PROLOGUE_OFFSET 11 #else -#define PROLOGUE_OFFSET 8 +#define PROLOGUE_OFFSET 10 #endif static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) @@ -281,6 +281,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ + /* Set up ftrace patch (initially in disabled state) */ + emit(A64_MOV(1, A64_R(9), A64_LR), ctx); + emit(A64_NOP, ctx); + /* Sign lr */ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) emit(A64_PACIASP, ctx); @@ -1888,10 +1892,16 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, u32 replaced; enum aarch64_insn_branch_type branch_type; - if (poke_type == BPF_MOD_CALL) + if (poke_type == BPF_MOD_CALL) { branch_type = AARCH64_INSN_BRANCH_LINK; - else + /* + * Adjust addr to point at the BL in the callsite. + * See ftrace_init_nop() for the callsite sequence. + */ + ip = (void *)((unsigned long)ip + AARCH64_INSN_SIZE); + } else { branch_type = AARCH64_INSN_BRANCH_NOLINK; + } if (gen_branch_or_nop(branch_type, ip, old_addr, &old_insn) < 0) return -EFAULT;
On 4/22/2022 6:54 PM, Jakub Sitnicki wrote: > Hi Xu, > > Thanks for working on this. > > We are also looking forward to using fentry hooks on arm64. > In particular, attaching to entry/exit into/from XDP progs. > > On Thu, Apr 14, 2022 at 12:22 PM -04, Xu Kuohai wrote: >> Impelment bpf_arch_text_poke() for arm64, so bpf trampoline code can use >> it to replace nop with jump, or replace jump with nop. >> >> Signed-off-by: Xu Kuohai <xukuohai@huawei.com> >> Acked-by: Song Liu <songliubraving@fb.com> >> --- >> arch/arm64/net/bpf_jit_comp.c | 52 +++++++++++++++++++++++++++++++++++ >> 1 file changed, 52 insertions(+) >> >> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c >> index 8ab4035dea27..1a1c3ea75ee2 100644 >> --- a/arch/arm64/net/bpf_jit_comp.c >> +++ b/arch/arm64/net/bpf_jit_comp.c >> @@ -9,6 +9,7 @@ >> >> #include <linux/bitfield.h> >> #include <linux/bpf.h> >> +#include <linux/memory.h> >> #include <linux/filter.h> >> #include <linux/printk.h> >> #include <linux/slab.h> >> @@ -18,6 +19,7 @@ >> #include <asm/cacheflush.h> >> #include <asm/debug-monitors.h> >> #include <asm/insn.h> >> +#include <asm/patching.h> >> #include <asm/set_memory.h> >> >> #include "bpf_jit.h" >> @@ -1529,3 +1531,53 @@ void bpf_jit_free_exec(void *addr) >> { >> return vfree(addr); >> } >> + >> +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, >> + void *addr, u32 *insn) >> +{ >> + if (!addr) >> + *insn = aarch64_insn_gen_nop(); >> + else >> + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, >> + (unsigned long)addr, >> + type); >> + >> + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; >> +} >> + >> +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, >> + void *old_addr, void *new_addr) >> +{ >> + int ret; >> + u32 old_insn; >> + u32 new_insn; >> + u32 replaced; >> + enum aarch64_insn_branch_type branch_type; >> + >> + if (poke_type == BPF_MOD_CALL) >> + branch_type = AARCH64_INSN_BRANCH_LINK; > > This path, bpf_arch_text_poke(<ip>, BPF_MOD_CALL, ...), is what we hit > when attaching a BPF program entry. It is exercised by selftest #232 > xdp_bpf2bpf. > > However, with this patchset alone it will not work because we don't > emit, yet, the ftrace patch (MOV X9, LR; NOP) as a part of BPF prog > prologue, like ftrace_init_nop() does. So patching attempt will fail. > > I think that is what you mentioned to in your reply to Hou [1] > > So my question is - is support for attaching to BPF progs in scope for > this patchset? > > If no, then perhaps it would be better for now to fail early with > something like -EOPNOTSUPP when poke_type is BPF_MOD_CALL, rather then > attempt to patch the code. > > If you plan to enable it as a part of this patchset, then I've given it > a quick try, and it seems that not a lot is needed get fentry to BPF > attachment to work. > > I'm including the diff for my quick and dirty attempt below. With that > patch on top, the xdp_bpf2bpf tests pass: > > #232 xdp_bpf2bpf:OK > > [1] https://lore.kernel.org/bpf/d8c4f1fb-a020-9457-44e2-dc63982a9213@huawei.com/ > Hi Jakub, Thanks for your testing and suggestion! I added bpf2bpf poking to this series and rebased it to [2] a few days ago, so there are some conflicts with the bpf-next branch. I'll rebase it to bpf-next and send v3. [2] https://lore.kernel.org/bpf/20220416042940.656344-1-kuifeng@fb.com/ >> + else >> + branch_type = AARCH64_INSN_BRANCH_NOLINK; >> + >> + if (gen_branch_or_nop(branch_type, ip, old_addr, &old_insn) < 0) >> + return -EFAULT; >> + >> + if (gen_branch_or_nop(branch_type, ip, new_addr, &new_insn) < 0) >> + return -EFAULT; >> + >> + mutex_lock(&text_mutex); >> + if (aarch64_insn_read(ip, &replaced)) { >> + ret = -EFAULT; >> + goto out; >> + } >> + >> + if (replaced != old_insn) { >> + ret = -EFAULT; >> + goto out; >> + } >> + >> + ret = aarch64_insn_patch_text_nosync((void *)ip, new_insn); >> +out: >> + mutex_unlock(&text_mutex); > > The body of this critical section is identical as ftrace_modify_code(). > Perhaps we could export it and reuse? > ftrace_modify_code() is defined in the arch code, and the prototypes are not consistent across archs, so it doesn't seem appropriate to export ftrace_modify_code() as a public interface. >> + return ret; >> +} > > --- > diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c > index 5f6bd755050f..94d8251500ab 100644 > --- a/arch/arm64/net/bpf_jit_comp.c > +++ b/arch/arm64/net/bpf_jit_comp.c > @@ -240,9 +240,9 @@ static bool is_lsi_offset(int offset, int scale) > /* Tail call offset to jump into */ > #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || \ > IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) > -#define PROLOGUE_OFFSET 9 > +#define PROLOGUE_OFFSET 11 > #else > -#define PROLOGUE_OFFSET 8 > +#define PROLOGUE_OFFSET 10 > #endif > > static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) > @@ -281,6 +281,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) > * > */ > > + /* Set up ftrace patch (initially in disabled state) */ > + emit(A64_MOV(1, A64_R(9), A64_LR), ctx); > + emit(A64_NOP, ctx); > > /* Sign lr */ > if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) > emit(A64_PACIASP, ctx); > @@ -1888,10 +1892,16 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, > u32 replaced; > enum aarch64_insn_branch_type branch_type; > > - if (poke_type == BPF_MOD_CALL) > + if (poke_type == BPF_MOD_CALL) { > branch_type = AARCH64_INSN_BRANCH_LINK; > - else > + /* > + * Adjust addr to point at the BL in the callsite. > + * See ftrace_init_nop() for the callsite sequence. > + */ > + ip = (void *)((unsigned long)ip + AARCH64_INSN_SIZE); > + } else { > branch_type = AARCH64_INSN_BRANCH_NOLINK; > + } > > if (gen_branch_or_nop(branch_type, ip, old_addr, &old_insn) < 0) > return -EFAULT; > .
On Sun, Apr 24, 2022 at 01:05 PM +08, Xu Kuohai wrote: > Thanks for your testing and suggestion! I added bpf2bpf poking to this > series and rebased it to [2] a few days ago, so there are some conflicts > with the bpf-next branch. I'll rebase it to bpf-next and send v3. > > [2] https://lore.kernel.org/bpf/20220416042940.656344-1-kuifeng@fb.com/ Looking forward to it. I think it would be okay to post v3 saying that it depends on the "Attach a cookie to a tracing program" series and won't apply cleanly to bpf-next with out. It would give us more time to review.
On 4/25/2022 10:26 PM, Jakub Sitnicki wrote: > On Sun, Apr 24, 2022 at 01:05 PM +08, Xu Kuohai wrote: >> Thanks for your testing and suggestion! I added bpf2bpf poking to this >> series and rebased it to [2] a few days ago, so there are some conflicts >> with the bpf-next branch. I'll rebase it to bpf-next and send v3. >> >> [2] https://lore.kernel.org/bpf/20220416042940.656344-1-kuifeng@fb.com/ > > Looking forward to it. > > I think it would be okay to post v3 saying that it depends on the > "Attach a cookie to a tracing program" series and won't apply cleanly to > bpf-next with out. > > It would give us more time to review. > . Ah, already sent v3 based on bpf-next :(, will send an update after [2] is merged.
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 8ab4035dea27..1a1c3ea75ee2 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -9,6 +9,7 @@ #include <linux/bitfield.h> #include <linux/bpf.h> +#include <linux/memory.h> #include <linux/filter.h> #include <linux/printk.h> #include <linux/slab.h> @@ -18,6 +19,7 @@ #include <asm/cacheflush.h> #include <asm/debug-monitors.h> #include <asm/insn.h> +#include <asm/patching.h> #include <asm/set_memory.h> #include "bpf_jit.h" @@ -1529,3 +1531,53 @@ void bpf_jit_free_exec(void *addr) { return vfree(addr); } + +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, + void *addr, u32 *insn) +{ + if (!addr) + *insn = aarch64_insn_gen_nop(); + else + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, + (unsigned long)addr, + type); + + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; +} + +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + int ret; + u32 old_insn; + u32 new_insn; + u32 replaced; + enum aarch64_insn_branch_type branch_type; + + if (poke_type == BPF_MOD_CALL) + branch_type = AARCH64_INSN_BRANCH_LINK; + else + branch_type = AARCH64_INSN_BRANCH_NOLINK; + + if (gen_branch_or_nop(branch_type, ip, old_addr, &old_insn) < 0) + return -EFAULT; + + if (gen_branch_or_nop(branch_type, ip, new_addr, &new_insn) < 0) + return -EFAULT; + + mutex_lock(&text_mutex); + if (aarch64_insn_read(ip, &replaced)) { + ret = -EFAULT; + goto out; + } + + if (replaced != old_insn) { + ret = -EFAULT; + goto out; + } + + ret = aarch64_insn_patch_text_nosync((void *)ip, new_insn); +out: + mutex_unlock(&text_mutex); + return ret; +}