Message ID | 20231020141358.643575-2-hbathini@linux.ibm.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | BPF |
Headers | show |
Series | powerpc/bpf: use BPF prog pack allocator | expand |
Context | Check | Description |
---|---|---|
bpf/vmtest-bpf-next-PR | success | PR summary |
bpf/vmtest-bpf-next-VM_Test-0 | success | Logs for ShellCheck |
netdev/tree_selection | success | Not a local patch, async |
Hari Bathini <hbathini@linux.ibm.com> writes: > patch_instruction() entails setting up pte, patching the instruction, > clearing the pte and flushing the tlb. If multiple instructions need > to be patched, every instruction would have to go through the above > drill unnecessarily. Instead, introduce patch_instructions() function > that sets up the pte, clears the pte and flushes the tlb only once > per page range of instructions to be patched. Duplicate most of the > patch_instruction() code instead of merging with it, to avoid the > performance degradation observed on ppc32, for patch_instruction(), > with the code path merged. Also, setup poking_init() always as BPF > expects poking_init() to be setup even when STRICT_KERNEL_RWX is off. > > Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> > Acked-by: Song Liu <song@kernel.org> > A lot of this is duplicate of patch_instruction(). Can we consolidate thing between them? > --- > > Changes in v7: > * Fixed crash observed with !STRICT_RWX. > > > arch/powerpc/include/asm/code-patching.h | 1 + > arch/powerpc/lib/code-patching.c | 141 ++++++++++++++++++++++- > 2 files changed, 139 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h > index 3f881548fb61..0e29ccf903d0 100644 > --- a/arch/powerpc/include/asm/code-patching.h > +++ b/arch/powerpc/include/asm/code-patching.h > @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr, > int patch_branch(u32 *addr, unsigned long target, int flags); > int patch_instruction(u32 *addr, ppc_inst_t instr); > int raw_patch_instruction(u32 *addr, ppc_inst_t instr); > +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr); > > static inline unsigned long patch_site_addr(s32 *site) > { > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > index b00112d7ad46..e1c1fd9246d8 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -204,9 +204,6 @@ void __init poking_init(void) > { > int ret; > > - if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) > - return; > - > if (mm_patch_enabled()) > ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, > "powerpc/text_poke_mm:online", > @@ -378,6 +375,144 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) > } > NOKPROBE_SYMBOL(patch_instruction); > > +static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr) > +{ > + unsigned long start = (unsigned long)patch_addr; > + > + /* Repeat instruction */ > + if (repeat_instr) { > + ppc_inst_t instr = ppc_inst_read(code); > + > + if (ppc_inst_prefixed(instr)) { > + u64 val = ppc_inst_as_ulong(instr); > + > + memset64((u64 *)patch_addr, val, len / 8); > + } else { > + u32 val = ppc_inst_val(instr); > + > + memset32(patch_addr, val, len / 4); > + } > + } else { > + memcpy(patch_addr, code, len); > + } > + > + smp_wmb(); /* smp write barrier */ > + flush_icache_range(start, start + len); > + return 0; > +} > + > +/* > + * A page is mapped and instructions that fit the page are patched. > + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. > + */ > +static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr) > +{ > + struct mm_struct *patching_mm, *orig_mm; > + unsigned long pfn = get_patch_pfn(addr); > + unsigned long text_poke_addr; > + spinlock_t *ptl; > + u32 *patch_addr; > + pte_t *pte; > + int err; > + > + patching_mm = __this_cpu_read(cpu_patching_context.mm); > + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); > + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); > + > + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); > + if (!pte) > + return -ENOMEM; > + > + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); > + > + /* order PTE update before use, also serves as the hwsync */ > + asm volatile("ptesync" ::: "memory"); > + > + /* order context switch after arbitrary prior code */ > + isync(); > + > + orig_mm = start_using_temp_mm(patching_mm); > + > + err = __patch_instructions(patch_addr, code, len, repeat_instr); > + > + /* context synchronisation performed by __patch_instructions */ > + stop_using_temp_mm(patching_mm, orig_mm); > + > + pte_clear(patching_mm, text_poke_addr, pte); > + /* > + * ptesync to order PTE update before TLB invalidation done > + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) > + */ > + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); > + > + pte_unmap_unlock(pte, ptl); > + > + return err; > +} > + > +/* > + * A page is mapped and instructions that fit the page are patched. > + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. > + */ > +static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) > +{ > + unsigned long pfn = get_patch_pfn(addr); > + unsigned long text_poke_addr; > + u32 *patch_addr; > + pte_t *pte; > + int err; > + > + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; > + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); > + > + pte = __this_cpu_read(cpu_patching_context.pte); > + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); > + /* See ptesync comment in radix__set_pte_at() */ > + if (radix_enabled()) > + asm volatile("ptesync" ::: "memory"); > + > + err = __patch_instructions(patch_addr, code, len, repeat_instr); > + > + pte_clear(&init_mm, text_poke_addr, pte); > + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); > + > + return err; > +} > + > +/* > + * Patch 'addr' with 'len' bytes of instructions from 'code'. > + * > + * If repeat_instr is true, the same instruction is filled for > + * 'len' bytes. > + */ > +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) > +{ Will this break with prefix instructions? > + while (len > 0) { > + unsigned long flags; > + size_t plen; > + int err; > + > + plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len); > + > + local_irq_save(flags); > + if (mm_patch_enabled()) > + err = __do_patch_instructions_mm(addr, code, plen, repeat_instr); > + else > + err = __do_patch_instructions(addr, code, plen, repeat_instr); > + local_irq_restore(flags); > + if (err) > + return err; > + > + len -= plen; > + addr = (u32 *)((unsigned long)addr + plen); > + if (!repeat_instr) > + code = (u32 *)((unsigned long)code + plen); > + } > + > + return 0; > +} > +NOKPROBE_SYMBOL(patch_instructions); > + > int patch_branch(u32 *addr, unsigned long target, int flags) > { > ppc_inst_t instr; > -- > 2.41.0
Hi Aneesh, On 30/10/23 6:32 pm, Aneesh Kumar K.V wrote: > Hari Bathini <hbathini@linux.ibm.com> writes: > >> patch_instruction() entails setting up pte, patching the instruction, >> clearing the pte and flushing the tlb. If multiple instructions need >> to be patched, every instruction would have to go through the above >> drill unnecessarily. Instead, introduce patch_instructions() function >> that sets up the pte, clears the pte and flushes the tlb only once >> per page range of instructions to be patched. Duplicate most of the >> patch_instruction() code instead of merging with it, to avoid the >> performance degradation observed on ppc32, for patch_instruction(), >> with the code path merged. Also, setup poking_init() always as BPF >> expects poking_init() to be setup even when STRICT_KERNEL_RWX is off. >> >> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> >> Acked-by: Song Liu <song@kernel.org> >> > > A lot of this is duplicate of patch_instruction(). Can we consolidate > thing between them? True. The code was consolidated till v5 but had to duplicate most of it to avoid performance degradation reported on ppc32: https://lore.kernel.org/all/6cceb564-8b52-4d98-9118-92a914f4871e@csgroup.eu/ > >> --- >> >> Changes in v7: >> * Fixed crash observed with !STRICT_RWX. >> >> >> arch/powerpc/include/asm/code-patching.h | 1 + >> arch/powerpc/lib/code-patching.c | 141 ++++++++++++++++++++++- >> 2 files changed, 139 insertions(+), 3 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h >> index 3f881548fb61..0e29ccf903d0 100644 >> --- a/arch/powerpc/include/asm/code-patching.h >> +++ b/arch/powerpc/include/asm/code-patching.h >> @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr, >> int patch_branch(u32 *addr, unsigned long target, int flags); >> int patch_instruction(u32 *addr, ppc_inst_t instr); >> int raw_patch_instruction(u32 *addr, ppc_inst_t instr); >> +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr); >> >> static inline unsigned long patch_site_addr(s32 *site) >> { >> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c >> index b00112d7ad46..e1c1fd9246d8 100644 >> --- a/arch/powerpc/lib/code-patching.c >> +++ b/arch/powerpc/lib/code-patching.c >> @@ -204,9 +204,6 @@ void __init poking_init(void) >> { >> int ret; >> >> - if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) >> - return; >> - >> if (mm_patch_enabled()) >> ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, >> "powerpc/text_poke_mm:online", >> @@ -378,6 +375,144 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) >> } >> NOKPROBE_SYMBOL(patch_instruction); >> >> +static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr) >> +{ >> + unsigned long start = (unsigned long)patch_addr; >> + >> + /* Repeat instruction */ >> + if (repeat_instr) { >> + ppc_inst_t instr = ppc_inst_read(code); >> + >> + if (ppc_inst_prefixed(instr)) { >> + u64 val = ppc_inst_as_ulong(instr); >> + >> + memset64((u64 *)patch_addr, val, len / 8); >> + } else { >> + u32 val = ppc_inst_val(instr); >> + >> + memset32(patch_addr, val, len / 4); >> + } >> + } else { >> + memcpy(patch_addr, code, len); >> + } >> + >> + smp_wmb(); /* smp write barrier */ >> + flush_icache_range(start, start + len); >> + return 0; >> +} >> + >> +/* >> + * A page is mapped and instructions that fit the page are patched. >> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. >> + */ >> +static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr) >> +{ >> + struct mm_struct *patching_mm, *orig_mm; >> + unsigned long pfn = get_patch_pfn(addr); >> + unsigned long text_poke_addr; >> + spinlock_t *ptl; >> + u32 *patch_addr; >> + pte_t *pte; >> + int err; >> + >> + patching_mm = __this_cpu_read(cpu_patching_context.mm); >> + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); >> + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); >> + >> + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); >> + if (!pte) >> + return -ENOMEM; >> + >> + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); >> + >> + /* order PTE update before use, also serves as the hwsync */ >> + asm volatile("ptesync" ::: "memory"); >> + >> + /* order context switch after arbitrary prior code */ >> + isync(); >> + >> + orig_mm = start_using_temp_mm(patching_mm); >> + >> + err = __patch_instructions(patch_addr, code, len, repeat_instr); >> + >> + /* context synchronisation performed by __patch_instructions */ >> + stop_using_temp_mm(patching_mm, orig_mm); >> + >> + pte_clear(patching_mm, text_poke_addr, pte); >> + /* >> + * ptesync to order PTE update before TLB invalidation done >> + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) >> + */ >> + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); >> + >> + pte_unmap_unlock(pte, ptl); >> + >> + return err; >> +} >> + >> +/* >> + * A page is mapped and instructions that fit the page are patched. >> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. >> + */ >> +static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) >> +{ >> + unsigned long pfn = get_patch_pfn(addr); >> + unsigned long text_poke_addr; >> + u32 *patch_addr; >> + pte_t *pte; >> + int err; >> + >> + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; >> + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); >> + >> + pte = __this_cpu_read(cpu_patching_context.pte); >> + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); >> + /* See ptesync comment in radix__set_pte_at() */ >> + if (radix_enabled()) >> + asm volatile("ptesync" ::: "memory"); >> + >> + err = __patch_instructions(patch_addr, code, len, repeat_instr); >> + >> + pte_clear(&init_mm, text_poke_addr, pte); >> + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); >> + >> + return err; >> +} >> + >> +/* >> + * Patch 'addr' with 'len' bytes of instructions from 'code'. >> + * >> + * If repeat_instr is true, the same instruction is filled for >> + * 'len' bytes. >> + */ >> +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) >> +{ > > Will this break with prefix instructions? No, afaics.. unless, the caller fails to setup the code buffer appropriately.. Thanks Hari
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 3f881548fb61..0e29ccf903d0 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr, int patch_branch(u32 *addr, unsigned long target, int flags); int patch_instruction(u32 *addr, ppc_inst_t instr); int raw_patch_instruction(u32 *addr, ppc_inst_t instr); +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr); static inline unsigned long patch_site_addr(s32 *site) { diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index b00112d7ad46..e1c1fd9246d8 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -204,9 +204,6 @@ void __init poking_init(void) { int ret; - if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) - return; - if (mm_patch_enabled()) ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/text_poke_mm:online", @@ -378,6 +375,144 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) } NOKPROBE_SYMBOL(patch_instruction); +static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr) +{ + unsigned long start = (unsigned long)patch_addr; + + /* Repeat instruction */ + if (repeat_instr) { + ppc_inst_t instr = ppc_inst_read(code); + + if (ppc_inst_prefixed(instr)) { + u64 val = ppc_inst_as_ulong(instr); + + memset64((u64 *)patch_addr, val, len / 8); + } else { + u32 val = ppc_inst_val(instr); + + memset32(patch_addr, val, len / 4); + } + } else { + memcpy(patch_addr, code, len); + } + + smp_wmb(); /* smp write barrier */ + flush_icache_range(start, start + len); + return 0; +} + +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + struct mm_struct *patching_mm, *orig_mm; + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + spinlock_t *ptl; + u32 *patch_addr; + pte_t *pte; + int err; + + patching_mm = __this_cpu_read(cpu_patching_context.mm); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = get_locked_pte(patching_mm, text_poke_addr, &ptl); + if (!pte) + return -ENOMEM; + + __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + + /* order PTE update before use, also serves as the hwsync */ + asm volatile("ptesync" ::: "memory"); + + /* order context switch after arbitrary prior code */ + isync(); + + orig_mm = start_using_temp_mm(patching_mm); + + err = __patch_instructions(patch_addr, code, len, repeat_instr); + + /* context synchronisation performed by __patch_instructions */ + stop_using_temp_mm(patching_mm, orig_mm); + + pte_clear(patching_mm, text_poke_addr, pte); + /* + * ptesync to order PTE update before TLB invalidation done + * by radix__local_flush_tlb_page_psize (in _tlbiel_va) + */ + local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); + + pte_unmap_unlock(pte, ptl); + + return err; +} + +/* + * A page is mapped and instructions that fit the page are patched. + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below. + */ +static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + unsigned long pfn = get_patch_pfn(addr); + unsigned long text_poke_addr; + u32 *patch_addr; + pte_t *pte; + int err; + + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + pte = __this_cpu_read(cpu_patching_context.pte); + __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0); + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync" ::: "memory"); + + err = __patch_instructions(patch_addr, code, len, repeat_instr); + + pte_clear(&init_mm, text_poke_addr, pte); + flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE); + + return err; +} + +/* + * Patch 'addr' with 'len' bytes of instructions from 'code'. + * + * If repeat_instr is true, the same instruction is filled for + * 'len' bytes. + */ +int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr) +{ + while (len > 0) { + unsigned long flags; + size_t plen; + int err; + + plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len); + + local_irq_save(flags); + if (mm_patch_enabled()) + err = __do_patch_instructions_mm(addr, code, plen, repeat_instr); + else + err = __do_patch_instructions(addr, code, plen, repeat_instr); + local_irq_restore(flags); + if (err) + return err; + + len -= plen; + addr = (u32 *)((unsigned long)addr + plen); + if (!repeat_instr) + code = (u32 *)((unsigned long)code + plen); + } + + return 0; +} +NOKPROBE_SYMBOL(patch_instructions); + int patch_branch(u32 *addr, unsigned long target, int flags) { ppc_inst_t instr;