Message ID | 168899127520.80889.15418363018799407058.stgit@devnote2 (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | x86: kprobes: Fix CFI_CLANG related issues | expand |
On Mon, Jul 10, 2023 at 09:14:35PM +0900, Masami Hiramatsu (Google) wrote: > + if (IS_ENABLED(CONFIG_CFI_CLANG)) { > + /* > + * The compiler generates the following instruction sequence > + * for indirect call checks and cfi.c decodes this; > + * > + * movl -<id>, %r10d ; 6 bytes > + * addl -4(%reg), %r10d ; 4 bytes > + * je .Ltmp1 ; 2 bytes > + * ud2 ; <- regs->ip > + * .Ltmp1: > + * > + * Also, these movl and addl are used for showing expected > + * type. So those must not be touched. > + */ > + __addr = recover_probed_instruction(buf, addr); > + if (!__addr) > + return 0; > + > + if (insn_decode_kernel(&insn, (void *)__addr) < 0) > + return 0; > + > + if (insn.opcode.value == 0xBA) > + offset = 12; > + else if (insn.opcode.value == 0x3) > + offset = 6; > + else > + goto out; Notably, the JE will already be avoided? > + > + /* This movl/addl is used for decoding CFI. */ > + if (is_cfi_trap(addr + offset)) > + return 0; > + } > > +out: > return (addr == paddr); > } Hmm, so I was thinking something like the below, which also catches things when we rewrite kCFI to FineIBT, except I don't think we care if the FineIBT callsite gets re-written. FineIBT only relies on the __cfi_ symbol not getting poked at (which the previous patches should ensure). Additionally is_cfi_trap() is horrifically slow -- it's a full linear search of the entire kcfi_traps array, it doesn't have any smarts on (#UD can hardly be considered a fast path). So I tihnk I'm ok with the above, just adding the below for reference (completely untested and everything). Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> --- diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index f7f6042eb7e6..b812dee76909 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -293,6 +293,11 @@ static int can_probe(unsigned long paddr) #endif addr += insn.length; } + /* + * Don't allow poking the kCFI/FineIBT callsites. + */ + if (IS_ENABLED(CONFIG_CFI_CLANG) && cfi_call_site(addr)) + return 0; return (addr == paddr); } diff --git a/kernel/cfi.c b/kernel/cfi.c index 08caad776717..2656e6ffa013 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -31,16 +31,22 @@ static inline unsigned long trap_address(s32 *p) return (unsigned long)((long)p + (long)*p); } -static bool is_trap(unsigned long addr, s32 *start, s32 *end) +static long cfi_trap_distance(unsigned long addr, s32 *start, s32 *end) { + long dist = LONG_MAX; s32 *p; for (p = start; p < end; ++p) { - if (trap_address(p) == addr) - return true; + long d = trap_address(p) - addr; + + if (abs(dist) < abs(d)) { + dist = d; + if (dist == 0) + return 0; + } } - return false; + return dist; } #ifdef CONFIG_MODULES @@ -66,25 +72,25 @@ void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, } } -static bool is_module_cfi_trap(unsigned long addr) +static long module_cfi_trap_distance(unsigned long addr) { struct module *mod; - bool found = false; + long dist = LONG_MAX; rcu_read_lock_sched_notrace(); mod = __module_address(addr); if (mod) - found = is_trap(addr, mod->kcfi_traps, mod->kcfi_traps_end); + dist = cfi_trap_distance(addr, mod->kcfi_traps, mod->kcfi_traps_end); rcu_read_unlock_sched_notrace(); return found; } #else /* CONFIG_MODULES */ -static inline bool is_module_cfi_trap(unsigned long addr) +static long module_cfi_trap_distance(unsigned long addr) { - return false; + return LONG_MAX; } #endif /* CONFIG_MODULES */ @@ -93,9 +99,24 @@ extern s32 __stop___kcfi_traps[]; bool is_cfi_trap(unsigned long addr) { - if (is_trap(addr, __start___kcfi_traps, __stop___kcfi_traps)) + long dist = cfi_trap_distance(addr, __start___kcfi_traps, __stop___kcfi_traps); + if (!dist) + return true; + + return module_cfi_trap_distance(addr) == 0; +} + +bool cfi_call_site(unsigned long addr) +{ + long dist = cfi_trap_distance(addr, __start___kcfi_traps, __stop___kcfi_traps); + if (dist >= -12 && dist <= 0) + return true; + + dist = module_cfi_trap_distance(addr); + if (dist >= -12 && dist <= 0) return true; - return is_module_cfi_trap(addr); + return false; } + #endif /* CONFIG_ARCH_USES_CFI_TRAPS */
On Mon, Jul 10, 2023 at 06:16:43PM +0200, Peter Zijlstra wrote: > diff --git a/kernel/cfi.c b/kernel/cfi.c > index 08caad776717..2656e6ffa013 100644 > --- a/kernel/cfi.c > +++ b/kernel/cfi.c > @@ -31,16 +31,22 @@ static inline unsigned long trap_address(s32 *p) > return (unsigned long)((long)p + (long)*p); > } > > -static bool is_trap(unsigned long addr, s32 *start, s32 *end) > +static long cfi_trap_distance(unsigned long addr, s32 *start, s32 *end) > { > + long dist = LONG_MAX; > s32 *p; > > for (p = start; p < end; ++p) { > - if (trap_address(p) == addr) > - return true; > + long d = trap_address(p) - addr; > + > + if (abs(dist) < abs(d)) { Not that I expect anybody will care, but that should obviously be: abs(d) < abs(dist) > + dist = d; > + if (dist == 0) > + return 0; > + } > } > > - return false; > + return dist; > }
On Mon, 10 Jul 2023 18:16:43 +0200 Peter Zijlstra <peterz@infradead.org> wrote: > On Mon, Jul 10, 2023 at 09:14:35PM +0900, Masami Hiramatsu (Google) wrote: > > + if (IS_ENABLED(CONFIG_CFI_CLANG)) { > > + /* > > + * The compiler generates the following instruction sequence > > + * for indirect call checks and cfi.c decodes this; > > + * > > + * movl -<id>, %r10d ; 6 bytes > > + * addl -4(%reg), %r10d ; 4 bytes > > + * je .Ltmp1 ; 2 bytes > > + * ud2 ; <- regs->ip > > + * .Ltmp1: > > + * > > + * Also, these movl and addl are used for showing expected > > + * type. So those must not be touched. > > + */ > > + __addr = recover_probed_instruction(buf, addr); > > + if (!__addr) > > + return 0; > > + > > + if (insn_decode_kernel(&insn, (void *)__addr) < 0) > > + return 0; > > + > > + if (insn.opcode.value == 0xBA) > > + offset = 12; > > + else if (insn.opcode.value == 0x3) > > + offset = 6; > > + else > > + goto out; > > Notably, the JE will already be avoided? > > > + > > + /* This movl/addl is used for decoding CFI. */ > > + if (is_cfi_trap(addr + offset)) > > + return 0; > > + } > > > > +out: > > return (addr == paddr); > > } > > Hmm, so I was thinking something like the below, which also catches > things when we rewrite kCFI to FineIBT, except I don't think we care if > the FineIBT callsite gets re-written. FineIBT only relies on the __cfi_ > symbol not getting poked at (which the previous patches should ensure). Oh, is FineIBT different from kCFI? I thought those are same. But anyway for kCFI=y && FineIBT=n case, I think this code still needed. > > Additionally is_cfi_trap() is horrifically slow -- it's a full linear > search of the entire kcfi_traps array, it doesn't have any smarts on > (#UD can hardly be considered a fast path). Yeah, register_kprobe() is not a fast path in most cases. So I think this is OK at this point. We can speed it up by sorting the array by address and binary search later. > > So I tihnk I'm ok with the above, just adding the below for reference > (completely untested and everything). I wonder the distance can be used outside of x86. CFI implementation depends on the arch? > > > Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Thanks! > > > --- > diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c > index f7f6042eb7e6..b812dee76909 100644 > --- a/arch/x86/kernel/kprobes/core.c > +++ b/arch/x86/kernel/kprobes/core.c > @@ -293,6 +293,11 @@ static int can_probe(unsigned long paddr) > #endif > addr += insn.length; > } > + /* > + * Don't allow poking the kCFI/FineIBT callsites. > + */ > + if (IS_ENABLED(CONFIG_CFI_CLANG) && cfi_call_site(addr)) > + return 0; > > return (addr == paddr); > } > diff --git a/kernel/cfi.c b/kernel/cfi.c > index 08caad776717..2656e6ffa013 100644 > --- a/kernel/cfi.c > +++ b/kernel/cfi.c > @@ -31,16 +31,22 @@ static inline unsigned long trap_address(s32 *p) > return (unsigned long)((long)p + (long)*p); > } > > -static bool is_trap(unsigned long addr, s32 *start, s32 *end) > +static long cfi_trap_distance(unsigned long addr, s32 *start, s32 *end) > { > + long dist = LONG_MAX; > s32 *p; > > for (p = start; p < end; ++p) { > - if (trap_address(p) == addr) > - return true; > + long d = trap_address(p) - addr; > + > + if (abs(dist) < abs(d)) { > + dist = d; > + if (dist == 0) > + return 0; > + } > } > > - return false; > + return dist; > } > > #ifdef CONFIG_MODULES > @@ -66,25 +72,25 @@ void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, > } > } > > -static bool is_module_cfi_trap(unsigned long addr) > +static long module_cfi_trap_distance(unsigned long addr) > { > struct module *mod; > - bool found = false; > + long dist = LONG_MAX; > > rcu_read_lock_sched_notrace(); > > mod = __module_address(addr); > if (mod) > - found = is_trap(addr, mod->kcfi_traps, mod->kcfi_traps_end); > + dist = cfi_trap_distance(addr, mod->kcfi_traps, mod->kcfi_traps_end); > > rcu_read_unlock_sched_notrace(); > > return found; > } > #else /* CONFIG_MODULES */ > -static inline bool is_module_cfi_trap(unsigned long addr) > +static long module_cfi_trap_distance(unsigned long addr) > { > - return false; > + return LONG_MAX; > } > #endif /* CONFIG_MODULES */ > > @@ -93,9 +99,24 @@ extern s32 __stop___kcfi_traps[]; > > bool is_cfi_trap(unsigned long addr) > { > - if (is_trap(addr, __start___kcfi_traps, __stop___kcfi_traps)) > + long dist = cfi_trap_distance(addr, __start___kcfi_traps, __stop___kcfi_traps); > + if (!dist) > + return true; > + > + return module_cfi_trap_distance(addr) == 0; > +} > + > +bool cfi_call_site(unsigned long addr) > +{ > + long dist = cfi_trap_distance(addr, __start___kcfi_traps, __stop___kcfi_traps); > + if (dist >= -12 && dist <= 0) > + return true; > + > + dist = module_cfi_trap_distance(addr); > + if (dist >= -12 && dist <= 0) > return true; > > - return is_module_cfi_trap(addr); > + return false; > } > + > #endif /* CONFIG_ARCH_USES_CFI_TRAPS */
On Tue, Jul 11, 2023 at 08:58:37AM +0900, Masami Hiramatsu wrote: > Oh, is FineIBT different from kCFI? I thought those are same. But anyway > for kCFI=y && FineIBT=n case, I think this code still needed. Yeah, FineIBT relies on kCFI and IBT (and selects CALL_PADDING) and dynamically re-writes the kCFI infrastructure. All the gory details are in arch/x86/kernel/alternative.c, search for CONFIG_FINEIBT, I've tried to write a big comment, but please let me know if something isn't clear and I'll write some actual words :-). But basically kCFI is a pure software solution and does the check before the indirect control transfer (it has to). While FineIBT relies on the IBT hardware in order to do the check after. As such, FineIBT can do the CFI check without memory loads, which is good for performance. Another useful property of FineIBT is that it is a speculation barrier.
On Tue, Jul 11, 2023 at 08:58:37AM +0900, Masami Hiramatsu wrote: > > Hmm, so I was thinking something like the below, which also catches > > things when we rewrite kCFI to FineIBT, except I don't think we care if > > the FineIBT callsite gets re-written. FineIBT only relies on the __cfi_ > > symbol not getting poked at (which the previous patches should ensure). > > Oh, is FineIBT different from kCFI? I thought those are same. But anyway > for kCFI=y && FineIBT=n case, I think this code still needed. Ah, I forgot to answer your other question; FineIBT is dynamically patched since it relies on optional hardware features, only if the hardware has IBT support can FineIBT work. So yeah, your check is definitely needed. And I think it'll 'gracefully' fail when FineIBT is patched in because the instructions aren't exactly the same. And since FineIBT has most of the magic in the __cfi_ prefix, which isn't patched per the previous patch, I think we're good on the call-site. > > So I tihnk I'm ok with the above, just adding the below for reference > > (completely untested and everything). > > I wonder the distance can be used outside of x86. CFI implementation > depends on the arch? Currently only arm64 and x86_64 have CFI, and while the particulars are a little different, they do have a lot in common, including the reporting. IIRC the arm64 variant is a little simpler because of fixed instruction size. There is no worry someone will jump in the middle of an instruction stream.
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index f7f6042eb7e6..fa8c2b41cbaf 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -54,6 +54,7 @@ #include <asm/insn.h> #include <asm/debugreg.h> #include <asm/ibt.h> +#include <asm/cfi.h> #include "common.h" @@ -293,7 +294,40 @@ static int can_probe(unsigned long paddr) #endif addr += insn.length; } + if (IS_ENABLED(CONFIG_CFI_CLANG)) { + /* + * The compiler generates the following instruction sequence + * for indirect call checks and cfi.c decodes this; + * + * movl -<id>, %r10d ; 6 bytes + * addl -4(%reg), %r10d ; 4 bytes + * je .Ltmp1 ; 2 bytes + * ud2 ; <- regs->ip + * .Ltmp1: + * + * Also, these movl and addl are used for showing expected + * type. So those must not be touched. + */ + __addr = recover_probed_instruction(buf, addr); + if (!__addr) + return 0; + + if (insn_decode_kernel(&insn, (void *)__addr) < 0) + return 0; + + if (insn.opcode.value == 0xBA) + offset = 12; + else if (insn.opcode.value == 0x3) + offset = 6; + else + goto out; + + /* This movl/addl is used for decoding CFI. */ + if (is_cfi_trap(addr + offset)) + return 0; + } +out: return (addr == paddr); }