Message ID | 1499724283-30719-3-git-send-email-labbott@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Laura, On Mon, Jul 10, 2017 at 03:04:43PM -0700, Laura Abbott wrote: > Implementation of stackleak based heavily on the x86 version Nice! > Signed-off-by: Laura Abbott <labbott@redhat.com> > --- > The biggest points that need review here: > - Is the extra offsetting (subtracting/adding from the stack) correct? I think it's a little off; more on that below. > - Where else do we need to clear the stack? I guess we might need to clear (all of the remainder of) the stack after invoking EFI runtime services -- those can run in task context, might leave sensitive values on the stack, and they're uninstrumented. The same would apply for x86. I think we can ignore garbage left on the stack by idle/hotplug, since that happens in the idle thread, so we shouldn't be doing uaccess transfers on those stacks. > - The assembly can almost certainly be optimized. I tried to keep the > behavior of the x86 'repe scasq' and the like where possible. I'm also a > terrible register allocator. I tried to steer clear of code golf here, but I didn't entirely manage. I also don't know x86 asm, so it's very possible I've just managed to confuse myself. > --- > arch/arm64/Kconfig | 1 + > arch/arm64/include/asm/processor.h | 3 ++ > arch/arm64/kernel/asm-offsets.c | 3 ++ > arch/arm64/kernel/entry.S | 92 +++++++++++++++++++++++++++++++++++ > arch/arm64/kernel/process.c | 18 +++++++ > drivers/firmware/efi/libstub/Makefile | 3 +- > scripts/Makefile.gcc-plugins | 5 +- > 7 files changed, 123 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 8addb85..0b65bfc 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -17,6 +17,7 @@ config ARM64 > select ARCH_HAS_KCOV > select ARCH_HAS_SET_MEMORY > select ARCH_HAS_SG_CHAIN > + select ARCH_HAS_STACKLEAK > select ARCH_HAS_STRICT_KERNEL_RWX > select ARCH_HAS_STRICT_MODULE_RWX > select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST > diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h > index 64c9e78..76f2738 100644 > --- a/arch/arm64/include/asm/processor.h > +++ b/arch/arm64/include/asm/processor.h > @@ -88,6 +88,9 @@ struct thread_struct { > unsigned long fault_address; /* fault info */ > unsigned long fault_code; /* ESR_EL1 value */ > struct debug_info debug; /* debugging */ > +#ifdef CONFIG_STACKLEAK > + unsigned long lowest_stack; > +#endif > }; > > #ifdef CONFIG_COMPAT > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c > index b3bb7ef..e0a5ae2 100644 > --- a/arch/arm64/kernel/asm-offsets.c > +++ b/arch/arm64/kernel/asm-offsets.c > @@ -43,6 +43,9 @@ int main(void) > DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); > #endif > DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); > +#ifdef CONFIG_STACKLEAK > + DEFINE(TSK_TI_LOWEST_STACK, offsetof(struct task_struct, thread.lowest_stack)); > +#endif > BLANK(); > DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); > BLANK(); > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S > index b738880..e573633 100644 > --- a/arch/arm64/kernel/entry.S > +++ b/arch/arm64/kernel/entry.S > @@ -744,6 +744,7 @@ ENDPROC(cpu_switch_to) > */ > ret_fast_syscall: > disable_irq // disable interrupts > + bl erase_kstack > str x0, [sp, #S_X0] // returned x0 > ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing > and x2, x1, #_TIF_SYSCALL_WORK > @@ -772,6 +773,7 @@ work_pending: > */ > ret_to_user: > disable_irq // disable interrupts > + bl erase_kstack > ldr x1, [tsk, #TSK_TI_FLAGS] > and x2, x1, #_TIF_WORK_MASK > cbnz x2, work_pending > @@ -865,3 +867,93 @@ ENTRY(sys_rt_sigreturn_wrapper) > mov x0, sp > b sys_rt_sigreturn > ENDPROC(sys_rt_sigreturn_wrapper) > + > +#ifdef CONFIG_STACKLEAK > +ENTRY(erase_kstack) > + /* save x0 for the fast path */ > + mov x10, x0 > + > + get_thread_info x0 > + ldr x1, [x0, #TSK_TI_LOWEST_STACK] > + > + /* get the number of bytes to check for lowest stack */ > + mov x3, x1 > + and x3, x3, #THREAD_SIZE - 1 > + lsr x3, x3, #3 > + > + /* now generate the start of the stack */ > + mov x4, sp > + movn x2, #THREAD_SIZE - 1 > + and x1, x4, x2 > + > + mov x2, #-0xBEEF /* stack poison */ > + > + cmp x3, #0 > + b.eq 4f /* Found nothing, go poison */ > + > +1: > + ldr x4, [x1, x3, lsl #3] > + cmp x4, x2 /* did we find the poison? */ > + b.eq 2f /* yes we did, go check it */ > + > + sub x3, x3, #1 > + cmp x3, #0 > + b.eq 4f /* Found nothing, go poison */ > + b 1b /* loop again */ > + > +2: IIUC, at this point, x1 was the start (lowest addr) of the stack, and x3 was the quadword index of the first poison on that stack. The x86 asm implicitly held that [x1, x3, lsl #3] address in RDI, but we don't have a copy... > + cmp x3, #16 > + b.ls 4f /* Go poison if there are less than 16 things left? */ > + > + mov x3, #16 ... and here we blat the index without saving it, meaning we always jump to close to the start of the stack, which I don't think was intentional. So then we fall though to the below... > +3: > + ldr x4, [x1, x3, lsl #3] > + cmp x4, x2 /* did we find the poison? */ > + b.ne 1b /* nope we have to check deeper */ > + > + sub x3, x3, #1 > + cmp x3, #0 > + b.eq 4f /* Found nothing, go poison */ > + b 3b /* loop again */ ... where we either: * find 16 contiguous poison entries, leaving x3 == 0, or: * we immediately find a non-poison value, and jump back to 1b. If there are 16 non-poison values, we're left with x3 == 0, otherwise we bail out and jump to 4f with x3 in the interval [0,15]. .... or I've completely confused myself, as suggested above. We might not have x86's fancy string instructions, but we could do something equally impenetrable: mov x5, #0 1: cbz x3, 4f ldr x4, [x1, x3, lsl #3] cmp x4, x2 csinc x5, xzr, x5, ne tbz x5, #4, 4f // found 16 poisons? sub x3, x3, #1 b 1b That doesn't stop 16 slots early, so it can return any value of x3 all the way down to zero. > + > + /* The poison function */ > +4: > + /* Generate the address we found */ > + add x5, x1, x3, lsl #3 > + orr x5, x5, #16 Have you figured out what the forced 16 byte offset is for? I've not managed to figure out why it's there, and it looks like Alexander couldn't either, judging by his comments in the x86 asm. > + > + mov x4, sp > + /* subtrace the current pointer */ > + sub x8, x4, x5 > + > + /* subtract one more so we don't touch the current sp */ > + sub x8, x8, #1 > + > + /* sanity check */ > + cmp x8, #THREAD_SIZE > + b.lo 5f > +999: > + b 999b > + > +5: > + lsr x8, x8, #3 > + mov x3, x8 > +6: > + cmp x3, #0 > + b.eq 7f > + > + str x2, [x1, x3, lsl #3] > + sub x3, x3, #1 > + b 6b > + > + /* Reset the lowest stack to the top of the stack */ > +7: > + ldr x1, [x0, TSK_STACK] > + add x1, x1, #THREAD_SIZE > + sub x1, x1, #256 > + str x1, [x0, #TSK_TI_LOWEST_STACK] I take it this is the offsetting you were querying? I don't think it's quite right. Our stack looks like: +---+ <- task_stack_page(p) + THREAD_SIZE | | +---+ <- task_stack_page(p) + THREAD_START_SP | | | | +---+ <- task_pt_regs(p) | | | | | | ~~~~~ ~~~~~ | | | | | | +---+ <- task_stack_page(p) At the point we return to userspace, sp == task_pt_regs(p). Judging by a generated asm-offsets.h, sizeof(struct_pt_regs) is 304 bytes currently. THREAD_SIZE - THREAD_START_SP == 16. We probably want to give that 16 a mnemonic (e.g FRAME_PADDING), and have something like: ldr x1, [x0, TSK_STACK] add x1, x1, #THREAD_SIZE sub x1, x1, #(S_FRAME_SIZE + FRAME_PADDING) str x1, [x0, #TSK_TI_LOWEST_STACK] > + > + mov x0, x10 > + ret > +ENDPROC(erase_kstack) > +#endif > diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c > index 659ae80..1b6cca2 100644 > --- a/arch/arm64/kernel/process.c > +++ b/arch/arm64/kernel/process.c > @@ -293,6 +293,12 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, > p->thread.cpu_context.pc = (unsigned long)ret_from_fork; > p->thread.cpu_context.sp = (unsigned long)childregs; > > +#ifdef CONFIG_STACKLEAK > + p->thread.lowest_stack = (unsigned long)task_stack_page(p) + > + 2 * sizeof(unsigned long); > +#endif I see this follows the x86 logic, but I don't see why it's necessary to offset this end of the stack. Do you have an idea as to what this is for? > + > + > ptrace_hw_copy_thread(p); > > return 0; > @@ -417,3 +423,15 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) > else > return randomize_page(mm->brk, SZ_1G); > } > + > +#ifdef CONFIG_STACKLEAK > +void __used check_alloca(unsigned long size) > +{ > + unsigned long sp = (unsigned long)&sp, stack_left; Nit: please use current_stack_pointer. Thanks, Mark.
On Tue, Jul 11, 2017 at 08:51:55PM +0100, Mark Rutland wrote: > On Mon, Jul 10, 2017 at 03:04:43PM -0700, Laura Abbott wrote: > > + /* Reset the lowest stack to the top of the stack */ > > +7: > > + ldr x1, [x0, TSK_STACK] > > + add x1, x1, #THREAD_SIZE > > + sub x1, x1, #256 > > + str x1, [x0, #TSK_TI_LOWEST_STACK] > > I take it this is the offsetting you were querying? > > I don't think it's quite right. Our stack looks like: > > +---+ <- task_stack_page(p) + THREAD_SIZE > | | > +---+ <- task_stack_page(p) + THREAD_START_SP > | | > | | > +---+ <- task_pt_regs(p) > | | > | | > | | > ~~~~~ > > ~~~~~ > | | > | | > | | > +---+ <- task_stack_page(p) > > At the point we return to userspace, sp == task_pt_regs(p). > > Judging by a generated asm-offsets.h, sizeof(struct_pt_regs) is 304 > bytes currently. THREAD_SIZE - THREAD_START_SP == 16. > > We probably want to give that 16 a mnemonic (e.g FRAME_PADDING), and > have something like: > > ldr x1, [x0, TSK_STACK] > add x1, x1, #THREAD_SIZE > sub x1, x1, #(S_FRAME_SIZE + FRAME_PADDING) > str x1, [x0, #TSK_TI_LOWEST_STACK] Thinking about it, given that sp == task_pt_regs(p), we could just do: mov x1, sp str x1, [x0, #TSK_TI_LOWEST_STACK] ... unless I've managed to lose the plot here. Mark.
Hello Mark, On 11.07.2017 22:51, Mark Rutland wrote: > On Mon, Jul 10, 2017 at 03:04:43PM -0700, Laura Abbott wrote: >> - Where else do we need to clear the stack? > > I guess we might need to clear (all of the remainder of) the stack after > invoking EFI runtime services -- those can run in task context, might > leave sensitive values on the stack, and they're uninstrumented. The > same would apply for x86. Thanks, I've added this to the TODO list. > I think we can ignore garbage left on the stack by idle/hotplug, since > that happens in the idle thread, so we shouldn't be doing uaccess > transfers on those stacks. Excuse me, I didn't understand what you mean. erase_kstack() is called at the end of syscall before returning to the userspace. Best regards, Alexander
On 07/11/2017 12:51 PM, Mark Rutland wrote: > Hi Laura, > > On Mon, Jul 10, 2017 at 03:04:43PM -0700, Laura Abbott wrote: >> Implementation of stackleak based heavily on the x86 version > > Nice! > >> Signed-off-by: Laura Abbott <labbott@redhat.com> >> --- >> The biggest points that need review here: >> - Is the extra offsetting (subtracting/adding from the stack) correct? > > I think it's a little off; more on that below. > >> - Where else do we need to clear the stack? > > I guess we might need to clear (all of the remainder of) the stack after > invoking EFI runtime services -- those can run in task context, might > leave sensitive values on the stack, and they're uninstrumented. The > same would apply for x86. > > I think we can ignore garbage left on the stack by idle/hotplug, since > that happens in the idle thread, so we shouldn't be doing uaccess > transfers on those stacks. > >> - The assembly can almost certainly be optimized. I tried to keep the >> behavior of the x86 'repe scasq' and the like where possible. I'm also a >> terrible register allocator. > > I tried to steer clear of code golf here, but I didn't entirely manage. > > I also don't know x86 asm, so it's very possible I've just managed to > confuse myself. > I know enough x86 asm to confuse myself as you can see below >> --- >> arch/arm64/Kconfig | 1 + >> arch/arm64/include/asm/processor.h | 3 ++ >> arch/arm64/kernel/asm-offsets.c | 3 ++ >> arch/arm64/kernel/entry.S | 92 +++++++++++++++++++++++++++++++++++ >> arch/arm64/kernel/process.c | 18 +++++++ >> drivers/firmware/efi/libstub/Makefile | 3 +- >> scripts/Makefile.gcc-plugins | 5 +- >> 7 files changed, 123 insertions(+), 2 deletions(-) >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index 8addb85..0b65bfc 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -17,6 +17,7 @@ config ARM64 >> select ARCH_HAS_KCOV >> select ARCH_HAS_SET_MEMORY >> select ARCH_HAS_SG_CHAIN >> + select ARCH_HAS_STACKLEAK >> select ARCH_HAS_STRICT_KERNEL_RWX >> select ARCH_HAS_STRICT_MODULE_RWX >> select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST >> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h >> index 64c9e78..76f2738 100644 >> --- a/arch/arm64/include/asm/processor.h >> +++ b/arch/arm64/include/asm/processor.h >> @@ -88,6 +88,9 @@ struct thread_struct { >> unsigned long fault_address; /* fault info */ >> unsigned long fault_code; /* ESR_EL1 value */ >> struct debug_info debug; /* debugging */ >> +#ifdef CONFIG_STACKLEAK >> + unsigned long lowest_stack; >> +#endif >> }; >> >> #ifdef CONFIG_COMPAT >> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c >> index b3bb7ef..e0a5ae2 100644 >> --- a/arch/arm64/kernel/asm-offsets.c >> +++ b/arch/arm64/kernel/asm-offsets.c >> @@ -43,6 +43,9 @@ int main(void) >> DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); >> #endif >> DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); >> +#ifdef CONFIG_STACKLEAK >> + DEFINE(TSK_TI_LOWEST_STACK, offsetof(struct task_struct, thread.lowest_stack)); >> +#endif >> BLANK(); >> DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); >> BLANK(); >> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S >> index b738880..e573633 100644 >> --- a/arch/arm64/kernel/entry.S >> +++ b/arch/arm64/kernel/entry.S >> @@ -744,6 +744,7 @@ ENDPROC(cpu_switch_to) >> */ >> ret_fast_syscall: >> disable_irq // disable interrupts >> + bl erase_kstack >> str x0, [sp, #S_X0] // returned x0 >> ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing >> and x2, x1, #_TIF_SYSCALL_WORK >> @@ -772,6 +773,7 @@ work_pending: >> */ >> ret_to_user: >> disable_irq // disable interrupts >> + bl erase_kstack >> ldr x1, [tsk, #TSK_TI_FLAGS] >> and x2, x1, #_TIF_WORK_MASK >> cbnz x2, work_pending >> @@ -865,3 +867,93 @@ ENTRY(sys_rt_sigreturn_wrapper) >> mov x0, sp >> b sys_rt_sigreturn >> ENDPROC(sys_rt_sigreturn_wrapper) >> + >> +#ifdef CONFIG_STACKLEAK >> +ENTRY(erase_kstack) >> + /* save x0 for the fast path */ >> + mov x10, x0 >> + >> + get_thread_info x0 >> + ldr x1, [x0, #TSK_TI_LOWEST_STACK] >> + >> + /* get the number of bytes to check for lowest stack */ >> + mov x3, x1 >> + and x3, x3, #THREAD_SIZE - 1 >> + lsr x3, x3, #3 >> + >> + /* now generate the start of the stack */ >> + mov x4, sp >> + movn x2, #THREAD_SIZE - 1 >> + and x1, x4, x2 >> + >> + mov x2, #-0xBEEF /* stack poison */ >> + >> + cmp x3, #0 >> + b.eq 4f /* Found nothing, go poison */ >> + >> +1: >> + ldr x4, [x1, x3, lsl #3] >> + cmp x4, x2 /* did we find the poison? */ >> + b.eq 2f /* yes we did, go check it */ >> + >> + sub x3, x3, #1 >> + cmp x3, #0 >> + b.eq 4f /* Found nothing, go poison */ >> + b 1b /* loop again */ >> + >> +2: > > IIUC, at this point, x1 was the start (lowest addr) of the stack, and x3 > was the quadword index of the first poison on that stack. > > The x86 asm implicitly held that [x1, x3, lsl #3] address in RDI, but we > don't have a copy... > >> + cmp x3, #16 >> + b.ls 4f /* Go poison if there are less than 16 things left? */ >> + >> + mov x3, #16 > > ... and here we blat the index without saving it, meaning we always jump > to close to the start of the stack, which I don't think was intentional. > Urgh. You are right. I had an index register when I first started out and then a different bug caused me to erroneously remove it. > So then we fall though to the below... > >> +3: >> + ldr x4, [x1, x3, lsl #3] >> + cmp x4, x2 /* did we find the poison? */ >> + b.ne 1b /* nope we have to check deeper */ >> + >> + sub x3, x3, #1 >> + cmp x3, #0 >> + b.eq 4f /* Found nothing, go poison */ >> + b 3b /* loop again */ > > ... where we either: > > * find 16 contiguous poison entries, leaving x3 == 0, or: > > * we immediately find a non-poison value, and jump back to 1b. If there > are 16 non-poison values, we're left with x3 == 0, otherwise we bail > out and jump to 4f with x3 in the interval [0,15]. > > .... or I've completely confused myself, as suggested above. > > We might not have x86's fancy string instructions, but we could do > something equally impenetrable: > > mov x5, #0 > 1: > cbz x3, 4f > ldr x4, [x1, x3, lsl #3] > cmp x4, x2 > csinc x5, xzr, x5, ne > tbz x5, #4, 4f // found 16 poisons? > sub x3, x3, #1 > b 1b > > That doesn't stop 16 slots early, so it can return any value of x3 all > the way down to zero. > Seems to work for my testing. >> + >> + /* The poison function */ >> +4: >> + /* Generate the address we found */ >> + add x5, x1, x3, lsl #3 >> + orr x5, x5, #16 > > Have you figured out what the forced 16 byte offset is for? > > I've not managed to figure out why it's there, and it looks like > Alexander couldn't either, judging by his comments in the x86 asm. > From get_wchan in arch/x86/kernel/process.c, it might be be to account for the start of the frame correctly? I don't have a definitive answer though and plan on just removing this for the next version. >> >> + mov x4, sp >> + /* subtrace the current pointer */ >> + sub x8, x4, x5 >> + >> + /* subtract one more so we don't touch the current sp */ >> + sub x8, x8, #1 >> + >> + /* sanity check */ >> + cmp x8, #THREAD_SIZE >> + b.lo 5f >> +999: >> + b 999b >> + >> +5: >> + lsr x8, x8, #3 >> + mov x3, x8 >> +6: >> + cmp x3, #0 >> + b.eq 7f >> + >> + str x2, [x1, x3, lsl #3] >> + sub x3, x3, #1 >> + b 6b >> + >> + /* Reset the lowest stack to the top of the stack */ >> +7: >> + ldr x1, [x0, TSK_STACK] >> + add x1, x1, #THREAD_SIZE >> + sub x1, x1, #256 >> + str x1, [x0, #TSK_TI_LOWEST_STACK] > > I take it this is the offsetting you were querying? > > I don't think it's quite right. Our stack looks like: > > +---+ <- task_stack_page(p) + THREAD_SIZE > | | > +---+ <- task_stack_page(p) + THREAD_START_SP > | | > | | > +---+ <- task_pt_regs(p) > | | > | | > | | > ~~~~~ > > ~~~~~ > | | > | | > | | > +---+ <- task_stack_page(p) > > At the point we return to userspace, sp == task_pt_regs(p). > > Judging by a generated asm-offsets.h, sizeof(struct_pt_regs) is 304 > bytes currently. THREAD_SIZE - THREAD_START_SP == 16. > > We probably want to give that 16 a mnemonic (e.g FRAME_PADDING), and > have something like: > > ldr x1, [x0, TSK_STACK] > add x1, x1, #THREAD_SIZE > sub x1, x1, #(S_FRAME_SIZE + FRAME_PADDING) > str x1, [x0, #TSK_TI_LOWEST_STACK] > Yes, that looks cleaner. I suspect that even though we weren't subtracting enough, it still worked because the lowest stack would get overwritten in track_stack later. >> + >> + mov x0, x10 >> + ret >> +ENDPROC(erase_kstack) >> +#endif >> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c >> index 659ae80..1b6cca2 100644 >> --- a/arch/arm64/kernel/process.c >> +++ b/arch/arm64/kernel/process.c >> @@ -293,6 +293,12 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, >> p->thread.cpu_context.pc = (unsigned long)ret_from_fork; >> p->thread.cpu_context.sp = (unsigned long)childregs; >> >> +#ifdef CONFIG_STACKLEAK >> + p->thread.lowest_stack = (unsigned long)task_stack_page(p) + >> + 2 * sizeof(unsigned long); >> +#endif > > I see this follows the x86 logic, but I don't see why it's necessary to > offset this end of the stack. > > Do you have an idea as to what this is for? > Again, no and I think I'll again remove it. >> + >> + >> ptrace_hw_copy_thread(p); >> >> return 0; >> @@ -417,3 +423,15 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) >> else >> return randomize_page(mm->brk, SZ_1G); >> } >> + >> +#ifdef CONFIG_STACKLEAK >> +void __used check_alloca(unsigned long size) >> +{ >> + unsigned long sp = (unsigned long)&sp, stack_left; > > Nit: please use current_stack_pointer. > Ack. This should also be cleaned up in the the track_stack function as well. > Thanks, > Mark. > Thanks, Laura
Hello Laura and Mark, [+ Tycho Andersen and Pax Team] On 14.07.2017 23:51, Laura Abbott wrote: > On 07/11/2017 12:51 PM, Mark Rutland wrote: >> On Mon, Jul 10, 2017 at 03:04:43PM -0700, Laura Abbott wrote: >>> + /* The poison function */ >>> +4: >>> + /* Generate the address we found */ >>> + add x5, x1, x3, lsl #3 >>> + orr x5, x5, #16 >> >> Have you figured out what the forced 16 byte offset is for? >> >> I've not managed to figure out why it's there, and it looks like >> Alexander couldn't either, judging by his comments in the x86 asm. > > From get_wchan in arch/x86/kernel/process.c, it might be be to > account for the start of the frame correctly? I don't have a > definitive answer though and plan on just removing this for the > next version. I've investigated it carefully: we should not remove this 16-byte offset. I looked at the bottom of the kernel stack with the debugger and found a non-zero value 0x57AC6E9D. It is STACK_END_MAGIC, which is defined in include/uapi/linux/magic.h. This value is checked if we enable CONFIG_SCHED_STACK_END_CHECK. Then I removed this 16-byte offset in stackleak patch (including the OR instruction in erase_kstack()) to see how the first erase_kstack() call happily overwrites that magic value: [ 1.574655] Freeing unused kernel memory: 1244K [ 1.575026] Kernel memory protection disabled. [ 1.578575] Kernel panic - not syncing: corrupted stack end detected inside scheduler [ 1.578575] [ 1.579420] CPU: 0 PID: 1 Comm: init Not tainted 4.12.0+ #9 [ 1.579420] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 1.579420] Call Trace: [ 1.579420] dump_stack+0x63/0x81 [ 1.579420] panic+0xd0/0x212 [ 1.579420] __schedule+0x6d8/0x6e0 [ 1.579420] schedule+0x31/0x80 [ 1.579420] io_schedule+0x11/0x40 [ 1.579420] __lock_page_or_retry+0x17d/0x2b0 [ 1.579420] ? page_cache_tree_insert+0x90/0x90 [ 1.579420] filemap_fault+0x3aa/0x5c0 [ 1.579420] ? filemap_map_pages+0x1a6/0x280 [ 1.579420] ext4_filemap_fault+0x2d/0x40 [ 1.579420] __do_fault+0x1b/0x60 [ 1.579420] __handle_mm_fault+0x641/0x8b0 [ 1.579420] handle_mm_fault+0x87/0x130 [ 1.579420] __do_page_fault+0x25f/0x4a0 [ 1.579420] trace_do_page_fault+0x37/0xd0 [ 1.579420] do_async_page_fault+0x23/0x80 [ 1.579420] async_page_fault+0x28/0x30 [ 1.579420] RIP: 0033:0x7f81be514ab0 [ 1.579420] RSP: 002b:00007ffc8de73768 EFLAGS: 00010202 [ 1.579420] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 1.579420] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007ffc8de73770 [ 1.579420] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 [ 1.579420] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 1.579420] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 1.579420] Dumping ftrace buffer: [ 1.579420] (ftrace buffer empty) [ 1.579420] Kernel Offset: disabled [ 1.579420] Rebooting in 86400 seconds.. So let's keep it not to break CONFIG_SCHED_STACK_END_CHECK. Again, I want to say kudos to PaX Team for his awesome code. Best regards, Alexander
On 07/21/2017 09:56 AM, Alexander Popov wrote: > Hello Laura and Mark, > > [+ Tycho Andersen and Pax Team] > > On 14.07.2017 23:51, Laura Abbott wrote: >> On 07/11/2017 12:51 PM, Mark Rutland wrote: >>> On Mon, Jul 10, 2017 at 03:04:43PM -0700, Laura Abbott wrote: >>>> + /* The poison function */ >>>> +4: >>>> + /* Generate the address we found */ >>>> + add x5, x1, x3, lsl #3 >>>> + orr x5, x5, #16 >>> >>> Have you figured out what the forced 16 byte offset is for? >>> >>> I've not managed to figure out why it's there, and it looks like >>> Alexander couldn't either, judging by his comments in the x86 asm. >> >> From get_wchan in arch/x86/kernel/process.c, it might be be to >> account for the start of the frame correctly? I don't have a >> definitive answer though and plan on just removing this for the >> next version. > > I've investigated it carefully: we should not remove this 16-byte offset. > > I looked at the bottom of the kernel stack with the debugger and found a > non-zero value 0x57AC6E9D. It is STACK_END_MAGIC, which is defined in > include/uapi/linux/magic.h. This value is checked if we enable > CONFIG_SCHED_STACK_END_CHECK. > > Then I removed this 16-byte offset in stackleak patch (including the OR > instruction in erase_kstack()) to see how the first erase_kstack() call happily > overwrites that magic value: > > [ 1.574655] Freeing unused kernel memory: 1244K > [ 1.575026] Kernel memory protection disabled. > [ 1.578575] Kernel panic - not syncing: corrupted stack end detected inside > scheduler > [ 1.578575] > [ 1.579420] CPU: 0 PID: 1 Comm: init Not tainted 4.12.0+ #9 > [ 1.579420] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS > Ubuntu-1.8.2-1ubuntu1 04/01/2014 > [ 1.579420] Call Trace: > [ 1.579420] dump_stack+0x63/0x81 > [ 1.579420] panic+0xd0/0x212 > [ 1.579420] __schedule+0x6d8/0x6e0 > [ 1.579420] schedule+0x31/0x80 > [ 1.579420] io_schedule+0x11/0x40 > [ 1.579420] __lock_page_or_retry+0x17d/0x2b0 > [ 1.579420] ? page_cache_tree_insert+0x90/0x90 > [ 1.579420] filemap_fault+0x3aa/0x5c0 > [ 1.579420] ? filemap_map_pages+0x1a6/0x280 > [ 1.579420] ext4_filemap_fault+0x2d/0x40 > [ 1.579420] __do_fault+0x1b/0x60 > [ 1.579420] __handle_mm_fault+0x641/0x8b0 > [ 1.579420] handle_mm_fault+0x87/0x130 > [ 1.579420] __do_page_fault+0x25f/0x4a0 > [ 1.579420] trace_do_page_fault+0x37/0xd0 > [ 1.579420] do_async_page_fault+0x23/0x80 > [ 1.579420] async_page_fault+0x28/0x30 > [ 1.579420] RIP: 0033:0x7f81be514ab0 > [ 1.579420] RSP: 002b:00007ffc8de73768 EFLAGS: 00010202 > [ 1.579420] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 > [ 1.579420] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007ffc8de73770 > [ 1.579420] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 > [ 1.579420] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 > [ 1.579420] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 > [ 1.579420] Dumping ftrace buffer: > [ 1.579420] (ftrace buffer empty) > [ 1.579420] Kernel Offset: disabled > [ 1.579420] Rebooting in 86400 seconds.. > > > So let's keep it not to break CONFIG_SCHED_STACK_END_CHECK. > That makes sense, good find! I wonder if CONFIG_SCHED_STACK_END_CHECK should go on the list of hardening options and/or if we can enhance it somehow? I'm not sure why it requires two words though since the poison only seems to be 32-bits? > Again, I want to say kudos to PaX Team for his awesome code. Agreed! > Best regards, > Alexander > Thanks, Laura
On 22.07.2017 03:23, Laura Abbott wrote: > On 07/21/2017 09:56 AM, Alexander Popov wrote: >> So let's keep it not to break CONFIG_SCHED_STACK_END_CHECK. > > That makes sense, good find! I wonder if CONFIG_SCHED_STACK_END_CHECK > should go on the list of hardening options and/or if we can enhance > it somehow? Do you mean this list? http://www.kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings > I'm not sure why it requires two words though since the > poison only seems to be 32-bits? On x86_64 end_of_stack() returns the pointer to unsigned long, so we need at least 8 bytes to avoid breaking CONFIG_SCHED_STACK_END_CHECK. Don't know about 8 more bytes. Best regards, Alexander
On Mon, Jul 24, 2017 at 1:19 AM, Alexander Popov <alex.popov@linux.com> wrote: > On 22.07.2017 03:23, Laura Abbott wrote: >> On 07/21/2017 09:56 AM, Alexander Popov wrote: >>> So let's keep it not to break CONFIG_SCHED_STACK_END_CHECK. >> >> That makes sense, good find! I wonder if CONFIG_SCHED_STACK_END_CHECK >> should go on the list of hardening options and/or if we can enhance >> it somehow? > > Do you mean this list? > http://www.kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings > >> I'm not sure why it requires two words though since the >> poison only seems to be 32-bits? > > On x86_64 end_of_stack() returns the pointer to unsigned long, so we need at > least 8 bytes to avoid breaking CONFIG_SCHED_STACK_END_CHECK. Don't know about 8 > more bytes. Seems like this should be a random value like the per-frame stack canary, but it looks like a relatively cheap check. It's probably not in the cache, though, since most stack operations should be pretty far from the end of the stack... -Kees
Hello Kees and Laura, On 25.07.2017 06:34, Kees Cook wrote: > On Mon, Jul 24, 2017 at 1:19 AM, Alexander Popov <alex.popov@linux.com> wrote: >> On 22.07.2017 03:23, Laura Abbott wrote: >>> On 07/21/2017 09:56 AM, Alexander Popov wrote: >>>> So let's keep it not to break CONFIG_SCHED_STACK_END_CHECK. >>> >>> That makes sense, good find! I wonder if CONFIG_SCHED_STACK_END_CHECK >>> should go on the list of hardening options and/or if we can enhance >>> it somehow? >> >> Do you mean this list? >> http://www.kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings >> >>> I'm not sure why it requires two words though since the >>> poison only seems to be 32-bits? >> >> On x86_64 end_of_stack() returns the pointer to unsigned long, so we need at >> least 8 bytes to avoid breaking CONFIG_SCHED_STACK_END_CHECK. Don't know about 8 >> more bytes. > > Seems like this should be a random value like the per-frame stack > canary, but it looks like a relatively cheap check. It's probably not > in the cache, though, since most stack operations should be pretty far > from the end of the stack... It seems that the idea you describe is not implemented in Grsecurity/PaX patch. On x86_64 the bottom of the stack for the mainline kernel looks like that: 0xffffc900004c8000: 0x57ac6e9d 0x00000000 0x00000000 0x00000000 0xffffc900004c8010: 0xffff4111 0xffffffff 0xffff4111 0xffffffff ... But for the Grsecurity kernel it looks like that: 0xffffc90000324000: 0x00000000 0x00000000 0x57ac6e9d 0x00000000 0xffffc90000324010: 0xffff4111 0xffffffff 0xffff4111 0xffffffff ... Because Grsecurity/PaX patch has that change: static inline unsigned long *end_of_stack(const struct task_struct *task) { - return task->stack; + return (unsigned long *)task->stack + 1; } So that is their reason of having 16 bytes reserved at the bottom of the kernel stack. However, right now I don't understand the purpose of patching end_of_stack(). What do you think? Should mainline have it? Best regards, Alexander
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8addb85..0b65bfc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -17,6 +17,7 @@ config ARM64 select ARCH_HAS_KCOV select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN + select ARCH_HAS_STACKLEAK select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 64c9e78..76f2738 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -88,6 +88,9 @@ struct thread_struct { unsigned long fault_address; /* fault info */ unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ +#ifdef CONFIG_STACKLEAK + unsigned long lowest_stack; +#endif }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index b3bb7ef..e0a5ae2 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -43,6 +43,9 @@ int main(void) DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); #endif DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); +#ifdef CONFIG_STACKLEAK + DEFINE(TSK_TI_LOWEST_STACK, offsetof(struct task_struct, thread.lowest_stack)); +#endif BLANK(); DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); BLANK(); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b738880..e573633 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -744,6 +744,7 @@ ENDPROC(cpu_switch_to) */ ret_fast_syscall: disable_irq // disable interrupts + bl erase_kstack str x0, [sp, #S_X0] // returned x0 ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing and x2, x1, #_TIF_SYSCALL_WORK @@ -772,6 +773,7 @@ work_pending: */ ret_to_user: disable_irq // disable interrupts + bl erase_kstack ldr x1, [tsk, #TSK_TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending @@ -865,3 +867,93 @@ ENTRY(sys_rt_sigreturn_wrapper) mov x0, sp b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) + +#ifdef CONFIG_STACKLEAK +ENTRY(erase_kstack) + /* save x0 for the fast path */ + mov x10, x0 + + get_thread_info x0 + ldr x1, [x0, #TSK_TI_LOWEST_STACK] + + /* get the number of bytes to check for lowest stack */ + mov x3, x1 + and x3, x3, #THREAD_SIZE - 1 + lsr x3, x3, #3 + + /* now generate the start of the stack */ + mov x4, sp + movn x2, #THREAD_SIZE - 1 + and x1, x4, x2 + + mov x2, #-0xBEEF /* stack poison */ + + cmp x3, #0 + b.eq 4f /* Found nothing, go poison */ + +1: + ldr x4, [x1, x3, lsl #3] + cmp x4, x2 /* did we find the poison? */ + b.eq 2f /* yes we did, go check it */ + + sub x3, x3, #1 + cmp x3, #0 + b.eq 4f /* Found nothing, go poison */ + b 1b /* loop again */ + +2: + cmp x3, #16 + b.ls 4f /* Go poison if there are less than 16 things left? */ + + mov x3, #16 +3: + ldr x4, [x1, x3, lsl #3] + cmp x4, x2 /* did we find the poison? */ + b.ne 1b /* nope we have to check deeper */ + + sub x3, x3, #1 + cmp x3, #0 + b.eq 4f /* Found nothing, go poison */ + b 3b /* loop again */ + + /* The poison function */ +4: + /* Generate the address we found */ + add x5, x1, x3, lsl #3 + orr x5, x5, #16 + + mov x4, sp + /* subtrace the current pointer */ + sub x8, x4, x5 + + /* subtract one more so we don't touch the current sp */ + sub x8, x8, #1 + + /* sanity check */ + cmp x8, #THREAD_SIZE + b.lo 5f +999: + b 999b + +5: + lsr x8, x8, #3 + mov x3, x8 +6: + cmp x3, #0 + b.eq 7f + + str x2, [x1, x3, lsl #3] + sub x3, x3, #1 + b 6b + + /* Reset the lowest stack to the top of the stack */ +7: + ldr x1, [x0, TSK_STACK] + add x1, x1, #THREAD_SIZE + sub x1, x1, #256 + str x1, [x0, #TSK_TI_LOWEST_STACK] + + mov x0, x10 + ret +ENDPROC(erase_kstack) +#endif diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 659ae80..1b6cca2 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -293,6 +293,12 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; +#ifdef CONFIG_STACKLEAK + p->thread.lowest_stack = (unsigned long)task_stack_page(p) + + 2 * sizeof(unsigned long); +#endif + + ptrace_hw_copy_thread(p); return 0; @@ -417,3 +423,15 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) else return randomize_page(mm->brk, SZ_1G); } + +#ifdef CONFIG_STACKLEAK +void __used check_alloca(unsigned long size) +{ + unsigned long sp = (unsigned long)&sp, stack_left; + + /* all kernel stacks are of the same size */ + stack_left = sp & (THREAD_SIZE - 1); + BUG_ON(stack_left < 256 || size >= stack_left - 256); +} +EXPORT_SYMBOL(check_alloca); +#endif diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index f742596..cb378fa 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -18,7 +18,8 @@ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ $(call cc-option,-ffreestanding) \ - $(call cc-option,-fno-stack-protector) + $(call cc-option,-fno-stack-protector) \ + $(DISABLE_STACKLEAK_PLUGIN) GCOV_PROFILE := n KASAN_SANITIZE := n diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 5c86f64..0eb8dbc 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -35,11 +35,14 @@ ifdef CONFIG_GCC_PLUGINS gcc-plugin-$(CONFIG_STACKLEAK) += stackleak_plugin.so gcc-plugin-cflags-$(CONFIG_STACKLEAK) += -DSTACKLEAK_PLUGIN -fplugin-arg-stackleak_plugin-track-lowest-sp=100 + ifdef CONFIG_STACKLEAK + DISABLE_STACKLEAK_PLUGIN += -fplugin-arg-stackleak_plugin-disable + endif GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y)) export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR - export SANCOV_PLUGIN DISABLE_LATENT_ENTROPY_PLUGIN + export SANCOV_PLUGIN DISABLE_LATENT_ENTROPY_PLUGIN DISABLE_STACKLEAK_PLUGIN ifneq ($(PLUGINCC),) # SANCOV_PLUGIN can be only in CFLAGS_KCOV because avoid duplication.
Implementation of stackleak based heavily on the x86 version Signed-off-by: Laura Abbott <labbott@redhat.com> --- The biggest points that need review here: - Is the extra offsetting (subtracting/adding from the stack) correct? - Where else do we need to clear the stack? - The assembly can almost certainly be optimized. I tried to keep the behavior of the x86 'repe scasq' and the like where possible. I'm also a terrible register allocator. --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/processor.h | 3 ++ arch/arm64/kernel/asm-offsets.c | 3 ++ arch/arm64/kernel/entry.S | 92 +++++++++++++++++++++++++++++++++++ arch/arm64/kernel/process.c | 18 +++++++ drivers/firmware/efi/libstub/Makefile | 3 +- scripts/Makefile.gcc-plugins | 5 +- 7 files changed, 123 insertions(+), 2 deletions(-)