diff mbox

[v5,3/9] x86/head: Move early exception panic code into early_fixup_exception

Message ID 4085070316fc3ab29538d3fcfe282648d1d4ee2e.1459605520.git.luto@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Andy Lutomirski April 2, 2016, 2:01 p.m. UTC
This removes a bunch of assembly and adds some C code instead.  It
changes the actual printouts on both 32-bit and 64-bit kernels, but
they still seem okay.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/include/asm/uaccess.h |  2 +-
 arch/x86/kernel/head_32.S      | 49 +++++-------------------------------------
 arch/x86/kernel/head_64.S      | 45 ++------------------------------------
 arch/x86/mm/extable.c          | 29 ++++++++++++++++++++-----
 4 files changed, 32 insertions(+), 93 deletions(-)

Comments

Borislav Petkov April 2, 2016, 6:39 p.m. UTC | #1
On Sat, Apr 02, 2016 at 07:01:34AM -0700, Andy Lutomirski wrote:
> This removes a bunch of assembly and adds some C code instead.  It
> changes the actual printouts on both 32-bit and 64-bit kernels, but
> they still seem okay.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>  arch/x86/include/asm/uaccess.h |  2 +-
>  arch/x86/kernel/head_32.S      | 49 +++++-------------------------------------
>  arch/x86/kernel/head_64.S      | 45 ++------------------------------------
>  arch/x86/mm/extable.c          | 29 ++++++++++++++++++++-----
>  4 files changed, 32 insertions(+), 93 deletions(-)

...

> @@ -99,21 +101,38 @@ int __init early_fixup_exception(struct pt_regs *regs, int trapnr)
>  
>  	/* Ignore early NMIs. */
>  	if (trapnr == X86_TRAP_NMI)
> -		return 1;
> +		return;
> +
> +	if (early_recursion_flag > 2)
> +		goto halt_loop;
> +
> +	if (regs->cs != __KERNEL_CS)
> +		goto fail;
>  
>  	e = search_exception_tables(regs->ip);
>  	if (!e)
> -		return 0;
> +		goto fail;
>  
>  	new_ip  = ex_fixup_addr(e);
>  	handler = ex_fixup_handler(e);
>  
>  	/* special handling not supported during early boot */
>  	if (handler != ex_handler_default)
> -		return 0;
> +		goto fail;
>  
>  	regs->ip = new_ip;
> -	return 1;
> +	return;
> +
> +fail:
> +	early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
> +		     (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
> +		     regs->orig_ax, read_cr2());
> +
> +	show_regs(regs);

To make this even better, it could be something called early_show_regs()
or so and be a simplified version of __show_regs() on both bitness but
which calls early_printk().

This way you'll be able to get out stuff to the console as early as
possible.

Btw, you don't need to dump rIP, CR2, etc in the PANIC message above
since you're going to early_show_regs() anyway.
Andy Lutomirski April 2, 2016, 8:13 p.m. UTC | #2
On Sat, Apr 2, 2016 at 11:39 AM, Borislav Petkov <bp@alien8.de> wrote:
> On Sat, Apr 02, 2016 at 07:01:34AM -0700, Andy Lutomirski wrote:
>> This removes a bunch of assembly and adds some C code instead.  It
>> changes the actual printouts on both 32-bit and 64-bit kernels, but
>> they still seem okay.
>>
>> Signed-off-by: Andy Lutomirski <luto@kernel.org>
>> ---
>>  arch/x86/include/asm/uaccess.h |  2 +-
>>  arch/x86/kernel/head_32.S      | 49 +++++-------------------------------------
>>  arch/x86/kernel/head_64.S      | 45 ++------------------------------------
>>  arch/x86/mm/extable.c          | 29 ++++++++++++++++++++-----
>>  4 files changed, 32 insertions(+), 93 deletions(-)
>
> ...
>
>> @@ -99,21 +101,38 @@ int __init early_fixup_exception(struct pt_regs *regs, int trapnr)
>>
>>       /* Ignore early NMIs. */
>>       if (trapnr == X86_TRAP_NMI)
>> -             return 1;
>> +             return;
>> +
>> +     if (early_recursion_flag > 2)
>> +             goto halt_loop;
>> +
>> +     if (regs->cs != __KERNEL_CS)
>> +             goto fail;
>>
>>       e = search_exception_tables(regs->ip);
>>       if (!e)
>> -             return 0;
>> +             goto fail;
>>
>>       new_ip  = ex_fixup_addr(e);
>>       handler = ex_fixup_handler(e);
>>
>>       /* special handling not supported during early boot */
>>       if (handler != ex_handler_default)
>> -             return 0;
>> +             goto fail;
>>
>>       regs->ip = new_ip;
>> -     return 1;
>> +     return;
>> +
>> +fail:
>> +     early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
>> +                  (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
>> +                  regs->orig_ax, read_cr2());
>> +
>> +     show_regs(regs);
>
> To make this even better, it could be something called early_show_regs()
> or so and be a simplified version of __show_regs() on both bitness but
> which calls early_printk().
>
> This way you'll be able to get out stuff to the console as early as
> possible.
>
> Btw, you don't need to dump rIP, CR2, etc in the PANIC message above
> since you're going to early_show_regs() anyway.

Given that I this isn't really a regression with my patches (it
probably never worked much better on 32-bit and the regs never would
have shown at all on 64-bit), I propose a different approach: make
printk work earlier.  Something like:

if (early) {
    early_printk(args);
}

or early_vprintk or whatever.

If the cost of a branch mattered, this could be alternative-patched
out later on, but that seems silly.  I also bet that a more sensible
fallback could be created in which printk would try to use an early
console if there's no real console.

--Andy
Borislav Petkov April 2, 2016, 8:47 p.m. UTC | #3
On Sat, Apr 02, 2016 at 01:13:37PM -0700, Andy Lutomirski wrote:
> Given that I this isn't really a regression with my patches (it
> probably never worked much better on 32-bit and the regs never would
> have shown at all on 64-bit),

You're right. That thing calls printk *and* early_printk, WTF:

#ifdef CONFIG_EARLY_PRINTK

	call early_printk
	...

	call dump_stack

	...

	call __print_symbol

those last two call printk. Great.

> I propose a different approach: make
> printk work earlier.  Something like:
> 
> if (early) {
>     early_printk(args);
> }
> 
> or early_vprintk or whatever.
> 
> If the cost of a branch mattered, this could be alternative-patched
> out later on, but that seems silly.  I also bet that a more sensible
> fallback could be created in which printk would try to use an early
> console if there's no real console.

So how about this:

printk() does

	vprintk_func = this_cpu_read(printk_func);

and that's

DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default

I guess we can make that function be early_printk-something and once
printk is initialized, we overwrite it with vprintk_default.

Elegant and no need for if branches and alternatives.

Hmmm.
Andy Lutomirski April 2, 2016, 8:58 p.m. UTC | #4
[cc Jan Kara]

On Sat, Apr 2, 2016 at 1:47 PM, Borislav Petkov <bp@alien8.de> wrote:
> On Sat, Apr 02, 2016 at 01:13:37PM -0700, Andy Lutomirski wrote:
>> Given that I this isn't really a regression with my patches (it
>> probably never worked much better on 32-bit and the regs never would
>> have shown at all on 64-bit),
>
> You're right. That thing calls printk *and* early_printk, WTF:
>
> #ifdef CONFIG_EARLY_PRINTK
>
>         call early_printk
>         ...
>
>         call dump_stack
>
>         ...
>
>         call __print_symbol
>
> those last two call printk. Great.
>
>> I propose a different approach: make
>> printk work earlier.  Something like:
>>
>> if (early) {
>>     early_printk(args);
>> }
>>
>> or early_vprintk or whatever.
>>
>> If the cost of a branch mattered, this could be alternative-patched
>> out later on, but that seems silly.  I also bet that a more sensible
>> fallback could be created in which printk would try to use an early
>> console if there's no real console.
>
> So how about this:
>
> printk() does
>
>         vprintk_func = this_cpu_read(printk_func);
>
> and that's
>
> DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default
>
> I guess we can make that function be early_printk-something and once
> printk is initialized, we overwrite it with vprintk_default.
>
> Elegant and no need for if branches and alternatives.
>
> Hmmm.

Jan, IIRC you were looking at printk recently-ish.  Any thoughts here?

--Andy
Jan Kara April 4, 2016, 11:52 a.m. UTC | #5
On Sat 02-04-16 13:58:19, Andy Lutomirski wrote:
> [cc Jan Kara]
> 
> On Sat, Apr 2, 2016 at 1:47 PM, Borislav Petkov <bp@alien8.de> wrote:
> > On Sat, Apr 02, 2016 at 01:13:37PM -0700, Andy Lutomirski wrote:
> >> Given that I this isn't really a regression with my patches (it
> >> probably never worked much better on 32-bit and the regs never would
> >> have shown at all on 64-bit),
> >
> > You're right. That thing calls printk *and* early_printk, WTF:
> >
> > #ifdef CONFIG_EARLY_PRINTK
> >
> >         call early_printk
> >         ...
> >
> >         call dump_stack
> >
> >         ...
> >
> >         call __print_symbol
> >
> > those last two call printk. Great.
> >
> >> I propose a different approach: make
> >> printk work earlier.  Something like:
> >>
> >> if (early) {
> >>     early_printk(args);
> >> }
> >>
> >> or early_vprintk or whatever.
> >>
> >> If the cost of a branch mattered, this could be alternative-patched
> >> out later on, but that seems silly.  I also bet that a more sensible
> >> fallback could be created in which printk would try to use an early
> >> console if there's no real console.
> >
> > So how about this:
> >
> > printk() does
> >
> >         vprintk_func = this_cpu_read(printk_func);
> >
> > and that's
> >
> > DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default
> >
> > I guess we can make that function be early_printk-something and once
> > printk is initialized, we overwrite it with vprintk_default.
> >
> > Elegant and no need for if branches and alternatives.
> >
> > Hmmm.
> 
> Jan, IIRC you were looking at printk recently-ish.  Any thoughts here?

Sounds like a good idea to me. I've also consulted this with Petr Mladek
(added to CC) who is using printk_func per-cpu variable in his
printk-from-NMI patches and he also doesn't see a problem with this.

I was just wondering about one thing - this way we add more early printks
if I understand your intention right. Are we guaranteed that they happen
only from a single CPU? Because currently there is no locking in
early_printk() and thus we can end up writing to early console several
messages in parallel from different CPUs. Not sure what's going to happen
in that case...

								Honza
Andy Lutomirski April 4, 2016, 3:32 p.m. UTC | #6
On Apr 4, 2016 4:51 AM, "Jan Kara" <jack@suse.cz> wrote:
>
> On Sat 02-04-16 13:58:19, Andy Lutomirski wrote:
> > [cc Jan Kara]
> >
> > On Sat, Apr 2, 2016 at 1:47 PM, Borislav Petkov <bp@alien8.de> wrote:
> > > On Sat, Apr 02, 2016 at 01:13:37PM -0700, Andy Lutomirski wrote:
> > >> Given that I this isn't really a regression with my patches (it
> > >> probably never worked much better on 32-bit and the regs never would
> > >> have shown at all on 64-bit),
> > >
> > > You're right. That thing calls printk *and* early_printk, WTF:
> > >
> > > #ifdef CONFIG_EARLY_PRINTK
> > >
> > >         call early_printk
> > >         ...
> > >
> > >         call dump_stack
> > >
> > >         ...
> > >
> > >         call __print_symbol
> > >
> > > those last two call printk. Great.
> > >
> > >> I propose a different approach: make
> > >> printk work earlier.  Something like:
> > >>
> > >> if (early) {
> > >>     early_printk(args);
> > >> }
> > >>
> > >> or early_vprintk or whatever.
> > >>
> > >> If the cost of a branch mattered, this could be alternative-patched
> > >> out later on, but that seems silly.  I also bet that a more sensible
> > >> fallback could be created in which printk would try to use an early
> > >> console if there's no real console.
> > >
> > > So how about this:
> > >
> > > printk() does
> > >
> > >         vprintk_func = this_cpu_read(printk_func);
> > >
> > > and that's
> > >
> > > DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default
> > >
> > > I guess we can make that function be early_printk-something and once
> > > printk is initialized, we overwrite it with vprintk_default.
> > >
> > > Elegant and no need for if branches and alternatives.
> > >
> > > Hmmm.
> >
> > Jan, IIRC you were looking at printk recently-ish.  Any thoughts here?
>
> Sounds like a good idea to me. I've also consulted this with Petr Mladek
> (added to CC) who is using printk_func per-cpu variable in his
> printk-from-NMI patches and he also doesn't see a problem with this.
>
> I was just wondering about one thing - this way we add more early printks
> if I understand your intention right. Are we guaranteed that they happen
> only from a single CPU? Because currently there is no locking in
> early_printk() and thus we can end up writing to early console several
> messages in parallel from different CPUs. Not sure what's going to happen
> in that case...

Adding locking would be easy enough, wouldn't it?

But do any platforms really boot a second CPU before switching to real
printk?  Given that I see all the smpboot stuff in dmesg, I guess real
printk happens first.  I admit I haven't actually checked.

--Andy

>
>                                                                 Honza
> --
> Jan Kara <jack@suse.com>
> SUSE Labs, CR
Arjan van de Ven April 4, 2016, 3:36 p.m. UTC | #7
On 4/4/2016 8:32 AM, Andy Lutomirski wrote:
>
> Adding locking would be easy enough, wouldn't it?
>
> But do any platforms really boot a second CPU before switching to real
> printk?  Given that I see all the smpboot stuff in dmesg, I guess real
> printk happens first.  I admit I haven't actually checked.

adding locking also makes things more fragile in terms of getting the last thing out
before you go down in flaming death....

until it's a proven problem, this early, get the message out at all is more important
than getting it out perfectly, sometimes.
Peter Zijlstra April 4, 2016, 4 p.m. UTC | #8
On Mon, Apr 04, 2016 at 08:32:21AM -0700, Andy Lutomirski wrote:

> Adding locking would be easy enough, wouldn't it?

See patch in this thread..

> But do any platforms really boot a second CPU before switching to real
> printk? 

I _only_ use early_printk() as printk() is a quagmire of fail :-)
diff mbox

Patch

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a3afb7259751..83fd2cf187d2 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -111,7 +111,7 @@  struct exception_table_entry {
 
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);
-extern int early_fixup_exception(struct pt_regs *regs, int trapnr);
+extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
 
 /*
  * These are the main single-value transfer routines.  They automatically
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 5e6ce845813a..411dce93fee9 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -555,8 +555,6 @@  early_idt_handler_common:
 	 */
 	cld
 
-	cmpl $2,%ss:early_recursion_flag
-	je hlt_loop
 	incl %ss:early_recursion_flag
 
 	/* The vector number is in pt_regs->gs */
@@ -588,13 +586,8 @@  early_idt_handler_common:
 	movw	%gs, PT_GS(%esp)
 	movw	$0, PT_GS+2(%esp)
 
-	cmpl $(__KERNEL_CS),PT_CS(%esp)
-	jne 10f
-
 	movl	%esp, %eax	/* args are pt_regs (EAX), trapnr (EDX) */
 	call	early_fixup_exception
-	andl	%eax,%eax
-	jz	10f		/* Exception wasn't fixed up */
 
 	popl	%ebx		/* pt_regs->bx */
 	popl	%ecx		/* pt_regs->cx */
@@ -610,29 +603,6 @@  early_idt_handler_common:
 	decl	%ss:early_recursion_flag
 	addl	$4, %esp	/* pop pt_regs->orig_ax */
 	iret
-
-10:
-#ifdef CONFIG_PRINTK
-	xorl %eax,%eax
-	movw %ax,PT_FS+2(%esp)	/* clean up the segment values on some cpus */
-	movw %ax,PT_DS+2(%esp)
-	movw %ax,PT_ES+2(%esp)
-	leal  40(%esp),%eax
-	pushl %eax		/* %esp before the exception */
-	pushl %ebx
-	pushl %ebp
-	pushl %esi
-	pushl %edi
-	movl %cr2,%eax
-	pushl %eax
-	pushl (20+6*4)(%esp)	/* trapno */
-	pushl $fault_msg
-	call printk
-#endif
-	call dump_stack
-hlt_loop:
-	hlt
-	jmp hlt_loop
 ENDPROC(early_idt_handler_common)
 
 /* This is the default interrupt "handler" :-) */
@@ -668,10 +638,14 @@  ignore_int:
 	popl %eax
 #endif
 	iret
+
+hlt_loop:
+	hlt
+	jmp hlt_loop
 ENDPROC(ignore_int)
 __INITDATA
 	.align 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
 	.long 0
 
 __REFDATA
@@ -736,19 +710,6 @@  __INITRODATA
 int_msg:
 	.asciz "Unknown interrupt or fault at: %p %p %p\n"
 
-fault_msg:
-/* fault info: */
-	.ascii "BUG: Int %d: CR2 %p\n"
-/* regs pushed in early_idt_handler: */
-	.ascii "     EDI %p  ESI %p  EBP %p  EBX %p\n"
-	.ascii "     ESP %p   ES %p   DS %p\n"
-	.ascii "     EDX %p  ECX %p  EAX %p\n"
-/* fault frame: */
-	.ascii "     vec %p  err %p  EIP %p   CS %p  flg %p\n"
-	.ascii "Stack: %p %p %p %p %p %p %p %p\n"
-	.ascii "       %p %p %p %p %p %p %p %p\n"
-	.asciz "       %p %p %p %p %p %p %p %p\n"
-
 #include "../../x86/xen/xen-head.S"
 
 /*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index af87896b6a23..c39b6437cf03 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -354,8 +354,6 @@  early_idt_handler_common:
 	 */
 	cld
 
-	cmpl $2,early_recursion_flag(%rip)
-	jz  1f
 	incl early_recursion_flag(%rip)
 
 	/* The vector number is currently in the pt_regs->di slot. */
@@ -376,9 +374,6 @@  early_idt_handler_common:
 	pushq %r14				/* pt_regs->r14 */
 	pushq %r15				/* pt_regs->r15 */
 
-	cmpl $__KERNEL_CS,CS(%rsp)
-	jne 11f
-
 	cmpq $14,%rsi		/* Page fault? */
 	jnz 10f
 	GET_CR2_INTO(%rdi)	/* Can clobber any volatile register if pv */
@@ -389,37 +384,8 @@  early_idt_handler_common:
 10:
 	movq %rsp,%rdi		/* RDI = pt_regs; RSI is already trapnr */
 	call early_fixup_exception
-	andl %eax,%eax
-	jnz 20f			# Found an exception entry
-
-11:
-#ifdef CONFIG_EARLY_PRINTK
-	/*
-	 * On paravirt kernels, GET_CR2_INTO clobbers callee-clobbered regs.
-	 * We only care about RSI, so we need to save it.
-	 */
-	movq %rsi,%rbx		/* Save vector number */
-	GET_CR2_INTO(%r9)
-	movq ORIG_RAX(%rsp),%r8	/* error code */
-	movq %rbx,%rsi		/* vector number */
-	movq CS(%rsp),%rdx
-	movq RIP(%rsp),%rcx
-	xorl %eax,%eax
-	leaq early_idt_msg(%rip),%rdi
-	call early_printk
-	cmpl $2,early_recursion_flag(%rip)
-	jz  1f
-	call dump_stack
-#ifdef CONFIG_KALLSYMS	
-	leaq early_idt_ripmsg(%rip),%rdi
-	movq RIP(%rsp),%rsi	# %rip again
-	call __print_symbol
-#endif
-#endif /* EARLY_PRINTK */
-1:	hlt
-	jmp 1b
 
-20:	/* Exception table entry found or page table generated */
+20:
 	decl early_recursion_flag(%rip)
 	jmp restore_regs_and_iret
 ENDPROC(early_idt_handler_common)
@@ -427,16 +393,9 @@  ENDPROC(early_idt_handler_common)
 	__INITDATA
 
 	.balign 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
 	.long 0
 
-#ifdef CONFIG_EARLY_PRINTK
-early_idt_msg:
-	.asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
-early_idt_ripmsg:
-	.asciz "RIP %s\n"
-#endif /* CONFIG_EARLY_PRINTK */
-
 #define NEXT_PAGE(name) \
 	.balign	PAGE_SIZE; \
 GLOBAL(name)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index d6e4e6fb4002..8997022abebc 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -90,8 +90,10 @@  int fixup_exception(struct pt_regs *regs, int trapnr)
 	return handler(e, regs, trapnr);
 }
 
+extern unsigned int early_recursion_flag;
+
 /* Restricted version used during very early boot */
-int __init early_fixup_exception(struct pt_regs *regs, int trapnr)
+void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 {
 	const struct exception_table_entry *e;
 	unsigned long new_ip;
@@ -99,21 +101,38 @@  int __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 
 	/* Ignore early NMIs. */
 	if (trapnr == X86_TRAP_NMI)
-		return 1;
+		return;
+
+	if (early_recursion_flag > 2)
+		goto halt_loop;
+
+	if (regs->cs != __KERNEL_CS)
+		goto fail;
 
 	e = search_exception_tables(regs->ip);
 	if (!e)
-		return 0;
+		goto fail;
 
 	new_ip  = ex_fixup_addr(e);
 	handler = ex_fixup_handler(e);
 
 	/* special handling not supported during early boot */
 	if (handler != ex_handler_default)
-		return 0;
+		goto fail;
 
 	regs->ip = new_ip;
-	return 1;
+	return;
+
+fail:
+	early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
+		     (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
+		     regs->orig_ax, read_cr2());
+
+	show_regs(regs);
+
+halt_loop:
+	while (true)
+		halt();
 }
 
 /*