diff mbox series

[v9,04/11] x86/entry/64: Adapt assembly for PIE support

Message ID 20190730191303.206365-5-thgarnie@chromium.org (mailing list archive)
State New, archived
Headers show
Series x86: PIE support to extend KASLR randomization | expand

Commit Message

Thomas Garnier July 30, 2019, 7:12 p.m. UTC
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible.

Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.

Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/entry/entry_64.S | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

Comments

Borislav Petkov Aug. 5, 2019, 5:28 p.m. UTC | #1
On Tue, Jul 30, 2019 at 12:12:48PM -0700, Thomas Garnier wrote:
> Change the assembly code to use only relative references of symbols for the
> kernel to be PIE compatible.
> 
> Position Independent Executable (PIE) support will allow to extend the
> KASLR randomization range below 0xffffffff80000000.
> 
> Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
> Reviewed-by: Kees Cook <keescook@chromium.org>
> ---
>  arch/x86/entry/entry_64.S | 16 +++++++++++-----
>  1 file changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 3f5a978a02a7..4b588a902009 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -1317,7 +1317,8 @@ ENTRY(error_entry)
>  	movl	%ecx, %eax			/* zero extend */
>  	cmpq	%rax, RIP+8(%rsp)
>  	je	.Lbstep_iret
> -	cmpq	$.Lgs_change, RIP+8(%rsp)
> +	leaq	.Lgs_change(%rip), %rcx
> +	cmpq	%rcx, RIP+8(%rsp)
>  	jne	.Lerror_entry_done
>  
>  	/*
> @@ -1514,10 +1515,10 @@ ENTRY(nmi)
>  	 * resume the outer NMI.
>  	 */
>  
> -	movq	$repeat_nmi, %rdx
> +	leaq	repeat_nmi(%rip), %rdx
>  	cmpq	8(%rsp), %rdx
>  	ja	1f
> -	movq	$end_repeat_nmi, %rdx
> +	leaq	end_repeat_nmi(%rip), %rdx
>  	cmpq	8(%rsp), %rdx
>  	ja	nested_nmi_out
>  1:
> @@ -1571,7 +1572,8 @@ nested_nmi:
>  	pushq	%rdx
>  	pushfq
>  	pushq	$__KERNEL_CS
> -	pushq	$repeat_nmi
> +	leaq	repeat_nmi(%rip), %rdx
> +	pushq	%rdx
>  
>  	/* Put stack back */
>  	addq	$(6*8), %rsp
> @@ -1610,7 +1612,11 @@ first_nmi:
>  	addq	$8, (%rsp)	/* Fix up RSP */
>  	pushfq			/* RFLAGS */
>  	pushq	$__KERNEL_CS	/* CS */
> -	pushq	$1f		/* RIP */
> +	pushq	$0		/* Future return address */
> +	pushq	%rax		/* Save RAX */
> +	leaq	1f(%rip), %rax	/* RIP */
> +	movq    %rax, 8(%rsp)   /* Put 1f on return address */
> +	popq	%rax		/* Restore RAX */

Can't you just use a callee-clobbered reg here instead of preserving
%rax?
Thomas Garnier Aug. 5, 2019, 5:50 p.m. UTC | #2
On Mon, Aug 5, 2019 at 10:28 AM Borislav Petkov <bp@alien8.de> wrote:
>
> On Tue, Jul 30, 2019 at 12:12:48PM -0700, Thomas Garnier wrote:
> > Change the assembly code to use only relative references of symbols for the
> > kernel to be PIE compatible.
> >
> > Position Independent Executable (PIE) support will allow to extend the
> > KASLR randomization range below 0xffffffff80000000.
> >
> > Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
> > Reviewed-by: Kees Cook <keescook@chromium.org>
> > ---
> >  arch/x86/entry/entry_64.S | 16 +++++++++++-----
> >  1 file changed, 11 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> > index 3f5a978a02a7..4b588a902009 100644
> > --- a/arch/x86/entry/entry_64.S
> > +++ b/arch/x86/entry/entry_64.S
> > @@ -1317,7 +1317,8 @@ ENTRY(error_entry)
> >       movl    %ecx, %eax                      /* zero extend */
> >       cmpq    %rax, RIP+8(%rsp)
> >       je      .Lbstep_iret
> > -     cmpq    $.Lgs_change, RIP+8(%rsp)
> > +     leaq    .Lgs_change(%rip), %rcx
> > +     cmpq    %rcx, RIP+8(%rsp)
> >       jne     .Lerror_entry_done
> >
> >       /*
> > @@ -1514,10 +1515,10 @@ ENTRY(nmi)
> >        * resume the outer NMI.
> >        */
> >
> > -     movq    $repeat_nmi, %rdx
> > +     leaq    repeat_nmi(%rip), %rdx
> >       cmpq    8(%rsp), %rdx
> >       ja      1f
> > -     movq    $end_repeat_nmi, %rdx
> > +     leaq    end_repeat_nmi(%rip), %rdx
> >       cmpq    8(%rsp), %rdx
> >       ja      nested_nmi_out
> >  1:
> > @@ -1571,7 +1572,8 @@ nested_nmi:
> >       pushq   %rdx
> >       pushfq
> >       pushq   $__KERNEL_CS
> > -     pushq   $repeat_nmi
> > +     leaq    repeat_nmi(%rip), %rdx
> > +     pushq   %rdx
> >
> >       /* Put stack back */
> >       addq    $(6*8), %rsp
> > @@ -1610,7 +1612,11 @@ first_nmi:
> >       addq    $8, (%rsp)      /* Fix up RSP */
> >       pushfq                  /* RFLAGS */
> >       pushq   $__KERNEL_CS    /* CS */
> > -     pushq   $1f             /* RIP */
> > +     pushq   $0              /* Future return address */
> > +     pushq   %rax            /* Save RAX */
> > +     leaq    1f(%rip), %rax  /* RIP */
> > +     movq    %rax, 8(%rsp)   /* Put 1f on return address */
> > +     popq    %rax            /* Restore RAX */
>
> Can't you just use a callee-clobbered reg here instead of preserving
> %rax?

I saw that %rdx was used for temporary usage and restored before the
end so I assumed that it was not an option.

>
> --
> Regards/Gruss,
>     Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.
Borislav Petkov Aug. 6, 2019, 5:08 a.m. UTC | #3
On Mon, Aug 05, 2019 at 10:50:30AM -0700, Thomas Garnier wrote:
> I saw that %rdx was used for temporary usage and restored before the
> end so I assumed that it was not an option.

PUSH_AND_CLEAR_REGS saves all regs earlier so I think you should be
able to use others. Like SAVE_AND_SWITCH_TO_KERNEL_CR3/RESTORE_CR3, for
example, uses %r15 and %r14.
Peter Zijlstra Aug. 6, 2019, 8:30 a.m. UTC | #4
On Tue, Aug 06, 2019 at 07:08:51AM +0200, Borislav Petkov wrote:
> On Mon, Aug 05, 2019 at 10:50:30AM -0700, Thomas Garnier wrote:
> > I saw that %rdx was used for temporary usage and restored before the
> > end so I assumed that it was not an option.
> 
> PUSH_AND_CLEAR_REGS saves all regs earlier so I think you should be
> able to use others. Like SAVE_AND_SWITCH_TO_KERNEL_CR3/RESTORE_CR3, for
> example, uses %r15 and %r14.

AFAICT the CONFIG_DEBUG_ENTRY thing he's changing is before we setup
pt_regs.

Also consider the UNWIND hint that's in there, it states we only have
the IRET frame on stack, not a full regs set.
Borislav Petkov Aug. 6, 2019, 12:35 p.m. UTC | #5
+ rostedt.

Steve, pls have a look at the patch at the beginning of this thread as
it touches the reentrant NMI magic. :)

Thx.

On Tue, Aug 06, 2019 at 10:30:32AM +0200, Peter Zijlstra wrote:
> On Tue, Aug 06, 2019 at 07:08:51AM +0200, Borislav Petkov wrote:
> > On Mon, Aug 05, 2019 at 10:50:30AM -0700, Thomas Garnier wrote:
> > > I saw that %rdx was used for temporary usage and restored before the
> > > end so I assumed that it was not an option.
> > 
> > PUSH_AND_CLEAR_REGS saves all regs earlier so I think you should be
> > able to use others. Like SAVE_AND_SWITCH_TO_KERNEL_CR3/RESTORE_CR3, for
> > example, uses %r15 and %r14.
> 
> AFAICT the CONFIG_DEBUG_ENTRY thing he's changing is before we setup
> pt_regs.
> 
> Also consider the UNWIND hint that's in there, it states we only have
> the IRET frame on stack, not a full regs set.

Ok, after discussing it on IRC, I guess let's leave it like that. I
guess little ugly is better than a lot more ugly if we're wanting to
attempt to free up some regs here to save us the rax preservation.
Probably not worth the effort by a long shot.

Thx.
Steven Rostedt Aug. 6, 2019, 1:59 p.m. UTC | #6
On Mon, Aug 05, 2019 at 07:28:54PM +0200, Borislav Petkov wrote:
> >  1:
> > @@ -1571,7 +1572,8 @@ nested_nmi:
> >  	pushq	%rdx
> >  	pushfq
> >  	pushq	$__KERNEL_CS
> > -	pushq	$repeat_nmi
> > +	leaq	repeat_nmi(%rip), %rdx
> > +	pushq	%rdx
> >  
> >  	/* Put stack back */
> >  	addq	$(6*8), %rsp
> > @@ -1610,7 +1612,11 @@ first_nmi:
> >  	addq	$8, (%rsp)	/* Fix up RSP */
> >  	pushfq			/* RFLAGS */
> >  	pushq	$__KERNEL_CS	/* CS */
> > -	pushq	$1f		/* RIP */
> > +	pushq	$0		/* Future return address */
> > +	pushq	%rax		/* Save RAX */
> > +	leaq	1f(%rip), %rax	/* RIP */
> > +	movq    %rax, 8(%rsp)   /* Put 1f on return address */
> > +	popq	%rax		/* Restore RAX */
> 
> Can't you just use a callee-clobbered reg here instead of preserving
> %rax?

As Peter stated later in this thread, we only have the IRQ stack frame saved
here, because we just took an NMI, and this is the logic to determine if it
was a nested NMI or not (where we have to be *very* careful about touching the
stack!)

That said, the code modified here is to test the NMI nesting logic (only
enabled with CONFIG_DEBUG_ENTRY), and what it is doing is re-enabling NMIs
before calling the first NMI handler, to help trigger nested NMIs without the
need of a break point or page fault (iret enables NMIs again).

This code is in the path of the "first nmi" (we confirmed that this is not
nested), which means that it should be safe to push onto the stack.

Yes, we need to save and restore whatever reg we used. The only comment I
would make is to use %rdx instead of %rax as that has been our "scratch"
register used before saving pt_regs. Just to be consistent.

-- Steve
Borislav Petkov Aug. 6, 2019, 3:35 p.m. UTC | #7
On Tue, Aug 06, 2019 at 09:59:42AM -0400, Steven Rostedt wrote:
> As Peter stated later in this thread, we only have the IRQ stack frame saved
> here, because we just took an NMI, and this is the logic to determine if it
> was a nested NMI or not (where we have to be *very* careful about touching the
> stack!)
> 
> That said, the code modified here is to test the NMI nesting logic (only
> enabled with CONFIG_DEBUG_ENTRY), and what it is doing is re-enabling NMIs
> before calling the first NMI handler, to help trigger nested NMIs without the
> need of a break point or page fault (iret enables NMIs again).
> 
> This code is in the path of the "first nmi" (we confirmed that this is not
> nested), which means that it should be safe to push onto the stack.

Thanks for the explanation!

> Yes, we need to save and restore whatever reg we used. The only comment I
> would make is to use %rdx instead of %rax as that has been our "scratch"
> register used before saving pt_regs. Just to be consistent.

Yap, makes sense.

Thx.
diff mbox series

Patch

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3f5a978a02a7..4b588a902009 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1317,7 +1317,8 @@  ENTRY(error_entry)
 	movl	%ecx, %eax			/* zero extend */
 	cmpq	%rax, RIP+8(%rsp)
 	je	.Lbstep_iret
-	cmpq	$.Lgs_change, RIP+8(%rsp)
+	leaq	.Lgs_change(%rip), %rcx
+	cmpq	%rcx, RIP+8(%rsp)
 	jne	.Lerror_entry_done
 
 	/*
@@ -1514,10 +1515,10 @@  ENTRY(nmi)
 	 * resume the outer NMI.
 	 */
 
-	movq	$repeat_nmi, %rdx
+	leaq	repeat_nmi(%rip), %rdx
 	cmpq	8(%rsp), %rdx
 	ja	1f
-	movq	$end_repeat_nmi, %rdx
+	leaq	end_repeat_nmi(%rip), %rdx
 	cmpq	8(%rsp), %rdx
 	ja	nested_nmi_out
 1:
@@ -1571,7 +1572,8 @@  nested_nmi:
 	pushq	%rdx
 	pushfq
 	pushq	$__KERNEL_CS
-	pushq	$repeat_nmi
+	leaq	repeat_nmi(%rip), %rdx
+	pushq	%rdx
 
 	/* Put stack back */
 	addq	$(6*8), %rsp
@@ -1610,7 +1612,11 @@  first_nmi:
 	addq	$8, (%rsp)	/* Fix up RSP */
 	pushfq			/* RFLAGS */
 	pushq	$__KERNEL_CS	/* CS */
-	pushq	$1f		/* RIP */
+	pushq	$0		/* Future return address */
+	pushq	%rax		/* Save RAX */
+	leaq	1f(%rip), %rax	/* RIP */
+	movq    %rax, 8(%rsp)   /* Put 1f on return address */
+	popq	%rax		/* Restore RAX */
 	iretq			/* continues at repeat_nmi below */
 	UNWIND_HINT_IRET_REGS
 1: