diff mbox

[v2] x86/xen/64: Rearrange the SYSCALL entries

Message ID 7c88ed36805d36841ab03ec3b48b4122c4418d71.1502164668.git.luto@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Andy Lutomirski Aug. 8, 2017, 3:59 a.m. UTC
Xen's raw SYSCALL entries are much less weird than native.  Rather
than fudging them to look like native entries, use the Xen-provided
stack frame directly.

This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
benefit from similar treatment.

This makes one change to the native code path: the compat
instruction that clears the high 32 bits of %rax is moved slightly
later.  I'd be surprised if this affects performance at all.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---

Changes from v1 (which I never actually emailed):
 - Fix zero-extension in the compat case.

 arch/x86/entry/entry_64.S        |  9 ++-------
 arch/x86/entry/entry_64_compat.S |  7 +++----
 arch/x86/xen/xen-asm_64.S        | 23 +++++++++--------------
 3 files changed, 14 insertions(+), 25 deletions(-)

Comments

Jürgen Groß Aug. 9, 2017, 3:39 p.m. UTC | #1
On 08/08/17 05:59, Andy Lutomirski wrote:
> Xen's raw SYSCALL entries are much less weird than native.  Rather
> than fudging them to look like native entries, use the Xen-provided
> stack frame directly.
> 
> This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
> the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
> benefit from similar treatment.
> 
> This makes one change to the native code path: the compat
> instruction that clears the high 32 bits of %rax is moved slightly
> later.  I'd be surprised if this affects performance at all.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>

Reviewed-by: Juergen Gross <jgross@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>


Thanks,

Juergen
Brian Gerst Aug. 14, 2017, 2:44 a.m. UTC | #2
On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote:
> Xen's raw SYSCALL entries are much less weird than native.  Rather
> than fudging them to look like native entries, use the Xen-provided
> stack frame directly.
>
> This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of
> the SWAPGS_UNSAFE_STACK paravirt hook.  The SYSENTER code would
> benefit from similar treatment.
>
> This makes one change to the native code path: the compat
> instruction that clears the high 32 bits of %rax is moved slightly
> later.  I'd be surprised if this affects performance at all.
>
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
>
> Changes from v1 (which I never actually emailed):
>  - Fix zero-extension in the compat case.
>
>  arch/x86/entry/entry_64.S        |  9 ++-------
>  arch/x86/entry/entry_64_compat.S |  7 +++----
>  arch/x86/xen/xen-asm_64.S        | 23 +++++++++--------------
>  3 files changed, 14 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index aa58155187c5..7cee92cf807f 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64)
>          * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
>          * it is too small to ever cause noticeable irq latency.
>          */
> -       SWAPGS_UNSAFE_STACK
> -       /*
> -        * A hypervisor implementation might want to use a label
> -        * after the swapgs, so that it can do the swapgs
> -        * for the guest and jump here on syscall.
> -        */
> -GLOBAL(entry_SYSCALL_64_after_swapgs)
>
> +       swapgs
>         movq    %rsp, PER_CPU_VAR(rsp_scratch)
>         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
> @@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
>         pushq   %r11                            /* pt_regs->flags */
>         pushq   $__USER_CS                      /* pt_regs->cs */
>         pushq   %rcx                            /* pt_regs->ip */
> +GLOBAL(entry_SYSCALL_64_after_hwframe)
>         pushq   %rax                            /* pt_regs->orig_ax */
>         pushq   %rdi                            /* pt_regs->di */
>         pushq   %rsi                            /* pt_regs->si */
> diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
> index e1721dafbcb1..5314d7b8e5ad 100644
> --- a/arch/x86/entry/entry_64_compat.S
> +++ b/arch/x86/entry/entry_64_compat.S
> @@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
>   */
>  ENTRY(entry_SYSCALL_compat)
>         /* Interrupts are off on entry. */
> -       SWAPGS_UNSAFE_STACK
> +       swapgs
>
>         /* Stash user ESP and switch to the kernel stack. */
>         movl    %esp, %r8d
>         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>
> -       /* Zero-extending 32-bit regs, do not remove */
> -       movl    %eax, %eax
> -
>         /* Construct struct pt_regs on stack */
>         pushq   $__USER32_DS            /* pt_regs->ss */
>         pushq   %r8                     /* pt_regs->sp */
>         pushq   %r11                    /* pt_regs->flags */
>         pushq   $__USER32_CS            /* pt_regs->cs */
>         pushq   %rcx                    /* pt_regs->ip */
> +GLOBAL(entry_SYSCALL_compat_after_hwframe)
> +       movl    %eax, %eax              /* discard orig_ax high bits */
>         pushq   %rax                    /* pt_regs->orig_ax */
>         pushq   %rdi                    /* pt_regs->di */
>         pushq   %rsi                    /* pt_regs->si */
> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index c3df43141e70..a8a4f4c460a6 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1)
>   *     rip
>   *     r11
>   * rsp->rcx
> - *
> - * In all the entrypoints, we undo all that to make it look like a
> - * CPU-generated syscall/sysenter and jump to the normal entrypoint.
>   */
>
> -.macro undo_xen_syscall
> -       mov 0*8(%rsp), %rcx
> -       mov 1*8(%rsp), %r11
> -       mov 5*8(%rsp), %rsp
> -.endm
> -
>  /* Normal 64-bit system call target */
>  ENTRY(xen_syscall_target)
> -       undo_xen_syscall
> -       jmp entry_SYSCALL_64_after_swapgs
> +       popq %rcx
> +       popq %r11
> +       jmp entry_SYSCALL_64_after_hwframe
>  ENDPROC(xen_syscall_target)
>
>  #ifdef CONFIG_IA32_EMULATION
>
>  /* 32-bit compat syscall target */
>  ENTRY(xen_syscall32_target)
> -       undo_xen_syscall
> -       jmp entry_SYSCALL_compat
> +       popq %rcx
> +       popq %r11
> +       jmp entry_SYSCALL_compat_after_hwframe
>  ENDPROC(xen_syscall32_target)
>
>  /* 32-bit compat sysenter target */
>  ENTRY(xen_sysenter_target)
> -       undo_xen_syscall
> +       mov 0*8(%rsp), %rcx
> +       mov 1*8(%rsp), %r11
> +       mov 5*8(%rsp), %rsp
>         jmp entry_SYSENTER_compat
>  ENDPROC(xen_sysenter_target)

This patch causes the iopl_32 and ioperm_32 self-tests to fail on a
64-bit PV kernel.  The 64-bit versions pass. It gets a seg fault after
"parent: write to 0x80 (should fail)", and the fault isn't caught by
the signal handler.  It just dumps back to the shell.  The tests pass
after reverting this.

--
Brian Gerst
diff mbox

Patch

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index aa58155187c5..7cee92cf807f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -142,14 +142,8 @@  ENTRY(entry_SYSCALL_64)
 	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
 	 * it is too small to ever cause noticeable irq latency.
 	 */
-	SWAPGS_UNSAFE_STACK
-	/*
-	 * A hypervisor implementation might want to use a label
-	 * after the swapgs, so that it can do the swapgs
-	 * for the guest and jump here on syscall.
-	 */
-GLOBAL(entry_SYSCALL_64_after_swapgs)
 
+	swapgs
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
@@ -161,6 +155,7 @@  GLOBAL(entry_SYSCALL_64_after_swapgs)
 	pushq	%r11				/* pt_regs->flags */
 	pushq	$__USER_CS			/* pt_regs->cs */
 	pushq	%rcx				/* pt_regs->ip */
+GLOBAL(entry_SYSCALL_64_after_hwframe)
 	pushq	%rax				/* pt_regs->orig_ax */
 	pushq	%rdi				/* pt_regs->di */
 	pushq	%rsi				/* pt_regs->si */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e1721dafbcb1..5314d7b8e5ad 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -183,21 +183,20 @@  ENDPROC(entry_SYSENTER_compat)
  */
 ENTRY(entry_SYSCALL_compat)
 	/* Interrupts are off on entry. */
-	SWAPGS_UNSAFE_STACK
+	swapgs
 
 	/* Stash user ESP and switch to the kernel stack. */
 	movl	%esp, %r8d
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
-	/* Zero-extending 32-bit regs, do not remove */
-	movl	%eax, %eax
-
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER32_DS		/* pt_regs->ss */
 	pushq	%r8			/* pt_regs->sp */
 	pushq	%r11			/* pt_regs->flags */
 	pushq	$__USER32_CS		/* pt_regs->cs */
 	pushq	%rcx			/* pt_regs->ip */
+GLOBAL(entry_SYSCALL_compat_after_hwframe)
+	movl	%eax, %eax		/* discard orig_ax high bits */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c3df43141e70..a8a4f4c460a6 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -82,34 +82,29 @@  RELOC(xen_sysret64, 1b+1)
  *	rip
  *	r11
  * rsp->rcx
- *
- * In all the entrypoints, we undo all that to make it look like a
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
  */
 
-.macro undo_xen_syscall
-	mov 0*8(%rsp), %rcx
-	mov 1*8(%rsp), %r11
-	mov 5*8(%rsp), %rsp
-.endm
-
 /* Normal 64-bit system call target */
 ENTRY(xen_syscall_target)
-	undo_xen_syscall
-	jmp entry_SYSCALL_64_after_swapgs
+	popq %rcx
+	popq %r11
+	jmp entry_SYSCALL_64_after_hwframe
 ENDPROC(xen_syscall_target)
 
 #ifdef CONFIG_IA32_EMULATION
 
 /* 32-bit compat syscall target */
 ENTRY(xen_syscall32_target)
-	undo_xen_syscall
-	jmp entry_SYSCALL_compat
+	popq %rcx
+	popq %r11
+	jmp entry_SYSCALL_compat_after_hwframe
 ENDPROC(xen_syscall32_target)
 
 /* 32-bit compat sysenter target */
 ENTRY(xen_sysenter_target)
-	undo_xen_syscall
+	mov 0*8(%rsp), %rcx
+	mov 1*8(%rsp), %r11
+	mov 5*8(%rsp), %rsp
 	jmp entry_SYSENTER_compat
 ENDPROC(xen_sysenter_target)