Message ID | 7c88ed36805d36841ab03ec3b48b4122c4418d71.1502164668.git.luto@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 08/08/17 05:59, Andy Lutomirski wrote: > Xen's raw SYSCALL entries are much less weird than native. Rather > than fudging them to look like native entries, use the Xen-provided > stack frame directly. > > This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of > the SWAPGS_UNSAFE_STACK paravirt hook. The SYSENTER code would > benefit from similar treatment. > > This makes one change to the native code path: the compat > instruction that clears the high 32 bits of %rax is moved slightly > later. I'd be surprised if this affects performance at all. > > Signed-off-by: Andy Lutomirski <luto@kernel.org> Reviewed-by: Juergen Gross <jgross@suse.com> Tested-by: Juergen Gross <jgross@suse.com> Thanks, Juergen
On Mon, Aug 7, 2017 at 11:59 PM, Andy Lutomirski <luto@kernel.org> wrote: > Xen's raw SYSCALL entries are much less weird than native. Rather > than fudging them to look like native entries, use the Xen-provided > stack frame directly. > > This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of > the SWAPGS_UNSAFE_STACK paravirt hook. The SYSENTER code would > benefit from similar treatment. > > This makes one change to the native code path: the compat > instruction that clears the high 32 bits of %rax is moved slightly > later. I'd be surprised if this affects performance at all. > > Signed-off-by: Andy Lutomirski <luto@kernel.org> > --- > > Changes from v1 (which I never actually emailed): > - Fix zero-extension in the compat case. > > arch/x86/entry/entry_64.S | 9 ++------- > arch/x86/entry/entry_64_compat.S | 7 +++---- > arch/x86/xen/xen-asm_64.S | 23 +++++++++-------------- > 3 files changed, 14 insertions(+), 25 deletions(-) > > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S > index aa58155187c5..7cee92cf807f 100644 > --- a/arch/x86/entry/entry_64.S > +++ b/arch/x86/entry/entry_64.S > @@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64) > * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, > * it is too small to ever cause noticeable irq latency. > */ > - SWAPGS_UNSAFE_STACK > - /* > - * A hypervisor implementation might want to use a label > - * after the swapgs, so that it can do the swapgs > - * for the guest and jump here on syscall. > - */ > -GLOBAL(entry_SYSCALL_64_after_swapgs) > > + swapgs > movq %rsp, PER_CPU_VAR(rsp_scratch) > movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp > > @@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) > pushq %r11 /* pt_regs->flags */ > pushq $__USER_CS /* pt_regs->cs */ > pushq %rcx /* pt_regs->ip */ > +GLOBAL(entry_SYSCALL_64_after_hwframe) > pushq %rax /* pt_regs->orig_ax */ > pushq %rdi /* pt_regs->di */ > pushq %rsi /* pt_regs->si */ > diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S > index e1721dafbcb1..5314d7b8e5ad 100644 > --- a/arch/x86/entry/entry_64_compat.S > +++ b/arch/x86/entry/entry_64_compat.S > @@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat) > */ > ENTRY(entry_SYSCALL_compat) > /* Interrupts are off on entry. */ > - SWAPGS_UNSAFE_STACK > + swapgs > > /* Stash user ESP and switch to the kernel stack. */ > movl %esp, %r8d > movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp > > - /* Zero-extending 32-bit regs, do not remove */ > - movl %eax, %eax > - > /* Construct struct pt_regs on stack */ > pushq $__USER32_DS /* pt_regs->ss */ > pushq %r8 /* pt_regs->sp */ > pushq %r11 /* pt_regs->flags */ > pushq $__USER32_CS /* pt_regs->cs */ > pushq %rcx /* pt_regs->ip */ > +GLOBAL(entry_SYSCALL_compat_after_hwframe) > + movl %eax, %eax /* discard orig_ax high bits */ > pushq %rax /* pt_regs->orig_ax */ > pushq %rdi /* pt_regs->di */ > pushq %rsi /* pt_regs->si */ > diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S > index c3df43141e70..a8a4f4c460a6 100644 > --- a/arch/x86/xen/xen-asm_64.S > +++ b/arch/x86/xen/xen-asm_64.S > @@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1) > * rip > * r11 > * rsp->rcx > - * > - * In all the entrypoints, we undo all that to make it look like a > - * CPU-generated syscall/sysenter and jump to the normal entrypoint. > */ > > -.macro undo_xen_syscall > - mov 0*8(%rsp), %rcx > - mov 1*8(%rsp), %r11 > - mov 5*8(%rsp), %rsp > -.endm > - > /* Normal 64-bit system call target */ > ENTRY(xen_syscall_target) > - undo_xen_syscall > - jmp entry_SYSCALL_64_after_swapgs > + popq %rcx > + popq %r11 > + jmp entry_SYSCALL_64_after_hwframe > ENDPROC(xen_syscall_target) > > #ifdef CONFIG_IA32_EMULATION > > /* 32-bit compat syscall target */ > ENTRY(xen_syscall32_target) > - undo_xen_syscall > - jmp entry_SYSCALL_compat > + popq %rcx > + popq %r11 > + jmp entry_SYSCALL_compat_after_hwframe > ENDPROC(xen_syscall32_target) > > /* 32-bit compat sysenter target */ > ENTRY(xen_sysenter_target) > - undo_xen_syscall > + mov 0*8(%rsp), %rcx > + mov 1*8(%rsp), %r11 > + mov 5*8(%rsp), %rsp > jmp entry_SYSENTER_compat > ENDPROC(xen_sysenter_target) This patch causes the iopl_32 and ioperm_32 self-tests to fail on a 64-bit PV kernel. The 64-bit versions pass. It gets a seg fault after "parent: write to 0x80 (should fail)", and the fault isn't caught by the signal handler. It just dumps back to the shell. The tests pass after reverting this. -- Brian Gerst
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index aa58155187c5..7cee92cf807f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -142,14 +142,8 @@ ENTRY(entry_SYSCALL_64) * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, * it is too small to ever cause noticeable irq latency. */ - SWAPGS_UNSAFE_STACK - /* - * A hypervisor implementation might want to use a label - * after the swapgs, so that it can do the swapgs - * for the guest and jump here on syscall. - */ -GLOBAL(entry_SYSCALL_64_after_swapgs) + swapgs movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp @@ -161,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) pushq %r11 /* pt_regs->flags */ pushq $__USER_CS /* pt_regs->cs */ pushq %rcx /* pt_regs->ip */ +GLOBAL(entry_SYSCALL_64_after_hwframe) pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e1721dafbcb1..5314d7b8e5ad 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat) */ ENTRY(entry_SYSCALL_compat) /* Interrupts are off on entry. */ - SWAPGS_UNSAFE_STACK + swapgs /* Stash user ESP and switch to the kernel stack. */ movl %esp, %r8d movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - /* Zero-extending 32-bit regs, do not remove */ - movl %eax, %eax - /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ pushq %r8 /* pt_regs->sp */ pushq %r11 /* pt_regs->flags */ pushq $__USER32_CS /* pt_regs->cs */ pushq %rcx /* pt_regs->ip */ +GLOBAL(entry_SYSCALL_compat_after_hwframe) + movl %eax, %eax /* discard orig_ax high bits */ pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index c3df43141e70..a8a4f4c460a6 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -82,34 +82,29 @@ RELOC(xen_sysret64, 1b+1) * rip * r11 * rsp->rcx - * - * In all the entrypoints, we undo all that to make it look like a - * CPU-generated syscall/sysenter and jump to the normal entrypoint. */ -.macro undo_xen_syscall - mov 0*8(%rsp), %rcx - mov 1*8(%rsp), %r11 - mov 5*8(%rsp), %rsp -.endm - /* Normal 64-bit system call target */ ENTRY(xen_syscall_target) - undo_xen_syscall - jmp entry_SYSCALL_64_after_swapgs + popq %rcx + popq %r11 + jmp entry_SYSCALL_64_after_hwframe ENDPROC(xen_syscall_target) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ ENTRY(xen_syscall32_target) - undo_xen_syscall - jmp entry_SYSCALL_compat + popq %rcx + popq %r11 + jmp entry_SYSCALL_compat_after_hwframe ENDPROC(xen_syscall32_target) /* 32-bit compat sysenter target */ ENTRY(xen_sysenter_target) - undo_xen_syscall + mov 0*8(%rsp), %rcx + mov 1*8(%rsp), %r11 + mov 5*8(%rsp), %rsp jmp entry_SYSENTER_compat ENDPROC(xen_sysenter_target)
Xen's raw SYSCALL entries are much less weird than native. Rather than fudging them to look like native entries, use the Xen-provided stack frame directly. This lets us eliminate entry_SYSCALL_64_after_swapgs and two uses of the SWAPGS_UNSAFE_STACK paravirt hook. The SYSENTER code would benefit from similar treatment. This makes one change to the native code path: the compat instruction that clears the high 32 bits of %rax is moved slightly later. I'd be surprised if this affects performance at all. Signed-off-by: Andy Lutomirski <luto@kernel.org> --- Changes from v1 (which I never actually emailed): - Fix zero-extension in the compat case. arch/x86/entry/entry_64.S | 9 ++------- arch/x86/entry/entry_64_compat.S | 7 +++---- arch/x86/xen/xen-asm_64.S | 23 +++++++++-------------- 3 files changed, 14 insertions(+), 25 deletions(-)