diff mbox series

[v3,25/33] KVM: VMX: Move vCPU-run code to a proper assembly routine

Message ID 20190125154120.19385-26-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: VMX: Move vCPU-run to proper asm sub-routine | expand

Commit Message

Sean Christopherson Jan. 25, 2019, 3:41 p.m. UTC
As evidenced by the myriad patches leading up to this moment, using
an inline asm blob for vCPU-run is nothing short of horrific.  It's also
been called "unholy", "an abomination" and likely a whole host of other
names that would violate the Code of Conduct if recorded here and now.

The code is relocated nearly verbatim, e.g. quotes, newlines, tabs and
__stringify need to be dropped, but other than those cosmetic changes
the only functional changees are to add the "call" and replace the final
"jmp" with a "ret".

Note that STACK_FRAME_NON_STANDARD is also dropped from __vmx_vcpu_run().

Suggested-by: Andi Kleen <ak@linux.intel.com>
Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/vmx/vmenter.S | 147 +++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     | 138 +---------------------------------
 2 files changed, 148 insertions(+), 137 deletions(-)

Comments

Paolo Bonzini Jan. 31, 2019, 10:33 a.m. UTC | #1
On 25/01/19 16:41, Sean Christopherson wrote:
> +#ifdef CONFIG_X86_64
> +#define WORD_SIZE 8
> +#else
> +#define WORD_SIZE 4
> +#endif

Making a small change here:

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index db223cfe9812..e64617f3b196 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -1,13 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/linkage.h>
 #include <asm/asm.h>
+#include <asm/bitsperlong.h>
 #include <asm/kvm_vcpu_regs.h>

-#ifdef CONFIG_X86_64
-#define WORD_SIZE 8
-#else
-#define WORD_SIZE 4
-#endif
+#define WORD_SIZE (BITS_PER_LONG / 8)

 #define VCPU_RAX	__VCPU_REGS_RAX * WORD_SIZE
 #define VCPU_RCX	__VCPU_REGS_RCX * WORD_SIZE

Thanks,

Paolo

> +#define VCPU_RAX	__VCPU_REGS_RAX * WORD_SIZE
> +#define VCPU_RCX	__VCPU_REGS_RCX * WORD_SIZE
> +#define VCPU_RDX	__VCPU_REGS_RDX * WORD_SIZE
> +#define VCPU_RBX	__VCPU_REGS_RBX * WORD_SIZE
> +/* Intentionally omit RSP as it's context switched by hardware */
> +#define VCPU_RBP	__VCPU_REGS_RBP * WORD_SIZE
> +#define VCPU_RSI	__VCPU_REGS_RSI * WORD_SIZE
> +#define VCPU_RDI	__VCPU_REGS_RDI * WORD_SIZE
> +
> +#ifdef CONFIG_X86_64
Jiri Slaby Feb. 25, 2019, 7:57 a.m. UTC | #2
Hi,

On 25. 01. 19, 16:41, Sean Christopherson wrote:
> As evidenced by the myriad patches leading up to this moment, using
> an inline asm blob for vCPU-run is nothing short of horrific.  It's also
> been called "unholy", "an abomination" and likely a whole host of other
> names that would violate the Code of Conduct if recorded here and now.
> 
> The code is relocated nearly verbatim, e.g. quotes, newlines, tabs and
> __stringify need to be dropped, but other than those cosmetic changes
> the only functional changees are to add the "call" and replace the final
> "jmp" with a "ret".
> 
> Note that STACK_FRAME_NON_STANDARD is also dropped from __vmx_vcpu_run().
> 
> Suggested-by: Andi Kleen <ak@linux.intel.com>
> Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>  arch/x86/kvm/vmx/vmenter.S | 147 +++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/vmx/vmx.c     | 138 +---------------------------------
>  2 files changed, 148 insertions(+), 137 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> index bcef2c7e9bc4..db223cfe9812 100644
> --- a/arch/x86/kvm/vmx/vmenter.S
> +++ b/arch/x86/kvm/vmx/vmenter.S
...
> @@ -55,3 +82,123 @@ ENDPROC(vmx_vmenter)
>  ENTRY(vmx_vmexit)
>  	ret
>  ENDPROC(vmx_vmexit)
> +
> +/**
> + * ____vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
> + * @vmx:	struct vcpu_vmx *
> + * @regs:	unsigned long * (to guest registers)
> + * %RBX:	VMCS launched status (non-zero indicates already launched)
> + *
> + * Returns:
> + *	%RBX is 0 on VM-Exit, 1 on VM-Fail
> + */
> +ENTRY(____vmx_vcpu_run)
> +	push %_ASM_BP
> +	mov  %_ASM_SP, %_ASM_BP

Was there any particular reason not to use FRAME_BEGIN (and FRAME_END
below)? It would compile to a nop on !CONFIG_FRAME_POINTER configs.

I understand this patch is only a move of the code from .c to .S. So I
would send a cleanup patch, but I just wonder if there is anything
blocking it?

> +
> +	/*
> +	 * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
> +	 * @regs is needed after VM-Exit to save the guest's register values.
> +	 */
> +	push %_ASM_ARG2
...
> +1:
> +#ifdef CONFIG_X86_64
> +	xor %r8d,  %r8d
> +	xor %r9d,  %r9d
> +	xor %r10d, %r10d
> +	xor %r11d, %r11d
> +	xor %r12d, %r12d
> +	xor %r13d, %r13d
> +	xor %r14d, %r14d
> +	xor %r15d, %r15d
> +#endif
> +	xor %eax, %eax
> +	xor %ecx, %ecx
> +	xor %edx, %edx
> +	xor %esi, %esi
> +	xor %edi, %edi
> +	xor %ebp, %ebp
> +
> +	/* "POP" @regs. */
> +	add $WORD_SIZE, %_ASM_SP
> +	pop %_ASM_BP
> +	ret
> +
> +	/* VM-Fail.  Out-of-line to avoid a taken Jcc after VM-Exit. */
> +2:	mov $1, %ebx
> +	jmp 1b
> +ENDPROC(____vmx_vcpu_run)

thanks,
Sean Christopherson Feb. 25, 2019, 3:23 p.m. UTC | #3
On Mon, Feb 25, 2019 at 08:57:08AM +0100, Jiri Slaby wrote:
> Hi,
> 
> On 25. 01. 19, 16:41, Sean Christopherson wrote:
> > As evidenced by the myriad patches leading up to this moment, using
> > an inline asm blob for vCPU-run is nothing short of horrific.  It's also
> > been called "unholy", "an abomination" and likely a whole host of other
> > names that would violate the Code of Conduct if recorded here and now.
> > 
> > The code is relocated nearly verbatim, e.g. quotes, newlines, tabs and
> > __stringify need to be dropped, but other than those cosmetic changes
> > the only functional changees are to add the "call" and replace the final
> > "jmp" with a "ret".
> > 
> > Note that STACK_FRAME_NON_STANDARD is also dropped from __vmx_vcpu_run().
> > 
> > Suggested-by: Andi Kleen <ak@linux.intel.com>
> > Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > ---
> >  arch/x86/kvm/vmx/vmenter.S | 147 +++++++++++++++++++++++++++++++++++++
> >  arch/x86/kvm/vmx/vmx.c     | 138 +---------------------------------
> >  2 files changed, 148 insertions(+), 137 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> > index bcef2c7e9bc4..db223cfe9812 100644
> > --- a/arch/x86/kvm/vmx/vmenter.S
> > +++ b/arch/x86/kvm/vmx/vmenter.S
> ...
> > @@ -55,3 +82,123 @@ ENDPROC(vmx_vmenter)
> >  ENTRY(vmx_vmexit)
> >  	ret
> >  ENDPROC(vmx_vmexit)
> > +
> > +/**
> > + * ____vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
> > + * @vmx:	struct vcpu_vmx *
> > + * @regs:	unsigned long * (to guest registers)
> > + * %RBX:	VMCS launched status (non-zero indicates already launched)
> > + *
> > + * Returns:
> > + *	%RBX is 0 on VM-Exit, 1 on VM-Fail
> > + */
> > +ENTRY(____vmx_vcpu_run)
> > +	push %_ASM_BP
> > +	mov  %_ASM_SP, %_ASM_BP
> 
> Was there any particular reason not to use FRAME_BEGIN (and FRAME_END
> below)? It would compile to a nop on !CONFIG_FRAME_POINTER configs.
> 
> I understand this patch is only a move of the code from .c to .S. So I
> would send a cleanup patch, but I just wonder if there is anything
> blocking it?

RBP needs to be saved/restored unconditionally as it will be crushed
by VM-Enter.

commit 63c73aa07fcabc090661a586f7ae5200a0fc5cb4
Author: Sean Christopherson <sean.j.christopherson@intel.com>
Date:   Fri Jan 25 07:41:11 2019 -0800

    KVM: VMX: Create a stack frame in vCPU-run

    ...in preparation for moving to a proper assembly sub-routnine.
    vCPU-run isn't a leaf function since it calls vmx_update_host_rsp()
    and vmx_vmenter().  And since we need to save/restore RBP anyways,
    unconditionally creating the frame costs a single MOV, i.e. don't
    bother keying off CONFIG_FRAME_POINTER or using FRAME_BEGIN, etc...

    Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
    Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index bcef2c7e9bc4..db223cfe9812 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -1,6 +1,33 @@ 
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/linkage.h>
 #include <asm/asm.h>
+#include <asm/kvm_vcpu_regs.h>
+
+#ifdef CONFIG_X86_64
+#define WORD_SIZE 8
+#else
+#define WORD_SIZE 4
+#endif
+
+#define VCPU_RAX	__VCPU_REGS_RAX * WORD_SIZE
+#define VCPU_RCX	__VCPU_REGS_RCX * WORD_SIZE
+#define VCPU_RDX	__VCPU_REGS_RDX * WORD_SIZE
+#define VCPU_RBX	__VCPU_REGS_RBX * WORD_SIZE
+/* Intentionally omit RSP as it's context switched by hardware */
+#define VCPU_RBP	__VCPU_REGS_RBP * WORD_SIZE
+#define VCPU_RSI	__VCPU_REGS_RSI * WORD_SIZE
+#define VCPU_RDI	__VCPU_REGS_RDI * WORD_SIZE
+
+#ifdef CONFIG_X86_64
+#define VCPU_R8		__VCPU_REGS_R8  * WORD_SIZE
+#define VCPU_R9		__VCPU_REGS_R9  * WORD_SIZE
+#define VCPU_R10	__VCPU_REGS_R10 * WORD_SIZE
+#define VCPU_R11	__VCPU_REGS_R11 * WORD_SIZE
+#define VCPU_R12	__VCPU_REGS_R12 * WORD_SIZE
+#define VCPU_R13	__VCPU_REGS_R13 * WORD_SIZE
+#define VCPU_R14	__VCPU_REGS_R14 * WORD_SIZE
+#define VCPU_R15	__VCPU_REGS_R15 * WORD_SIZE
+#endif
 
 	.text
 
@@ -55,3 +82,123 @@  ENDPROC(vmx_vmenter)
 ENTRY(vmx_vmexit)
 	ret
 ENDPROC(vmx_vmexit)
+
+/**
+ * ____vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
+ * @vmx:	struct vcpu_vmx *
+ * @regs:	unsigned long * (to guest registers)
+ * %RBX:	VMCS launched status (non-zero indicates already launched)
+ *
+ * Returns:
+ *	%RBX is 0 on VM-Exit, 1 on VM-Fail
+ */
+ENTRY(____vmx_vcpu_run)
+	push %_ASM_BP
+	mov  %_ASM_SP, %_ASM_BP
+
+	/*
+	 * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
+	 * @regs is needed after VM-Exit to save the guest's register values.
+	 */
+	push %_ASM_ARG2
+
+	/* Adjust RSP to account for the CALL to vmx_vmenter(). */
+	lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
+	call vmx_update_host_rsp
+
+	/* Load @regs to RCX. */
+	mov (%_ASM_SP), %_ASM_CX
+
+	/* Check if vmlaunch or vmresume is needed */
+	cmpb $0, %bl
+
+	/* Load guest registers.  Don't clobber flags. */
+	mov VCPU_RAX(%_ASM_CX), %_ASM_AX
+	mov VCPU_RBX(%_ASM_CX), %_ASM_BX
+	mov VCPU_RDX(%_ASM_CX), %_ASM_DX
+	mov VCPU_RSI(%_ASM_CX), %_ASM_SI
+	mov VCPU_RDI(%_ASM_CX), %_ASM_DI
+	mov VCPU_RBP(%_ASM_CX), %_ASM_BP
+#ifdef CONFIG_X86_64
+	mov VCPU_R8 (%_ASM_CX),  %r8
+	mov VCPU_R9 (%_ASM_CX),  %r9
+	mov VCPU_R10(%_ASM_CX), %r10
+	mov VCPU_R11(%_ASM_CX), %r11
+	mov VCPU_R12(%_ASM_CX), %r12
+	mov VCPU_R13(%_ASM_CX), %r13
+	mov VCPU_R14(%_ASM_CX), %r14
+	mov VCPU_R15(%_ASM_CX), %r15
+#endif
+	/* Load guest RCX.  This kills the vmx_vcpu pointer! */
+	mov VCPU_RCX(%_ASM_CX), %_ASM_CX
+
+	/* Enter guest mode */
+	call vmx_vmenter
+
+	/* Jump on VM-Fail. */
+	jbe 2f
+
+	/* Temporarily save guest's RCX. */
+	push %_ASM_CX
+
+	/* Reload @regs to RCX. */
+	mov WORD_SIZE(%_ASM_SP), %_ASM_CX
+
+	/* Save all guest registers, including RCX from the stack */
+	mov %_ASM_AX,   VCPU_RAX(%_ASM_CX)
+	mov %_ASM_BX,   VCPU_RBX(%_ASM_CX)
+	__ASM_SIZE(pop) VCPU_RCX(%_ASM_CX)
+	mov %_ASM_DX,   VCPU_RDX(%_ASM_CX)
+	mov %_ASM_SI,   VCPU_RSI(%_ASM_CX)
+	mov %_ASM_DI,   VCPU_RDI(%_ASM_CX)
+	mov %_ASM_BP,   VCPU_RBP(%_ASM_CX)
+#ifdef CONFIG_X86_64
+	mov %r8,  VCPU_R8 (%_ASM_CX)
+	mov %r9,  VCPU_R9 (%_ASM_CX)
+	mov %r10, VCPU_R10(%_ASM_CX)
+	mov %r11, VCPU_R11(%_ASM_CX)
+	mov %r12, VCPU_R12(%_ASM_CX)
+	mov %r13, VCPU_R13(%_ASM_CX)
+	mov %r14, VCPU_R14(%_ASM_CX)
+	mov %r15, VCPU_R15(%_ASM_CX)
+#endif
+
+	/* Clear EBX to indicate VM-Exit (as opposed to VM-Fail). */
+	xor %ebx, %ebx
+
+	/*
+	 * Clear all general purpose registers except RSP and RBX to prevent
+	 * speculative use of the guest's values, even those that are reloaded
+	 * via the stack.  In theory, an L1 cache miss when restoring registers
+	 * could lead to speculative execution with the guest's values.
+	 * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
+	 * free.  RSP and RBX are exempt as RSP is restored by hardware during
+	 * VM-Exit and RBX is explicitly loaded with 0 or 1 to "return" VM-Fail.
+	 */
+1:
+#ifdef CONFIG_X86_64
+	xor %r8d,  %r8d
+	xor %r9d,  %r9d
+	xor %r10d, %r10d
+	xor %r11d, %r11d
+	xor %r12d, %r12d
+	xor %r13d, %r13d
+	xor %r14d, %r14d
+	xor %r15d, %r15d
+#endif
+	xor %eax, %eax
+	xor %ecx, %ecx
+	xor %edx, %edx
+	xor %esi, %esi
+	xor %edi, %edi
+	xor %ebp, %ebp
+
+	/* "POP" @regs. */
+	add $WORD_SIZE, %_ASM_SP
+	pop %_ASM_BP
+	ret
+
+	/* VM-Fail.  Out-of-line to avoid a taken Jcc after VM-Exit. */
+2:	mov $1, %ebx
+	jmp 1b
+ENDPROC(____vmx_vcpu_run)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 206da8e49b04..76b68492e077 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6370,33 +6370,6 @@  void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
 	}
 }
 
-#ifdef CONFIG_X86_64
-#define WORD_SIZE	8
-#else
-#define WORD_SIZE	4
-#endif
-
-#define _WORD_SIZE	__stringify(WORD_SIZE)
-
-#define VCPU_RAX	__stringify(__VCPU_REGS_RAX * WORD_SIZE)
-#define VCPU_RCX	__stringify(__VCPU_REGS_RCX * WORD_SIZE)
-#define VCPU_RDX	__stringify(__VCPU_REGS_RDX * WORD_SIZE)
-#define VCPU_RBX	__stringify(__VCPU_REGS_RBX * WORD_SIZE)
-/* Intentionally omit %RSP as it's context switched by hardware */
-#define VCPU_RBP	__stringify(__VCPU_REGS_RBP * WORD_SIZE)
-#define VCPU_RSI	__stringify(__VCPU_REGS_RSI * WORD_SIZE)
-#define VCPU_RDI	__stringify(__VCPU_REGS_RDI * WORD_SIZE)
-#ifdef CONFIG_X86_64
-#define VCPU_R8		__stringify(__VCPU_REGS_R8  * WORD_SIZE)
-#define VCPU_R9		__stringify(__VCPU_REGS_R9  * WORD_SIZE)
-#define VCPU_R10	__stringify(__VCPU_REGS_R10 * WORD_SIZE)
-#define VCPU_R11	__stringify(__VCPU_REGS_R11 * WORD_SIZE)
-#define VCPU_R12	__stringify(__VCPU_REGS_R12 * WORD_SIZE)
-#define VCPU_R13	__stringify(__VCPU_REGS_R13 * WORD_SIZE)
-#define VCPU_R14	__stringify(__VCPU_REGS_R14 * WORD_SIZE)
-#define VCPU_R15	__stringify(__VCPU_REGS_R15 * WORD_SIZE)
-#endif
-
 static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
 {
 	if (static_branch_unlikely(&vmx_l1d_should_flush))
@@ -6406,115 +6379,7 @@  static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
 		write_cr2(vcpu->arch.cr2);
 
 	asm(
-		"push %%" _ASM_BP " \n\t"
-		"mov  %%" _ASM_SP ", %%" _ASM_BP " \n\t"
-
-		/*
-		 * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
-		 * @regs is needed after VM-Exit to save the guest's register values.
-		 */
-		"push %%" _ASM_ARG2 " \n\t"
-
-		/* Adjust RSP to account for the CALL to vmx_vmenter(). */
-		"lea -" _WORD_SIZE "(%%" _ASM_SP "), %%" _ASM_ARG2 " \n\t"
-		"call vmx_update_host_rsp \n\t"
-
-		/* Load RCX with @regs. */
-		"mov (%%" _ASM_SP "), %%" _ASM_CX " \n\t"
-
-		/* Check if vmlaunch or vmresume is needed */
-		"cmpb $0, %%bl \n\t"
-
-		/* Load guest registers.  Don't clobber flags. */
-		"mov " VCPU_RAX "(%%" _ASM_CX "), %%" _ASM_AX " \n\t"
-		"mov " VCPU_RBX "(%%" _ASM_CX "), %%" _ASM_BX " \n\t"
-		"mov " VCPU_RDX "(%%" _ASM_CX "), %%" _ASM_DX " \n\t"
-		"mov " VCPU_RSI "(%%" _ASM_CX "), %%" _ASM_SI " \n\t"
-		"mov " VCPU_RDI "(%%" _ASM_CX "), %%" _ASM_DI " \n\t"
-		"mov " VCPU_RBP "(%%" _ASM_CX "), %%" _ASM_BP " \n\t"
-#ifdef CONFIG_X86_64
-		"mov " VCPU_R8  "(%%" _ASM_CX "),  %%r8  \n\t"
-		"mov " VCPU_R9  "(%%" _ASM_CX "),  %%r9  \n\t"
-		"mov " VCPU_R10 "(%%" _ASM_CX "), %%r10 \n\t"
-		"mov " VCPU_R11 "(%%" _ASM_CX "), %%r11 \n\t"
-		"mov " VCPU_R12 "(%%" _ASM_CX "), %%r12 \n\t"
-		"mov " VCPU_R13 "(%%" _ASM_CX "), %%r13 \n\t"
-		"mov " VCPU_R14 "(%%" _ASM_CX "), %%r14 \n\t"
-		"mov " VCPU_R15 "(%%" _ASM_CX "), %%r15 \n\t"
-#endif
-		/* Load guest RCX.  This kills the vmx_vcpu pointer! */
-		"mov " VCPU_RCX"(%%" _ASM_CX "), %%" _ASM_CX " \n\t"
-
-		/* Enter guest mode */
-		"call vmx_vmenter\n\t"
-		"jbe 2f \n\t"
-
-		/* Temporarily save guest's RCX. */
-		"push %%" _ASM_CX " \n\t"
-
-		/* Reload RCX with @regs. */
-		"mov " _WORD_SIZE "(%%" _ASM_SP "), %%" _ASM_CX " \n\t"
-
-		/* Save all guest registers, including RCX from the stack */
-		"mov %%" _ASM_AX ", " VCPU_RAX "(%%" _ASM_CX ") \n\t"
-		"mov %%" _ASM_BX ", " VCPU_RBX "(%%" _ASM_CX ") \n\t"
-		__ASM_SIZE(pop) "   " VCPU_RCX "(%%" _ASM_CX ") \n\t"
-		"mov %%" _ASM_DX ", " VCPU_RDX "(%%" _ASM_CX ") \n\t"
-		"mov %%" _ASM_SI ", " VCPU_RSI "(%%" _ASM_CX ") \n\t"
-		"mov %%" _ASM_DI ", " VCPU_RDI "(%%" _ASM_CX ") \n\t"
-		"mov %%" _ASM_BP ", " VCPU_RBP "(%%" _ASM_CX ") \n\t"
-#ifdef CONFIG_X86_64
-		"mov %%r8,  " VCPU_R8  "(%%" _ASM_CX ") \n\t"
-		"mov %%r9,  " VCPU_R9  "(%%" _ASM_CX ") \n\t"
-		"mov %%r10, " VCPU_R10 "(%%" _ASM_CX ") \n\t"
-		"mov %%r11, " VCPU_R11 "(%%" _ASM_CX ") \n\t"
-		"mov %%r12, " VCPU_R12 "(%%" _ASM_CX ") \n\t"
-		"mov %%r13, " VCPU_R13 "(%%" _ASM_CX ") \n\t"
-		"mov %%r14, " VCPU_R14 "(%%" _ASM_CX ") \n\t"
-		"mov %%r15, " VCPU_R15 "(%%" _ASM_CX ") \n\t"
-#endif
-
-		/* Clear EBX to indicate VM-Exit (as opposed to VM-Fail). */
-		"xor %%ebx, %%ebx \n\t"
-
-		/*
-		 * Clear all general purpose registers except RSP and RBX to prevent
-		 * speculative use of the guest's values, even those that are reloaded
-		 * via the stack.  In theory, an L1 cache miss when restoring registers
-		 * could lead to speculative execution with the guest's values.
-		 * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
-		 * free.  RSP and RBX are exempt as RSP is restored by hardware during
-		 * VM-Exit and RBX is explicitly loaded with 0 or 1 to "return" VM-Fail.
-		 */
-		"1: \n\t"
-#ifdef CONFIG_X86_64
-		"xor %%r8d,  %%r8d \n\t"
-		"xor %%r9d,  %%r9d \n\t"
-		"xor %%r10d, %%r10d \n\t"
-		"xor %%r11d, %%r11d \n\t"
-		"xor %%r12d, %%r12d \n\t"
-		"xor %%r13d, %%r13d \n\t"
-		"xor %%r14d, %%r14d \n\t"
-		"xor %%r15d, %%r15d \n\t"
-#endif
-		"xor %%eax, %%eax \n\t"
-		"xor %%ecx, %%ecx \n\t"
-		"xor %%edx, %%edx \n\t"
-		"xor %%esi, %%esi \n\t"
-		"xor %%edi, %%edi \n\t"
-		"xor %%ebp, %%ebp \n\t"
-
-		/* "POP" the vcpu_vmx pointer. */
-		"add $" _WORD_SIZE ", %%" _ASM_SP " \n\t"
-		"pop  %%" _ASM_BP " \n\t"
-		"jmp 3f \n\t"
-
-		/* VM-Fail.  Out-of-line to avoid a taken Jcc after VM-Exit. */
-		"2: \n\t"
-		"mov $1, %%ebx \n\t"
-		"jmp 1b \n\t"
-		"3: \n\t"
-
+		"call ____vmx_vcpu_run \n\t"
 	      : ASM_CALL_CONSTRAINT, "=b"(vmx->fail),
 #ifdef CONFIG_X86_64
 		"=D"((int){0}), "=S"((int){0})
@@ -6535,7 +6400,6 @@  static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
 
 	vcpu->arch.cr2 = read_cr2();
 }
-STACK_FRAME_NON_STANDARD(__vmx_vcpu_run);
 
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {