diff mbox series

[v3,15/33] KVM: nVMX: Cache host_rsp on a per-VMCS basis

Message ID 20190125154120.19385-16-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: VMX: Move vCPU-run to proper asm sub-routine | expand

Commit Message

Sean Christopherson Jan. 25, 2019, 3:41 p.m. UTC
Currently, host_rsp is cached on a per-vCPU basis, i.e. it's stored in
struct vcpu_vmx.  In non-nested usage the caching is for all intents
and purposes 100% effective, e.g. only the first VMLAUNCH needs to
synchronize VMCS.HOST_RSP since the call stack to vmx_vcpu_run() is
identical each and every time.  But when running a nested guest, KVM
must invalidate the cache when switching the current VMCS as it can't
guarantee the new VMCS has the same HOST_RSP as the previous VMCS.  In
other words, the cache loses almost all of its efficacy when running a
nested VM.

Move host_rsp to struct vmcs_host_state, which is per-VMCS, so that it
is cached on a per-VMCS basis and restores its 100% hit rate when
nested VMs are in play.

Note that the host_rsp cache for vmcs02 essentially "breaks" when
nested early checks are enabled as nested_vmx_check_vmentry_hw() will
see a different RSP at the time of its VM-Enter.  While it's possible
to avoid even that VMCS.HOST_RSP synchronization, e.g. by employing a
dedicated VM-Exit stack, there is little motivation for doing so as
the overhead of two VMWRITEs (~55 cycles) is dwarfed by the overhead
of the extra VMX transition (600+ cycles) and is a proverbial drop in
the ocean relative to the total cost of a nested transtion (10s of
thousands of cycles).

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/vmx/nested.c | 24 ++++++------------------
 arch/x86/kvm/vmx/vmcs.h   |  1 +
 arch/x86/kvm/vmx/vmx.c    | 13 ++++++-------
 arch/x86/kvm/vmx/vmx.h    |  1 -
 4 files changed, 13 insertions(+), 26 deletions(-)

Comments

Jim Mattson Jan. 25, 2019, 10:08 p.m. UTC | #1
On Fri, Jan 25, 2019 at 7:42 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> Currently, host_rsp is cached on a per-vCPU basis, i.e. it's stored in
> struct vcpu_vmx.  In non-nested usage the caching is for all intents
> and purposes 100% effective, e.g. only the first VMLAUNCH needs to
> synchronize VMCS.HOST_RSP since the call stack to vmx_vcpu_run() is
> identical each and every time.  But when running a nested guest, KVM
> must invalidate the cache when switching the current VMCS as it can't
> guarantee the new VMCS has the same HOST_RSP as the previous VMCS.  In
> other words, the cache loses almost all of its efficacy when running a
> nested VM.
>
> Move host_rsp to struct vmcs_host_state, which is per-VMCS, so that it
> is cached on a per-VMCS basis and restores its 100% hit rate when
> nested VMs are in play.
>
> Note that the host_rsp cache for vmcs02 essentially "breaks" when
> nested early checks are enabled as nested_vmx_check_vmentry_hw() will
> see a different RSP at the time of its VM-Enter.  While it's possible
> to avoid even that VMCS.HOST_RSP synchronization, e.g. by employing a
> dedicated VM-Exit stack, there is little motivation for doing so as
> the overhead of two VMWRITEs (~55 cycles) is dwarfed by the overhead
> of the extra VMX transition (600+ cycles) and is a proverbial drop in
> the ocean relative to the total cost of a nested transtion (10s of
> thousands of cycles).
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 48281b0684ca..5d5218a14fb3 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1978,17 +1978,6 @@  static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
 		prepare_vmcs02_early_full(vmx, vmcs12);
 
-	/*
-	 * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
-	 * entry, but only if the current (host) sp changed from the value
-	 * we wrote last (vmx->host_rsp).  This cache is no longer relevant
-	 * if we switch vmcs, and rather than hold a separate cache per vmcs,
-	 * here we just force the write to happen on entry.  host_rsp will
-	 * also be written unconditionally by nested_vmx_check_vmentry_hw()
-	 * if we are doing early consistency checks via hardware.
-	 */
-	vmx->host_rsp = 0;
-
 	/*
 	 * PIN CONTROLS
 	 */
@@ -2753,8 +2742,11 @@  static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 
 	asm(
 		"sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
+		"cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
+		"je 1f \n\t"
 		__ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
-		"mov %%" _ASM_SP ", %c[host_rsp](%% " _ASM_CX ")\n\t"
+		"mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
+		"1: \n\t"
 		"add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
 
 		/* Check if vmlaunch or vmresume is needed */
@@ -2770,11 +2762,10 @@  static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 
 		CC_SET(be)
 	      : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
-	      : "c"(vmx),
-		[HOST_RSP]"r"((unsigned long)HOST_RSP),
+	      :	[HOST_RSP]"r"((unsigned long)HOST_RSP),
 		[loaded_vmcs]"r"(vmx->loaded_vmcs),
 		[launched]"i"(offsetof(struct loaded_vmcs, launched)),
-		[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
+		[host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
 		[wordsize]"i"(sizeof(ulong))
 	      : "cc", "memory"
 	);
@@ -3911,9 +3902,6 @@  void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 		vmx_flush_tlb(vcpu, true);
 	}
 
-	/* This is needed for same reason as it was needed in prepare_vmcs02 */
-	vmx->host_rsp = 0;
-
 	/* Unpin physical memory we referred to in vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		kvm_release_page_dirty(vmx->nested.apic_access_page);
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 6def3ba88e3b..cb6079f8a227 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -34,6 +34,7 @@  struct vmcs_host_state {
 	unsigned long cr4;	/* May not match real cr4 */
 	unsigned long gs_base;
 	unsigned long fs_base;
+	unsigned long rsp;
 
 	u16           fs_sel, gs_sel, ldt_sel;
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e3b06fecdfb5..fff1e5b5febe 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6380,9 +6380,9 @@  static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
 		"sub $%c[wordsize], %%" _ASM_SP "\n\t" /* placeholder for guest RCX */
 		"push %%" _ASM_CX " \n\t"
 		"sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
-		"cmp %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t"
+		"cmp %%" _ASM_SP ", (%%" _ASM_DI ") \n\t"
 		"je 1f \n\t"
-		"mov %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t"
+		"mov %%" _ASM_SP ", (%%" _ASM_DI ") \n\t"
 		/* Avoid VMWRITE when Enlightened VMCS is in use */
 		"test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
 		"jz 2f \n\t"
@@ -6481,11 +6481,10 @@  static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
 		"xor %%edi, %%edi \n\t"
 		"xor %%ebp, %%ebp \n\t"
 		"pop  %%" _ASM_BP " \n\t"
-	      : ASM_CALL_CONSTRAINT, "=S"((int){0})
-	      : "c"(vmx), "S"(evmcs_rsp),
+	      : ASM_CALL_CONSTRAINT, "=D"((int){0}), "=S"((int){0})
+	      : "c"(vmx), "D"(&vmx->loaded_vmcs->host_state.rsp), "S"(evmcs_rsp),
 		[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
 		[fail]"i"(offsetof(struct vcpu_vmx, fail)),
-		[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
 		[HOST_RSP]"i"(HOST_RSP),
 		[rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
 		[rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
@@ -6508,10 +6507,10 @@  static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
 		[wordsize]"i"(sizeof(ulong))
 	      : "cc", "memory"
 #ifdef CONFIG_X86_64
-		, "rax", "rbx", "rdx", "rdi"
+		, "rax", "rbx", "rdx"
 		, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
 #else
-		, "eax", "ebx", "edx", "edi"
+		, "eax", "ebx", "edx"
 #endif
 	      );
 }
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 99328954c2fc..8e203b725928 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -175,7 +175,6 @@  struct nested_vmx {
 
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
-	unsigned long         host_rsp;
 	u8                    fail;
 	u8		      msr_bitmap_mode;
 	u32                   exit_intr_info;