[06/15] KVM: nVMX: Don't "put" vCPU or host state when switching VMCS
diff mbox series

Message ID 20190507160640.4812-7-sean.j.christopherson@intel.com
State New
Headers show
Series
  • KVM: nVMX: Optimize nested VM-Entry
Related show

Commit Message

Sean Christopherson May 7, 2019, 4:06 p.m. UTC
When switching between vmcs01 and vmcs02, KVM isn't actually switching
between guest and host.  If guest state is already loaded (the likely,
if not guaranteed, case), keep the guest state loaded and manually swap
the loaded_cpu_state pointer after propagating saved host state to the
new vmcs0{1,2}.

Avoiding the switch between guest and host reduces the latency of
switching between vmcs01 and vmcs02 by several hundred cycles, and
reduces the roundtrip time of a nested VM by upwards of 1000 cycles.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/vmx/nested.c | 18 +++++++++++++-
 arch/x86/kvm/vmx/vmx.c    | 52 ++++++++++++++++++++++-----------------
 arch/x86/kvm/vmx/vmx.h    |  3 ++-
 3 files changed, 48 insertions(+), 25 deletions(-)

Comments

Paolo Bonzini June 6, 2019, 4:24 p.m. UTC | #1
On 07/05/19 18:06, Sean Christopherson wrote:
> When switching between vmcs01 and vmcs02, KVM isn't actually switching
> between guest and host.  If guest state is already loaded (the likely,
> if not guaranteed, case), keep the guest state loaded and manually swap
> the loaded_cpu_state pointer after propagating saved host state to the
> new vmcs0{1,2}.
> 
> Avoiding the switch between guest and host reduces the latency of
> switching between vmcs01 and vmcs02 by several hundred cycles, and
> reduces the roundtrip time of a nested VM by upwards of 1000 cycles.
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>  arch/x86/kvm/vmx/nested.c | 18 +++++++++++++-
>  arch/x86/kvm/vmx/vmx.c    | 52 ++++++++++++++++++++++-----------------
>  arch/x86/kvm/vmx/vmx.h    |  3 ++-
>  3 files changed, 48 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index a30d53823b2e..4651d3462df4 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -241,15 +241,31 @@ static void free_nested(struct kvm_vcpu *vcpu)
>  static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct vmcs_host_state *src;
> +	struct loaded_vmcs *prev;
>  	int cpu;
>  
>  	if (vmx->loaded_vmcs == vmcs)
>  		return;
>  
>  	cpu = get_cpu();
> -	vmx_vcpu_put(vcpu);
> +	prev = vmx->loaded_cpu_state;
>  	vmx->loaded_vmcs = vmcs;
>  	vmx_vcpu_load(vcpu, cpu);
> +
> +	if (likely(prev)) {
> +		src = &prev->host_state;
> +
> +		vmx_set_host_fs_gs(&vmcs->host_state, src->fs_sel, src->gs_sel,
> +				   src->fs_base, src->gs_base);
> +
> +		vmcs->host_state.ldt_sel = src->ldt_sel;
> +#ifdef CONFIG_X86_64
> +		vmcs->host_state.ds_sel = src->ds_sel;
> +		vmcs->host_state.es_sel = src->es_sel;
> +#endif
> +		vmx->loaded_cpu_state = vmcs;
> +	}
>  	put_cpu();

I'd like to extract this into a separate function:

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 438fae1fef2a..83e436f201bf 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -248,34 +248,40 @@ static void free_nested(struct kvm_vcpu *vcpu)
 	free_loaded_vmcs(&vmx->nested.vmcs02);
 }
 
+static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx)
+{
+	struct loaded_vmcs *prev = vmx->loaded_cpu_state;
+	struct loaded_vmcs *cur;
+	struct vmcs_host_state *dest, *src;
+
+	if (unlikely(!prev))
+		return;
+
+	cur = &vmx->loaded_vmcs;
+	src = &prev->host_state;
+	dest = &cur->host_state;
+
+	vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
+	dest->ldt_sel = src->ldt_sel;
+#ifdef CONFIG_X86_64
+	dest->ds_sel = src->ds_sel;
+	dest->es_sel = src->es_sel;
+#endif
+	vmx->loaded_cpu_state = cur;
+}
+
 static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct vmcs_host_state *src;
-	struct loaded_vmcs *prev;
 	int cpu;
 
 	if (vmx->loaded_vmcs == vmcs)
 		return;
 
 	cpu = get_cpu();
-	prev = vmx->loaded_cpu_state;
 	vmx->loaded_vmcs = vmcs;
 	vmx_vcpu_load(vcpu, cpu);
-
-	if (likely(prev)) {
-		src = &prev->host_state;
-
-		vmx_set_host_fs_gs(&vmcs->host_state, src->fs_sel, src->gs_sel,
-				   src->fs_base, src->gs_base);
-
-		vmcs->host_state.ldt_sel = src->ldt_sel;
-#ifdef CONFIG_X86_64
-		vmcs->host_state.ds_sel = src->ds_sel;
-		vmcs->host_state.es_sel = src->es_sel;
-#endif
-		vmx->loaded_cpu_state = vmcs;
-	}
+	vmx_sync_vmcs_host_state(vmx);
 	put_cpu();
 
 	vm_entry_controls_reset_shadow(vmx);

Paolo
Sean Christopherson June 6, 2019, 6:57 p.m. UTC | #2
On Thu, Jun 06, 2019 at 06:24:43PM +0200, Paolo Bonzini wrote:
> On 07/05/19 18:06, Sean Christopherson wrote:
> > When switching between vmcs01 and vmcs02, KVM isn't actually switching
> > between guest and host.  If guest state is already loaded (the likely,
> > if not guaranteed, case), keep the guest state loaded and manually swap
> > the loaded_cpu_state pointer after propagating saved host state to the
> > new vmcs0{1,2}.
> > 
> > Avoiding the switch between guest and host reduces the latency of
> > switching between vmcs01 and vmcs02 by several hundred cycles, and
> > reduces the roundtrip time of a nested VM by upwards of 1000 cycles.
> > 
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > ---
> >  arch/x86/kvm/vmx/nested.c | 18 +++++++++++++-
> >  arch/x86/kvm/vmx/vmx.c    | 52 ++++++++++++++++++++++-----------------
> >  arch/x86/kvm/vmx/vmx.h    |  3 ++-
> >  3 files changed, 48 insertions(+), 25 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> > index a30d53823b2e..4651d3462df4 100644
> > --- a/arch/x86/kvm/vmx/nested.c
> > +++ b/arch/x86/kvm/vmx/nested.c
> > @@ -241,15 +241,31 @@ static void free_nested(struct kvm_vcpu *vcpu)
> >  static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
> >  {
> >  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> > +	struct vmcs_host_state *src;
> > +	struct loaded_vmcs *prev;
> >  	int cpu;
> >  
> >  	if (vmx->loaded_vmcs == vmcs)
> >  		return;
> >  
> >  	cpu = get_cpu();
> > -	vmx_vcpu_put(vcpu);
> > +	prev = vmx->loaded_cpu_state;
> >  	vmx->loaded_vmcs = vmcs;
> >  	vmx_vcpu_load(vcpu, cpu);
> > +
> > +	if (likely(prev)) {
> > +		src = &prev->host_state;
> > +
> > +		vmx_set_host_fs_gs(&vmcs->host_state, src->fs_sel, src->gs_sel,
> > +				   src->fs_base, src->gs_base);
> > +
> > +		vmcs->host_state.ldt_sel = src->ldt_sel;
> > +#ifdef CONFIG_X86_64
> > +		vmcs->host_state.ds_sel = src->ds_sel;
> > +		vmcs->host_state.es_sel = src->es_sel;
> > +#endif
> > +		vmx->loaded_cpu_state = vmcs;
> > +	}
> >  	put_cpu();
> 
> I'd like to extract this into a separate function:
> 
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 438fae1fef2a..83e436f201bf 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -248,34 +248,40 @@ static void free_nested(struct kvm_vcpu *vcpu)
>  	free_loaded_vmcs(&vmx->nested.vmcs02);
>  }
>  
> +static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx)

What about taking the vmcs pointers, and using old/new instead of
prev/cur?  Calling it prev is wonky since it's pulled from the current
value of loaded_cpu_state, especially since cur is the same type.
That oddity is also why I grabbed prev before setting loaded_vmcs,
it just felt wrong even though they really are two separate things.

static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
				     struct loaded_vmcs *old,
				     struct loaded_vmcs *new)
{
	...
}


{
	vmx_sync_vmcs_host_state(vmx, vmx->loaded_cpu_state, vmcs);
}

> +{
> +	struct loaded_vmcs *prev = vmx->loaded_cpu_state;
> +	struct loaded_vmcs *cur;
> +	struct vmcs_host_state *dest, *src;
> +
> +	if (unlikely(!prev))
> +		return;
> +
> +	cur = &vmx->loaded_vmcs;
> +	src = &prev->host_state;
> +	dest = &cur->host_state;
> +
> +	vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
> +	dest->ldt_sel = src->ldt_sel;
> +#ifdef CONFIG_X86_64
> +	dest->ds_sel = src->ds_sel;
> +	dest->es_sel = src->es_sel;
> +#endif
> +	vmx->loaded_cpu_state = cur;
> +}
> +
>  static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> -	struct vmcs_host_state *src;
> -	struct loaded_vmcs *prev;
>  	int cpu;
>  
>  	if (vmx->loaded_vmcs == vmcs)
>  		return;
>  
>  	cpu = get_cpu();
> -	prev = vmx->loaded_cpu_state;
>  	vmx->loaded_vmcs = vmcs;
>  	vmx_vcpu_load(vcpu, cpu);
> -
> -	if (likely(prev)) {
> -		src = &prev->host_state;
> -
> -		vmx_set_host_fs_gs(&vmcs->host_state, src->fs_sel, src->gs_sel,
> -				   src->fs_base, src->gs_base);
> -
> -		vmcs->host_state.ldt_sel = src->ldt_sel;
> -#ifdef CONFIG_X86_64
> -		vmcs->host_state.ds_sel = src->ds_sel;
> -		vmcs->host_state.es_sel = src->es_sel;
> -#endif
> -		vmx->loaded_cpu_state = vmcs;
> -	}
> +	vmx_sync_vmcs_host_state(vmx);
>  	put_cpu();
>  
>  	vm_entry_controls_reset_shadow(vmx);
> 
> Paolo
Paolo Bonzini June 7, 2019, 5 p.m. UTC | #3
On 06/06/19 20:57, Sean Christopherson wrote:
> What about taking the vmcs pointers, and using old/new instead of
> prev/cur?  Calling it prev is wonky since it's pulled from the current
> value of loaded_cpu_state, especially since cur is the same type.
> That oddity is also why I grabbed prev before setting loaded_vmcs,
> it just felt wrong even though they really are two separate things.
> 
> static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
> 				     struct loaded_vmcs *old,
> 				     struct loaded_vmcs *new)

I had it like that in the beginning actually.  But the idea of this
function is that because we're switching vmcs's, the host register
fields have to be moved to the VMCS that will be used next.  I don't see
how it would be used with old and new being anything other than
vmx->loaded_cpu_state and vmx->loaded_vmcs and, because we're switching
VMCS, those are the "previously" active VMCS and the "currently" active
VMCS.

What would also make sense, is to change loaded_cpu_state to a bool (it
must always be equal to loaded_vmcs anyway) and make the prototype
something like this:

static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
				     struct loaded_vmcs *prev)


I'll send a patch.

Paolo
Sean Christopherson June 7, 2019, 5:08 p.m. UTC | #4
On Fri, Jun 07, 2019 at 07:00:06PM +0200, Paolo Bonzini wrote:
> On 06/06/19 20:57, Sean Christopherson wrote:
> > What about taking the vmcs pointers, and using old/new instead of
> > prev/cur?  Calling it prev is wonky since it's pulled from the current
> > value of loaded_cpu_state, especially since cur is the same type.
> > That oddity is also why I grabbed prev before setting loaded_vmcs,
> > it just felt wrong even though they really are two separate things.
> > 
> > static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
> > 				     struct loaded_vmcs *old,
> > 				     struct loaded_vmcs *new)
> 
> I had it like that in the beginning actually.  But the idea of this
> function is that because we're switching vmcs's, the host register
> fields have to be moved to the VMCS that will be used next.  I don't see
> how it would be used with old and new being anything other than
> vmx->loaded_cpu_state and vmx->loaded_vmcs and, because we're switching
> VMCS, those are the "previously" active VMCS and the "currently" active
> VMCS.
> 
> What would also make sense, is to change loaded_cpu_state to a bool (it
> must always be equal to loaded_vmcs anyway) and make the prototype
> something like this:
> 
> static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
> 				     struct loaded_vmcs *prev)
> 
> 
> I'll send a patch.

Works for me.  The only reason I made loaded_cpu_state was so that
vmx_prepare_switch_to_host() could WARN on it diverging from loaded_vmcs.
Seeing as how that WARN has never fired, I'm comfortable making it a bool.

Patch
diff mbox series

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a30d53823b2e..4651d3462df4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -241,15 +241,31 @@  static void free_nested(struct kvm_vcpu *vcpu)
 static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs_host_state *src;
+	struct loaded_vmcs *prev;
 	int cpu;
 
 	if (vmx->loaded_vmcs == vmcs)
 		return;
 
 	cpu = get_cpu();
-	vmx_vcpu_put(vcpu);
+	prev = vmx->loaded_cpu_state;
 	vmx->loaded_vmcs = vmcs;
 	vmx_vcpu_load(vcpu, cpu);
+
+	if (likely(prev)) {
+		src = &prev->host_state;
+
+		vmx_set_host_fs_gs(&vmcs->host_state, src->fs_sel, src->gs_sel,
+				   src->fs_base, src->gs_base);
+
+		vmcs->host_state.ldt_sel = src->ldt_sel;
+#ifdef CONFIG_X86_64
+		vmcs->host_state.ds_sel = src->ds_sel;
+		vmcs->host_state.es_sel = src->es_sel;
+#endif
+		vmx->loaded_cpu_state = vmcs;
+	}
 	put_cpu();
 
 	vm_entry_controls_reset_shadow(vmx);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f3b0f4445af7..b97666731425 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1035,6 +1035,33 @@  static void pt_guest_exit(struct vcpu_vmx *vmx)
 	wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
 }
 
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+			unsigned long fs_base, unsigned long gs_base)
+{
+	if (unlikely(fs_sel != host->fs_sel)) {
+		if (!(fs_sel & 7))
+			vmcs_write16(HOST_FS_SELECTOR, fs_sel);
+		else
+			vmcs_write16(HOST_FS_SELECTOR, 0);
+		host->fs_sel = fs_sel;
+	}
+	if (unlikely(gs_sel != host->gs_sel)) {
+		if (!(gs_sel & 7))
+			vmcs_write16(HOST_GS_SELECTOR, gs_sel);
+		else
+			vmcs_write16(HOST_GS_SELECTOR, 0);
+		host->gs_sel = gs_sel;
+	}
+	if (unlikely(fs_base != host->fs_base)) {
+		vmcs_writel(HOST_FS_BASE, fs_base);
+		host->fs_base = fs_base;
+	}
+	if (unlikely(gs_base != host->gs_base)) {
+		vmcs_writel(HOST_GS_BASE, gs_base);
+		host->gs_base = gs_base;
+	}
+}
+
 void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1100,28 +1127,7 @@  void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 	gs_base = segment_base(gs_sel);
 #endif
 
-	if (unlikely(fs_sel != host_state->fs_sel)) {
-		if (!(fs_sel & 7))
-			vmcs_write16(HOST_FS_SELECTOR, fs_sel);
-		else
-			vmcs_write16(HOST_FS_SELECTOR, 0);
-		host_state->fs_sel = fs_sel;
-	}
-	if (unlikely(gs_sel != host_state->gs_sel)) {
-		if (!(gs_sel & 7))
-			vmcs_write16(HOST_GS_SELECTOR, gs_sel);
-		else
-			vmcs_write16(HOST_GS_SELECTOR, 0);
-		host_state->gs_sel = gs_sel;
-	}
-	if (unlikely(fs_base != host_state->fs_base)) {
-		vmcs_writel(HOST_FS_BASE, fs_base);
-		host_state->fs_base = fs_base;
-	}
-	if (unlikely(gs_base != host_state->gs_base)) {
-		vmcs_writel(HOST_GS_BASE, gs_base);
-		host_state->gs_base = gs_base;
-	}
+	vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
 }
 
 static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
@@ -1310,7 +1316,7 @@  static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
 		pi_set_sn(pi_desc);
 }
 
-void vmx_vcpu_put(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	vmx_vcpu_pi_put(vcpu);
 
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 63d37ccce3dc..f81b32ae1822 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -293,11 +293,12 @@  struct kvm_vmx {
 
 bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
 void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
-void vmx_vcpu_put(struct kvm_vcpu *vcpu);
 int allocate_vpid(void);
 void free_vpid(int vpid);
 void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
 void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
+void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
+			unsigned long fs_base, unsigned long gs_base);
 int vmx_get_cpl(struct kvm_vcpu *vcpu);
 unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
 void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);