diff mbox series

[v2,4/9] x86/kvm/mmu: introduce guest_mmu

Message ID 20180925175844.20277-5-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show
Series x86/kvm/nVMX: optimize MMU switch between L1 and L2 | expand

Commit Message

Vitaly Kuznetsov Sept. 25, 2018, 5:58 p.m. UTC
When EPT is used for nested guest we need to re-init MMU as shadow
EPT MMU (nested_ept_init_mmu_context() does that). When we return back
from L2 to L1 kvm_mmu_reset_context() in nested_vmx_load_cr3() resets
MMU back to normal TDP mode. Add a special 'guest_mmu' so we can use
separate root caches; the improved hit rate is not very important for
single vCPU performance, but it avoids contention on the mmu_lock for
many vCPUs.

On the nested CPUID benchmark, with 16 vCPUs, an L2->L1->L2 vmexit
goes from 42k to 26k cycles.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
Changes since v1:
- drop now unneded local vmx variable in vmx_free_vcpu_nested
  [Sean Christopherson]
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/mmu.c              | 15 +++++++++++----
 arch/x86/kvm/vmx.c              | 27 ++++++++++++++++++---------
 3 files changed, 32 insertions(+), 13 deletions(-)

Comments

Sean Christopherson Sept. 26, 2018, 2:02 p.m. UTC | #1
On Tue, Sep 25, 2018 at 07:58:39PM +0200, Vitaly Kuznetsov wrote:
> When EPT is used for nested guest we need to re-init MMU as shadow
> EPT MMU (nested_ept_init_mmu_context() does that). When we return back
> from L2 to L1 kvm_mmu_reset_context() in nested_vmx_load_cr3() resets
> MMU back to normal TDP mode. Add a special 'guest_mmu' so we can use
> separate root caches; the improved hit rate is not very important for
> single vCPU performance, but it avoids contention on the mmu_lock for
> many vCPUs.
> 
> On the nested CPUID benchmark, with 16 vCPUs, an L2->L1->L2 vmexit
> goes from 42k to 26k cycles.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> Changes since v1:
> - drop now unneded local vmx variable in vmx_free_vcpu_nested
>   [Sean Christopherson]
> ---
>  arch/x86/include/asm/kvm_host.h |  3 +++
>  arch/x86/kvm/mmu.c              | 15 +++++++++++----
>  arch/x86/kvm/vmx.c              | 27 ++++++++++++++++++---------
>  3 files changed, 32 insertions(+), 13 deletions(-)

...

> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2d55adab52de..93ff08136fc1 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -8468,8 +8468,10 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>   * Free whatever needs to be freed from vmx->nested when L1 goes down, or
>   * just stops using VMX.
>   */
> -static void free_nested(struct vcpu_vmx *vmx)
> +static void free_nested(struct kvm_vcpu *vcpu)
>  {
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
>  	if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
>  		return;
>  
> @@ -8502,6 +8504,8 @@ static void free_nested(struct vcpu_vmx *vmx)
>  		vmx->nested.pi_desc = NULL;
>  	}
>  
> +	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
> +
>  	free_loaded_vmcs(&vmx->nested.vmcs02);
>  }
>  
> @@ -8510,7 +8514,7 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
>  {
>  	if (!nested_vmx_check_permission(vcpu))
>  		return 1;
> -	free_nested(to_vmx(vcpu));
> +	free_nested(vcpu);
>  	nested_vmx_succeed(vcpu);
>  	return kvm_skip_emulated_instruction(vcpu);
>  }
> @@ -8541,6 +8545,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
>  	if (vmptr == vmx->nested.current_vmptr)
>  		nested_release_vmcs12(vmx);
>  
> +	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);

Shouldn't we only free guest_mmu if VMCLEAR is targeting current_vmptr?
Assuming that's the case, we could put the call to kvm_mmu_free_roots()
in nested_release_vmcs12() instead of calling it from handle_vmclear()
and handle_vmptrld().

> +
>  	kvm_vcpu_write_guest(vcpu,
>  			vmptr + offsetof(struct vmcs12, launch_state),
>  			&zero, sizeof(zero));
> @@ -8924,6 +8930,9 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
>  		}
>  
>  		nested_release_vmcs12(vmx);
> +
> +		kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu,
> +				   KVM_MMU_ROOTS_ALL);
>  		/*
>  		 * Load VMCS12 from guest memory since it is not already
>  		 * cached.
> @@ -10976,12 +10985,10 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
>   */
>  static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
>  {
> -       struct vcpu_vmx *vmx = to_vmx(vcpu);
> -
> -       vcpu_load(vcpu);
> -       vmx_switch_vmcs(vcpu, &vmx->vmcs01);
> -       free_nested(vmx);
> -       vcpu_put(vcpu);
> +	vcpu_load(vcpu);
> +	vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
> +	free_nested(vcpu);
> +	vcpu_put(vcpu);
>  }
>  
>  static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
> @@ -11331,6 +11338,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
>  	if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
>  		return 1;
>  
> +	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
>  	kvm_init_shadow_ept_mmu(vcpu,
>  			to_vmx(vcpu)->nested.msrs.ept_caps &
>  			VMX_EPT_EXECUTE_ONLY_BIT,
> @@ -11346,6 +11354,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
>  
>  static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
>  {
> +	vcpu->arch.mmu = &vcpu->arch.root_mmu;
>  	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
>  }
>  
> @@ -13421,7 +13430,7 @@ static void vmx_leave_nested(struct kvm_vcpu *vcpu)
>  		to_vmx(vcpu)->nested.nested_run_pending = 0;
>  		nested_vmx_vmexit(vcpu, -1, 0, 0);
>  	}
> -	free_nested(to_vmx(vcpu));
> +	free_nested(vcpu);
>  }
>  
>  /*
> -- 
> 2.17.1
>
Vitaly Kuznetsov Sept. 26, 2018, 5:18 p.m. UTC | #2
Sean Christopherson <sean.j.christopherson@intel.com> writes:

> On Tue, Sep 25, 2018 at 07:58:39PM +0200, Vitaly Kuznetsov wrote:
>> When EPT is used for nested guest we need to re-init MMU as shadow
>> EPT MMU (nested_ept_init_mmu_context() does that). When we return back
>> from L2 to L1 kvm_mmu_reset_context() in nested_vmx_load_cr3() resets
>> MMU back to normal TDP mode. Add a special 'guest_mmu' so we can use
>> separate root caches; the improved hit rate is not very important for
>> single vCPU performance, but it avoids contention on the mmu_lock for
>> many vCPUs.
>> 
>> On the nested CPUID benchmark, with 16 vCPUs, an L2->L1->L2 vmexit
>> goes from 42k to 26k cycles.
>> 
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>> Changes since v1:
>> - drop now unneded local vmx variable in vmx_free_vcpu_nested
>>   [Sean Christopherson]
>> ---
>>  arch/x86/include/asm/kvm_host.h |  3 +++
>>  arch/x86/kvm/mmu.c              | 15 +++++++++++----
>>  arch/x86/kvm/vmx.c              | 27 ++++++++++++++++++---------
>>  3 files changed, 32 insertions(+), 13 deletions(-)
>
> ...
>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 2d55adab52de..93ff08136fc1 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -8468,8 +8468,10 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>>   * Free whatever needs to be freed from vmx->nested when L1 goes down, or
>>   * just stops using VMX.
>>   */
>> -static void free_nested(struct vcpu_vmx *vmx)
>> +static void free_nested(struct kvm_vcpu *vcpu)
>>  {
>> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +
>>  	if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
>>  		return;
>>  
>> @@ -8502,6 +8504,8 @@ static void free_nested(struct vcpu_vmx *vmx)
>>  		vmx->nested.pi_desc = NULL;
>>  	}
>>  
>> +	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
>> +
>>  	free_loaded_vmcs(&vmx->nested.vmcs02);
>>  }
>>  
>> @@ -8510,7 +8514,7 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
>>  {
>>  	if (!nested_vmx_check_permission(vcpu))
>>  		return 1;
>> -	free_nested(to_vmx(vcpu));
>> +	free_nested(vcpu);
>>  	nested_vmx_succeed(vcpu);
>>  	return kvm_skip_emulated_instruction(vcpu);
>>  }
>> @@ -8541,6 +8545,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
>>  	if (vmptr == vmx->nested.current_vmptr)
>>  		nested_release_vmcs12(vmx);
>>  
>> +	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
>
> Shouldn't we only free guest_mmu if VMCLEAR is targeting
> current_vmptr?

Right you are, this was definitely overlooked, no need for
kvm_mmu_free_roots() when we VMCLEAR some-other-vmptr.

> Assuming that's the case, we could put the call to kvm_mmu_free_roots()
> in nested_release_vmcs12() instead of calling it from handle_vmclear()
> and handle_vmptrld().

Yep, will do in v3.

>
>> +
>>  	kvm_vcpu_write_guest(vcpu,
>>  			vmptr + offsetof(struct vmcs12, launch_state),
>>  			&zero, sizeof(zero));
>> @@ -8924,6 +8930,9 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
>>  		}
>>  
>>  		nested_release_vmcs12(vmx);
>> +
>> +		kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu,
>> +				   KVM_MMU_ROOTS_ALL);
>>  		/*
>>  		 * Load VMCS12 from guest memory since it is not already
>>  		 * cached.
>> @@ -10976,12 +10985,10 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
>>   */
>>  static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
>>  {
>> -       struct vcpu_vmx *vmx = to_vmx(vcpu);
>> -
>> -       vcpu_load(vcpu);
>> -       vmx_switch_vmcs(vcpu, &vmx->vmcs01);
>> -       free_nested(vmx);
>> -       vcpu_put(vcpu);
>> +	vcpu_load(vcpu);
>> +	vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
>> +	free_nested(vcpu);
>> +	vcpu_put(vcpu);
>>  }
>>  
>>  static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
>> @@ -11331,6 +11338,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
>>  	if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
>>  		return 1;
>>  
>> +	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
>>  	kvm_init_shadow_ept_mmu(vcpu,
>>  			to_vmx(vcpu)->nested.msrs.ept_caps &
>>  			VMX_EPT_EXECUTE_ONLY_BIT,
>> @@ -11346,6 +11354,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
>>  
>>  static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
>>  {
>> +	vcpu->arch.mmu = &vcpu->arch.root_mmu;
>>  	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
>>  }
>>  
>> @@ -13421,7 +13430,7 @@ static void vmx_leave_nested(struct kvm_vcpu *vcpu)
>>  		to_vmx(vcpu)->nested.nested_run_pending = 0;
>>  		nested_vmx_vmexit(vcpu, -1, 0, 0);
>>  	}
>> -	free_nested(to_vmx(vcpu));
>> +	free_nested(vcpu);
>>  }
>>  
>>  /*
>> -- 
>> 2.17.1
>>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 404c3438827b..a3829869353b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -539,6 +539,9 @@  struct kvm_vcpu_arch {
 	/* Non-nested MMU for L1 */
 	struct kvm_mmu root_mmu;
 
+	/* L1 MMU when running nested */
+	struct kvm_mmu guest_mmu;
+
 	/*
 	 * Paging state of an L2 guest (used for nested npt)
 	 *
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4491b8894337..96c2a0b3eb53 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4967,8 +4967,10 @@  EXPORT_SYMBOL_GPL(kvm_mmu_load);
 
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
-	kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, KVM_MMU_ROOTS_ALL);
-	WARN_ON(VALID_PAGE(vcpu->arch.mmu->root_hpa));
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
+	WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root_hpa));
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+	WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
 
@@ -5407,13 +5409,18 @@  int kvm_mmu_create(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.mmu = &vcpu->arch.root_mmu;
 	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
+
 	vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
 	vcpu->arch.root_mmu.translate_gpa = translate_gpa;
-	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
-
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
 		vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 
+	vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
+	vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+
+	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
 	return alloc_mmu_pages(vcpu);
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2d55adab52de..93ff08136fc1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8468,8 +8468,10 @@  static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
  * Free whatever needs to be freed from vmx->nested when L1 goes down, or
  * just stops using VMX.
  */
-static void free_nested(struct vcpu_vmx *vmx)
+static void free_nested(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
 	if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
 		return;
 
@@ -8502,6 +8504,8 @@  static void free_nested(struct vcpu_vmx *vmx)
 		vmx->nested.pi_desc = NULL;
 	}
 
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+
 	free_loaded_vmcs(&vmx->nested.vmcs02);
 }
 
@@ -8510,7 +8514,7 @@  static int handle_vmoff(struct kvm_vcpu *vcpu)
 {
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
-	free_nested(to_vmx(vcpu));
+	free_nested(vcpu);
 	nested_vmx_succeed(vcpu);
 	return kvm_skip_emulated_instruction(vcpu);
 }
@@ -8541,6 +8545,8 @@  static int handle_vmclear(struct kvm_vcpu *vcpu)
 	if (vmptr == vmx->nested.current_vmptr)
 		nested_release_vmcs12(vmx);
 
+	kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+
 	kvm_vcpu_write_guest(vcpu,
 			vmptr + offsetof(struct vmcs12, launch_state),
 			&zero, sizeof(zero));
@@ -8924,6 +8930,9 @@  static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		}
 
 		nested_release_vmcs12(vmx);
+
+		kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu,
+				   KVM_MMU_ROOTS_ALL);
 		/*
 		 * Load VMCS12 from guest memory since it is not already
 		 * cached.
@@ -10976,12 +10985,10 @@  static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
  */
 static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
 {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
-
-       vcpu_load(vcpu);
-       vmx_switch_vmcs(vcpu, &vmx->vmcs01);
-       free_nested(vmx);
-       vcpu_put(vcpu);
+	vcpu_load(vcpu);
+	vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
+	free_nested(vcpu);
+	vcpu_put(vcpu);
 }
 
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
@@ -11331,6 +11338,7 @@  static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 	if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
 		return 1;
 
+	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
 	kvm_init_shadow_ept_mmu(vcpu,
 			to_vmx(vcpu)->nested.msrs.ept_caps &
 			VMX_EPT_EXECUTE_ONLY_BIT,
@@ -11346,6 +11354,7 @@  static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 
 static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.mmu = &vcpu->arch.root_mmu;
 	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
 }
 
@@ -13421,7 +13430,7 @@  static void vmx_leave_nested(struct kvm_vcpu *vcpu)
 		to_vmx(vcpu)->nested.nested_run_pending = 0;
 		nested_vmx_vmexit(vcpu, -1, 0, 0);
 	}
-	free_nested(to_vmx(vcpu));
+	free_nested(vcpu);
 }
 
 /*