diff mbox series

[1/3] KVM: x86: introduce kvm_mmu_invalidate_gva

Message ID 20200326093516.24215-2-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: sync SPTEs on page/EPT fault injection | expand

Commit Message

Paolo Bonzini March 26, 2020, 9:35 a.m. UTC
Wrap the combination of mmu->invlpg and kvm_x86_ops->tlb_flush_gva
into a new function.  This function also lets us specify the host PGD to
invalidate and also the MMU, both of which will be useful in fixing and
simplifying kvm_inject_emulated_page_fault.

A nested guest's MMU however has g_context->invlpg == NULL.  Instead of
setting it to nonpaging_invlpg, make kvm_mmu_invalidate_gva the only
entry point to mmu->invlpg and make a NULL invlpg pointer equivalent
to nonpaging_invlpg, saving a retpoline.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +
 arch/x86/kvm/mmu/mmu.c          | 71 +++++++++++++++++++++------------
 2 files changed, 47 insertions(+), 26 deletions(-)

Comments

Sean Christopherson March 28, 2020, 6:26 p.m. UTC | #1
On Thu, Mar 26, 2020 at 05:35:14AM -0400, Paolo Bonzini wrote:
> Wrap the combination of mmu->invlpg and kvm_x86_ops->tlb_flush_gva
> into a new function.  This function also lets us specify the host PGD to
> invalidate and also the MMU, both of which will be useful in fixing and
> simplifying kvm_inject_emulated_page_fault.
> 
> A nested guest's MMU however has g_context->invlpg == NULL.  Instead of
> setting it to nonpaging_invlpg, make kvm_mmu_invalidate_gva the only
> entry point to mmu->invlpg and make a NULL invlpg pointer equivalent
> to nonpaging_invlpg, saving a retpoline.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 +
>  arch/x86/kvm/mmu/mmu.c          | 71 +++++++++++++++++++++------------
>  2 files changed, 47 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 328b1765ff76..f6a1ece1bb4a 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1506,6 +1506,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
>  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
>  		       void *insn, int insn_len);
>  void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
> +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> +		            gva_t gva, unsigned long root_hpa);
>  void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
>  void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
>  
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 560e85ebdf22..e26c9a583e75 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2153,10 +2153,6 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>  
> -static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
> -{
> -}
> -
>  static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
>  				 struct kvm_mmu_page *sp, u64 *spte,
>  				 const void *pte)
> @@ -4237,7 +4233,7 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
>  	context->page_fault = nonpaging_page_fault;
>  	context->gva_to_gpa = nonpaging_gva_to_gpa;
>  	context->sync_page = nonpaging_sync_page;
> -	context->invlpg = nonpaging_invlpg;
> +	context->invlpg = NULL;
>  	context->update_pte = nonpaging_update_pte;
>  	context->root_level = 0;
>  	context->shadow_root_level = PT32E_ROOT_LEVEL;
> @@ -4928,7 +4924,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
>  	context->mmu_role.as_u64 = new_role.as_u64;
>  	context->page_fault = kvm_tdp_page_fault;
>  	context->sync_page = nonpaging_sync_page;
> -	context->invlpg = nonpaging_invlpg;
> +	context->invlpg = NULL;
>  	context->update_pte = nonpaging_update_pte;
>  	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
>  	context->direct_map = true;
> @@ -5096,6 +5092,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
>  	g_context->get_pdptr         = kvm_pdptr_read;
>  	g_context->inject_page_fault = kvm_inject_page_fault;
>  
> +	/*
> +	 * L2 page tables are never shadowed, so there is no need to sync
> +	 * SPTEs.
> +	 */
> +	g_context->invlpg            = NULL;
> +
>  	/*
>  	 * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
>  	 * L1's nested page tables (e.g. EPT12). The nested translation
> @@ -5497,37 +5499,54 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
>  }
>  EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
>  
> -void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
> +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> +		            gva_t gva, unsigned long root_hpa)

As pointed out by the build bot, @root_hpa needs to be hpa_t.

>  {
> -	struct kvm_mmu *mmu = vcpu->arch.mmu;
>  	int i;
>  
> -	/* INVLPG on a * non-canonical address is a NOP according to the SDM.  */
> -	if (is_noncanonical_address(gva, vcpu))
> +	/* It's actually a GPA for vcpu->arch.guest_mmu.  */
> +	if (mmu != &vcpu->arch.guest_mmu) {

Doesn't need to be addressed here, but this is not the first time in this
series (the large TLB flushing series) that I've struggled to parse
"guest_mmu".  Would it make sense to rename it something like nested_tdp_mmu
or l2_tdp_mmu?

A bit ugly, but it'd be nice to avoid the mental challenge of remembering
that guest_mmu is in play if and only if nested TDP is enabled.

> +		/* INVLPG on a non-canonical address is a NOP according to the SDM.  */
> +		if (is_noncanonical_address(gva, vcpu))
> +			return;
> +
> +		kvm_x86_ops->tlb_flush_gva(vcpu, gva);
> +	}
> +
> +	if (!mmu->invlpg)
>  		return;
>  
> -	mmu->invlpg(vcpu, gva, mmu->root_hpa);
> +	if (root_hpa == INVALID_PAGE) {
> +		mmu->invlpg(vcpu, gva, mmu->root_hpa);
>  
> -	/*
> -	 * INVLPG is required to invalidate any global mappings for the VA,
> -	 * irrespective of PCID. Since it would take us roughly similar amount
> -	 * of work to determine whether any of the prev_root mappings of the VA
> -	 * is marked global, or to just sync it blindly, so we might as well
> -	 * just always sync it.
> -	 *
> -	 * Mappings not reachable via the current cr3 or the prev_roots will be
> -	 * synced when switching to that cr3, so nothing needs to be done here
> -	 * for them.
> -	 */
> -	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
> -		if (VALID_PAGE(mmu->prev_roots[i].hpa))
> -			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
> +		/*
> +		 * INVLPG is required to invalidate any global mappings for the VA,
> +		 * irrespective of PCID. Since it would take us roughly similar amount
> +		 * of work to determine whether any of the prev_root mappings of the VA
> +		 * is marked global, or to just sync it blindly, so we might as well
> +		 * just always sync it.
> +		 *
> +		 * Mappings not reachable via the current cr3 or the prev_roots will be
> +		 * synced when switching to that cr3, so nothing needs to be done here
> +		 * for them.
> +		 */
> +		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
> +			if (VALID_PAGE(mmu->prev_roots[i].hpa))
> +				mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
> +	} else {
> +		mmu->invlpg(vcpu, gva, root_hpa);
> +	}
> +}
> +EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva);
>  
> -	kvm_x86_ops->tlb_flush_gva(vcpu, gva);
> +void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
> +{
> +	kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
>  	++vcpu->stat.invlpg;
>  }
>  EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
>  
> +
>  void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
>  {
>  	struct kvm_mmu *mmu = vcpu->arch.mmu;
> -- 
> 2.18.2
> 
>
Paolo Bonzini March 30, 2020, 10:45 a.m. UTC | #2
On 28/03/20 19:26, Sean Christopherson wrote:
>> +	if (mmu != &vcpu->arch.guest_mmu) {
> Doesn't need to be addressed here, but this is not the first time in this
> series (the large TLB flushing series) that I've struggled to parse
> "guest_mmu".  Would it make sense to rename it something like nested_tdp_mmu
> or l2_tdp_mmu?
> 
> A bit ugly, but it'd be nice to avoid the mental challenge of remembering
> that guest_mmu is in play if and only if nested TDP is enabled.

No, it's not ugly at all.  My vote would be for shadow_tdp_mmu.

Paolo
Sean Christopherson March 30, 2020, 6:47 p.m. UTC | #3
On Mon, Mar 30, 2020 at 12:45:34PM +0200, Paolo Bonzini wrote:
> On 28/03/20 19:26, Sean Christopherson wrote:
> >> +	if (mmu != &vcpu->arch.guest_mmu) {
> > Doesn't need to be addressed here, but this is not the first time in this
> > series (the large TLB flushing series) that I've struggled to parse
> > "guest_mmu".  Would it make sense to rename it something like nested_tdp_mmu
> > or l2_tdp_mmu?
> > 
> > A bit ugly, but it'd be nice to avoid the mental challenge of remembering
> > that guest_mmu is in play if and only if nested TDP is enabled.
> 
> No, it's not ugly at all.  My vote would be for shadow_tdp_mmu.

Works for me.  My vote is for anything other than guest_mmu :-)
Vitaly Kuznetsov March 31, 2020, 10:33 a.m. UTC | #4
Sean Christopherson <sean.j.christopherson@intel.com> writes:

> On Mon, Mar 30, 2020 at 12:45:34PM +0200, Paolo Bonzini wrote:
>> On 28/03/20 19:26, Sean Christopherson wrote:
>> >> +	if (mmu != &vcpu->arch.guest_mmu) {
>> > Doesn't need to be addressed here, but this is not the first time in this
>> > series (the large TLB flushing series) that I've struggled to parse
>> > "guest_mmu".  Would it make sense to rename it something like nested_tdp_mmu
>> > or l2_tdp_mmu?
>> > 
>> > A bit ugly, but it'd be nice to avoid the mental challenge of remembering
>> > that guest_mmu is in play if and only if nested TDP is enabled.
>> 
>> No, it's not ugly at all.  My vote would be for shadow_tdp_mmu.
>
> Works for me.  My vote is for anything other than guest_mmu :-)
>

Oh come on guys, nobody protested when I called it this way :-)

Peronally, I don't quite like 'shadow_tdp_mmu' because it doesn't have
any particular reference to the fact that it is a nested/L2 related
thing (maybe it's just a shadow MMU?) Also, we already have a thing
called 'nested_mmu'... Maybe let's be bold and rename all three things,
like

root_mmu -> l1_mmu
guest_mmu -> l1_nested_mmu
nested_mmu -> l2_mmu (l2_walk_mmu)

or something like that?
Paolo Bonzini March 31, 2020, 12:16 p.m. UTC | #5
On 31/03/20 12:33, Vitaly Kuznetsov wrote:
>> Works for me.  My vote is for anything other than guest_mmu :-)
>
> Oh come on guys, nobody protested when I called it this way :-)

Sure I take full responsibility for that. :)

> Peronally, I don't quite like 'shadow_tdp_mmu' because it doesn't have
> any particular reference to the fact that it is a nested/L2 related
> thing (maybe it's just a shadow MMU?)

Well, nested virt is the only case in which you shadow TDP.  Both
interpretations work:

* "shadow tdp_mmu": an MMU for two-dimensional page tables that employs
shadowing

* "shadow_tdp MMU": the MMU for two-dimensional page tables.

> Also, we already have a thing
> called 'nested_mmu'... Maybe let's be bold and rename all three things,
> like
>
> root_mmu -> l1_mmu
> guest_mmu -> l1_nested_mmu
> nested_mmu -> l2_mmu (l2_walk_mmu)

I am not particularly fond of using l1/l2 outside code that specifically
deals with nested virt.  Also, l1_nested_mmu is too confusing with
respect to the current nested_mmu (likewise for root_mmu I would rename
it to guest_mmu but it would be an awful source of mental confusion as
well as semantic source code conflicts).

That said, I wouldn't mind replacing nested_mmu to something else, for
example nested_walk_mmu.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 328b1765ff76..f6a1ece1bb4a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1506,6 +1506,8 @@  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
 		       void *insn, int insn_len);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+		            gva_t gva, unsigned long root_hpa);
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
 void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 560e85ebdf22..e26c9a583e75 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2153,10 +2153,6 @@  static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
-{
-}
-
 static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
 				 struct kvm_mmu_page *sp, u64 *spte,
 				 const void *pte)
@@ -4237,7 +4233,7 @@  static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 	context->page_fault = nonpaging_page_fault;
 	context->gva_to_gpa = nonpaging_gva_to_gpa;
 	context->sync_page = nonpaging_sync_page;
-	context->invlpg = nonpaging_invlpg;
+	context->invlpg = NULL;
 	context->update_pte = nonpaging_update_pte;
 	context->root_level = 0;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
@@ -4928,7 +4924,7 @@  static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	context->mmu_role.as_u64 = new_role.as_u64;
 	context->page_fault = kvm_tdp_page_fault;
 	context->sync_page = nonpaging_sync_page;
-	context->invlpg = nonpaging_invlpg;
+	context->invlpg = NULL;
 	context->update_pte = nonpaging_update_pte;
 	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
 	context->direct_map = true;
@@ -5096,6 +5092,12 @@  static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	g_context->get_pdptr         = kvm_pdptr_read;
 	g_context->inject_page_fault = kvm_inject_page_fault;
 
+	/*
+	 * L2 page tables are never shadowed, so there is no need to sync
+	 * SPTEs.
+	 */
+	g_context->invlpg            = NULL;
+
 	/*
 	 * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
 	 * L1's nested page tables (e.g. EPT12). The nested translation
@@ -5497,37 +5499,54 @@  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
-void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+		            gva_t gva, unsigned long root_hpa)
 {
-	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	int i;
 
-	/* INVLPG on a * non-canonical address is a NOP according to the SDM.  */
-	if (is_noncanonical_address(gva, vcpu))
+	/* It's actually a GPA for vcpu->arch.guest_mmu.  */
+	if (mmu != &vcpu->arch.guest_mmu) {
+		/* INVLPG on a non-canonical address is a NOP according to the SDM.  */
+		if (is_noncanonical_address(gva, vcpu))
+			return;
+
+		kvm_x86_ops->tlb_flush_gva(vcpu, gva);
+	}
+
+	if (!mmu->invlpg)
 		return;
 
-	mmu->invlpg(vcpu, gva, mmu->root_hpa);
+	if (root_hpa == INVALID_PAGE) {
+		mmu->invlpg(vcpu, gva, mmu->root_hpa);
 
-	/*
-	 * INVLPG is required to invalidate any global mappings for the VA,
-	 * irrespective of PCID. Since it would take us roughly similar amount
-	 * of work to determine whether any of the prev_root mappings of the VA
-	 * is marked global, or to just sync it blindly, so we might as well
-	 * just always sync it.
-	 *
-	 * Mappings not reachable via the current cr3 or the prev_roots will be
-	 * synced when switching to that cr3, so nothing needs to be done here
-	 * for them.
-	 */
-	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-		if (VALID_PAGE(mmu->prev_roots[i].hpa))
-			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+		/*
+		 * INVLPG is required to invalidate any global mappings for the VA,
+		 * irrespective of PCID. Since it would take us roughly similar amount
+		 * of work to determine whether any of the prev_root mappings of the VA
+		 * is marked global, or to just sync it blindly, so we might as well
+		 * just always sync it.
+		 *
+		 * Mappings not reachable via the current cr3 or the prev_roots will be
+		 * synced when switching to that cr3, so nothing needs to be done here
+		 * for them.
+		 */
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			if (VALID_PAGE(mmu->prev_roots[i].hpa))
+				mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+	} else {
+		mmu->invlpg(vcpu, gva, root_hpa);
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva);
 
-	kvm_x86_ops->tlb_flush_gva(vcpu, gva);
+void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+{
+	kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
 	++vcpu->stat.invlpg;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
 
+
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;