diff mbox series

[v3,08/10] KVM: x86/mmu: Split out TDP MMU page fault handling

Message ID 20220921173546.2674386-9-dmatlack@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Make tdp_mmu read-only and clean up TPD MMU fault handler | expand

Commit Message

David Matlack Sept. 21, 2022, 5:35 p.m. UTC
Split out the page fault handling for the TDP MMU to a separate
function.  This creates some duplicate code, but makes the TDP MMU fault
handler simpler to read by eliminating branches and will enable future
cleanups by allowing the TDP MMU and non-TDP MMU fault paths to diverge.

Only compile in the TDP MMU fault handler for 64-bit builds since
kvm_tdp_mmu_map() does not exist in 32-bit builds.

No functional change intended.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu/mmu.c | 62 ++++++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 14 deletions(-)

Comments

Isaku Yamahata Oct. 3, 2022, 7:28 p.m. UTC | #1
On Wed, Sep 21, 2022 at 10:35:44AM -0700,
David Matlack <dmatlack@google.com> wrote:

> Split out the page fault handling for the TDP MMU to a separate
> function.  This creates some duplicate code, but makes the TDP MMU fault
> handler simpler to read by eliminating branches and will enable future
> cleanups by allowing the TDP MMU and non-TDP MMU fault paths to diverge.
> 
> Only compile in the TDP MMU fault handler for 64-bit builds since
> kvm_tdp_mmu_map() does not exist in 32-bit builds.
> 
> No functional change intended.
> 
> Signed-off-by: David Matlack <dmatlack@google.com>
> ---
>  arch/x86/kvm/mmu/mmu.c | 62 ++++++++++++++++++++++++++++++++----------
>  1 file changed, 48 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index dc203973de83..b36f351138f7 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4238,7 +4238,6 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
>  
>  static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  {
> -	bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);
>  	int r;
>  
>  	if (page_fault_handle_page_track(vcpu, fault))
> @@ -4257,11 +4256,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  		return r;
>  
>  	r = RET_PF_RETRY;
> -
> -	if (is_tdp_mmu_fault)
> -		read_lock(&vcpu->kvm->mmu_lock);
> -	else
> -		write_lock(&vcpu->kvm->mmu_lock);
> +	write_lock(&vcpu->kvm->mmu_lock);
>  
>  	if (is_page_fault_stale(vcpu, fault))
>  		goto out_unlock;
> @@ -4270,16 +4265,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  	if (r)
>  		goto out_unlock;
>  
> -	if (is_tdp_mmu_fault)
> -		r = kvm_tdp_mmu_map(vcpu, fault);
> -	else
> -		r = __direct_map(vcpu, fault);
> +	r = __direct_map(vcpu, fault);
>  
>  out_unlock:
> -	if (is_tdp_mmu_fault)
> -		read_unlock(&vcpu->kvm->mmu_lock);
> -	else
> -		write_unlock(&vcpu->kvm->mmu_lock);
> +	write_unlock(&vcpu->kvm->mmu_lock);
>  	kvm_release_pfn_clean(fault->pfn);
>  	return r;
>  }
> @@ -4327,6 +4316,46 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
>  }
>  EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
>  
> +#ifdef CONFIG_X86_64
> +static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
> +				  struct kvm_page_fault *fault)
> +{
> +	int r;
> +
> +	if (page_fault_handle_page_track(vcpu, fault))
> +		return RET_PF_EMULATE;
> +
> +	r = fast_page_fault(vcpu, fault);
> +	if (r != RET_PF_INVALID)
> +		return r;
> +
> +	r = mmu_topup_memory_caches(vcpu, false);
> +	if (r)
> +		return r;
> +
> +	r = kvm_faultin_pfn(vcpu, fault, ACC_ALL);
> +	if (r != RET_PF_CONTINUE)
> +		return r;
> +
> +	r = RET_PF_RETRY;
> +	read_lock(&vcpu->kvm->mmu_lock);
> +
> +	if (is_page_fault_stale(vcpu, fault))
> +		goto out_unlock;
> +
> +	r = make_mmu_pages_available(vcpu);
> +	if (r)
> +		goto out_unlock;
> +
> +	r = kvm_tdp_mmu_map(vcpu, fault);
> +
> +out_unlock:
> +	read_unlock(&vcpu->kvm->mmu_lock);
> +	kvm_release_pfn_clean(fault->pfn);
> +	return r;
> +}
> +#endif
> +
>  int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  {
>  	/*
> @@ -4351,6 +4380,11 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  		}
>  	}
>  
> +#ifdef CONFIG_X86_64
> +	if (tdp_mmu_enabled)
> +		return kvm_tdp_mmu_page_fault(vcpu, fault);
> +#endif
> +
>  	return direct_page_fault(vcpu, fault);
>  }
>  
> -- 
> 2.37.3.998.g577e59143f-goog
> 

Reviewed-by: Isaku Yamahata <isaku.yamahata@intel.com>
Sean Christopherson Oct. 11, 2022, 9:56 p.m. UTC | #2
On Wed, Sep 21, 2022, David Matlack wrote:
> Split out the page fault handling for the TDP MMU to a separate
> function.  This creates some duplicate code, but makes the TDP MMU fault
> handler simpler to read by eliminating branches and will enable future
> cleanups by allowing the TDP MMU and non-TDP MMU fault paths to diverge.

IMO, the code duplication isn't worth the relatively minor simplification.  And
some of the "cleanups" to reduce the duplication arguably add complexity, e.g.
moving code around in "Initialize fault.{gfn,slot} earlier for direct MMUs" isn't
a net positive IMO.

With the TDP MMU being all-or-nothing, the need to check if the MMU is a TDP MMU
goes away.  If the TDP MMU is enabled, direct_page_fault() will be reached only
for non-nested TDP page faults.  Paging is required for NPT, and EPT faults will
always go through ept_page_fault(), i.e. nonpaging_page_fault() is unused.

In other words, this code

	bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);

gets replaced with direct checks on is_tdp_mmu_enabled().  And because fast page
faults are used only for direct MMUs,  that code can also simply query whether or
not the TDP MMU is enabled.

Avoiding make_mmu_pages_available() for the TDP MMU is easy enough and doesn't
require any new conditionals.

	if (is_tdp_mmu_enabled()) {
		r = kvm_tdp_mmu_map(vcpu, fault);
	} else {
		r = make_mmu_pages_available(vcpu);
		if (r)
			goto out_unlock;

		r = __direct_map(vcpu, fault);
	}


That leaves the read vs. write lock as the only code that is borderline difficult
to read.  I agree that's a little ugly, but IMO it's not worth duplicating the
entire function.  If we really wanted to hide that ugliness, we could add helpers
to do the lock+unlock, but even that seems somewhat superfluous.

From a performance perspective, with the flag being read-only after the vendor
module is loaded, we can add a static key to track TDP MMU enabling, biased to the
TDP MMU being enabled.  That turns all of if-statements into NOPs for the TDP MMU.
(this is why I suggested keeping the is_tdp_mmu_enabled() wrapper in patch 1).
Static branches actually shrink the code size too (because that matters), e.g. a
5-byte nop instead of 8 bytes for TEST+Jcc.

I'll speculatively post a v4 with the static key idea, minus patches 7, 8, and 10.
I think that'll make it easier to discuss the static key/branch implementation.
If we decide we still want to split out the TDP MMU page fault handler, then it
should be relatively simple to fold back in those patches.

E.g. the generate code looks like this:

4282		if (is_tdp_mmu_enabled())
4283			read_lock(&vcpu->kvm->mmu_lock);
   0x000000000005ff7e <+494>:   nopl   0x0(%rax,%rax,1)
   0x000000000005ff83 <+499>:	mov    (%rbx),%rdi
   0x000000000005ff86 <+502>:	call   0x5ff8b <direct_page_fault+507>

4286	
4287		if (is_page_fault_stale(vcpu, fault))
   0x000000000005ff8b <+507>:	mov    %r15,%rsi
   0x000000000005ff8e <+510>:	mov    %rbx,%rdi
   0x000000000005ff91 <+513>:	call   0x558b0 <is_page_fault_stale>
   0x000000000005ff96 <+518>:	test   %al,%al
   0x000000000005ff98 <+520>:	jne    0x603e1 <direct_page_fault+1617>

4288			goto out_unlock;
4289	
4290		if (is_tdp_mmu_enabled()) {
4291			r = kvm_tdp_mmu_map(vcpu, fault);
   0x000000000005ff9e <+526>:   nopl   0x0(%rax,%rax,1)
   0x000000000005ffa3 <+531>:	mov    %rbx,%rdi
   0x000000000005ffa6 <+534>:	mov    %r15,%rsi
   0x000000000005ffa9 <+537>:	call   0x5ffae <direct_page_fault+542>
   0x000000000005ffae <+542>:	mov    (%rbx),%rdi
   0x000000000005ffb1 <+545>:	mov    %eax,%ebp


#ifdef CONFIG_X86_64
DECLARE_STATIC_KEY_TRUE(tdp_mmu_enabled);
#endif
                                     
static inline bool is_tdp_mmu_enabled(void)
{
#ifdef CONFIG_X86_64
        return static_branch_likely(&tdp_mmu_enabled);
#else
        return false;
#endif
}
                              
static inline bool is_tdp_mmu(struct kvm_mmu *mmu)
{
        return is_tdp_mmu_enabled() && mmu->root_role.direct;
}
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index dc203973de83..b36f351138f7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4238,7 +4238,6 @@  static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
 
 static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 {
-	bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);
 	int r;
 
 	if (page_fault_handle_page_track(vcpu, fault))
@@ -4257,11 +4256,7 @@  static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 		return r;
 
 	r = RET_PF_RETRY;
-
-	if (is_tdp_mmu_fault)
-		read_lock(&vcpu->kvm->mmu_lock);
-	else
-		write_lock(&vcpu->kvm->mmu_lock);
+	write_lock(&vcpu->kvm->mmu_lock);
 
 	if (is_page_fault_stale(vcpu, fault))
 		goto out_unlock;
@@ -4270,16 +4265,10 @@  static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	if (r)
 		goto out_unlock;
 
-	if (is_tdp_mmu_fault)
-		r = kvm_tdp_mmu_map(vcpu, fault);
-	else
-		r = __direct_map(vcpu, fault);
+	r = __direct_map(vcpu, fault);
 
 out_unlock:
-	if (is_tdp_mmu_fault)
-		read_unlock(&vcpu->kvm->mmu_lock);
-	else
-		write_unlock(&vcpu->kvm->mmu_lock);
+	write_unlock(&vcpu->kvm->mmu_lock);
 	kvm_release_pfn_clean(fault->pfn);
 	return r;
 }
@@ -4327,6 +4316,46 @@  int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
 }
 EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
 
+#ifdef CONFIG_X86_64
+static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
+				  struct kvm_page_fault *fault)
+{
+	int r;
+
+	if (page_fault_handle_page_track(vcpu, fault))
+		return RET_PF_EMULATE;
+
+	r = fast_page_fault(vcpu, fault);
+	if (r != RET_PF_INVALID)
+		return r;
+
+	r = mmu_topup_memory_caches(vcpu, false);
+	if (r)
+		return r;
+
+	r = kvm_faultin_pfn(vcpu, fault, ACC_ALL);
+	if (r != RET_PF_CONTINUE)
+		return r;
+
+	r = RET_PF_RETRY;
+	read_lock(&vcpu->kvm->mmu_lock);
+
+	if (is_page_fault_stale(vcpu, fault))
+		goto out_unlock;
+
+	r = make_mmu_pages_available(vcpu);
+	if (r)
+		goto out_unlock;
+
+	r = kvm_tdp_mmu_map(vcpu, fault);
+
+out_unlock:
+	read_unlock(&vcpu->kvm->mmu_lock);
+	kvm_release_pfn_clean(fault->pfn);
+	return r;
+}
+#endif
+
 int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 {
 	/*
@@ -4351,6 +4380,11 @@  int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		}
 	}
 
+#ifdef CONFIG_X86_64
+	if (tdp_mmu_enabled)
+		return kvm_tdp_mmu_page_fault(vcpu, fault);
+#endif
+
 	return direct_page_fault(vcpu, fault);
 }