diff mbox series

[v2,3/6] KVM: x86/mmu: Refactor TDP MMU iter need resched check

Message ID 20240823235648.3236880-4-dmatlack@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Optimize TDP MMU huge page recovery during disable-dirty-log | expand

Commit Message

David Matlack Aug. 23, 2024, 11:56 p.m. UTC
Refactor the TDP MMU iterator "need resched" checks into a helper
function so they can be called from a different code path in a
subsequent commit.

No functional change intended.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

Comments

Sean Christopherson Oct. 31, 2024, 12:04 a.m. UTC | #1
On Fri, Aug 23, 2024, David Matlack wrote:
> Refactor the TDP MMU iterator "need resched" checks into a helper
> function so they can be called from a different code path in a
> subsequent commit.
> 
> No functional change intended.
> 
> Signed-off-by: David Matlack <dmatlack@google.com>
> ---
>  arch/x86/kvm/mmu/tdp_mmu.c | 16 +++++++++++-----
>  1 file changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 27adbb3ecb02..9b8299ee4abb 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -646,6 +646,16 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
>  #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end)		\
>  	for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)
>  
> +static inline bool __must_check tdp_mmu_iter_need_resched(struct kvm *kvm,
> +							  struct tdp_iter *iter)
> +{
> +	/* Ensure forward progress has been made before yielding. */
> +	if (iter->next_last_level_gfn == iter->yielded_gfn)
> +		return false;
> +
> +	return need_resched() || rwlock_needbreak(&kvm->mmu_lock);
> +}
> +
>  /*
>   * Yield if the MMU lock is contended or this thread needs to return control
>   * to the scheduler.
> @@ -666,11 +676,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
>  {
>  	WARN_ON_ONCE(iter->yielded);
>  
> -	/* Ensure forward progress has been made before yielding. */
> -	if (iter->next_last_level_gfn == iter->yielded_gfn)
> -		return false;
> -
> -	if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
> +	if (tdp_mmu_iter_need_resched(kvm, iter)) {

Huh.  There's a subtle behavioral change here, that I would not have noticed had
I not _just_ looked at this code[*].  Falling through means the "don't yield to
ensure forward progress case" would return iter->yielded, not %false.  Per the
WARN above, iter->yielded _should_ be false, but if KVM had a bug that caused it
to get stuck, then that bug would escalate into an even worse bug of putting KVM
into a potentially unbreakable infinite loop.

Which is extremely unlikely, but it's a good excuse to clean this up :-)  I'll
test and post the below, and plan on slotting it in before this patch (you might
even see it show up in kvm-x86 before it gets posted).

[*] https://lore.kernel.org/all/Zx-_cmV8ps7Y2fTe@google.com


From: Sean Christopherson <seanjc@google.com>
Date: Wed, 30 Oct 2024 16:28:31 -0700
Subject: [PATCH] KVM: x86/mmu: Check yielded_gfn for forward progress iff
 resched is needed

Swap the order of the checks in tdp_mmu_iter_cond_resched() so that KVM
checks to see if a resched is needed _before_ checking to see if yielding
must be disallowed to guarantee forward progress.  Iterating over TDP MMU
SPTEs is a hot path, e.g. tearing down a root can touch millions of SPTEs,
and not needing to reschedule is by far the common case.  On the other
handle, disallowing yielding because forward progress has not been made is
a very rare case.

Returning early for the common case (no resched), effectively reduces the
number of checks from 2 to 1 for the common case, and should make the code
slightly more predictable for the CPU.

To resolve a weird conundrum where the forward progress check currently
returns false, but the need resched check subtly returns iter->yielded,
which _should_ be false (enforced by a WARN), return false unconditionally
(which might also help make the sequence more predicatble).  If KVM has a
bug where iter->yielded is left danging, continuing to yield is neither
right nor wrong, it was simply an artifact of how the original code was
written.

Unconditionally returning false when yielding is unnecessary or unwanted
will also allow extracting the "should resched" logic to a separate helper
in a future patch.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 076343c3c8a7..8170b16b91c3 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -658,29 +658,29 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
 {
 	WARN_ON_ONCE(iter->yielded);
 
+	if (!need_resched() && !rwlock_needbreak(&kvm->mmu_lock))
+		return false;
+
 	/* Ensure forward progress has been made before yielding. */
 	if (iter->next_last_level_gfn == iter->yielded_gfn)
 		return false;
 
-	if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
-		if (flush)
-			kvm_flush_remote_tlbs(kvm);
+	if (flush)
+		kvm_flush_remote_tlbs(kvm);
 
-		rcu_read_unlock();
+	rcu_read_unlock();
 
-		if (shared)
-			cond_resched_rwlock_read(&kvm->mmu_lock);
-		else
-			cond_resched_rwlock_write(&kvm->mmu_lock);
+	if (shared)
+		cond_resched_rwlock_read(&kvm->mmu_lock);
+	else
+		cond_resched_rwlock_write(&kvm->mmu_lock);
 
-		rcu_read_lock();
+	rcu_read_lock();
 
-		WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn);
+	WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn);
 
-		iter->yielded = true;
-	}
-
-	return iter->yielded;
+	iter->yielded = true;
+	return true;
 }
 
 static inline gfn_t tdp_mmu_max_gfn_exclusive(void)

base-commit: 35ef80eb29ab5f7b7c7264c7f21a64b3aa046921
--
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 27adbb3ecb02..9b8299ee4abb 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -646,6 +646,16 @@  static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end)		\
 	for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)
 
+static inline bool __must_check tdp_mmu_iter_need_resched(struct kvm *kvm,
+							  struct tdp_iter *iter)
+{
+	/* Ensure forward progress has been made before yielding. */
+	if (iter->next_last_level_gfn == iter->yielded_gfn)
+		return false;
+
+	return need_resched() || rwlock_needbreak(&kvm->mmu_lock);
+}
+
 /*
  * Yield if the MMU lock is contended or this thread needs to return control
  * to the scheduler.
@@ -666,11 +676,7 @@  static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
 {
 	WARN_ON_ONCE(iter->yielded);
 
-	/* Ensure forward progress has been made before yielding. */
-	if (iter->next_last_level_gfn == iter->yielded_gfn)
-		return false;
-
-	if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
+	if (tdp_mmu_iter_need_resched(kvm, iter)) {
 		if (flush)
 			kvm_flush_remote_tlbs(kvm);