diff mbox series

[3/7] KVM: x86/mmu: Batch TLB flushes when zapping collapsible TDP MMU SPTEs

Message ID 20240805233114.4060019-4-dmatlack@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/mmu: Optimize TDP MMU huge page recovery during disable-dirty-log | expand

Commit Message

David Matlack Aug. 5, 2024, 11:31 p.m. UTC
Set SPTEs directly to SHADOW_NONPRESENT_VALUE and batch up TLB flushes
when zapping collapsible SPTEs, rather than freezing them first.

Freezing the SPTE first is not required. It is fine for another thread
holding mmu_lock for read to immediately install a present entry before
TLBs are flushed because the underlying mapping is not changing. vCPUs
that translate through the stale 4K mappings or a new huge page mapping
will still observe the same GPA->HPA translations.

KVM must only flush TLBs before dropping RCU (to avoid use-after-free of
the zapped page tables) and before dropping mmu_lock (to synchronize
with mmu_notifiers invalidating mappings).

In VMs backed with 2MiB pages, batching TLB flushes improves the time it
takes to zap collapsible SPTEs to disable dirty logging:

 $ ./dirty_log_perf_test -s anonymous_hugetlb_2mb -v 64 -e -b 4g

 Before: Disabling dirty logging time: 14.334453428s (131072 flushes)
 After:  Disabling dirty logging time: 4.794969689s  (76 flushes)

Skipping freezing SPTEs also avoids stalling vCPU threads on the frozen
SPTE for the time it takes to perform a remote TLB flush. vCPUs faulting
on the zapped mapping can now immediately install a new huge mapping and
proceed with guest execution.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 54 +++++++-------------------------------
 1 file changed, 9 insertions(+), 45 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index f881e79243b3..fad2912d3d4c 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -591,48 +591,6 @@  static inline int __must_check tdp_mmu_set_spte_atomic(struct kvm *kvm,
 	return 0;
 }
 
-static inline int __must_check tdp_mmu_zap_spte_atomic(struct kvm *kvm,
-						       struct tdp_iter *iter)
-{
-	int ret;
-
-	lockdep_assert_held_read(&kvm->mmu_lock);
-
-	/*
-	 * Freeze the SPTE by setting it to a special, non-present value. This
-	 * will stop other threads from immediately installing a present entry
-	 * in its place before the TLBs are flushed.
-	 *
-	 * Delay processing of the zapped SPTE until after TLBs are flushed and
-	 * the FROZEN_SPTE is replaced (see below).
-	 */
-	ret = __tdp_mmu_set_spte_atomic(iter, FROZEN_SPTE);
-	if (ret)
-		return ret;
-
-	kvm_flush_remote_tlbs_gfn(kvm, iter->gfn, iter->level);
-
-	/*
-	 * No other thread can overwrite the frozen SPTE as they must either
-	 * wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not
-	 * overwrite the special frozen SPTE value. Use the raw write helper to
-	 * avoid an unnecessary check on volatile bits.
-	 */
-	__kvm_tdp_mmu_write_spte(iter->sptep, SHADOW_NONPRESENT_VALUE);
-
-	/*
-	 * Process the zapped SPTE after flushing TLBs, and after replacing
-	 * FROZEN_SPTE with 0. This minimizes the amount of time vCPUs are
-	 * blocked by the FROZEN_SPTE and reduces contention on the child
-	 * SPTEs.
-	 */
-	handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
-			    SHADOW_NONPRESENT_VALUE, iter->level, true);
-
-	return 0;
-}
-
-
 /*
  * tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
  * @kvm:	      KVM instance
@@ -1625,12 +1583,15 @@  static void zap_collapsible_spte_range(struct kvm *kvm,
 	gfn_t end = start + slot->npages;
 	struct tdp_iter iter;
 	int max_mapping_level;
+	bool flush = false;
 
 	rcu_read_lock();
 
 	tdp_root_for_each_pte(iter, root, start, end) {
-		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
+		if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) {
+			flush = false;
 			continue;
+		}
 
 		if (!is_shadow_present_pte(iter.old_spte) ||
 		    !is_last_spte(iter.old_spte, iter.level))
@@ -1653,8 +1614,8 @@  static void zap_collapsible_spte_range(struct kvm *kvm,
 		while (max_mapping_level > iter.level)
 			tdp_iter_step_up(&iter);
 
-		/* Note, a successful atomic zap also does a remote TLB flush. */
-		(void)tdp_mmu_zap_spte_atomic(kvm, &iter);
+		if (!tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE))
+			flush = true;
 
 		/*
 		 * If the atomic zap fails, the iter will recurse back into
@@ -1662,6 +1623,9 @@  static void zap_collapsible_spte_range(struct kvm *kvm,
 		 */
 	}
 
+	if (flush)
+		kvm_flush_remote_tlbs_memslot(kvm, slot);
+
 	rcu_read_unlock();
 }