@@ -822,14 +822,36 @@ static inline gfn_t tdp_mmu_max_gfn_host(void)
return 1ULL << (shadow_phys_bits - PAGE_SHIFT);
}
-static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
- bool shared)
+static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
+ bool shared, int zap_level)
{
struct tdp_iter iter;
gfn_t end = tdp_mmu_max_gfn_host();
gfn_t start = 0;
+ for_each_tdp_pte_min_level(iter, root, zap_level, start, end) {
+retry:
+ if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared))
+ continue;
+
+ if (!is_shadow_present_pte(iter.old_spte))
+ continue;
+
+ if (iter.level > zap_level)
+ continue;
+
+ if (!shared)
+ tdp_mmu_set_spte(kvm, &iter, 0);
+ else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0))
+ goto retry;
+ }
+}
+
+static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
+ bool shared)
+{
+
/*
* The root must have an elevated refcount so that it's reachable via
* mmu_notifier callbacks, which allows this path to yield and drop
@@ -847,22 +869,17 @@ static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
rcu_read_lock();
/*
- * No need to try to step down in the iterator when zapping an entire
- * root, zapping an upper-level SPTE will recurse on its children.
+ * To avoid RCU stalls due to recursively removing huge swaths of SPs,
+ * split the zap into two passes. On the first pass, zap at the 1gb
+ * level, and then zap top-level SPs on the second pass. "1gb" is not
+ * arbitrary, as KVM must be able to zap a 1gb shadow page without
+ * inducing a stall to allow in-place replacement with a 1gb hugepage.
+ *
+ * Because zapping a SP recurses on its children, stepping down to
+ * PG_LEVEL_4K in the iterator itself is unnecessary.
*/
- for_each_tdp_pte_min_level(iter, root, root->role.level, start, end) {
-retry:
- if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared))
- continue;
-
- if (!is_shadow_present_pte(iter.old_spte))
- continue;
-
- if (!shared)
- tdp_mmu_set_spte(kvm, &iter, 0);
- else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0))
- goto retry;
- }
+ __tdp_mmu_zap_root(kvm, root, shared, PG_LEVEL_1G);
+ __tdp_mmu_zap_root(kvm, root, shared, root->role.level);
rcu_read_unlock();
}