@@ -6629,9 +6629,10 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e
/*
* Invalidate (zap) SPTEs that cover GFNs from gfn_start and up to gfn_end
- * (not including it)
+ * (not including it) for VALID roots specified with attr_filter
*/
-void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
+static void kvm_zap_gfn_range_filtered(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end,
+ enum kvm_gfn_range_filter attr_filter)
{
bool flush;
@@ -6647,7 +6648,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
if (tdp_mmu_enabled)
- flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush);
+ flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end,
+ attr_filter, flush);
if (flush)
kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);
@@ -6657,6 +6659,17 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
write_unlock(&kvm->mmu_lock);
}
+/*
+ * Invalidate (zap) SPTEs that cover GFNs from gfn_start and up to gfn_end
+ * (not including it) for all *VALID* non-mirror roots.
+ */
+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
+{
+ kvm_zap_gfn_range_filtered(kvm, gfn_start, gfn_end,
+ KVM_FILTER_SHARED);
+}
+EXPORT_SYMBOL_GPL(kvm_zap_gfn_range);
+
static bool slot_rmap_write_protect(struct kvm *kvm,
struct kvm_rmap_head *rmap_head,
const struct kvm_memory_slot *slot)
@@ -6998,7 +7011,6 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
return need_tlb_flush;
}
-EXPORT_SYMBOL_GPL(kvm_zap_gfn_range);
static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot)
@@ -1016,16 +1016,21 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
}
/*
- * Zap leaf SPTEs for the range of gfns, [start, end), for all *VALID** roots.
+ * Zap leaf SPTEs for the range of gfns, [start, end), for *VALID** roots
+ * specified with attr_filter.
* Returns true if a TLB flush is needed before releasing the MMU lock, i.e. if
* one or more SPTEs were zapped since the MMU lock was last acquired.
*/
-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
+bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end,
+ enum kvm_gfn_range_filter attr_filter, bool flush)
{
+ enum kvm_tdp_mmu_root_types types;
struct kvm_mmu_page *root;
lockdep_assert_held_write(&kvm->mmu_lock);
- for_each_valid_tdp_mmu_root_yield_safe(kvm, root, -1)
+
+ types = kvm_gfn_range_filter_to_root_types(kvm, attr_filter);
+ __for_each_tdp_mmu_root_yield_safe(kvm, root, -1, types)
flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
return flush;
@@ -63,7 +63,8 @@ static inline struct kvm_mmu_page *tdp_mmu_get_root(struct kvm_vcpu *vcpu,
return root_to_sp(vcpu->arch.mmu->root.hpa);
}
-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
+bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end,
+ enum kvm_gfn_range_filter attr_filter, bool flush);
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
void kvm_tdp_mmu_invalidate_roots(struct kvm *kvm,
Only zap valid, non-mirror roots in kvm_zap_gfn_range(). There are 3 callers to kvm_zap_gfn_range() in KVM. (1) in __kvm_set_or_clear_apicv_inhibit(). (2) in sev_handle_rmp_fault(). (3) in kvm_noncoherent_dma_assignment_start_or_stop(). TDX inhibits apicv as soon as TD initialization occurs. Mirror roots do not apply for SEV. So, kvm_zap_gfn_range() does no need to zap mirror roots in cases (1) and (2). Currently, TDX does not support the assignment of noncoherent DMA devices, even to shared memory (there's no corresponding WBINVD emulation). Even if TDX supports noncoherent DMA devices assignment in the future, the private EPT (underlying the mirror roots) in TDX always forces the EPT memory types to WB. Thus, kvm_zap_gfn_range() does not need to zap mirror roots in case (3). Zapping only valid, non-mirror roots in kvm_zap_gfn_range() allows TDX to avoid depending on the self-snoop feature when reusing VMX's op get_mt_mask for TDX shared EPT. Introduce a static helper kvm_zap_gfn_range_filtered() and have kvm_zap_gfn_range() invoke it with the filter KVM_FILTER_SHARED. Opportunistically, move EXPORT_SYMBOL_GPL of kvm_zap_gfn_range() closer to the function itself. Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> --- The code base is kvm-coco-queue. Previously Paolo suggested dropping TDX's specific implmentation of the op get_mt_mask and having TDX reuse VMX's implementation. [1] Sean also suggested making the self-snoop feature a hard dependency for enabling TDX [2]. That is because - TDX shared EPT is able to reuse the memory type specified in VMX's code as long as guest MTRRs are not referenced. - KVM does not call kvm_zap_gfn_range() when attaching/detaching non-coherent DMA devices when the CPU have feature self-snoop. [3] However, [3] cannot be guaranteed after commit 9d70f3fec144 ("Revert "KVM: VMX: Always honor guest PAT on CPUs that support self-snoop"), which was due to a regression with the bochsdrm driver. Although [3] may be added back in the future, rather than relying on or waiting for that, a better approach would be to avoid zapping the private EPT in kvm_zap_gfn_range(). [1] https://lore.kernel.org/kvm/b791a3f6-a5ab-4f7e-bb2a-d277b26ec2c4@redhat.com/ [2] https://lore.kernel.org/kvm/ZuBSNS33_ck-w6-9@google.com [3] https://lore.kernel.org/all/20240309010929.1403984-6-seanjc@google.com --- arch/x86/kvm/mmu/mmu.c | 20 ++++++++++++++++---- arch/x86/kvm/mmu/tdp_mmu.c | 11 ++++++++--- arch/x86/kvm/mmu/tdp_mmu.h | 3 ++- 3 files changed, 26 insertions(+), 8 deletions(-) base-commit: 2893e1e10283b33c1412b83949ea346aa75eaf18