@@ -2120,7 +2120,6 @@ static void direct_walk_iterator_reset_traversal(
* range, so the last gfn to be interated over would be the largest possible
* GFN, in this scenario.)
*/
-__attribute__((unused))
static void direct_walk_iterator_setup_walk(struct direct_walk_iterator *iter,
struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
enum mmu_lock_mode lock_mode)
@@ -2151,7 +2150,6 @@ static void direct_walk_iterator_setup_walk(struct direct_walk_iterator *iter,
direct_walk_iterator_start_traversal(iter);
}
-__attribute__((unused))
static void direct_walk_iterator_retry_pte(struct direct_walk_iterator *iter)
{
BUG_ON(!iter->walk_in_progress);
@@ -2397,7 +2395,6 @@ static bool cmpxchg_pte(u64 *ptep, u64 old_pte, u64 new_pte, int level, u64 gfn)
return r == old_pte;
}
-__attribute__((unused))
static bool direct_walk_iterator_set_pte(struct direct_walk_iterator *iter,
u64 new_pte)
{
@@ -2725,6 +2722,44 @@ static int kvm_handle_hva_range(struct kvm *kvm,
return ret;
}
+/*
+ * Marks the range of gfns, [start, end), non-present.
+ */
+static bool zap_direct_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
+ gfn_t end, enum mmu_lock_mode lock_mode)
+{
+ struct direct_walk_iterator iter;
+
+ direct_walk_iterator_setup_walk(&iter, kvm, as_id, start, end,
+ lock_mode);
+ while (direct_walk_iterator_next_present_pte(&iter)) {
+ /*
+ * The gfn range should be handled at the largest granularity
+ * possible, however since the functions which handle changed
+ * PTEs (and freeing child PTs) will not yield, zapping an
+ * entry with too many child PTEs can lead to scheduler
+ * problems. In order to avoid scheduler problems, only zap
+ * PTEs at PDPE level and lower. The root level entries will be
+ * zapped and the high level page table pages freed on VM
+ * teardown.
+ */
+ if ((iter.pte_gfn_start < start ||
+ iter.pte_gfn_end > end ||
+ iter.level > PT_PDPE_LEVEL) &&
+ !is_last_spte(iter.old_pte, iter.level))
+ continue;
+
+ /*
+ * If the compare / exchange succeeds, then we will continue on
+ * to the next pte. If it fails, the next iteration will repeat
+ * the current pte. We'll handle both cases in the same way, so
+ * we don't need to check the result here.
+ */
+ direct_walk_iterator_set_pte(&iter, 0);
+ }
+ return direct_walk_iterator_end_traversal(&iter);
+}
+
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
unsigned long data,
int (*handler)(struct kvm *kvm,
@@ -6645,11 +6680,26 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm)
*/
static void kvm_mmu_zap_all_fast(struct kvm *kvm)
{
+ int i;
+
lockdep_assert_held(&kvm->slots_lock);
write_lock(&kvm->mmu_lock);
trace_kvm_mmu_zap_all_fast(kvm);
+ /* Zap all direct MMU PTEs slowly */
+ if (kvm->arch.direct_mmu_enabled) {
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+ zap_direct_gfn_range(kvm, i, 0, ~0ULL,
+ MMU_WRITE_LOCK | MMU_LOCK_MAY_RESCHED);
+ }
+
+ if (kvm->arch.pure_direct_mmu) {
+ kvm_flush_remote_tlbs(kvm);
+ write_unlock(&kvm->mmu_lock);
+ return;
+ }
+
/*
* Toggle mmu_valid_gen between '0' and '1'. Because slots_lock is
* held for the entire duration of zapping obsolete pages, it's
@@ -6888,8 +6938,21 @@ void kvm_mmu_zap_all(struct kvm *kvm)
struct kvm_mmu_page *sp, *node;
LIST_HEAD(invalid_list);
int ign;
+ int i;
write_lock(&kvm->mmu_lock);
+ if (kvm->arch.direct_mmu_enabled) {
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+ zap_direct_gfn_range(kvm, i, 0, ~0ULL,
+ MMU_WRITE_LOCK | MMU_LOCK_MAY_RESCHED);
+ kvm_flush_remote_tlbs(kvm);
+ }
+
+ if (kvm->arch.pure_direct_mmu) {
+ write_unlock(&kvm->mmu_lock);
+ return;
+ }
+
restart:
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
if (sp->role.invalid && sp->root_count)
Adds a function for zapping ranges of GFNs in an address space which uses the paging structure iterator and uses the function to support invalidate_zap_all_pages for the direct MMU. Signed-off-by: Ben Gardon <bgardon@google.com> --- arch/x86/kvm/mmu.c | 69 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-)