[RFC,15/28] kvm: mmu: Support invalidate_zap_all_pages

Message ID	20190926231824.149014-16-bgardon@google.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=ErSa=XV=vger.kernel.org=kvm-owner@kernel.org> Date: Thu, 26 Sep 2019 16:18:11 -0700 In-Reply-To: <20190926231824.149014-1-bgardon@google.com> Message-Id: <20190926231824.149014-16-bgardon@google.com> Mime-Version: 1.0 References: <20190926231824.149014-1-bgardon@google.com> Subject: [RFC PATCH 15/28] kvm: mmu: Support invalidate_zap_all_pages From: Ben Gardon <bgardon@google.com> To: kvm@vger.kernel.org Cc: Paolo Bonzini <pbonzini@redhat.com>, Peter Feiner <pfeiner@google.com>, Peter Shier <pshier@google.com>, Junaid Shahid <junaids@google.com>, Jim Mattson <jmattson@google.com>, Ben Gardon <bgardon@google.com> Content-Type: text/plain; charset="UTF-8" Sender: kvm-owner@vger.kernel.org Precedence: bulk
Series	kvm: mmu: Rework the x86 TDP direct mapped case \| expand [RFC,00/28] kvm: mmu: Rework the x86 TDP direct mapped case [RFC,01/28] kvm: mmu: Separate generating and setting mmio ptes [RFC,02/28] kvm: mmu: Separate pte generation from set_spte [RFC,03/28] kvm: mmu: Zero page cache memory at allocation time [RFC,04/28] kvm: mmu: Update the lpages stat atomically [RFC,05/28] sched: Add cond_resched_rwlock [RFC,06/28] kvm: mmu: Replace mmu_lock with a read/write lock [RFC,07/28] kvm: mmu: Add functions for handling changed PTEs [RFC,08/28] kvm: mmu: Init / Uninit the direct MMU [RFC,09/28] kvm: mmu: Free direct MMU page table memory in an RCU callback [RFC,10/28] kvm: mmu: Flush TLBs before freeing direct MMU page table memory [RFC,11/28] kvm: mmu: Optimize for freeing direct MMU PTs on teardown [RFC,12/28] kvm: mmu: Set tlbs_dirty atomically [RFC,13/28] kvm: mmu: Add an iterator for concurrent paging structure walks [RFC,14/28] kvm: mmu: Batch updates to the direct mmu disconnected list [RFC,15/28] kvm: mmu: Support invalidate_zap_all_pages [RFC,16/28] kvm: mmu: Add direct MMU page fault handler [RFC,17/28] kvm: mmu: Add direct MMU fast page fault handler [RFC,18/28] kvm: mmu: Add an hva range iterator for memslot GFNs [RFC,19/28] kvm: mmu: Make address space ID a property of memslots [RFC,20/28] kvm: mmu: Implement the invalidation MMU notifiers for the direct MMU [RFC,21/28] kvm: mmu: Integrate the direct mmu with the changed pte notifier [RFC,22/28] kvm: mmu: Implement access tracking for the direct MMU [RFC,23/28] kvm: mmu: Make mark_page_dirty_in_slot usable from outside kvm_main [RFC,24/28] kvm: mmu: Support dirty logging in the direct MMU [RFC,25/28] kvm: mmu: Support kvm_zap_gfn_range in the direct MMU [RFC,26/28] kvm: mmu: Integrate direct MMU with nesting [RFC,27/28] kvm: mmu: Lazily allocate rmap when direct MMU is enabled [RFC,28/28] kvm: mmu: Support MMIO in the direct MMU

Message ID

20190926231824.149014-16-bgardon@google.com (mailing list archive)

State

New, archived

Headers

Date: Thu, 26 Sep 2019 16:18:11 -0700
In-Reply-To: <20190926231824.149014-1-bgardon@google.com>
Message-Id: <20190926231824.149014-16-bgardon@google.com>
Mime-Version: 1.0
References: <20190926231824.149014-1-bgardon@google.com>
Subject: [RFC PATCH 15/28] kvm: mmu: Support invalidate_zap_all_pages
From: Ben Gardon <bgardon@google.com>
To: kvm@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
        Peter Feiner <pfeiner@google.com>,
        Peter Shier <pshier@google.com>,
        Junaid Shahid <junaids@google.com>,
        Jim Mattson <jmattson@google.com>,
        Ben Gardon <bgardon@google.com>
Content-Type: text/plain; charset="UTF-8"
Sender: kvm-owner@vger.kernel.org
Precedence: bulk

Series

kvm: mmu: Rework the x86 TDP direct mapped case | expand

Commit Message

Ben Gardon Sept. 26, 2019, 11:18 p.m. UTC

Adds a function for zapping ranges of GFNs in an address space which
uses the paging structure iterator and uses the function to support
invalidate_zap_all_pages for the direct MMU.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/kvm/mmu.c | 69 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 234db5f4246a4..f0696658b527c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2120,7 +2120,6 @@  static void direct_walk_iterator_reset_traversal(
  * range, so the last gfn to be interated over would be the largest possible
  * GFN, in this scenario.)
  */
-__attribute__((unused))
 static void direct_walk_iterator_setup_walk(struct direct_walk_iterator *iter,
 	struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
 	enum mmu_lock_mode lock_mode)
@@ -2151,7 +2150,6 @@  static void direct_walk_iterator_setup_walk(struct direct_walk_iterator *iter,
 	direct_walk_iterator_start_traversal(iter);
 }
 
-__attribute__((unused))
 static void direct_walk_iterator_retry_pte(struct direct_walk_iterator *iter)
 {
 	BUG_ON(!iter->walk_in_progress);
@@ -2397,7 +2395,6 @@  static bool cmpxchg_pte(u64 *ptep, u64 old_pte, u64 new_pte, int level, u64 gfn)
 	return r == old_pte;
 }
 
-__attribute__((unused))
 static bool direct_walk_iterator_set_pte(struct direct_walk_iterator *iter,
 					 u64 new_pte)
 {
@@ -2725,6 +2722,44 @@  static int kvm_handle_hva_range(struct kvm *kvm,
 	return ret;
 }
 
+/*
+ * Marks the range of gfns, [start, end), non-present.
+ */
+static bool zap_direct_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
+				gfn_t end, enum mmu_lock_mode lock_mode)
+{
+	struct direct_walk_iterator iter;
+
+	direct_walk_iterator_setup_walk(&iter, kvm, as_id, start, end,
+					lock_mode);
+	while (direct_walk_iterator_next_present_pte(&iter)) {
+		/*
+		 * The gfn range should be handled at the largest granularity
+		 * possible, however since the functions which handle changed
+		 * PTEs (and freeing child PTs) will not yield, zapping an
+		 * entry with too many child PTEs can lead to scheduler
+		 * problems. In order to avoid scheduler problems, only zap
+		 * PTEs at PDPE level and lower. The root level entries will be
+		 * zapped and the high level page table pages freed on VM
+		 * teardown.
+		 */
+		if ((iter.pte_gfn_start < start ||
+		     iter.pte_gfn_end > end ||
+		     iter.level > PT_PDPE_LEVEL) &&
+		    !is_last_spte(iter.old_pte, iter.level))
+			continue;
+
+		/*
+		 * If the compare / exchange succeeds, then we will continue on
+		 * to the next pte. If it fails, the next iteration will repeat
+		 * the current pte. We'll handle both cases in the same way, so
+		 * we don't need to check the result here.
+		 */
+		direct_walk_iterator_set_pte(&iter, 0);
+	}
+	return direct_walk_iterator_end_traversal(&iter);
+}
+
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  unsigned long data,
 			  int (*handler)(struct kvm *kvm,
@@ -6645,11 +6680,26 @@  static void kvm_zap_obsolete_pages(struct kvm *kvm)
  */
 static void kvm_mmu_zap_all_fast(struct kvm *kvm)
 {
+	int i;
+
 	lockdep_assert_held(&kvm->slots_lock);
 
 	write_lock(&kvm->mmu_lock);
 	trace_kvm_mmu_zap_all_fast(kvm);
 
+	/* Zap all direct MMU PTEs slowly */
+	if (kvm->arch.direct_mmu_enabled) {
+		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+			zap_direct_gfn_range(kvm, i, 0, ~0ULL,
+					MMU_WRITE_LOCK | MMU_LOCK_MAY_RESCHED);
+	}
+
+	if (kvm->arch.pure_direct_mmu) {
+		kvm_flush_remote_tlbs(kvm);
+		write_unlock(&kvm->mmu_lock);
+		return;
+	}
+
 	/*
 	 * Toggle mmu_valid_gen between '0' and '1'.  Because slots_lock is
 	 * held for the entire duration of zapping obsolete pages, it's
@@ -6888,8 +6938,21 @@  void kvm_mmu_zap_all(struct kvm *kvm)
 	struct kvm_mmu_page *sp, *node;
 	LIST_HEAD(invalid_list);
 	int ign;
+	int i;
 
 	write_lock(&kvm->mmu_lock);
+	if (kvm->arch.direct_mmu_enabled) {
+		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+			zap_direct_gfn_range(kvm, i, 0, ~0ULL,
+					MMU_WRITE_LOCK | MMU_LOCK_MAY_RESCHED);
+		kvm_flush_remote_tlbs(kvm);
+	}
+
+	if (kvm->arch.pure_direct_mmu) {
+		write_unlock(&kvm->mmu_lock);
+		return;
+	}
+
 restart:
 	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
 		if (sp->role.invalid && sp->root_count)

[RFC,15/28] kvm: mmu: Support invalidate_zap_all_pages

Commit Message

Patch