diff mbox series

[09/10] kvm: x86: mmu: Ability to switch dirty logging mode dynamically

Message ID 20181020031543.124399-10-junaids@google.com (mailing list archive)
State New, archived
Headers show
Series [01/10] kvm: mmu: spte_write_protect optimization | expand

Commit Message

Junaid Shahid Oct. 20, 2018, 3:15 a.m. UTC
Add a mechanism to switch between WrProt and D-Bit based dirty logging
while the VM is running (and possibly dirty logging is already enabled).
Switching to/from PML is not currently supported, but that can be added
later.

Signed-off-by: Junaid Shahid <junaids@google.com>
---
 arch/x86/include/asm/kvm_host.h |   4 +
 arch/x86/include/uapi/asm/kvm.h |   1 +
 arch/x86/kvm/mmu.c              | 146 +++++++++++++++++++++++++++++++-
 arch/x86/kvm/mmu.h              |   1 +
 arch/x86/kvm/vmx.c              |  10 ++-
 5 files changed, 158 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3da22c92a5d6..bdbc87a26662 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1131,6 +1131,8 @@  struct kvm_x86_ops {
 	 *      function can also add any un-flushed dirty state maintained by
 	 *      the hardware to the mask (e.g. if flush_log_dirty is not
 	 *      implemented.)
+	 *  - switch_dirty_log_mode:
+	 *      Switch to the given dirty log mode.
 	 */
 	void (*slot_enable_log_dirty)(struct kvm *kvm,
 				      struct kvm_memory_slot *slot);
@@ -1141,6 +1143,7 @@  struct kvm_x86_ops {
 					struct kvm_memory_slot *slot,
 					gfn_t offset, unsigned long *mask);
 	int (*write_log_dirty)(struct kvm_vcpu *vcpu);
+	int (*switch_dirty_log_mode)(struct kvm *kvm, u8 mode);
 
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
@@ -1202,6 +1205,7 @@  struct kvm_arch_async_pf {
 extern struct kvm_x86_ops *kvm_x86_ops;
 
 extern u8 kvm_default_dirty_log_mode;
+extern u8 kvm_supported_dirty_log_modes;
 
 #define __KVM_HAVE_ARCH_VM_ALLOC
 static inline struct kvm *kvm_arch_alloc_vm(void)
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 2b1c442bffe6..ff2ed65be75c 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -420,6 +420,7 @@  struct kvm_nested_state {
 	__u8 data[0];
 };
 
+#define KVM_DIRTY_LOG_MODE_DEFAULT	0
 #define KVM_DIRTY_LOG_MODE_WRPROT	1
 #define KVM_DIRTY_LOG_MODE_DBIT		2
 #define KVM_DIRTY_LOG_MODE_PML		4
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0839b8cfdf66..4abc75c97593 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -265,6 +265,9 @@  static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
 u8 __read_mostly kvm_default_dirty_log_mode;
 EXPORT_SYMBOL_GPL(kvm_default_dirty_log_mode);
 
+u8 __read_mostly kvm_supported_dirty_log_modes;
+EXPORT_SYMBOL_GPL(kvm_supported_dirty_log_modes);
+
 static void mmu_spte_set(u64 *sptep, u64 spte);
 static union kvm_mmu_page_role
 kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
@@ -436,6 +439,7 @@  void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 
 	if (shadow_dirty_mask == 0) {
 		enable_d_bit_logging = false;
+		kvm_supported_dirty_log_modes &= ~KVM_DIRTY_LOG_MODE_DBIT;
 
 		if (kvm_default_dirty_log_mode == KVM_DIRTY_LOG_MODE_DBIT)
 			kvm_default_dirty_log_mode = KVM_DIRTY_LOG_MODE_WRPROT;
@@ -1704,6 +1708,30 @@  kvm_mmu_shadow_dirty_mask_test_and_clear(struct kvm *kvm,
 	return mask;
 }
 
+/*
+ * Test the D bit in the SPTE(s) corresponding to the GFN and return true if any
+ * SPTE has the D bit set.
+ *
+ * The MMU lock should be held before calling this function.
+ */
+bool kvm_mmu_test_shadow_dirty_mask(struct kvm *kvm,
+				    struct kvm_memory_slot *slot,
+				    gfn_t gfn_offset)
+{
+	struct kvm_rmap_head *rmap_head;
+	u64 *sptep;
+	struct rmap_iterator iter;
+	u64 pte_bits = 0;
+
+	rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset,
+			      PT_PAGE_TABLE_LEVEL, slot);
+
+	for_each_rmap_spte(rmap_head, &iter, sptep)
+		pte_bits |= *sptep;
+
+	return pte_bits & shadow_dirty_mask;
+}
+
 /**
  * Gets the dirty state (if any) for selected PT level pages from the hardware
  * MMU structures and resets the hardware state to track those pages again.
@@ -6081,9 +6109,13 @@  int kvm_mmu_module_init(void)
 	BUILD_BUG_ON(sizeof(union kvm_mmu_role) != sizeof(u64));
 
 	kvm_mmu_reset_all_pte_masks();
-	kvm_default_dirty_log_mode = enable_d_bit_logging
-				     ? KVM_DIRTY_LOG_MODE_DBIT
-				     : KVM_DIRTY_LOG_MODE_WRPROT;
+	kvm_default_dirty_log_mode = KVM_DIRTY_LOG_MODE_WRPROT;
+	kvm_supported_dirty_log_modes = KVM_DIRTY_LOG_MODE_WRPROT;
+
+	if (enable_d_bit_logging) {
+		kvm_supported_dirty_log_modes |= KVM_DIRTY_LOG_MODE_DBIT;
+		kvm_default_dirty_log_mode = KVM_DIRTY_LOG_MODE_DBIT;
+	}
 
 	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
 					    sizeof(struct pte_list_desc),
@@ -6150,3 +6182,111 @@  void kvm_mmu_module_exit(void)
 	unregister_shrinker(&mmu_shrinker);
 	mmu_audit_disable();
 }
+
+static void switch_dirty_log_mode_dbit_to_wrprot(struct kvm *kvm)
+{
+	ulong i;
+	struct kvm_memslots *slots = kvm_memslots(kvm);
+	struct kvm_memory_slot *memslot;
+	bool flush = false;
+
+	kvm_for_each_memslot(memslot, slots)
+		if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
+			flush |= kvm_mmu_slot_leaf_remove_write_access(kvm,
+								       memslot);
+	/*
+	 * We need to ensure that the write-protection gets propagated to all
+	 * CPUs before we transfer the dirty bits to the dirty bitmap.
+	 * Otherwise, it would be possible for some other CPU to write to a
+	 * page some time after we have gone over that page in the loop below
+	 * and then the page wouldn't get marked in the dirty bitmap.
+	 */
+	if (flush)
+		kvm_flush_remote_tlbs(kvm);
+
+	spin_lock(&kvm->mmu_lock);
+
+	kvm_for_each_memslot(memslot, slots) {
+		if (!(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES))
+			continue;
+
+		for (i = 0; i < memslot->npages; i++)
+			if (!test_bit(i, memslot->dirty_bitmap) &&
+			    kvm_mmu_test_shadow_dirty_mask(kvm, memslot, i))
+				set_bit(i, memslot->dirty_bitmap);
+	}
+	spin_unlock(&kvm->mmu_lock);
+
+	kvm->arch.dirty_logging_mode = KVM_DIRTY_LOG_MODE_WRPROT;
+}
+
+static void switch_dirty_log_mode_wrprot_to_dbit(struct kvm *kvm)
+{
+	struct kvm_memslots *slots = kvm_memslots(kvm);
+	struct kvm_memory_slot *memslot;
+
+	kvm_for_each_memslot(memslot, slots)
+		if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
+			kvm_mmu_slot_leaf_clear_dirty(kvm, memslot);
+
+	/*
+	 * No need to initiate a TLB flush here, since any page for which we
+	 * cleared the dirty bit above would already be marked in the dirty
+	 * bitmap. It isn't until the next get_dirty_log or enable_log_dirty
+	 * that the clearing of the dirty bits needs to be propagated
+	 * everywhere.
+	 */
+
+	kvm->arch.dirty_logging_mode = KVM_DIRTY_LOG_MODE_DBIT;
+
+	/*
+	 * As an optimization, we could also remove the write-protection from
+	 * all SPTEs here, rather than incurring faults as writes happen.
+	 */
+}
+
+int kvm_mmu_switch_dirty_log_mode(struct kvm *kvm, u8 mode)
+{
+	int err = 0;
+	u8 old_mode;
+
+	if (mode == KVM_DIRTY_LOG_MODE_DEFAULT)
+		mode = kvm_default_dirty_log_mode;
+
+	if (hweight8(mode) != 1)
+		return -EINVAL;
+
+	if (!(mode & kvm_supported_dirty_log_modes)) {
+		kvm_err("Dirty logging mode %u is not supported.\n", mode);
+		return -ENOTSUPP;
+	}
+
+	kvm_debug("Switching dirty logging mode from %u to %u.\n",
+		  kvm->arch.dirty_logging_mode, mode);
+
+	mutex_lock(&kvm->slots_lock);
+
+	old_mode = kvm->arch.dirty_logging_mode;
+
+	if (mode != old_mode) {
+		if (mode == KVM_DIRTY_LOG_MODE_WRPROT &&
+		    old_mode == KVM_DIRTY_LOG_MODE_DBIT)
+			switch_dirty_log_mode_dbit_to_wrprot(kvm);
+		else if (mode == KVM_DIRTY_LOG_MODE_DBIT &&
+			 old_mode == KVM_DIRTY_LOG_MODE_WRPROT)
+			switch_dirty_log_mode_wrprot_to_dbit(kvm);
+		else if (kvm_x86_ops->switch_dirty_log_mode)
+			err = kvm_x86_ops->switch_dirty_log_mode(kvm, mode);
+		else
+			err = -ENOTSUPP;
+	}
+
+	mutex_unlock(&kvm->slots_lock);
+
+	if (err)
+		kvm_err("Trying to switch dirty logging mode from "
+			"%u to %u failed with error %d.\n",
+			old_mode, mode, err);
+
+	return err;
+}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 6d39802a666d..b27dde010ec1 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -213,4 +213,5 @@  void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 				    struct kvm_memory_slot *slot, u64 gfn);
 int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
+int kvm_mmu_switch_dirty_log_mode(struct kvm *kvm, u8 mode);
 #endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c6b0477c855e..232115b84fbb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7992,9 +7992,17 @@  static __init int hardware_setup(void)
 		kvm_x86_ops->slot_disable_log_dirty = NULL;
 		kvm_x86_ops->flush_log_dirty = NULL;
 		kvm_x86_ops->get_and_reset_log_dirty = NULL;
-	} else
+	} else {
 		kvm_default_dirty_log_mode = KVM_DIRTY_LOG_MODE_PML;
 
+		/*
+		 * Currently, switching between PML and other modes is not
+		 * supported, so if PML is enabled, it is the only available
+		 * mode.
+		 */
+		kvm_supported_dirty_log_modes = KVM_DIRTY_LOG_MODE_PML;
+	}
+
 	if (!cpu_has_vmx_preemption_timer())
 		kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;