diff mbox

[RFC,4/4] KVM: selective write protection using dirty bitmap

Message ID 20101122144518.867f692c.yoshikawa.takuya@oss.ntt.co.jp (mailing list archive)
State New, archived
Headers show

Commit Message

Takuya Yoshikawa Nov. 22, 2010, 5:45 a.m. UTC
None
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b04c0fa..bc72c0d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -617,6 +617,8 @@  void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+void kvm_mmu_slot_remove_write_access_mask(struct kvm *kvm,
+		struct kvm_memory_slot *slot, unsigned long *dirty_bitmap);
 void kvm_mmu_zap_all(struct kvm *kvm);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2139309..978e806 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3456,6 +3456,48 @@  void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 	kvm_flush_remote_tlbs(kvm);
 }
 
+static void remove_write_access_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+	u64 *spte = rmap_next(kvm, rmapp, NULL);
+
+	while (spte) {
+		update_spte(spte, *spte & ~PT_WRITABLE_MASK);
+		spte = rmap_next(kvm, rmapp, spte);
+	}
+}
+
+/*
+ * Write protect the pages marked dirty in a given bitmap.
+ */
+void kvm_mmu_slot_remove_write_access_mask(struct kvm *kvm,
+					   struct kvm_memory_slot *slot,
+					   unsigned long *dirty_bitmap)
+{
+	int i;
+	gfn_t gfn_offset;
+	unsigned long idx;
+	long last_idx[KVM_NR_PAGE_SIZES - 1];
+
+	for (i = 0; i < (KVM_NR_PAGE_SIZES - 1); ++i)
+		last_idx[i] = -1;
+
+	for_each_set_bit(gfn_offset, dirty_bitmap, slot->npages) {
+		remove_write_access_rmapp(kvm, &slot->rmap[gfn_offset]);
+
+		for (i = 0; i < (KVM_NR_PAGE_SIZES - 1); ++i) {
+			idx = lpage_idx(slot->base_gfn + gfn_offset,
+					slot->base_gfn, PT_DIRECTORY_LEVEL + i);
+			if (idx == last_idx[i])
+				continue;
+
+			remove_write_access_rmapp(kvm,
+					&slot->lpage_info[i][idx].rmap_pde);
+			last_idx[i] = idx;
+		}
+	}
+	kvm_flush_remote_tlbs(kvm);
+}
+
 void kvm_mmu_zap_all(struct kvm *kvm)
 {
 	struct kvm_mmu_page *sp, *node;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e9cf381..222af5e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3193,6 +3193,32 @@  static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	return 0;
 }
 
+enum kvm_dirty_level {
+	KVM_DIRTY_LEVEL_NOTHING,
+	KVM_DIRTY_LEVEL_LIGHT,
+	KVM_DIRTY_LEVEL_HEAVY
+};
+
+/*
+ * Decide which write protection functions we should use.
+ *
+ * Key factors:
+ *  - number of dirty pages
+ *  - number of shadow pages
+ *  - direct mode or shadow mode
+ */
+static enum kvm_dirty_level dirty_level_memslot(struct kvm_memory_slot *memslot)
+{
+	if (!memslot->nr_dirty_pages)
+		return KVM_DIRTY_LEVEL_NOTHING;
+
+	if ((memslot->nr_dirty_pages < 2048) ||
+	    (memslot->nr_dirty_pages < memslot->npages / 64))
+		return KVM_DIRTY_LEVEL_LIGHT;
+
+	return KVM_DIRTY_LEVEL_HEAVY;
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
@@ -3202,6 +3228,7 @@  int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	int r;
 	struct kvm_memory_slot *memslot;
 	unsigned long n;
+	enum kvm_dirty_level dirty_level;
 
 	mutex_lock(&kvm->slots_lock);
 
@@ -3217,7 +3244,8 @@  int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	n = kvm_dirty_bitmap_bytes(memslot);
 
 	/* If nothing is dirty, don't bother messing with page tables. */
-	if (memslot->nr_dirty_pages) {
+	dirty_level = dirty_level_memslot(memslot);
+	if (dirty_level != KVM_DIRTY_LEVEL_NOTHING) {
 		struct kvm_memslots *slots, *old_slots;
 		unsigned long *dirty_bitmap;
 
@@ -3242,7 +3270,12 @@  int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 		kfree(old_slots);
 
 		spin_lock(&kvm->mmu_lock);
-		kvm_mmu_slot_remove_write_access(kvm, log->slot);
+		if (dirty_level == KVM_DIRTY_LEVEL_HEAVY)
+			kvm_mmu_slot_remove_write_access(kvm, log->slot);
+		else
+			kvm_mmu_slot_remove_write_access_mask(kvm,
+						&slots->memslots[log->slot],
+						dirty_bitmap);
 		spin_unlock(&kvm->mmu_lock);
 
 		r = -EFAULT;