diff mbox

[4/6] KVM: Dirty memory tracking for performant checkpointing and improved live migration

Message ID BL2PR08MB481D079AE15F43243BF5F94F0630@BL2PR08MB481.namprd08.prod.outlook.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cao, Lei April 26, 2016, 7:25 p.m. UTC
Implement a dirty page threshold which when triggered forces vcpus to
exit.

Due to limited buffering on the host, it should be ensured that checkpoint
state is captured before too many pages have been dirtied. Exceeding buffer
space would effectively force the two sides to be broken apart and be
resynchronized from scratch. This "divergence" event is costly to repair.
Given that, an "emergency" stop is needed so that once a critical threshold
of dirty pages has been reached, if the VM execution has not been stopped,
VM exits with a new exit reason indicating that the dirty log is full.
This only kicks in after a predefined threshold of dirty pages has been
reached and the threshold and buffer sizes are selected to make the use of
the "emergency stop" a rare event.

Signed-off-by: Lei Cao <lei.cao@stratus.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 11 ++++++++++
 include/linux/kvm_host.h        |  1 +
 include/uapi/linux/kvm.h        |  1 +
 virt/kvm/kvm_main.c             | 34 ++++++++++++++++++++++++++++++-
 5 files changed, 47 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 52bff2b..2b43660 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -71,6 +71,7 @@ 
 #define KVM_REQ_HV_RESET          28
 #define KVM_REQ_HV_EXIT           29
 #define KVM_REQ_HV_STIMER         30
+#define KVM_REQ_EXIT_DIRTY_LOG_FULL 31
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b7798c..deede71 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6489,6 +6489,11 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 1;
 			goto out;
 		}
+		if (kvm_check_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_DIRTY_LOG_FULL;
+			r = 0;
+			goto out;
+		}
 		if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
 			record_steal_time(vcpu);
 		if (kvm_check_request(KVM_REQ_SMI, vcpu))
@@ -6687,6 +6692,12 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
+	if (vcpu->need_exit) {
+		vcpu->need_exit = false;
+		kvm_make_all_cpus_request(vcpu->kvm,
+			KVM_REQ_EXIT_DIRTY_LOG_FULL);
+	}
+
 	/*
 	 * Profile KVM exit RIPs:
 	 */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5793ecf..08bda35 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -257,6 +257,7 @@  struct kvm_vcpu {
 	} spin_loop;
 #endif
 	bool preempted;
+	bool need_exit;
 	struct kvm_vcpu_arch arch;
 };
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 736668d..97520c4 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -205,6 +205,7 @@  struct kvm_hyperv_exit {
 #define KVM_EXIT_S390_STSI        25
 #define KVM_EXIT_IOAPIC_EOI       26
 #define KVM_EXIT_HYPERV           27
+#define KVM_EXIT_DIRTY_LOG_FULL   28
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ba99cbc6..e22d7f4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2006,6 +2006,25 @@  static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot,
 	}
 }
 
+static void check_dirty_trigger(struct kvm *kvm, struct kvm_vcpu *vcpu,
+				int count)
+{
+	if (count > kvm->mt.dirty_trigger) {
+		/*
+		* Request vcpu exits, but if interrupts are disabled, we have
+		* to defer the requests because smp_call_xxx may deadlock when
+		* called that way.
+		*/
+		if (vcpu && irqs_disabled()) {
+			vcpu->need_exit = 1;
+		} else {
+			WARN_ON(irqs_disabled());
+			kvm_make_all_cpus_request(kvm,
+			KVM_REQ_EXIT_DIRTY_LOG_FULL);
+		}
+	}
+}
+
 /*
  * We have some new dirty pages for our sublist waiter.  Enough to merit
  * waking it up?
@@ -2079,6 +2098,7 @@  static void mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot,
 		if ((gfnlist->dirty_index % DIRTY_GFN_ADD_GRANULARITY) == 0) {
 			spin_lock(&kvm->mt.lock);
 			kvm->mt.tot_pages += DIRTY_GFN_ADD_GRANULARITY;
+			check_dirty_trigger(kvm, vcpu, kvm->mt.tot_pages);
 			mt_sw_add_pages(kvm);
 			spin_unlock(&kvm->mt.lock);
 		}
@@ -2433,6 +2453,8 @@  static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	if (IS_ERR(vcpu))
 		return PTR_ERR(vcpu);
 
+	vcpu->need_exit = false;
+
 	preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
 
 	r = kvm_arch_vcpu_setup(vcpu);
@@ -3627,7 +3649,17 @@  static int kvm_vm_ioctl_mt_sublist_fetch(struct kvm *kvm,
 
 static int kvm_vm_ioctl_mt_dirty_trigger(struct kvm *kvm, int dirty_trigger)
 {
-	return -EINVAL;
+	if (!kvm->mt.gfn_list.dirty_gfns)
+		return -EINVAL;
+
+	if (kvm->mt.gfn_list.max_dirty < dirty_trigger)
+		return -EINVAL;
+
+	kvm->mt.dirty_trigger = dirty_trigger;
+
+	check_dirty_trigger(kvm, NULL, kvm->mt.tot_pages);
+
+	return 0;
 }
 
 static long kvm_vm_ioctl(struct file *filp,