@@ -71,6 +71,7 @@
#define KVM_REQ_HV_RESET 28
#define KVM_REQ_HV_EXIT 29
#define KVM_REQ_HV_STIMER 30
+#define KVM_REQ_EXIT_DIRTY_LOG_FULL 31
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -6489,6 +6489,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
r = 1;
goto out;
}
+ if (kvm_check_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_DIRTY_LOG_FULL;
+ r = 0;
+ goto out;
+ }
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
record_steal_time(vcpu);
if (kvm_check_request(KVM_REQ_SMI, vcpu))
@@ -6687,6 +6692,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (vcpu->need_exit) {
+ vcpu->need_exit = false;
+ kvm_make_all_cpus_request(vcpu->kvm,
+ KVM_REQ_EXIT_DIRTY_LOG_FULL);
+ }
+
/*
* Profile KVM exit RIPs:
*/
@@ -257,6 +257,7 @@ struct kvm_vcpu {
} spin_loop;
#endif
bool preempted;
+ bool need_exit;
struct kvm_vcpu_arch arch;
};
@@ -205,6 +205,7 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_S390_STSI 25
#define KVM_EXIT_IOAPIC_EOI 26
#define KVM_EXIT_HYPERV 27
+#define KVM_EXIT_DIRTY_LOG_FULL 28
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -2006,6 +2006,25 @@ static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot,
}
}
+static void check_dirty_trigger(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ int count)
+{
+ if (count > kvm->mt.dirty_trigger) {
+ /*
+ * Request vcpu exits, but if interrupts are disabled, we have
+ * to defer the requests because smp_call_xxx may deadlock when
+ * called that way.
+ */
+ if (vcpu && irqs_disabled()) {
+ vcpu->need_exit = 1;
+ } else {
+ WARN_ON(irqs_disabled());
+ kvm_make_all_cpus_request(kvm,
+ KVM_REQ_EXIT_DIRTY_LOG_FULL);
+ }
+ }
+}
+
/*
* We have some new dirty pages for our sublist waiter. Enough to merit
* waking it up?
@@ -2079,6 +2098,7 @@ static void mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot,
if ((gfnlist->dirty_index % DIRTY_GFN_ADD_GRANULARITY) == 0) {
spin_lock(&kvm->mt.lock);
kvm->mt.tot_pages += DIRTY_GFN_ADD_GRANULARITY;
+ check_dirty_trigger(kvm, vcpu, kvm->mt.tot_pages);
mt_sw_add_pages(kvm);
spin_unlock(&kvm->mt.lock);
}
@@ -2433,6 +2453,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
if (IS_ERR(vcpu))
return PTR_ERR(vcpu);
+ vcpu->need_exit = false;
+
preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
r = kvm_arch_vcpu_setup(vcpu);
@@ -3627,7 +3649,17 @@ static int kvm_vm_ioctl_mt_sublist_fetch(struct kvm *kvm,
static int kvm_vm_ioctl_mt_dirty_trigger(struct kvm *kvm, int dirty_trigger)
{
- return -EINVAL;
+ if (!kvm->mt.gfn_list.dirty_gfns)
+ return -EINVAL;
+
+ if (kvm->mt.gfn_list.max_dirty < dirty_trigger)
+ return -EINVAL;
+
+ kvm->mt.dirty_trigger = dirty_trigger;
+
+ check_dirty_trigger(kvm, NULL, kvm->mt.tot_pages);
+
+ return 0;
}
static long kvm_vm_ioctl(struct file *filp,
Implement a dirty page threshold which when triggered forces vcpus to exit. Due to limited buffering on the host, it should be ensured that checkpoint state is captured before too many pages have been dirtied. Exceeding buffer space would effectively force the two sides to be broken apart and be resynchronized from scratch. This "divergence" event is costly to repair. Given that, an "emergency" stop is needed so that once a critical threshold of dirty pages has been reached, if the VM execution has not been stopped, VM exits with a new exit reason indicating that the dirty log is full. This only kicks in after a predefined threshold of dirty pages has been reached and the threshold and buffer sizes are selected to make the use of the "emergency stop" a rare event. Signed-off-by: Lei Cao <lei.cao@stratus.com> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 11 ++++++++++ include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 34 ++++++++++++++++++++++++++++++- 5 files changed, 47 insertions(+), 1 deletion(-)