diff mbox

[1/6] KVM: Dirty memory tracking for performant checkpointing and improved live migration

Message ID BL2PR08MB4811F817BCFF76BD3A9E445F0630@BL2PR08MB481.namprd08.prod.outlook.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cao, Lei April 26, 2016, 7:21 p.m. UTC
Introduce new memory tracking ioctls to support performant 
checkpoint/restore implementations.

See patch 6 for details on the new ioctls.

Signed-off-by: Lei Cao <lei.cao@stratus.com>
---
 include/uapi/linux/kvm.h | 94 +++++++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c      | 96 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 190 insertions(+)
diff mbox

Patch

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7f1f80..2bce4db 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1312,4 +1312,98 @@  struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+#define KVM_MT_VERSION			1
+struct mt_setup {
+	__u32 version;
+
+	/* which operation to perform? */
+#define KVM_MT_OP_INIT           1
+#define KVM_MT_OP_CLEANUP        2
+	__u32 op;
+
+	/*
+	 * flags bit defs:
+	 */
+
+	/*
+	 * Features.
+	 * 1. Avoid logging duplicate entries
+	 */
+#define KVM_MT_OPTION_NO_DUPS		(1 << 2)
+
+	__u32 flags;
+
+	/* max number of dirty pages per checkpoint cycle */
+	__u32 max_dirty;
+};
+
+struct mt_enable {
+	__u32 flags;		/* 1 -> on, 0 -> off */
+};
+
+#define MT_OFFSET_MASK		(0x0000ffffffffffffUL)
+
+#define MT_MAKE_SLOT_OFFSET(slot, offset)			\
+	do {							\
+		__u64 slot_off = offset & MT_OFFSET_MASK;	\
+		slot_off |= ((__u64)slot << 48);		\
+		slot_off;					\
+	} while (0)
+
+#define MT_OFFSET_FROM_SLOT_OFFSET(slot_off)		\
+	(slot_off & MT_OFFSET_MASK)
+
+#define MT_SLOT_FROM_SLOT_OFFSET(slot_off)		\
+	(slot_off >> 48)
+
+struct mt_gfn_list {
+	__s32	count;
+	__u32	max_dirty;
+	__u64	*gfnlist;
+};
+
+struct mt_prepare_cp {
+	__s64	cpid;
+};
+
+struct mt_sublist_fetch_info {
+	struct mt_gfn_list  gfn_info;
+
+	/*
+	 * flags bit defs:
+	 */
+
+	/* caller sleeps until dirty count is reached */
+#define MT_FETCH_WAIT		(1 << 0)
+	/* dirty tracking is re-armed for each page in returned list */
+#define MT_FETCH_REARM		(1 << 1)
+
+	__u32 flags;
+};
+
+struct mt_dirty_trigger {
+	/* force vcpus to exit when trigger is reached */
+	__u32 dirty_trigger;
+};
+
+/* Initialize/Cleanup MT data structures, allocate/free list buffers, etc. */
+#define KVM_INIT_MT		 _IOW(KVMIO, 0xf0, struct mt_setup)
+/* Active/Deactivate Memory Tracking */
+#define KVM_ENABLE_MT		 _IOW(KVMIO, 0xf1, struct mt_enable)
+/* notify MT subsystem that VM is about to be unpaused */
+#define KVM_PREPARE_MT_CP	 _IOW(KVMIO, 0xf2, struct mt_prepare_cp)
+/* Rearm dirty traps for specified pages */
+#define KVM_REARM_DIRTY_PAGES	 _IO(KVMIO, 0xf3)
+/* notify MT subsystem no more pages will be dirtied this cycle */
+#define KVM_MT_VM_QUIESCED	  _IO(KVMIO, 0xf4)
+/*
+ * Return specified number of dirty pages.  May return fewer than requested.
+ * Optionally, caller can request to sleep until desired number is reached.
+ * The KVM_MT_VM_QUIESCED call above will wake this sleeper even if the
+ * number of dirty pages is not yet the requested amount.
+ */
+#define KVM_MT_SUBLIST_FETCH	_IOWR(KVMIO, 0xf5, struct mt_sublist_fetch_info)
+/* Set VM exit trigger point based on dirty page count */
+#define KVM_MT_DIRTY_TRIGGER	 _IOW(KVMIO, 0xf6, struct mt_dirty_trigger)
+
 #endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4fd482f..8a582e5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2752,6 +2752,43 @@  static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 	return kvm_vm_ioctl_check_extension(kvm, arg);
 }
 
+static int kvm_vm_ioctl_mt_init(struct kvm *kvm, struct mt_setup *mts)
+{
+	return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_enable(struct kvm *kvm, struct mt_enable *mte)
+{
+	return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_prepare_cp(struct kvm *kvm,
+				      struct mt_prepare_cp *mtpcp)
+{
+	return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_rearm_gfns(struct kvm *kvm)
+{
+	return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_quiesced(struct kvm *kvm)
+{
+	return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_sublist_fetch(struct kvm *kvm,
+					 struct mt_sublist_fetch_info *mtsfi)
+{
+	return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_dirty_trigger(struct kvm *kvm, int dirty_trigger)
+{
+	return -EINVAL;
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2909,6 +2946,65 @@  out_free_irq_routing:
 	case KVM_CHECK_EXTENSION:
 		r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
 		break;
+	case KVM_INIT_MT: {
+		struct mt_setup mts;
+
+		r = -EFAULT;
+		if (copy_from_user(&mts, (void __user *)arg, sizeof(mts)))
+			goto out;
+		r = kvm_vm_ioctl_mt_init(kvm, &mts);
+		break;
+	}
+	case KVM_ENABLE_MT: {
+		struct mt_enable mte;
+
+		r = -EFAULT;
+		if (copy_from_user(&mte, (void __user *)arg, sizeof(mte)))
+			goto out;
+		r = kvm_vm_ioctl_mt_enable(kvm, &mte);
+		break;
+	}
+	case KVM_PREPARE_MT_CP: {
+		struct mt_prepare_cp mtpcp;
+
+		r = -EFAULT;
+		if (copy_from_user(&mtpcp, (void __user *)arg, sizeof(mtpcp)))
+			goto out;
+		r = kvm_vm_ioctl_mt_prepare_cp(kvm, &mtpcp);
+		break;
+	}
+	case KVM_REARM_DIRTY_PAGES: {
+		r = kvm_vm_ioctl_mt_rearm_gfns(kvm);
+		break;
+	}
+	case KVM_MT_VM_QUIESCED: {
+		r = kvm_vm_ioctl_mt_quiesced(kvm);
+		break;
+	}
+	case KVM_MT_SUBLIST_FETCH: {
+		struct mt_sublist_fetch_info mtsfi;
+
+		r = -EFAULT;
+		if (copy_from_user(&mtsfi, (void __user *)arg, sizeof(mtsfi)))
+			goto out;
+		r = kvm_vm_ioctl_mt_sublist_fetch(kvm, &mtsfi);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user((void __user *)arg, &mtsfi, sizeof(mtsfi)))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_MT_DIRTY_TRIGGER: {
+		struct mt_dirty_trigger mtdt;
+
+		r = -EFAULT;
+		if (copy_from_user(&mtdt, (void __user *)arg, sizeof(mtdt)))
+			goto out;
+		r = kvm_vm_ioctl_mt_dirty_trigger(kvm, mtdt.dirty_trigger);
+		break;
+	}
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}