@@ -1312,4 +1312,98 @@ struct kvm_assigned_msix_entry {
__u16 padding[3];
};
+#define KVM_MT_VERSION 1
+struct mt_setup {
+ __u32 version;
+
+ /* which operation to perform? */
+#define KVM_MT_OP_INIT 1
+#define KVM_MT_OP_CLEANUP 2
+ __u32 op;
+
+ /*
+ * flags bit defs:
+ */
+
+ /*
+ * Features.
+ * 1. Avoid logging duplicate entries
+ */
+#define KVM_MT_OPTION_NO_DUPS (1 << 2)
+
+ __u32 flags;
+
+ /* max number of dirty pages per checkpoint cycle */
+ __u32 max_dirty;
+};
+
+struct mt_enable {
+ __u32 flags; /* 1 -> on, 0 -> off */
+};
+
+#define MT_OFFSET_MASK (0x0000ffffffffffffUL)
+
+#define MT_MAKE_SLOT_OFFSET(slot, offset) \
+ do { \
+ __u64 slot_off = offset & MT_OFFSET_MASK; \
+ slot_off |= ((__u64)slot << 48); \
+ slot_off; \
+ } while (0)
+
+#define MT_OFFSET_FROM_SLOT_OFFSET(slot_off) \
+ (slot_off & MT_OFFSET_MASK)
+
+#define MT_SLOT_FROM_SLOT_OFFSET(slot_off) \
+ (slot_off >> 48)
+
+struct mt_gfn_list {
+ __s32 count;
+ __u32 max_dirty;
+ __u64 *gfnlist;
+};
+
+struct mt_prepare_cp {
+ __s64 cpid;
+};
+
+struct mt_sublist_fetch_info {
+ struct mt_gfn_list gfn_info;
+
+ /*
+ * flags bit defs:
+ */
+
+ /* caller sleeps until dirty count is reached */
+#define MT_FETCH_WAIT (1 << 0)
+ /* dirty tracking is re-armed for each page in returned list */
+#define MT_FETCH_REARM (1 << 1)
+
+ __u32 flags;
+};
+
+struct mt_dirty_trigger {
+ /* force vcpus to exit when trigger is reached */
+ __u32 dirty_trigger;
+};
+
+/* Initialize/Cleanup MT data structures, allocate/free list buffers, etc. */
+#define KVM_INIT_MT _IOW(KVMIO, 0xf0, struct mt_setup)
+/* Active/Deactivate Memory Tracking */
+#define KVM_ENABLE_MT _IOW(KVMIO, 0xf1, struct mt_enable)
+/* notify MT subsystem that VM is about to be unpaused */
+#define KVM_PREPARE_MT_CP _IOW(KVMIO, 0xf2, struct mt_prepare_cp)
+/* Rearm dirty traps for specified pages */
+#define KVM_REARM_DIRTY_PAGES _IO(KVMIO, 0xf3)
+/* notify MT subsystem no more pages will be dirtied this cycle */
+#define KVM_MT_VM_QUIESCED _IO(KVMIO, 0xf4)
+/*
+ * Return specified number of dirty pages. May return fewer than requested.
+ * Optionally, caller can request to sleep until desired number is reached.
+ * The KVM_MT_VM_QUIESCED call above will wake this sleeper even if the
+ * number of dirty pages is not yet the requested amount.
+ */
+#define KVM_MT_SUBLIST_FETCH _IOWR(KVMIO, 0xf5, struct mt_sublist_fetch_info)
+/* Set VM exit trigger point based on dirty page count */
+#define KVM_MT_DIRTY_TRIGGER _IOW(KVMIO, 0xf6, struct mt_dirty_trigger)
+
#endif /* __LINUX_KVM_H */
@@ -2752,6 +2752,43 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
return kvm_vm_ioctl_check_extension(kvm, arg);
}
+static int kvm_vm_ioctl_mt_init(struct kvm *kvm, struct mt_setup *mts)
+{
+ return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_enable(struct kvm *kvm, struct mt_enable *mte)
+{
+ return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_prepare_cp(struct kvm *kvm,
+ struct mt_prepare_cp *mtpcp)
+{
+ return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_rearm_gfns(struct kvm *kvm)
+{
+ return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_quiesced(struct kvm *kvm)
+{
+ return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_sublist_fetch(struct kvm *kvm,
+ struct mt_sublist_fetch_info *mtsfi)
+{
+ return -EINVAL;
+}
+
+static int kvm_vm_ioctl_mt_dirty_trigger(struct kvm *kvm, int dirty_trigger)
+{
+ return -EINVAL;
+}
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2909,6 +2946,65 @@ out_free_irq_routing:
case KVM_CHECK_EXTENSION:
r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
break;
+ case KVM_INIT_MT: {
+ struct mt_setup mts;
+
+ r = -EFAULT;
+ if (copy_from_user(&mts, (void __user *)arg, sizeof(mts)))
+ goto out;
+ r = kvm_vm_ioctl_mt_init(kvm, &mts);
+ break;
+ }
+ case KVM_ENABLE_MT: {
+ struct mt_enable mte;
+
+ r = -EFAULT;
+ if (copy_from_user(&mte, (void __user *)arg, sizeof(mte)))
+ goto out;
+ r = kvm_vm_ioctl_mt_enable(kvm, &mte);
+ break;
+ }
+ case KVM_PREPARE_MT_CP: {
+ struct mt_prepare_cp mtpcp;
+
+ r = -EFAULT;
+ if (copy_from_user(&mtpcp, (void __user *)arg, sizeof(mtpcp)))
+ goto out;
+ r = kvm_vm_ioctl_mt_prepare_cp(kvm, &mtpcp);
+ break;
+ }
+ case KVM_REARM_DIRTY_PAGES: {
+ r = kvm_vm_ioctl_mt_rearm_gfns(kvm);
+ break;
+ }
+ case KVM_MT_VM_QUIESCED: {
+ r = kvm_vm_ioctl_mt_quiesced(kvm);
+ break;
+ }
+ case KVM_MT_SUBLIST_FETCH: {
+ struct mt_sublist_fetch_info mtsfi;
+
+ r = -EFAULT;
+ if (copy_from_user(&mtsfi, (void __user *)arg, sizeof(mtsfi)))
+ goto out;
+ r = kvm_vm_ioctl_mt_sublist_fetch(kvm, &mtsfi);
+ if (r)
+ goto out;
+ r = -EFAULT;
+ if (copy_to_user((void __user *)arg, &mtsfi, sizeof(mtsfi)))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_MT_DIRTY_TRIGGER: {
+ struct mt_dirty_trigger mtdt;
+
+ r = -EFAULT;
+ if (copy_from_user(&mtdt, (void __user *)arg, sizeof(mtdt)))
+ goto out;
+ r = kvm_vm_ioctl_mt_dirty_trigger(kvm, mtdt.dirty_trigger);
+ break;
+ }
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
}
Introduce new memory tracking ioctls to support performant checkpoint/restore implementations. See patch 6 for details on the new ioctls. Signed-off-by: Lei Cao <lei.cao@stratus.com> --- include/uapi/linux/kvm.h | 94 +++++++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 96 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+)