diff mbox

[v2,2/4] KVM: Dirty memory tracking for performant checkpointing solutions

Message ID CY1PR08MB1992BC26A6EF9A21BA2AE175F0610@CY1PR08MB1992.namprd08.prod.outlook.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cao, Lei Jan. 4, 2017, 8:43 p.m. UTC
Introduce memory tracking data structures and implement the new ioctls
and support functions.

Signed-off-by: Lei Cao <lei.cao@stratus.com>
---
 include/linux/kvm_host.h |  25 ++++++
 virt/kvm/kvm_main.c      | 220 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 239 insertions(+), 6 deletions(-)

Comments

Paolo Bonzini Jan. 5, 2017, 10:15 a.m. UTC | #1
On 04/01/2017 21:43, Cao, Lei wrote:
> Introduce memory tracking data structures and implement the new ioctls
> and support functions.
> 
> Signed-off-by: Lei Cao <lei.cao@stratus.com>
> ---
>  include/linux/kvm_host.h |  25 ++++++
>  virt/kvm/kvm_main.c      | 220 +++++++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 239 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 1c5190d..7a85b30 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -204,6 +204,22 @@ struct kvm_mmio_fragment {
>  	unsigned len;
>  };
>  
> +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
> +struct dirty_gfn_t {
> +	__u32 slot; /* as_id | slot_id */
> +	__u32 pad;
> +	__u64 offset;
> +};
> +
> +struct gfn_list_t {
> +	__u32 dirty_index; /* where to put the next dirty GFN */
> +	__u32 avail_index; /* GFNs before this can be harvested */
> +	__u32 fetch_index; /* the next GFN to be harvested */
> +	__u32 pad;
> +	struct dirty_gfn_t dirty_gfns[0];
> +};

These are part of the userspace API, so they go in
include/uapi/linux/kvm.h and they should be included unconditionally.
Also, as mentioned (indirectly) in my reply to the cover letter, the
types for userspace API should start with "kvm_".

Please document in include/linux/kvm.h or virt/kvm/kvm_main.c the
requirements for enabling KVM_DIRTY_LOG_PAGE_OFFSET.  As far as I can
see there is at least the following:

- any memory accesses done by KVM should use kvm_vcpu_write_* instead of
kvm_write_* if possible, otherwise the per-VM log will fill too fast

- you should provide kvm_arch_mmu_enable_log_dirty_pt_masked and you
should not have a separate step to synchronize a hardware dirty bitmap
with KVM's

The required size of the global log probably correlates to the number of
VCPUs, so what is a good rule of thumb to size the logs?  Could it be
something like "M times the number of VCPUs, plus N" (and document the M
and N of course)?

> @@ -1996,6 +2038,9 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
>  	r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
>  	if (r)
>  		return -EFAULT;
> +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
> +	kvm_mt_mark_page_dirty(kvm, ghc->memslot, NULL, gpa >> PAGE_SHIFT);
> +#endif

Please add the kvm and vcpu arguments to mark_page_dirty_in_slot, since
you're modifying all four calls to mark_page_dirty_in_slot itself.

Furthermore, mark_page_dirty_in_slot already has a lot of ingredients
that remove duplicated code:

- it checks for memslot != NULL

- it checks for memslot->dirty_bitmap

- the check for memslot->dirty_bitmap makes it unnecessary to check
memslot->id against KVM_USER_MEM_SLOTS.

>  	mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
>  
>  	return 0;
> @@ -2076,6 +2121,12 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
>  	struct kvm_memory_slot *memslot;
>  
>  	memslot = gfn_to_memslot(kvm, gfn);
> +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
> +	if (memslot) {
> +		if (memslot->id >= 0 && memslot->id < KVM_USER_MEM_SLOTS)
> +			kvm_mt_mark_page_dirty(kvm, memslot, NULL, gfn);
> +	}
> +#endif
>  	mark_page_dirty_in_slot(memslot, gfn);
>  }
>  EXPORT_SYMBOL_GPL(mark_page_dirty);
> @@ -2085,6 +2136,13 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
>  	struct kvm_memory_slot *memslot;
>  
>  	memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
> +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
> +	if (memslot) {
> +		if (memslot->id >= 0 && memslot->id < KVM_USER_MEM_SLOTS)
> +			kvm_mt_mark_page_dirty(vcpu->kvm, memslot,
> +					       vcpu, gfn);
> +	}
> +#endif
>  	mark_page_dirty_in_slot(memslot, gfn);
>  }
>  EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
> @@ -2377,6 +2435,32 @@ static const struct vm_operations_struct kvm_vcpu_vm_ops = {
>  
>  static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
>  {
> +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
> +	struct kvm_vcpu *vcpu = file->private_data;
> +	unsigned long start, pfn, size;
> +	int ret;
> +	struct gfn_list_t *log;
> +
> +	if (vcpu->kvm->dirty_log_size) {
> +		size = vcpu->kvm->dirty_log_size;
> +		start = vma->vm_start +
> +			KVM_DIRTY_LOG_PAGE_OFFSET*PAGE_SIZE;
> +		log = vcpu->kvm->dirty_logs;
> +		pfn = page_to_pfn(virt_to_page((unsigned long)log));
> +		ret = remap_pfn_range(vma, start, pfn, size,
> +				      vma->vm_page_prot);
> +		if (ret)
> +			return ret;
> +		start = vma->vm_start +
> +			KVM_DIRTY_LOG_PAGE_OFFSET*PAGE_SIZE + size;
> +		log = vcpu->dirty_logs;
> +		pfn = page_to_pfn(virt_to_page((unsigned long)log));
> +		ret = remap_pfn_range(vma, start, pfn, size,
> +				      vma->vm_page_prot);
> +		if (ret)
> +			return ret;
> +	}

Please don't use remap_pfn_range; instead extend kvm_vcpu_fault.  For
the VM-wide log, let's mmap the KVM file descriptor instead.  Please add
a .mmap callback to kvm_vm_fops and define a new vm_operations_struct
kvm_vm_vm_ops.

> +#endif
>  	vma->vm_ops = &kvm_vcpu_vm_ops;
>  	return 0;
>  }
> @@ -2946,19 +3030,143 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
>  }
>  
>  #ifdef KVM_DIRTY_LOG_PAGE_OFFSET
> +void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot,
> +	struct kvm_vcpu *vcpu, gfn_t gfn)
> +{
> +	struct gfn_list_t *gfnlist;
> +	int slot_id;
> +	u32 as_id = 0;
> +	u64 offset;
> +
> +	if (!slot || !slot->dirty_bitmap || !kvm->dirty_log_size)
> +		return;
> +
> +	offset = gfn - slot->base_gfn;
> +
> +	if (test_bit_le(offset, slot->dirty_bitmap))
> +		return;
> +
> +	slot_id = slot->id;
> +
> +	if (vcpu)
> +		as_id = kvm_arch_vcpu_memslots_id(vcpu);
> +
> +	if (vcpu)
> +		gfnlist = vcpu->dirty_logs;
> +	else
> +		gfnlist = kvm->dirty_logs;
> +
> +	if (gfnlist->dirty_index == kvm->max_dirty_logs) {
> +		printk(KERN_ERR "dirty log overflow\n");

This should be at least ratelimited, but maybe it should even trigger a
WARN_ONCE.

> +		return;
> +	}
> +
> +	if (!vcpu)
> +		spin_lock(&kvm->dirty_log_lock);
> +	gfnlist->dirty_gfns[gfnlist->dirty_index].slot =
> +		(as_id << 16) | slot_id;
> +	gfnlist->dirty_gfns[gfnlist->dirty_index].offset = offset;
> +	smp_wmb();
> +	gfnlist->dirty_index++;
> +	if (!vcpu)
> +		spin_unlock(&kvm->dirty_log_lock);
> +}
> +
>  static int kvm_mt_set_dirty_log_size(struct kvm *kvm, u32 size)
>  {
> -	return -EINVAL;
> +	struct page *page;
> +
> +	if (!size)
> +		return -EINVAL;
> +
> +	kvm->dirty_log_size = get_order(size) * PAGE_SIZE;
> +	kvm->max_dirty_logs = (kvm->dirty_log_size -
> +			       sizeof(struct gfn_list_t)) /
> +			      sizeof(struct dirty_gfn_t);
> +	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));

Please use vmalloc.  We don't want userspace to trigger large-order
allocations.

> +	if (!page) {
> +		kvm_put_kvm(kvm);
> +		return -ENOMEM;
> +	}
> +	kvm->dirty_logs = page_address(page);
> +	spin_lock_init(&kvm->dirty_log_lock);
> +	return 0;
> +}
> +
> +static void kvm_mt_reset_gfn(struct kvm *kvm,
> +			     struct dirty_gfn_t *slot_offset)
> +{
> +	struct kvm_memory_slot *slot;
> +	int as_id, id;
> +
> +	as_id = slot_offset->slot >> 16;
> +	id = (u16)slot_offset->slot;
> +	slot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
> +
> +	clear_bit_le(slot_offset->offset, slot->dirty_bitmap);
> +	kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, slot,
> +						slot_offset->offset, 1);

In case the guest has locality, would it be beneficial to check for
consecutive dirtied pages, and only call
kvm_arch_mmu_enable_log_dirty_pt_masked once?

Paolo

>  }
>  
>  static int kvm_mt_reset_all_gfns(struct kvm *kvm)
>  {
> -	return -EINVAL;
> +	int i, j;
> +	struct kvm_vcpu *vcpu;
> +	int cleared = 0;
> +
> +	if (!kvm->dirty_log_size)
> +		return -EINVAL;
> +
> +	spin_lock(&kvm->mmu_lock);
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		for (j = 0;
> +		     j < vcpu->dirty_logs->dirty_index;
> +		     j++, cleared++)
> +			kvm_mt_reset_gfn(kvm,
> +					 &kvm->dirty_logs->dirty_gfns[j]);
> +		vcpu->dirty_logs->dirty_index = 0;
> +		vcpu->dirty_logs->avail_index = 0;
> +		vcpu->dirty_logs->fetch_index = 0;
> +	}
> +
> +	for (j = 0; j < kvm->dirty_logs->dirty_index; j++, cleared++)
> +		kvm_mt_reset_gfn(kvm, &kvm->dirty_logs->dirty_gfns[j]);
> +	kvm->dirty_logs->dirty_index = 0;
> +	kvm->dirty_logs->avail_index = 0;
> +	kvm->dirty_logs->fetch_index = 0;
> +
> +	spin_unlock(&kvm->mmu_lock);
> +
> +	if (cleared)
> +		kvm_flush_remote_tlbs(kvm);
> +
> +	return 0;
>  }
>  
>  static int kvm_mt_get_dirty_count(struct kvm *kvm, u32 *count)
>  {
> -	return -EINVAL;
> +	int i, avail = 0;
> +	struct kvm_vcpu *vcpu;
> +
> +	if (!kvm->dirty_log_size)
> +		return -EINVAL;
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		vcpu->dirty_logs->avail_index =
> +			READ_ONCE(vcpu->dirty_logs->dirty_index);
> +		avail += vcpu->dirty_logs->avail_index -
> +			 vcpu->dirty_logs->fetch_index;
> +	}
> +
> +	kvm->dirty_logs->avail_index =
> +		READ_ONCE(kvm->dirty_logs->dirty_index);
> +	avail += kvm->dirty_logs->avail_index -
> +		 kvm->dirty_logs->fetch_index;
> +
> +	*count = avail;
> +
> +	return 0;
>  }
>  #endif /* KVM_DIRTY_LOG_PAGE_OFFSET*/
>  
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Jan. 5, 2017, 10:49 a.m. UTC | #2
On 04/01/2017 21:43, Cao, Lei wrote:
> +static void kvm_mt_reset_gfn(struct kvm *kvm,
> +			     struct dirty_gfn_t *slot_offset)
> +{
> +	struct kvm_memory_slot *slot;
> +	int as_id, id;
> +
> +	as_id = slot_offset->slot >> 16;
> +	id = (u16)slot_offset->slot;
> +	slot = id_to_memslot(__kvm_memslots(kvm, as_id), id);

Forgot one: userspace can be evil here, so you need to validate slot and
offset here (and copy them into local variables with READ_ONCE before
you validate them).

Paolo

> +	clear_bit_le(slot_offset->offset, slot->dirty_bitmap);
> +	kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, slot,
> +						slot_offset->offset, 1);
>  }
>  
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1c5190d..7a85b30 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -204,6 +204,22 @@  struct kvm_mmio_fragment {
 	unsigned len;
 };
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+struct dirty_gfn_t {
+	__u32 slot; /* as_id | slot_id */
+	__u32 pad;
+	__u64 offset;
+};
+
+struct gfn_list_t {
+	__u32 dirty_index; /* where to put the next dirty GFN */
+	__u32 avail_index; /* GFNs before this can be harvested */
+	__u32 fetch_index; /* the next GFN to be harvested */
+	__u32 pad;
+	struct dirty_gfn_t dirty_gfns[0];
+};
+#endif
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -265,6 +281,9 @@  struct kvm_vcpu {
 	bool preempted;
 	struct kvm_vcpu_arch arch;
 	struct dentry *debugfs_dentry;
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	struct gfn_list_t *dirty_logs;
+#endif
 };
 
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
@@ -430,6 +449,12 @@  struct kvm {
 	struct list_head devices;
 	struct dentry *debugfs_dentry;
 	struct kvm_stat_data **debugfs_stat_data;
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	u32 dirty_log_size;
+	u32 max_dirty_logs;
+	struct gfn_list_t *dirty_logs;
+	spinlock_t dirty_log_lock;
+#endif
 };
 
 #define kvm_err(fmt, ...) \
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8e6f2b1..bff980c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -122,6 +122,12 @@  static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
 
 static void kvm_release_pfn_dirty(kvm_pfn_t pfn);
 static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+static void kvm_mt_mark_page_dirty(struct kvm *kvm,
+				   struct kvm_memory_slot *slot,
+				   struct kvm_vcpu *vcpu,
+				   gfn_t gfn);
+#endif
 
 __visible bool kvm_rebooting;
 EXPORT_SYMBOL_GPL(kvm_rebooting);
@@ -254,15 +260,37 @@  int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	}
 	vcpu->run = page_address(page);
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (kvm->dirty_log_size) {
+		page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+				   get_order(kvm->dirty_log_size));
+		if (!page) {
+			r = -ENOMEM;
+			goto fail_free_run;
+		}
+		vcpu->dirty_logs = page_address(page);
+	}
+#endif
+
 	kvm_vcpu_set_in_spin_loop(vcpu, false);
 	kvm_vcpu_set_dy_eligible(vcpu, false);
 	vcpu->preempted = false;
 
 	r = kvm_arch_vcpu_init(vcpu);
 	if (r < 0)
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+		goto fail_free_logs;
+#else
 		goto fail_free_run;
+#endif
 	return 0;
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+fail_free_logs:
+	if (kvm->dirty_log_size)
+		free_pages((unsigned long)vcpu->dirty_logs,
+			   get_order(kvm->dirty_log_size));
+#endif
 fail_free_run:
 	free_page((unsigned long)vcpu->run);
 fail:
@@ -275,6 +303,11 @@  void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
 	put_pid(vcpu->pid);
 	kvm_arch_vcpu_uninit(vcpu);
 	free_page((unsigned long)vcpu->run);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (vcpu->kvm->dirty_log_size)
+		free_pages((unsigned long)vcpu->dirty_logs,
+			   get_order(vcpu->kvm->dirty_log_size));
+#endif
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
 
@@ -726,6 +759,11 @@  static void kvm_destroy_vm(struct kvm *kvm)
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kvm_io_bus_destroy(kvm->buses[i]);
 	kvm_coalesced_mmio_free(kvm);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (kvm->dirty_log_size)
+		free_pages((unsigned long)kvm->dirty_logs,
+			  get_order(kvm->dirty_log_size));
+#endif
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
 #else
@@ -1862,7 +1900,8 @@  int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
 
-static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
+static int __kvm_write_guest_page(struct kvm *kvm,
+				  struct kvm_memory_slot *memslot, gfn_t gfn,
 			          const void *data, int offset, int len)
 {
 	int r;
@@ -1874,6 +1913,9 @@  static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
 	r = __copy_to_user((void __user *)addr + offset, data, len);
 	if (r)
 		return -EFAULT;
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	kvm_mt_mark_page_dirty(kvm, memslot, NULL, gfn);
+#endif
 	mark_page_dirty_in_slot(memslot, gfn);
 	return 0;
 }
@@ -1883,7 +1925,7 @@  int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
 {
 	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 
-	return __kvm_write_guest_page(slot, gfn, data, offset, len);
+	return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
 
@@ -1892,7 +1934,7 @@  int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
 {
 	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
-	return __kvm_write_guest_page(slot, gfn, data, offset, len);
+	return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
 
@@ -1996,6 +2038,9 @@  int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 	r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
 	if (r)
 		return -EFAULT;
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	kvm_mt_mark_page_dirty(kvm, ghc->memslot, NULL, gpa >> PAGE_SHIFT);
+#endif
 	mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
 
 	return 0;
@@ -2076,6 +2121,12 @@  void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 	struct kvm_memory_slot *memslot;
 
 	memslot = gfn_to_memslot(kvm, gfn);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (memslot) {
+		if (memslot->id >= 0 && memslot->id < KVM_USER_MEM_SLOTS)
+			kvm_mt_mark_page_dirty(kvm, memslot, NULL, gfn);
+	}
+#endif
 	mark_page_dirty_in_slot(memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty);
@@ -2085,6 +2136,13 @@  void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
 	struct kvm_memory_slot *memslot;
 
 	memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (memslot) {
+		if (memslot->id >= 0 && memslot->id < KVM_USER_MEM_SLOTS)
+			kvm_mt_mark_page_dirty(vcpu->kvm, memslot,
+					       vcpu, gfn);
+	}
+#endif
 	mark_page_dirty_in_slot(memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
@@ -2377,6 +2435,32 @@  static const struct vm_operations_struct kvm_vcpu_vm_ops = {
 
 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
 {
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	struct kvm_vcpu *vcpu = file->private_data;
+	unsigned long start, pfn, size;
+	int ret;
+	struct gfn_list_t *log;
+
+	if (vcpu->kvm->dirty_log_size) {
+		size = vcpu->kvm->dirty_log_size;
+		start = vma->vm_start +
+			KVM_DIRTY_LOG_PAGE_OFFSET*PAGE_SIZE;
+		log = vcpu->kvm->dirty_logs;
+		pfn = page_to_pfn(virt_to_page((unsigned long)log));
+		ret = remap_pfn_range(vma, start, pfn, size,
+				      vma->vm_page_prot);
+		if (ret)
+			return ret;
+		start = vma->vm_start +
+			KVM_DIRTY_LOG_PAGE_OFFSET*PAGE_SIZE + size;
+		log = vcpu->dirty_logs;
+		pfn = page_to_pfn(virt_to_page((unsigned long)log));
+		ret = remap_pfn_range(vma, start, pfn, size,
+				      vma->vm_page_prot);
+		if (ret)
+			return ret;
+	}
+#endif
 	vma->vm_ops = &kvm_vcpu_vm_ops;
 	return 0;
 }
@@ -2946,19 +3030,143 @@  static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 }
 
 #ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+void kvm_mt_mark_page_dirty(struct kvm *kvm, struct kvm_memory_slot *slot,
+	struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	struct gfn_list_t *gfnlist;
+	int slot_id;
+	u32 as_id = 0;
+	u64 offset;
+
+	if (!slot || !slot->dirty_bitmap || !kvm->dirty_log_size)
+		return;
+
+	offset = gfn - slot->base_gfn;
+
+	if (test_bit_le(offset, slot->dirty_bitmap))
+		return;
+
+	slot_id = slot->id;
+
+	if (vcpu)
+		as_id = kvm_arch_vcpu_memslots_id(vcpu);
+
+	if (vcpu)
+		gfnlist = vcpu->dirty_logs;
+	else
+		gfnlist = kvm->dirty_logs;
+
+	if (gfnlist->dirty_index == kvm->max_dirty_logs) {
+		printk(KERN_ERR "dirty log overflow\n");
+		return;
+	}
+
+	if (!vcpu)
+		spin_lock(&kvm->dirty_log_lock);
+	gfnlist->dirty_gfns[gfnlist->dirty_index].slot =
+		(as_id << 16) | slot_id;
+	gfnlist->dirty_gfns[gfnlist->dirty_index].offset = offset;
+	smp_wmb();
+	gfnlist->dirty_index++;
+	if (!vcpu)
+		spin_unlock(&kvm->dirty_log_lock);
+}
+
 static int kvm_mt_set_dirty_log_size(struct kvm *kvm, u32 size)
 {
-	return -EINVAL;
+	struct page *page;
+
+	if (!size)
+		return -EINVAL;
+
+	kvm->dirty_log_size = get_order(size) * PAGE_SIZE;
+	kvm->max_dirty_logs = (kvm->dirty_log_size -
+			       sizeof(struct gfn_list_t)) /
+			      sizeof(struct dirty_gfn_t);
+	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
+	if (!page) {
+		kvm_put_kvm(kvm);
+		return -ENOMEM;
+	}
+	kvm->dirty_logs = page_address(page);
+	spin_lock_init(&kvm->dirty_log_lock);
+	return 0;
+}
+
+static void kvm_mt_reset_gfn(struct kvm *kvm,
+			     struct dirty_gfn_t *slot_offset)
+{
+	struct kvm_memory_slot *slot;
+	int as_id, id;
+
+	as_id = slot_offset->slot >> 16;
+	id = (u16)slot_offset->slot;
+	slot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
+
+	clear_bit_le(slot_offset->offset, slot->dirty_bitmap);
+	kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, slot,
+						slot_offset->offset, 1);
 }
 
 static int kvm_mt_reset_all_gfns(struct kvm *kvm)
 {
-	return -EINVAL;
+	int i, j;
+	struct kvm_vcpu *vcpu;
+	int cleared = 0;
+
+	if (!kvm->dirty_log_size)
+		return -EINVAL;
+
+	spin_lock(&kvm->mmu_lock);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		for (j = 0;
+		     j < vcpu->dirty_logs->dirty_index;
+		     j++, cleared++)
+			kvm_mt_reset_gfn(kvm,
+					 &kvm->dirty_logs->dirty_gfns[j]);
+		vcpu->dirty_logs->dirty_index = 0;
+		vcpu->dirty_logs->avail_index = 0;
+		vcpu->dirty_logs->fetch_index = 0;
+	}
+
+	for (j = 0; j < kvm->dirty_logs->dirty_index; j++, cleared++)
+		kvm_mt_reset_gfn(kvm, &kvm->dirty_logs->dirty_gfns[j]);
+	kvm->dirty_logs->dirty_index = 0;
+	kvm->dirty_logs->avail_index = 0;
+	kvm->dirty_logs->fetch_index = 0;
+
+	spin_unlock(&kvm->mmu_lock);
+
+	if (cleared)
+		kvm_flush_remote_tlbs(kvm);
+
+	return 0;
 }
 
 static int kvm_mt_get_dirty_count(struct kvm *kvm, u32 *count)
 {
-	return -EINVAL;
+	int i, avail = 0;
+	struct kvm_vcpu *vcpu;
+
+	if (!kvm->dirty_log_size)
+		return -EINVAL;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		vcpu->dirty_logs->avail_index =
+			READ_ONCE(vcpu->dirty_logs->dirty_index);
+		avail += vcpu->dirty_logs->avail_index -
+			 vcpu->dirty_logs->fetch_index;
+	}
+
+	kvm->dirty_logs->avail_index =
+		READ_ONCE(kvm->dirty_logs->dirty_index);
+	avail += kvm->dirty_logs->avail_index -
+		 kvm->dirty_logs->fetch_index;
+
+	*count = avail;
+
+	return 0;
 }
 #endif /* KVM_DIRTY_LOG_PAGE_OFFSET*/