diff mbox

[v4,4/4] KVM: Implement ring-based dirty memory tracking

Message ID CY1PR08MB1992FCC3BF99F11328C8C4CFF05B0@CY1PR08MB1992.namprd08.prod.outlook.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cao, Lei Feb. 15, 2017, 6:28 p.m. UTC
Implement ring-base dirty memory tracking.

Signed-off-by: Lei Cao <lei.cao@stratus.com>
---
 arch/x86/kvm/Makefile    |   3 +-
 include/linux/kvm_host.h |  12 +++
 virt/kvm/gfn_ring.c      | 135 +++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c      | 220 ++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 357 insertions(+), 13 deletions(-)
 create mode 100644 virt/kvm/gfn_ring.c

Comments

Paolo Bonzini Feb. 15, 2017, 9:54 p.m. UTC | #1
On 15/02/2017 19:28, Cao, Lei wrote:
> +	spin_lock(&kvm->mmu_lock);
> +	kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask);
> +	spin_unlock(&kvm->mmu_lock);
> +
> +	while (mask) {
> +		clear_bit_le(offset + __ffs(mask), memslot->dirty_bitmap);
> +		mask &= mask - 1;
> +	}

These two steps should be done in the opposite order.  So far nothing I
cannot fix on commit though (and this is going to be material for 4.12
anyway).

Paolo
kernel test robot Feb. 15, 2017, 10:07 p.m. UTC | #2
Hi Lei,

[auto build test ERROR on kvms390/next]
[also build test ERROR on v4.10-rc8]
[cannot apply to kvm/linux-next kvm-ppc/kvm-ppc-next next-20170215]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Cao-Lei/KVM-Add-new-generic-capability-for-ring-based-dirty-memory-logging/20170216-033010
base:   https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git next
config: arm64-defconfig (attached as .config)
compiler: aarch64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=arm64 

All errors (new ones prefixed by >>):

   In file included from include/linux/kvm_host.h:36:0,
                    from arch/arm64/kvm/../../../virt/kvm/kvm_main.c:21:
   arch/arm64/include/asm/kvm_host.h:35:0: warning: "KVM_COALESCED_MMIO_PAGE_OFFSET" redefined
    #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
    
   In file included from include/linux/kvm_host.h:31:0,
                    from arch/arm64/kvm/../../../virt/kvm/kvm_main.c:21:
   include/uapi/linux/kvm.h:219:0: note: this is the location of the previous definition
    #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
    
   arch/arm64/kvm/../../../virt/kvm/kvm_main.c: In function 'kvm_vcpu_init':
>> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:282:8: error: implicit declaration of function 'kvm_cpu_dirty_log_size' [-Werror=implicit-function-declaration]
           kvm_cpu_dirty_log_size();
           ^~~~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/kvm_cpu_dirty_log_size +282 arch/arm64/kvm/../../../virt/kvm/kvm_main.c

   276	
   277	#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
   278		if (kvm->dirty_ring_size) {
   279			u32 limit = (kvm->dirty_ring_size /
   280				     sizeof(struct kvm_dirty_gfn)) -
   281				    DIRTY_RING_BUFFER_ENTRY_NUM -
 > 282				    kvm_cpu_dirty_log_size();
   283			r = kvm_gfn_ring_alloc(&vcpu->dirty_ring,
   284					       kvm->dirty_ring_size,
   285					       limit);

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Feb. 15, 2017, 10:19 p.m. UTC | #3
Hi Lei,

[auto build test ERROR on kvms390/next]
[also build test ERROR on v4.10-rc8]
[cannot apply to kvm/linux-next kvm-ppc/kvm-ppc-next next-20170215]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Cao-Lei/KVM-Add-new-generic-capability-for-ring-based-dirty-memory-logging/20170216-033010
base:   https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git next
config: mips-malta_kvm_defconfig (attached as .config)
compiler: mipsel-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=mips 

All errors (new ones prefixed by >>):

   In file included from include/linux/kvm_host.h:36:0,
                    from arch/mips/kvm/../../../virt/kvm/kvm_main.c:21:
   arch/mips/include/asm/kvm_host.h:72:0: error: "KVM_COALESCED_MMIO_PAGE_OFFSET" redefined [-Werror]
    #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
    
   In file included from include/linux/kvm_host.h:31:0,
                    from arch/mips/kvm/../../../virt/kvm/kvm_main.c:21:
   include/uapi/linux/kvm.h:219:0: note: this is the location of the previous definition
    #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
    
   arch/mips/kvm/../../../virt/kvm/kvm_main.c: In function 'kvm_vcpu_init':
>> arch/mips/kvm/../../../virt/kvm/kvm_main.c:282:8: error: implicit declaration of function 'kvm_cpu_dirty_log_size' [-Werror=implicit-function-declaration]
           kvm_cpu_dirty_log_size();
           ^~~~~~~~~~~~~~~~~~~~~~
   cc1: all warnings being treated as errors

vim +/kvm_cpu_dirty_log_size +282 arch/mips/kvm/../../../virt/kvm/kvm_main.c

   276	
   277	#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
   278		if (kvm->dirty_ring_size) {
   279			u32 limit = (kvm->dirty_ring_size /
   280				     sizeof(struct kvm_dirty_gfn)) -
   281				    DIRTY_RING_BUFFER_ENTRY_NUM -
 > 282				    kvm_cpu_dirty_log_size();
   283			r = kvm_gfn_ring_alloc(&vcpu->dirty_ring,
   284					       kvm->dirty_ring_size,
   285					       limit);

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Cao, Lei Feb. 16, 2017, 2:56 p.m. UTC | #4
On 2/15/2017 4:54 PM, Paolo Bonzini wrote:
> 
> 
> On 15/02/2017 19:28, Cao, Lei wrote:
>> +	spin_lock(&kvm->mmu_lock);
>> +	kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask);
>> +	spin_unlock(&kvm->mmu_lock);
>> +
>> +	while (mask) {
>> +		clear_bit_le(offset + __ffs(mask), memslot->dirty_bitmap);
>> +		mask &= mask - 1;
>> +	}
> 
> These two steps should be done in the opposite order.  So far nothing I
> cannot fix on commit though (and this is going to be material for 4.12
> anyway).
> 
> Paolo
> 
> 

Thanks for all your help!

Lei
diff mbox

Patch

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 3bff207..d832622 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -8,7 +8,8 @@  CFLAGS_vmx.o := -I.
 KVM := ../../../virt/kvm
 
 kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
-				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
+				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \
+				$(KVM)/gfn_ring.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b0783da..082a2b2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,7 @@ 
 #include <linux/kvm_types.h>
 
 #include <asm/kvm_host.h>
+#include <linux/kvm_gfn_ring.h>
 
 #ifndef KVM_MAX_VCPU_ID
 #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
@@ -266,6 +267,10 @@  struct kvm_vcpu {
 	bool preempted;
 	struct kvm_vcpu_arch arch;
 	struct dentry *debugfs_dentry;
+
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	struct kvm_gfn_ring dirty_ring;
+#endif
 };
 
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
@@ -431,6 +436,11 @@  struct kvm {
 	struct list_head devices;
 	struct dentry *debugfs_dentry;
 	struct kvm_stat_data **debugfs_stat_data;
+
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	u32 dirty_ring_size;
+	struct kvm_gfn_ring dirty_ring;
+#endif
 };
 
 #define kvm_err(fmt, ...) \
@@ -714,6 +724,8 @@  void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 					gfn_t gfn_offset,
 					unsigned long mask);
 
+void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask);
+
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				struct kvm_dirty_log *log);
 
diff --git a/virt/kvm/gfn_ring.c b/virt/kvm/gfn_ring.c
new file mode 100644
index 0000000..cb0f455
--- /dev/null
+++ b/virt/kvm/gfn_ring.c
@@ -0,0 +1,135 @@ 
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/vmalloc.h>
+#include <linux/kvm_gfn_ring.h>
+
+int kvm_gfn_ring_alloc(struct kvm_gfn_ring *gfnring, u32 size, u32 limit)
+{
+	gfnring->dirty_ring = vmalloc(size);
+	if (!gfnring->dirty_ring)
+		return -ENOMEM;
+	memset(gfnring->dirty_ring, 0, size);
+
+	gfnring->size = size/sizeof(struct kvm_dirty_gfn);
+	gfnring->soft_limit = limit;
+	gfnring->dirty_index = 0;
+	gfnring->reset_index = 0;
+	spin_lock_init(&gfnring->lock);
+
+	return 0;
+}
+
+int kvm_gfn_ring_reset(struct kvm *kvm, struct kvm_gfn_ring *gfnring)
+{
+	u32 cur_slot, next_slot;
+	u64 cur_offset, next_offset;
+	unsigned long mask;
+	u32 fetch;
+	int count = 0;
+	struct kvm_dirty_gfn *entry;
+	struct kvm_dirty_ring *ring = gfnring->dirty_ring;
+
+	fetch = READ_ONCE(ring->indices.fetch_index);
+	if (fetch == gfnring->reset_index)
+		return 0;
+
+	entry = &ring->dirty_gfns[gfnring->reset_index &
+			(gfnring->size - 1)];
+	/*
+	 * The ring buffer is shared with userspace, which might mmap
+	 * it and concurrently modify slot and offset.  Userspace must
+	 * not be trusted!  READ_ONCE prevents the compiler from changing
+	 * the values after they've been range-checked (the checks are
+	 * in kvm_reset_dirty_gfn).
+	 */
+	smp_read_barrier_depends();
+	cur_slot = READ_ONCE(entry->slot);
+	cur_offset = READ_ONCE(entry->offset);
+	mask = 1;
+	count++;
+	gfnring->reset_index++;
+	while (gfnring->reset_index != fetch) {
+		entry = &ring->dirty_gfns[gfnring->reset_index &
+			(gfnring->size - 1)];
+		smp_read_barrier_depends();
+		next_slot = READ_ONCE(entry->slot);
+		next_offset = READ_ONCE(entry->offset);
+		gfnring->reset_index++;
+		count++;
+		/*
+		 * Try to coalesce the reset operations when the guest is
+		 * scanning pages in the same slot.
+		 */
+		if (next_slot == cur_slot) {
+			int delta = next_offset - cur_offset;
+
+			if (delta >= 0 && delta < BITS_PER_LONG) {
+				mask |= 1ull << delta;
+				continue;
+			}
+
+			/* Backwards visit, careful about overflows!  */
+			if (delta > -BITS_PER_LONG && delta < 0 &&
+			    (mask << -delta >> -delta) == mask) {
+				cur_offset = next_offset;
+				mask = (mask << -delta) | 1;
+				continue;
+			}
+		}
+		kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+		cur_slot = next_slot;
+		cur_offset = next_offset;
+		mask = 1;
+	}
+	kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+
+	return count;
+}
+
+int kvm_gfn_ring_push(struct kvm_gfn_ring *gfnring,
+		      u32 slot,
+		      u64 offset,
+		      bool locked)
+{
+	int ret;
+	u16 num;
+	struct kvm_dirty_gfn *entry;
+
+	if (locked)
+		spin_lock(&gfnring->lock);
+
+	num = (u16)(gfnring->dirty_index - gfnring->reset_index);
+	if (num >= gfnring->size) {
+		WARN_ON_ONCE(num > gfnring->size);
+		ret = -EBUSY;
+		goto out;
+	}
+
+	entry = &gfnring->dirty_ring->dirty_gfns[gfnring->dirty_index &
+			(gfnring->size - 1)];
+	entry->slot = slot;
+	entry->offset = offset;
+	smp_wmb();
+	gfnring->dirty_index++;
+	num = gfnring->dirty_index - gfnring->reset_index;
+	gfnring->dirty_ring->indices.avail_index = gfnring->dirty_index;
+	ret = num >= gfnring->soft_limit;
+
+out:
+	if (locked)
+		spin_unlock(&gfnring->lock);
+
+	return ret;
+}
+
+struct page *kvm_gfn_ring_get_page(struct kvm_gfn_ring *ring, u32 i)
+{
+	return vmalloc_to_page((void *)ring->dirty_ring+i*PAGE_SIZE);
+
+}
+
+void kvm_gfn_ring_free(struct kvm_gfn_ring *gfnring)
+{
+	if (gfnring->dirty_ring)
+		vfree(gfnring->dirty_ring);
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 016be4d..9cf4a5e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -63,9 +63,16 @@ 
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
 
+#include <linux/kvm_gfn_ring.h>
+
 /* Worst case buffer size needed for holding an integer. */
 #define ITOA_MAX_LEN 12
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+/* some buffer space for the dirty log ring for ring full situations */
+#define DIRTY_RING_BUFFER_ENTRY_NUM 16
+#endif
+
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
@@ -121,7 +128,16 @@  static void hardware_disable_all(void);
 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
 
 static void kvm_release_pfn_dirty(kvm_pfn_t pfn);
-static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+				    struct kvm_vcpu *vcpu,
+				    struct kvm_memory_slot *memslot,
+				    gfn_t gfn);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+static void mark_page_dirty_in_ring(struct kvm *kvm,
+				    struct kvm_vcpu *vcpu,
+				    struct kvm_memory_slot *slot,
+				    gfn_t gfn);
+#endif
 
 __visible bool kvm_rebooting;
 EXPORT_SYMBOL_GPL(kvm_rebooting);
@@ -258,11 +274,36 @@  int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	kvm_vcpu_set_dy_eligible(vcpu, false);
 	vcpu->preempted = false;
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (kvm->dirty_ring_size) {
+		u32 limit = (kvm->dirty_ring_size /
+			     sizeof(struct kvm_dirty_gfn)) -
+			    DIRTY_RING_BUFFER_ENTRY_NUM -
+			    kvm_cpu_dirty_log_size();
+		r = kvm_gfn_ring_alloc(&vcpu->dirty_ring,
+				       kvm->dirty_ring_size,
+				       limit);
+		if (r) {
+			kvm->dirty_ring_size = 0;
+			goto fail_free_run;
+		}
+	}
+#endif
+
 	r = kvm_arch_vcpu_init(vcpu);
 	if (r < 0)
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+		goto fail_free_ring;
+#else
 		goto fail_free_run;
+#endif
 	return 0;
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+fail_free_ring:
+	if (kvm->dirty_ring_size)
+		kvm_gfn_ring_free(&vcpu->dirty_ring);
+#endif
 fail_free_run:
 	free_page((unsigned long)vcpu->run);
 fail:
@@ -275,6 +316,10 @@  void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
 	put_pid(vcpu->pid);
 	kvm_arch_vcpu_uninit(vcpu);
 	free_page((unsigned long)vcpu->run);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (vcpu->kvm->dirty_ring_size)
+		kvm_gfn_ring_free(&vcpu->dirty_ring);
+#endif
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
 
@@ -726,6 +771,10 @@  static void kvm_destroy_vm(struct kvm *kvm)
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kvm_io_bus_destroy(kvm->buses[i]);
 	kvm_coalesced_mmio_free(kvm);
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	if (kvm->dirty_ring_size)
+		kvm_gfn_ring_free(&kvm->dirty_ring);
+#endif
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
 #else
@@ -1861,7 +1910,8 @@  int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
 
-static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
+static int __kvm_write_guest_page(struct kvm *kvm, struct kvm_vcpu *vcpu,
+				  struct kvm_memory_slot *memslot, gfn_t gfn,
 			          const void *data, int offset, int len)
 {
 	int r;
@@ -1873,7 +1923,7 @@  static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
 	r = __copy_to_user((void __user *)addr + offset, data, len);
 	if (r)
 		return -EFAULT;
-	mark_page_dirty_in_slot(memslot, gfn);
+	mark_page_dirty_in_slot(kvm, vcpu, memslot, gfn);
 	return 0;
 }
 
@@ -1882,7 +1932,8 @@  int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
 {
 	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 
-	return __kvm_write_guest_page(slot, gfn, data, offset, len);
+	return __kvm_write_guest_page(kvm, NULL, slot, gfn, data,
+				      offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
 
@@ -1891,7 +1942,8 @@  int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
 {
 	struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
-	return __kvm_write_guest_page(slot, gfn, data, offset, len);
+	return __kvm_write_guest_page(vcpu->kvm, vcpu, slot, gfn, data,
+				      offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
 
@@ -1995,7 +2047,7 @@  int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 	r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
 	if (r)
 		return -EFAULT;
-	mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
+	mark_page_dirty_in_slot(kvm, NULL, ghc->memslot, gpa >> PAGE_SHIFT);
 
 	return 0;
 }
@@ -2060,12 +2112,17 @@  int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
-static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot,
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+				    struct kvm_vcpu *vcpu,
+				    struct kvm_memory_slot *memslot,
 				    gfn_t gfn)
 {
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+		mark_page_dirty_in_ring(kvm, vcpu, memslot, gfn);
+#endif
 		set_bit_le(rel_gfn, memslot->dirty_bitmap);
 	}
 }
@@ -2075,7 +2132,7 @@  void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 	struct kvm_memory_slot *memslot;
 
 	memslot = gfn_to_memslot(kvm, gfn);
-	mark_page_dirty_in_slot(memslot, gfn);
+	mark_page_dirty_in_slot(kvm, NULL, memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty);
 
@@ -2084,7 +2141,7 @@  void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
 	struct kvm_memory_slot *memslot;
 
 	memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	mark_page_dirty_in_slot(memslot, gfn);
+	mark_page_dirty_in_slot(vcpu->kvm, vcpu, memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
 
@@ -2363,6 +2420,13 @@  static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
 		page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
 #endif
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	else if ((vmf->pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) &&
+		 (vmf->pgoff < KVM_DIRTY_LOG_PAGE_OFFSET +
+		  vcpu->kvm->dirty_ring_size / PAGE_SIZE))
+		page = kvm_gfn_ring_get_page(&vcpu->dirty_ring,
+				vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET);
+#endif
 	else
 		return kvm_arch_vcpu_fault(vcpu, vmf);
 	get_page(page);
@@ -2946,14 +3010,120 @@  static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 }
 
 #ifdef KVM_DIRTY_LOG_PAGE_OFFSET
-static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, __u32 size)
+static void mark_page_dirty_in_ring(struct kvm *kvm,
+				    struct kvm_vcpu *vcpu,
+				    struct kvm_memory_slot *slot,
+				    gfn_t gfn)
 {
-	return -EINVAL;
+	struct kvm_gfn_ring *gfnlist;
+	u32 as_id = 0;
+	u64 offset;
+	struct kvm_vcpu *exit_vcpu = vcpu;
+	int ret;
+	bool locked;
+
+	if (!kvm->dirty_ring_size)
+		return;
+
+	offset = gfn - slot->base_gfn;
+
+	if (test_bit_le(offset, slot->dirty_bitmap))
+		return;
+
+	if (vcpu)
+		as_id = kvm_arch_vcpu_memslots_id(vcpu);
+
+	locked = (vcpu == NULL);
+
+	if (vcpu)
+		gfnlist = &vcpu->dirty_ring;
+	else
+		gfnlist = &kvm->dirty_ring;
+
+	ret = kvm_gfn_ring_push(gfnlist, (as_id << 16)|slot->id,
+		offset, locked);
+	if (ret < 0) {
+		if (vcpu)
+			WARN_ONCE(1, "vcpu %d dirty log overflow\n",
+				vcpu->vcpu_id);
+		else
+			WARN_ONCE(1, "global dirty log overflow\n");
+		return;
+	}
+
+	if (ret) {
+		if (!exit_vcpu)
+			exit_vcpu = kvm->vcpus[0];
+		kvm_make_request(KVM_REQ_EXIT_DIRTY_LOG_FULL, exit_vcpu);
+	}
+}
+
+void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
+{
+	struct kvm_memory_slot *memslot;
+	int as_id, id;
+
+	as_id = slot >> 16;
+	id = (u16)slot;
+	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
+		return;
+
+	memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
+	if (offset >= memslot->npages)
+		return;
+
+	spin_lock(&kvm->mmu_lock);
+	kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask);
+	spin_unlock(&kvm->mmu_lock);
+
+	while (mask) {
+		clear_bit_le(offset + __ffs(mask), memslot->dirty_bitmap);
+		mask &= mask - 1;
+	}
+}
+
+static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size)
+{
+	int r;
+	u32 limit;
+
+	/* the size should be power of 2 */
+	if (!size || (size & (size - 1)))
+		return -EINVAL;
+
+	kvm->dirty_ring_size = size;
+	limit = (size/sizeof(struct kvm_dirty_gfn)) -
+		DIRTY_RING_BUFFER_ENTRY_NUM;
+	r = kvm_gfn_ring_alloc(&kvm->dirty_ring, size, limit);
+	if (r) {
+		kvm_put_kvm(kvm);
+		return r;
+	}
+	return 0;
 }
 
 static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm)
 {
-	return -EINVAL;
+	int i;
+	struct kvm_vcpu *vcpu;
+	int cleared = 0;
+
+	if (!kvm->dirty_ring_size)
+		return -EINVAL;
+
+	mutex_lock(&kvm->slots_lock);
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		cleared += kvm_gfn_ring_reset(kvm, &vcpu->dirty_ring);
+
+	cleared += kvm_gfn_ring_reset(kvm, &kvm->dirty_ring);
+
+	mutex_unlock(&kvm->slots_lock);
+
+	if (cleared)
+		kvm_flush_remote_tlbs(kvm);
+
+	return cleared;
 }
 #endif
 
@@ -3202,6 +3372,29 @@  static long kvm_vm_compat_ioctl(struct file *filp,
 }
 #endif
 
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kvm *kvm = vma->vm_file->private_data;
+	struct page *page;
+
+	page = kvm_gfn_ring_get_page(&kvm->dirty_ring, vmf->pgoff);
+	get_page(page);
+	vmf->page = page;
+	return 0;
+}
+
+static const struct vm_operations_struct kvm_vm_vm_ops = {
+	.fault = kvm_vm_fault,
+};
+
+static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	vma->vm_ops = &kvm_vm_vm_ops;
+	return 0;
+}
+#endif
+
 static struct file_operations kvm_vm_fops = {
 	.release        = kvm_vm_release,
 	.unlocked_ioctl = kvm_vm_ioctl,
@@ -3209,6 +3402,9 @@  static struct file_operations kvm_vm_fops = {
 	.compat_ioctl   = kvm_vm_compat_ioctl,
 #endif
 	.llseek		= noop_llseek,
+#ifdef KVM_DIRTY_LOG_PAGE_OFFSET
+	.mmap           = kvm_vm_mmap,
+#endif
 };
 
 static int kvm_dev_ioctl_create_vm(unsigned long type)