diff mbox series

[v8,1/3] KVM: Implement dirty quota-based throttling of vcpus

Message ID 20230225204758.17726-2-shivam.kumar1@nutanix.com (mailing list archive)
State New, archived
Headers show
Series KVM: Dirty quota-based throttling | expand

Commit Message

Shivam Kumar Feb. 25, 2023, 8:47 p.m. UTC
Define dirty_quota_bytes variable to track and throttle memory
dirtying for every vcpu. This variable stores the number of bytes the
vcpu is allowed to dirty. To dirty more, the vcpu needs to request
more quota by exiting to userspace.

Implement update_dirty_quota function which

i) Decreases dirty_quota_bytes by arch-specific page size whenever a
page is dirtied.
ii) Raises a KVM request KVM_REQ_DIRTY_QUOTA_EXIT whenever the dirty
quota is exhausted (i.e. dirty_quota_bytes <= 0).

Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com>
Suggested-by: Manish Mishra <manish.mishra@nutanix.com>
Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com>
Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com>
Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com>
---
 Documentation/virt/kvm/api.rst | 17 +++++++++++++++++
 include/linux/kvm_host.h       |  5 +++++
 include/uapi/linux/kvm.h       |  8 ++++++++
 tools/include/uapi/linux/kvm.h |  1 +
 virt/kvm/Kconfig               |  3 +++
 virt/kvm/kvm_main.c            | 31 +++++++++++++++++++++++++++++++
 6 files changed, 65 insertions(+)

Comments

Marc Zyngier Feb. 27, 2023, 1:49 a.m. UTC | #1
On Sat, 25 Feb 2023 20:47:57 +0000,
Shivam Kumar <shivam.kumar1@nutanix.com> wrote:
> 
> Define dirty_quota_bytes variable to track and throttle memory
> dirtying for every vcpu. This variable stores the number of bytes the
> vcpu is allowed to dirty. To dirty more, the vcpu needs to request
> more quota by exiting to userspace.
> 
> Implement update_dirty_quota function which
> 
> i) Decreases dirty_quota_bytes by arch-specific page size whenever a
> page is dirtied.
> ii) Raises a KVM request KVM_REQ_DIRTY_QUOTA_EXIT whenever the dirty
> quota is exhausted (i.e. dirty_quota_bytes <= 0).
> 
> Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com>
> Suggested-by: Manish Mishra <manish.mishra@nutanix.com>
> Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com>
> Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com>
> Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com>
> ---
>  Documentation/virt/kvm/api.rst | 17 +++++++++++++++++
>  include/linux/kvm_host.h       |  5 +++++
>  include/uapi/linux/kvm.h       |  8 ++++++++
>  tools/include/uapi/linux/kvm.h |  1 +
>  virt/kvm/Kconfig               |  3 +++
>  virt/kvm/kvm_main.c            | 31 +++++++++++++++++++++++++++++++
>  6 files changed, 65 insertions(+)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 62de0768d6aa..3a283fe212d8 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -6688,6 +6688,23 @@ Please note that the kernel is allowed to use the kvm_run structure as the
>  primary storage for certain register types. Therefore, the kernel may use the
>  values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
>  
> +::
> +
> +	/*
> +	 * Number of bytes the vCPU is allowed to dirty if KVM_CAP_DIRTY_QUOTA is
> +	 * enabled. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if this quota
> +	 * is exhausted, i.e. dirty_quota_bytes <= 0.
> +	 */
> +	long dirty_quota_bytes;
> +
> +Please note that enforcing the quota is best effort. Dirty quota is reduced by
> +arch-specific page size when any guest page is dirtied. Also, the guest may dirty
> +multiple pages before KVM can recheck the quota.

What are the events that trigger such quota reduction?

> +
> +::
> +  };
> +
> +
>  
>  6. Capabilities that can be enabled on vCPUs
>  ============================================
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 8ada23756b0e..f5ce343c64f2 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -167,6 +167,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_VM_DEAD			(1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>  #define KVM_REQ_UNBLOCK			2
>  #define KVM_REQ_DIRTY_RING_SOFT_FULL	3
> +#define KVM_REQ_DIRTY_QUOTA_EXIT	4
>  #define KVM_REQUEST_ARCH_BASE		8
>  
>  /*
> @@ -800,6 +801,9 @@ struct kvm {
>  	bool dirty_ring_with_bitmap;
>  	bool vm_bugged;
>  	bool vm_dead;
> +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
> +	bool dirty_quota_enabled;
> +#endif
>  
>  #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
>  	struct notifier_block pm_notifier;
> @@ -1235,6 +1239,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
>  bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
>  bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
>  unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
> +void update_dirty_quota(struct kvm *kvm, unsigned long page_size_bytes);
>  void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
>  void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
>  
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index d77aef872a0a..ddb9d3d797c4 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -264,6 +264,7 @@ struct kvm_xen_exit {
>  #define KVM_EXIT_RISCV_SBI        35
>  #define KVM_EXIT_RISCV_CSR        36
>  #define KVM_EXIT_NOTIFY           37
> +#define KVM_EXIT_DIRTY_QUOTA_EXHAUSTED 38
>  
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  /* Emulate instruction failed. */
> @@ -526,6 +527,12 @@ struct kvm_run {
>  		struct kvm_sync_regs regs;
>  		char padding[SYNC_REGS_SIZE_BYTES];
>  	} s;
> +	/*
> +	 * Number of bytes the vCPU is allowed to dirty if KVM_CAP_DIRTY_QUOTA is
> +	 * enabled. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if this quota
> +	 * is exhausted, i.e. dirty_quota_bytes <= 0.
> +	 */
> +	long dirty_quota_bytes;
>  };
>  
>  /* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */
> @@ -1184,6 +1191,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
>  #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
>  #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
> +#define KVM_CAP_DIRTY_QUOTA 227
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
> index 55155e262646..48f236e2b836 100644
> --- a/tools/include/uapi/linux/kvm.h
> +++ b/tools/include/uapi/linux/kvm.h
> @@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
>  #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
>  #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
> +#define KVM_CAP_DIRTY_QUOTA 227
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index b74916de5183..ccaa332d88f9 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -19,6 +19,9 @@ config HAVE_KVM_IRQ_ROUTING
>  config HAVE_KVM_DIRTY_RING
>         bool
>  
> +config HAVE_KVM_DIRTY_QUOTA
> +       bool
> +
>  # Only strongly ordered architectures can select this, as it doesn't
>  # put any explicit constraint on userspace ordering. They can also
>  # select the _ACQ_REL version.
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index d255964ec331..744b955514ce 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -3096,6 +3096,9 @@ static int __kvm_write_guest_page(struct kvm *kvm,
>  	r = __copy_to_user((void __user *)addr + offset, data, len);
>  	if (r)
>  		return -EFAULT;
> +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
> +	update_dirty_quota(kvm, PAGE_SIZE);
> +#endif

Why PAGE_SIZE? Why not 'len'? Why if the page was already dirtied? Why
should it be accounted for multiple times? In most cases, this is the
*hypervisor* writing to the guest, not the vcpu. Why should this be
accounted to the vcpu quota?

	M.
Shivam Kumar March 4, 2023, 9:58 a.m. UTC | #2
On 27/02/23 7:19 am, Marc Zyngier wrote:
> On Sat, 25 Feb 2023 20:47:57 +0000,
> Shivam Kumar <shivam.kumar1@nutanix.com> wrote:
>>
>> Define dirty_quota_bytes variable to track and throttle memory
>> dirtying for every vcpu. This variable stores the number of bytes the
>> vcpu is allowed to dirty. To dirty more, the vcpu needs to request
>> more quota by exiting to userspace.
>>
>> Implement update_dirty_quota function which
>>
>> i) Decreases dirty_quota_bytes by arch-specific page size whenever a
>> page is dirtied.
>> ii) Raises a KVM request KVM_REQ_DIRTY_QUOTA_EXIT whenever the dirty
>> quota is exhausted (i.e. dirty_quota_bytes <= 0).
>>
>> Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com>
>> Suggested-by: Manish Mishra <manish.mishra@nutanix.com>
>> Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com>
>> Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com>
>> Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com>
>> ---
>>   Documentation/virt/kvm/api.rst | 17 +++++++++++++++++
>>   include/linux/kvm_host.h       |  5 +++++
>>   include/uapi/linux/kvm.h       |  8 ++++++++
>>   tools/include/uapi/linux/kvm.h |  1 +
>>   virt/kvm/Kconfig               |  3 +++
>>   virt/kvm/kvm_main.c            | 31 +++++++++++++++++++++++++++++++
>>   6 files changed, 65 insertions(+)
>>
>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
>> index 62de0768d6aa..3a283fe212d8 100644
>> --- a/Documentation/virt/kvm/api.rst
>> +++ b/Documentation/virt/kvm/api.rst
>> @@ -6688,6 +6688,23 @@ Please note that the kernel is allowed to use the kvm_run structure as the
>>   primary storage for certain register types. Therefore, the kernel may use the
>>   values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
>>   
>> +::
>> +
>> +	/*
>> +	 * Number of bytes the vCPU is allowed to dirty if KVM_CAP_DIRTY_QUOTA is
>> +	 * enabled. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if this quota
>> +	 * is exhausted, i.e. dirty_quota_bytes <= 0.
>> +	 */
>> +	long dirty_quota_bytes;
>> +
>> +Please note that enforcing the quota is best effort. Dirty quota is reduced by
>> +arch-specific page size when any guest page is dirtied. Also, the guest may dirty
>> +multiple pages before KVM can recheck the quota.
> 
> What are the events that trigger such quota reduction?

If PML is enabled or when functions like nested_mark_vmcs12_pages_dirty 
get called that can mark multiple pages dirtied in a single exit.

Thanks.

> 
>> +
>> +::
>> +  };
>> +
>> +
>>   
>>   6. Capabilities that can be enabled on vCPUs
>>   ============================================
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 8ada23756b0e..f5ce343c64f2 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -167,6 +167,7 @@ static inline bool is_error_page(struct page *page)
>>   #define KVM_REQ_VM_DEAD			(1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>>   #define KVM_REQ_UNBLOCK			2
>>   #define KVM_REQ_DIRTY_RING_SOFT_FULL	3
>> +#define KVM_REQ_DIRTY_QUOTA_EXIT	4
>>   #define KVM_REQUEST_ARCH_BASE		8
>>   
>>   /*
>> @@ -800,6 +801,9 @@ struct kvm {
>>   	bool dirty_ring_with_bitmap;
>>   	bool vm_bugged;
>>   	bool vm_dead;
>> +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
>> +	bool dirty_quota_enabled;
>> +#endif
>>   
>>   #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
>>   	struct notifier_block pm_notifier;
>> @@ -1235,6 +1239,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
>>   bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
>>   bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
>>   unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
>> +void update_dirty_quota(struct kvm *kvm, unsigned long page_size_bytes);
>>   void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
>>   void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
>>   
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index d77aef872a0a..ddb9d3d797c4 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -264,6 +264,7 @@ struct kvm_xen_exit {
>>   #define KVM_EXIT_RISCV_SBI        35
>>   #define KVM_EXIT_RISCV_CSR        36
>>   #define KVM_EXIT_NOTIFY           37
>> +#define KVM_EXIT_DIRTY_QUOTA_EXHAUSTED 38
>>   
>>   /* For KVM_EXIT_INTERNAL_ERROR */
>>   /* Emulate instruction failed. */
>> @@ -526,6 +527,12 @@ struct kvm_run {
>>   		struct kvm_sync_regs regs;
>>   		char padding[SYNC_REGS_SIZE_BYTES];
>>   	} s;
>> +	/*
>> +	 * Number of bytes the vCPU is allowed to dirty if KVM_CAP_DIRTY_QUOTA is
>> +	 * enabled. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if this quota
>> +	 * is exhausted, i.e. dirty_quota_bytes <= 0.
>> +	 */
>> +	long dirty_quota_bytes;
>>   };
>>   
>>   /* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */
>> @@ -1184,6 +1191,7 @@ struct kvm_ppc_resize_hpt {
>>   #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
>>   #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
>>   #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
>> +#define KVM_CAP_DIRTY_QUOTA 227
>>   
>>   #ifdef KVM_CAP_IRQ_ROUTING
>>   
>> diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
>> index 55155e262646..48f236e2b836 100644
>> --- a/tools/include/uapi/linux/kvm.h
>> +++ b/tools/include/uapi/linux/kvm.h
>> @@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt {
>>   #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
>>   #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
>>   #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
>> +#define KVM_CAP_DIRTY_QUOTA 227
>>   
>>   #ifdef KVM_CAP_IRQ_ROUTING
>>   
>> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
>> index b74916de5183..ccaa332d88f9 100644
>> --- a/virt/kvm/Kconfig
>> +++ b/virt/kvm/Kconfig
>> @@ -19,6 +19,9 @@ config HAVE_KVM_IRQ_ROUTING
>>   config HAVE_KVM_DIRTY_RING
>>          bool
>>   
>> +config HAVE_KVM_DIRTY_QUOTA
>> +       bool
>> +
>>   # Only strongly ordered architectures can select this, as it doesn't
>>   # put any explicit constraint on userspace ordering. They can also
>>   # select the _ACQ_REL version.
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index d255964ec331..744b955514ce 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -3096,6 +3096,9 @@ static int __kvm_write_guest_page(struct kvm *kvm,
>>   	r = __copy_to_user((void __user *)addr + offset, data, len);
>>   	if (r)
>>   		return -EFAULT;
>> +#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
>> +	update_dirty_quota(kvm, PAGE_SIZE);
>> +#endif
> 
> Why PAGE_SIZE? Why not 'len'? Why if the page was already dirtied? Why
> should it be accounted for multiple times? In most cases, this is the
> *hypervisor* writing to the guest, not the vcpu. Why should this be
> accounted to the vcpu quota?

Agreed, update doesn't make much sense here. Thanks.

Thanks,
Shivam
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 62de0768d6aa..3a283fe212d8 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6688,6 +6688,23 @@  Please note that the kernel is allowed to use the kvm_run structure as the
 primary storage for certain register types. Therefore, the kernel may use the
 values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
 
+::
+
+	/*
+	 * Number of bytes the vCPU is allowed to dirty if KVM_CAP_DIRTY_QUOTA is
+	 * enabled. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if this quota
+	 * is exhausted, i.e. dirty_quota_bytes <= 0.
+	 */
+	long dirty_quota_bytes;
+
+Please note that enforcing the quota is best effort. Dirty quota is reduced by
+arch-specific page size when any guest page is dirtied. Also, the guest may dirty
+multiple pages before KVM can recheck the quota.
+
+::
+  };
+
+
 
 6. Capabilities that can be enabled on vCPUs
 ============================================
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8ada23756b0e..f5ce343c64f2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -167,6 +167,7 @@  static inline bool is_error_page(struct page *page)
 #define KVM_REQ_VM_DEAD			(1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_UNBLOCK			2
 #define KVM_REQ_DIRTY_RING_SOFT_FULL	3
+#define KVM_REQ_DIRTY_QUOTA_EXIT	4
 #define KVM_REQUEST_ARCH_BASE		8
 
 /*
@@ -800,6 +801,9 @@  struct kvm {
 	bool dirty_ring_with_bitmap;
 	bool vm_bugged;
 	bool vm_dead;
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+	bool dirty_quota_enabled;
+#endif
 
 #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
 	struct notifier_block pm_notifier;
@@ -1235,6 +1239,7 @@  struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
+void update_dirty_quota(struct kvm *kvm, unsigned long page_size_bytes);
 void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d77aef872a0a..ddb9d3d797c4 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -264,6 +264,7 @@  struct kvm_xen_exit {
 #define KVM_EXIT_RISCV_SBI        35
 #define KVM_EXIT_RISCV_CSR        36
 #define KVM_EXIT_NOTIFY           37
+#define KVM_EXIT_DIRTY_QUOTA_EXHAUSTED 38
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -526,6 +527,12 @@  struct kvm_run {
 		struct kvm_sync_regs regs;
 		char padding[SYNC_REGS_SIZE_BYTES];
 	} s;
+	/*
+	 * Number of bytes the vCPU is allowed to dirty if KVM_CAP_DIRTY_QUOTA is
+	 * enabled. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if this quota
+	 * is exhausted, i.e. dirty_quota_bytes <= 0.
+	 */
+	long dirty_quota_bytes;
 };
 
 /* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */
@@ -1184,6 +1191,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
+#define KVM_CAP_DIRTY_QUOTA 227
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 55155e262646..48f236e2b836 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1175,6 +1175,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
 #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
+#define KVM_CAP_DIRTY_QUOTA 227
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index b74916de5183..ccaa332d88f9 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -19,6 +19,9 @@  config HAVE_KVM_IRQ_ROUTING
 config HAVE_KVM_DIRTY_RING
        bool
 
+config HAVE_KVM_DIRTY_QUOTA
+       bool
+
 # Only strongly ordered architectures can select this, as it doesn't
 # put any explicit constraint on userspace ordering. They can also
 # select the _ACQ_REL version.
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d255964ec331..744b955514ce 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3096,6 +3096,9 @@  static int __kvm_write_guest_page(struct kvm *kvm,
 	r = __copy_to_user((void __user *)addr + offset, data, len);
 	if (r)
 		return -EFAULT;
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+	update_dirty_quota(kvm, PAGE_SIZE);
+#endif
 	mark_page_dirty_in_slot(kvm, memslot, gfn);
 	return 0;
 }
@@ -3234,6 +3237,9 @@  int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 	r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
 	if (r)
 		return -EFAULT;
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+	update_dirty_quota(kvm, PAGE_SIZE);
+#endif
 	mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT);
 
 	return 0;
@@ -3304,6 +3310,18 @@  int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
+void update_dirty_quota(struct kvm *kvm, unsigned long page_size_bytes)
+{
+	struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+
+	if (!vcpu || (vcpu->kvm != kvm) || !READ_ONCE(kvm->dirty_quota_enabled))
+		return;
+
+	vcpu->run->dirty_quota_bytes -= page_size_bytes;
+	if (vcpu->run->dirty_quota_bytes <= 0)
+		kvm_make_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu);
+}
+
 void mark_page_dirty_in_slot(struct kvm *kvm,
 			     const struct kvm_memory_slot *memslot,
 		 	     gfn_t gfn)
@@ -3334,6 +3352,9 @@  void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 	struct kvm_memory_slot *memslot;
 
 	memslot = gfn_to_memslot(kvm, gfn);
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+	update_dirty_quota(kvm, PAGE_SIZE);
+#endif
 	mark_page_dirty_in_slot(kvm, memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty);
@@ -3343,6 +3364,9 @@  void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
 	struct kvm_memory_slot *memslot;
 
 	memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+	update_dirty_quota(vcpu->kvm, PAGE_SIZE);
+#endif
 	mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
@@ -4524,6 +4548,8 @@  static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 	case KVM_CAP_BINARY_STATS_FD:
 	case KVM_CAP_SYSTEM_EVENT_DATA:
 		return 1;
+	case KVM_CAP_DIRTY_QUOTA:
+		return !!IS_ENABLED(CONFIG_HAVE_KVM_DIRTY_QUOTA);
 	default:
 		break;
 	}
@@ -4673,6 +4699,11 @@  static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
 
 		return r;
 	}
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+	case KVM_CAP_DIRTY_QUOTA:
+		WRITE_ONCE(kvm->dirty_quota_enabled, cap->args[0]);
+		return 0;
+#endif
 	default:
 		return kvm_vm_ioctl_enable_cap(kvm, cap);
 	}