@@ -6752,6 +6752,23 @@ Please note that the kernel is allowed to use the kvm_run structure as the
primary storage for certain register types. Therefore, the kernel may use the
values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
+::
+
+ /*
+ * Number of bytes the vCPU is allowed to dirty if KVM_CAP_DIRTY_QUOTA is
+ * enabled. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if this quota
+ * is exhausted, i.e. dirty_quota_bytes <= 0.
+ */
+ long dirty_quota_bytes;
+
+Please note that enforcing the quota is best effort. Dirty quota is reduced by
+arch-specific page size when any guest page is dirtied. Also, the guest may dirty
+multiple pages before KVM can recheck the quota, e.g. when PML is enabled.
+
+::
+ };
+
+
6. Capabilities that can be enabled on vCPUs
============================================
@@ -167,6 +167,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_DIRTY_RING_SOFT_FULL 3
+#define KVM_REQ_DIRTY_QUOTA_EXIT 4
#define KVM_REQUEST_ARCH_BASE 8
/*
@@ -801,6 +802,7 @@ struct kvm {
bool dirty_ring_with_bitmap;
bool vm_bugged;
bool vm_dead;
+ bool dirty_quota_enabled;
#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
struct notifier_block pm_notifier;
@@ -1236,6 +1238,9 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+void update_dirty_quota(struct kvm *kvm, unsigned long page_size_bytes);
+#endif
void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
@@ -264,6 +264,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_RISCV_SBI 35
#define KVM_EXIT_RISCV_CSR 36
#define KVM_EXIT_NOTIFY 37
+#define KVM_EXIT_DIRTY_QUOTA_EXHAUSTED 38
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -531,6 +532,12 @@ struct kvm_run {
struct kvm_sync_regs regs;
char padding[SYNC_REGS_SIZE_BYTES];
} s;
+ /*
+ * Number of bytes the vCPU is allowed to dirty if KVM_CAP_DIRTY_QUOTA is
+ * enabled. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if this quota
+ * is exhausted, i.e. dirty_quota_bytes <= 0.
+ */
+ long dirty_quota_bytes;
};
/* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */
@@ -1190,6 +1197,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
#define KVM_CAP_COUNTER_OFFSET 227
+#define KVM_CAP_DIRTY_QUOTA 228
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1184,6 +1184,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
+#define KVM_CAP_DIRTY_QUOTA 228
#ifdef KVM_CAP_IRQ_ROUTING
@@ -19,6 +19,9 @@ config HAVE_KVM_IRQ_ROUTING
config HAVE_KVM_DIRTY_RING
bool
+config HAVE_KVM_DIRTY_QUOTA
+ bool
+
# Only strongly ordered architectures can select this, as it doesn't
# put any explicit constraint on userspace ordering. They can also
# select the _ACQ_REL version.
@@ -3307,6 +3307,20 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_clear_guest);
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+void update_dirty_quota(struct kvm *kvm, unsigned long page_size_bytes)
+{
+ struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+
+ if (!vcpu || (vcpu->kvm != kvm) || !READ_ONCE(kvm->dirty_quota_enabled))
+ return;
+
+ vcpu->run->dirty_quota_bytes -= page_size_bytes;
+ if (vcpu->run->dirty_quota_bytes <= 0)
+ kvm_make_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu);
+}
+#endif
+
void mark_page_dirty_in_slot(struct kvm *kvm,
const struct kvm_memory_slot *memslot,
gfn_t gfn)
@@ -3337,6 +3351,9 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
struct kvm_memory_slot *memslot;
memslot = gfn_to_memslot(kvm, gfn);
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+ update_dirty_quota(kvm, PAGE_SIZE);
+#endif
mark_page_dirty_in_slot(kvm, memslot, gfn);
}
EXPORT_SYMBOL_GPL(mark_page_dirty);
@@ -3346,6 +3363,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
struct kvm_memory_slot *memslot;
memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+ update_dirty_quota(vcpu->kvm, PAGE_SIZE);
+#endif
mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
@@ -4526,6 +4546,8 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_BINARY_STATS_FD:
case KVM_CAP_SYSTEM_EVENT_DATA:
return 1;
+ case KVM_CAP_DIRTY_QUOTA:
+ return !!IS_ENABLED(CONFIG_HAVE_KVM_DIRTY_QUOTA);
default:
break;
}
@@ -4675,6 +4697,11 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
return r;
}
+#ifdef CONFIG_HAVE_KVM_DIRTY_QUOTA
+ case KVM_CAP_DIRTY_QUOTA:
+ WRITE_ONCE(kvm->dirty_quota_enabled, cap->args[0]);
+ return 0;
+#endif
default:
return kvm_vm_ioctl_enable_cap(kvm, cap);
}