@@ -6614,6 +6614,24 @@ array field represents return values. The userspace should update the return
values of SBI call before resuming the VCPU. For more details on RISC-V SBI
spec refer, https://github.com/riscv/riscv-sbi-doc.
+::
+
+ /* KVM_EXIT_DIRTY_QUOTA_EXHAUSTED */
+ struct {
+ __u64 count;
+ __u64 quota;
+ } dirty_quota_exit;
+If exit reason is KVM_EXIT_DIRTY_QUOTA_EXHAUSTED, it indicates that the VCPU has
+exhausted its dirty quota. The 'dirty_quota_exit' member of kvm_run structure
+makes the following information available to the userspace:
+ 'count' field: the current count of pages dirtied by the VCPU, can be
+ skewed based on the size of the pages accessed by each vCPU.
+ 'quota' field: the observed dirty quota just before the exit to userspace.
+The userspace can design a strategy to allocate the overall scope of dirtying
+for the VM among the vcpus. Based on the strategy and the current state of dirty
+quota throttling, the userspace can make a decision to either update (increase)
+the quota or to put the VCPU to sleep for some time.
+
::
/* KVM_EXIT_NOTIFY */
@@ -6668,6 +6686,20 @@ values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
::
+ /*
+ * Number of pages the vCPU is allowed to have dirtied over its entire
+ * lifetime. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if the quota
+ * is reached/exceeded.
+ */
+ __u64 dirty_quota;
+Please note that enforcing the quota is best effort, as the guest may dirty
+multiple pages before KVM can recheck the quota. However, unless KVM is using
+a hardware-based dirty ring buffer, e.g. Intel's Page Modification Logging,
+KVM will detect quota exhaustion within a handful of dirtied page. If a
+hardware ring buffer is used, the overrun is bounded by the size of the buffer
+(512 entries for PML).
+
+::
};
@@ -151,12 +151,13 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQUEST_NO_ACTION BIT(10)
/*
* Architecture-independent vcpu->requests bit members
- * Bits 4-7 are reserved for more arch-independent bits.
+ * Bits 5-7 are reserved for more arch-independent bits.
*/
#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_UNHALT 3
+#define KVM_REQ_DIRTY_QUOTA_EXIT 4
#define KVM_REQUEST_ARCH_BASE 8
/*
@@ -380,6 +381,8 @@ struct kvm_vcpu {
*/
struct kvm_memory_slot *last_used_slot;
u64 last_used_slot_gen;
+
+ u64 dirty_quota;
};
/*
@@ -542,6 +545,21 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
}
+static inline int kvm_vcpu_check_dirty_quota(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ u64 dirty_quota = READ_ONCE(run->dirty_quota);
+ u64 pages_dirtied = vcpu->stat.generic.pages_dirtied;
+
+ if (!dirty_quota || (pages_dirtied < dirty_quota))
+ return 1;
+
+ run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
+ run->dirty_quota_exit.count = pages_dirtied;
+ run->dirty_quota_exit.quota = dirty_quota;
+ return 0;
+}
+
/*
* Some of the bitops functions do not support too long bitmaps.
* This number must be determined not to exceed such limits.
@@ -118,6 +118,7 @@ struct kvm_vcpu_stat_generic {
u64 halt_poll_fail_hist[HALT_POLL_HIST_COUNT];
u64 halt_wait_hist[HALT_POLL_HIST_COUNT];
u64 blocking;
+ u64 pages_dirtied;
};
#define KVM_STATS_NAME_SIZE 48
@@ -272,6 +272,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_RISCV_SBI 35
#define KVM_EXIT_RISCV_CSR 36
#define KVM_EXIT_NOTIFY 37
+#define KVM_EXIT_DIRTY_QUOTA_EXHAUSTED 38
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -510,6 +511,11 @@ struct kvm_run {
#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
__u32 flags;
} notify;
+ /* KVM_EXIT_DIRTY_QUOTA_EXHAUSTED */
+ struct {
+ __u64 count;
+ __u64 quota;
+ } dirty_quota_exit;
/* Fix the size of the union. */
char padding[256];
};
@@ -531,6 +537,12 @@ struct kvm_run {
struct kvm_sync_regs regs;
char padding[SYNC_REGS_SIZE_BYTES];
} s;
+ /*
+ * Number of pages the vCPU is allowed to have dirtied over its entire
+ * lifetime. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if the
+ * quota is reached/exceeded.
+ */
+ __u64 dirty_quota;
};
/* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */
@@ -3298,18 +3298,36 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_clear_guest);
+static void kvm_vcpu_is_dirty_quota_exhausted(struct kvm_vcpu *vcpu)
+{
+ u64 dirty_quota = READ_ONCE(vcpu->run->dirty_quota);
+
+ if (!dirty_quota || (vcpu->stat.generic.pages_dirtied < dirty_quota))
+ return;
+
+ /*
+ * Snapshot the quota to report it to userspace. The dirty count will be
+ * captured when the request is processed.
+ */
+ vcpu->dirty_quota = dirty_quota;
+ kvm_make_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu);
+}
+
void mark_page_dirty_in_slot(struct kvm *kvm,
const struct kvm_memory_slot *memslot,
gfn_t gfn)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
-#ifdef CONFIG_HAVE_KVM_DIRTY_RING
if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm))
return;
-#endif
- if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
+ if (!memslot)
+ return;
+
+ WARN_ON_ONCE(!vcpu->stat.generic.pages_dirtied++);
+
+ if (kvm_slot_dirty_track_enabled(memslot)) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
u32 slot = (memslot->as_id << 16) | memslot->id;
@@ -3318,6 +3336,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
slot, rel_gfn);
else
set_bit_le(rel_gfn, memslot->dirty_bitmap);
+
+ kvm_vcpu_is_dirty_quota_exhausted(vcpu);
}
}
EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);