@@ -1006,10 +1006,15 @@ documentation when it pops into existence).
4.37 KVM_ENABLE_CAP
-Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
-Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
- mips (only KVM_CAP_ENABLE_CAP), ppc, s390
-Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
+Capability: KVM_CAP_ENABLE_CAP
+Architectures: mips, ppc, s390
+Type: vcpu ioctl
+Parameters: struct kvm_enable_cap (in)
+Returns: 0 on success; -1 on error
+
+Capability: KVM_CAP_ENABLE_CAP_VM
+Architectures: all
+Type: vcpu ioctl
Parameters: struct kvm_enable_cap (in)
Returns: 0 on success; -1 on error
@@ -3942,3 +3947,69 @@ In order to use SynIC, it has to be activated by setting this
capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
will disable the use of APIC hardware virtualization even if supported
by the CPU, as it's incompatible with SynIC auto-EOI behavior.
+
+8.3 KVM_CAP_DIRTY_LOG_RING
+
+Architectures: x86
+Parameters: args[0] - size of the dirty log ring
+
+Kernel is capable of tracking dirty memory using rings, which
+are stored in memory regions that can be mmaped into userspace.
+
+There is one dirty ring per vcpu and one global ring.
+
+The dirty ring has the following structure.
+
+struct kvm_dirty_gfn {
+ __u32 pad;
+ __u32 slot; /* as_id | slot_id */
+ __u64 offset;
+};
+
+struct kvm_dirty_list {
+ union {
+ struct {
+ __u16 avail_index; /* set by kernel */
+ __u16 fetch_index; /* set by userspace */
+ } indices;
+ struct kvm_dirty_gfn dirty_gfns[0];
+ };
+};
+
+Userspace calls KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM
+ioctl to enable this capability for the new guest and set the
+size of the rings. The size of the ring should be page aligned
+and be 16 pages at a minimum. The larger the ring, the less
+likely the ring is full and the VM is forced to exit to
+userspace. The optimal size is workload dependent.
+
+After the capability is enabled, userspace mmaps the global
+dirty ring. The per-vcpu dirty ring is mmapped along with kvm_run
+when vcpu is created.
+
+To enable dirty logging with ring, userspace calls
+KVM_SET_USER_MEMORY_REGION ioctls on all the user memory regions
+with KVM_MEM_LOG_DIRTY_PAGES bit set.
+
+To disable dirty logging with ring, userspace calls
+KVM_SET_USER_MEMORY_REGION ioctls on all the user memory regions
+with KVM_MEM_LOG_DIRTY_PAGES bit clear.
+
+Once the dirty logging is enabled, userspace can start harvesting
+dirty pages.
+
+To harvest the dirty pages, userspace accesses the mmaped dirty
+list to read the dirty GFNs up to avail_index and set the
+fetch_index accordingly. Harvest can be done when the guest is
+running or paused. Dirty pages don't need to be harvest all at
+once.
+
+To rearm the dirty traps, userspace calls KVM_RESET_DIRTY_PAGES
+ioctl. This should be done only when the guest is paused and
+all the dirty pages have been harvested.
+
+If one of the dirty lists is full, the guest will exit to userspace
+with the exit reason set to KVM_EXIT_DIRTY_LOG_FULL, and the
+KVM_RUN ioctl will return -EINTR. Once that happens, userspace
+should pause all the vcpus, then harvest all the dirty pages and
+rearm the dirty traps. It can unpause the guest after that.
@@ -507,7 +507,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
case KVM_CAP_ENABLE_CAP:
- case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_ONE_REG:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
@@ -1358,8 +1357,8 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
}
-static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
- struct kvm_enable_cap *cap)
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
{
int r;
@@ -1412,15 +1411,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
- case KVM_ENABLE_CAP:
- {
- struct kvm_enable_cap cap;
- r = -EFAULT;
- if (copy_from_user(&cap, argp, sizeof(cap)))
- goto out;
- r = kvm_vm_ioctl_enable_cap(kvm, &cap);
- break;
- }
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CREATE_SPAPR_TCE_64: {
struct kvm_create_spapr_tce_64 create_tce_64;
@@ -366,7 +366,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
- case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_S390_IRQCHIP:
case KVM_CAP_VM_ATTRIBUTES:
case KVM_CAP_MP_STATE:
@@ -480,7 +479,7 @@ static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
}
}
-static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
{
int r;
@@ -1232,14 +1231,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_s390_inject_vm(kvm, &s390int);
break;
}
- case KVM_ENABLE_CAP: {
- struct kvm_enable_cap cap;
- r = -EFAULT;
- if (copy_from_user(&cap, argp, sizeof(cap)))
- break;
- r = kvm_vm_ioctl_enable_cap(kvm, &cap);
- break;
- }
case KVM_CREATE_IRQCHIP: {
struct kvm_irq_routing_entry routing;
@@ -2629,7 +2629,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_TIME:
case KVM_CAP_IOAPIC_POLARITY_IGNORED:
case KVM_CAP_TSC_DEADLINE_TIMER:
- case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_DISABLE_QUIRKS:
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
@@ -3868,8 +3867,8 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
return 0;
}
-static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
- struct kvm_enable_cap *cap)
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
{
int r;
@@ -4176,15 +4175,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
- case KVM_ENABLE_CAP: {
- struct kvm_enable_cap cap;
-
- r = -EFAULT;
- if (copy_from_user(&cap, argp, sizeof(cap)))
- goto out;
- r = kvm_vm_ioctl_enable_cap(kvm, &cap);
- break;
- }
default:
r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
}
@@ -718,6 +718,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
bool line_status);
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap);
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
@@ -871,6 +871,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_USER_INSTR0 130
#define KVM_CAP_MSI_DEVID 131
#define KVM_CAP_PPC_HTM 132
+#define KVM_CAP_DIRTY_LOG_RING 133
#ifdef KVM_CAP_IRQ_ROUTING
@@ -2927,6 +2927,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
#endif
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM:
+ case KVM_CAP_ENABLE_CAP_VM:
return 1;
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
case KVM_CAP_IRQ_ROUTING:
@@ -2944,6 +2945,28 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
return kvm_vm_ioctl_check_extension(kvm, arg);
}
+static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, __u32 size)
+{
+ return -EINVAL;
+}
+
+int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
+{
+ return -EINVAL;
+}
+
+static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
+{
+ switch (cap->cap) {
+ case KVM_CAP_DIRTY_LOG_RING:
+ return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
+ default:
+ return kvm_vm_ioctl_enable_cap(kvm, cap);
+ }
+}
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2957,6 +2980,15 @@ static long kvm_vm_ioctl(struct file *filp,
case KVM_CREATE_VCPU:
r = kvm_vm_ioctl_create_vcpu(kvm, arg);
break;
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ goto out;
+ r = kvm_vm_ioctl_enable_cap_generic(kvm, &cap);
+ break;
+ }
case KVM_SET_USER_MEMORY_REGION: {
struct kvm_userspace_memory_region kvm_userspace_mem;
Add support for capabilities that can be enabled in a generic way. Introduce new capability: ring-based dirty memory logging Signed-off-by: Lei Cao <lei.cao@stratus.com> --- Documentation/virtual/kvm/api.txt | 79 +++++++++++++++++++++++++++++++++++++-- arch/powerpc/kvm/powerpc.c | 14 +------ arch/s390/kvm/kvm-s390.c | 11 +----- arch/x86/kvm/x86.c | 14 +------ include/linux/kvm_host.h | 2 + include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 32 ++++++++++++++++ 7 files changed, 115 insertions(+), 38 deletions(-)