new file mode 100644
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARM64_PAGE_TRACKING_DEVICE_H
+#define _ARM64_PAGE_TRACKING_DEVICE_H
+
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+
+/* Page tracking mode */
+enum pt_mode {
+ dirty_pages,
+};
+
+/* Configuration of a per-VM page tracker */
+struct pt_config {
+ enum pt_mode mode; /* Tracking mode */
+ u32 vmid; /* VMID to track */
+};
+
+/* Interface provided by the page tracking device */
+struct page_tracking_device {
+
+ /* Allocates a per-VM tracker, returns tracking context */
+ void* (*allocate_tracker)(struct pt_config config);
+
+ /* Releases a per-VM tracker */
+ int (*release_tracker)(void *ctx);
+
+ /*
+ * Enables tracking for the specified @ctx and the specified @cpu,
+ * @cpu = -1 enables tracking for all cpus
+ *
+ * The function may be called for the same @ctx and @cpu multiple
+ * times and the implementation has to do reference counting to
+ * correctly disable the tracking.
+ * @returns 0 on success, negative errno in case of a failure
+ */
+ int (*enable_tracking)(void *ctx, int cpu);
+
+ /*
+ * Disables tracking for the @ctx
+ *
+ * Does actually disable the tracking of the @ctx and the @cpu only
+ * when the number of disable and enable calls matches, i.e. when the
+ * reference counter is at 0. @returns 0 in this case, -EBUSY while
+ * reference counter > 0 and negative errno in case of a failure
+ */
+ int (*disable_tracking)(void *ctx, int cpu);
+
+ /*
+ * Flushes any tracking data available for the @ctx,
+ * @returns 0 on success, negative errno in case of a failure
+ */
+ int (*flush)(void *ctx);
+
+ /*
+ * Reads up to @max dirty pages available for the @ctx
+ * In case @cpu id is not -1, reads only pages dirtied by the specified cpu
+ * @returns number of read pages and -errno in case of a failure
+ */
+ int (*read_dirty_pages)(void *ctx,
+ int cpu,
+ gpa_t *pages,
+ u32 max);
+};
+
+/* Page tracking device tear-down, bring-up and existence checks */
+void page_tracking_device_unregister(struct page_tracking_device *pt_dev);
+int page_tracking_device_register(struct page_tracking_device *pt_dev);
+int page_tracking_device_registered(void);
+
+/* Page tracking device wrappers */
+void *page_tracking_allocate(struct pt_config config);
+int page_tracking_release(void *ctx);
+int page_tracking_enable(void *ctx, int cpu);
+int page_tracking_disable(void *ctx, int cpu);
+int page_tracking_flush(void *ctx);
+int page_tracking_read_dirty_pages(void *ctx, int cpu, gpa_t *pages, u32 max);
+
+#endif /*_ARM64_PAGE_TRACKNG_DEVICE_H */
@@ -66,4 +66,16 @@ config PROTECTED_NVHE_STACKTRACE
If unsure, or not using protected nVHE (pKVM), say N.
+config HAVE_KVM_PAGE_TRACKING_DEVICE
+ bool "Support for hardware accelerated dirty tracking"
+ default n
+ help
+ Say Y to enable hardware accelerated dirty tracking
+
+ Adds support for hardware accelerated dirty tracking during live
+ migration of a virtual machine. Requires a hardware accelerator.
+
+ If there is no required hardware, say N.
+
+
endif # VIRTUALIZATION
@@ -18,6 +18,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
arch_timer.o trng.o vmid.o emulate-nested.o nested.o \
+ page_tracking.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
new file mode 100644
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/page_tracking.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+
+#ifndef CONFIG_HAVE_KVM_PAGE_TRACKING_DEVICE
+
+int page_tracking_device_register(struct page_tracking_device *dev) { return 0; }
+void page_tracking_device_unregister(struct page_tracking_device *dev) {}
+int page_tracking_device_registered(void) { return 0; }
+void *page_tracking_allocate(struct pt_config config) { return NULL; }
+int page_tracking_release(void *ctx) { return 0; }
+int page_tracking_enable(void *ctx, int cpu) { return 0; }
+int page_tracking_disable(void *ctx, int cpu) { return 0; }
+int page_tracking_flush(void *ctx) { return 0; }
+int page_tracking_read_dirty_pages(void *ctx, int cpu, gpa_t *pages, u32 max) { return 0; }
+
+#else
+
+static DEFINE_MUTEX(page_tracking_device_mutex);
+static struct page_tracking_device __rcu *pt_dev __read_mostly;
+
+int page_tracking_device_register(struct page_tracking_device *dev)
+{
+ int rc = 0;
+
+ mutex_lock(&page_tracking_device_mutex);
+
+ if (rcu_dereference_protected(pt_dev, lockdep_is_held(&page_tracking_device_mutex))) {
+ rc = -EBUSY;
+ goto out;
+ }
+ rcu_assign_pointer(pt_dev, dev);
+out:
+ mutex_unlock(&page_tracking_device_mutex);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(page_tracking_device_register);
+
+void page_tracking_device_unregister(struct page_tracking_device *dev)
+{
+ mutex_lock(&page_tracking_device_mutex);
+
+ if (dev == rcu_dereference_protected(pt_dev,
+ lockdep_is_held(&page_tracking_device_mutex))) {
+ /* Disable page tracking device */
+ RCU_INIT_POINTER(pt_dev, NULL);
+ synchronize_rcu();
+ }
+ mutex_unlock(&page_tracking_device_mutex);
+}
+EXPORT_SYMBOL_GPL(page_tracking_device_unregister);
+
+int page_tracking_device_registered(void)
+{
+ bool registered;
+
+ rcu_read_lock();
+ registered = (rcu_dereference(pt_dev) != NULL);
+ rcu_read_unlock();
+ return registered;
+}
+EXPORT_SYMBOL_GPL(page_tracking_device_registered);
+
+/* Allocates a per-VM tracker, returns tracking context */
+void *page_tracking_allocate(struct pt_config config)
+{
+ struct page_tracking_device *dev;
+ void *ctx = NULL;
+
+ rcu_read_lock();
+ dev = rcu_dereference(pt_dev);
+ if (likely(dev))
+ ctx = dev->allocate_tracker(config);
+ rcu_read_unlock();
+ return ctx;
+}
+EXPORT_SYMBOL_GPL(page_tracking_allocate);
+
+/* Releases a per-VM tracker */
+int page_tracking_release(void *ctx)
+{
+ int r;
+ struct page_tracking_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(pt_dev);
+ if (likely(dev))
+ r = dev->release_tracker(ctx);
+ rcu_read_unlock();
+ return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_release);
+
+/* Enables tracking for the specified @ctx and @cpu (-1 for all cpus) */
+int page_tracking_enable(void *ctx, int cpu)
+{
+ int r;
+ struct page_tracking_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(pt_dev);
+ if (likely(dev))
+ r = dev->enable_tracking(ctx, cpu);
+ rcu_read_unlock();
+ return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_enable);
+
+/* Disables tracking for the @ctx and @cpu */
+int page_tracking_disable(void *ctx, int cpu)
+{
+ int r;
+ struct page_tracking_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(pt_dev);
+ if (likely(dev))
+ r = dev->disable_tracking(ctx, cpu);
+ rcu_read_unlock();
+ return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_disable);
+
+/* Flushes any available data */
+int page_tracking_flush(void *ctx)
+{
+ int r;
+ struct page_tracking_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(pt_dev);
+ if (likely(dev))
+ r = dev->flush(ctx);
+ rcu_read_unlock();
+ return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_flush);
+
+/*
+ * Reads up to @max dirty pages available for the @ctx and @cpu (-1 for all cpus)
+ * @returns number of read pages and -errno in case of error
+ */
+int page_tracking_read_dirty_pages(void *ctx, int cpu, gpa_t *pages, u32 max)
+{
+ int r;
+ struct page_tracking_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(pt_dev);
+ if (likely(dev))
+ r = dev->read_dirty_pages(ctx, cpu, pages, max);
+ rcu_read_unlock();
+ return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_read_dirty_pages);
+
+#endif
Add an interface for tracking stage-2 page accesses. The interface can be implemented by a driver for a device that has the capabilities e.g. AWS Graviton Page Tracking Agent accelerator. When a device implementing page_tracking_device interface is available, KVM will use it to accelerate dirty logging. The initial version of the interface supports dirty logging only, but the interface can be extended to other use cases, such as a WSS calculation. page_tracking_device supports tracking stage-2 translations by VMID and by CPU ID. While VMID filter is required, CPU ID is optional. CPU ID == -1 denotes any CPU. Similarly, page_tracking_device allows getting pages logged for either a particular CPU or for all. KVM can use CPU ID of -1 to populate dirty bitmaps and a specific CPU ID for per vCPU dirty rings. Signed-off-by: Lilit Janpoladyan <lilitj@amazon.com> --- arch/arm64/include/asm/page_tracking.h | 79 +++++++++++++ arch/arm64/kvm/Kconfig | 12 ++ arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/page_tracking.c | 158 +++++++++++++++++++++++++ 4 files changed, 250 insertions(+) create mode 100644 arch/arm64/include/asm/page_tracking.h create mode 100644 arch/arm64/kvm/page_tracking.c