diff mbox series

[1/8] arm64: add an interface for stage-2 page tracking

Message ID 20240918152807.25135-2-lilitj@amazon.com (mailing list archive)
State New, archived
Headers show
Series *** RFC: ARM KVM dirty tracking device *** | expand

Commit Message

Lilit Janpoladyan Sept. 18, 2024, 3:28 p.m. UTC
Add an interface for tracking stage-2 page accesses. The interface
can be implemented by a driver for a device that has the capabilities
e.g. AWS Graviton Page Tracking Agent accelerator. When a device
implementing page_tracking_device interface is available, KVM will
use it to accelerate dirty logging. The initial version of the
interface supports dirty logging only, but the interface can be
extended to other use cases, such as a WSS calculation.

page_tracking_device supports tracking stage-2 translations by VMID
and by CPU ID. While VMID filter is required, CPU ID is optional.
CPU ID == -1 denotes any CPU. Similarly, page_tracking_device allows
getting pages logged for either a particular CPU or for all. KVM
can use CPU ID of -1 to populate dirty bitmaps and a specific
CPU ID for per vCPU dirty rings.

Signed-off-by: Lilit Janpoladyan <lilitj@amazon.com>
---
 arch/arm64/include/asm/page_tracking.h |  79 +++++++++++++
 arch/arm64/kvm/Kconfig                 |  12 ++
 arch/arm64/kvm/Makefile                |   1 +
 arch/arm64/kvm/page_tracking.c         | 158 +++++++++++++++++++++++++
 4 files changed, 250 insertions(+)
 create mode 100644 arch/arm64/include/asm/page_tracking.h
 create mode 100644 arch/arm64/kvm/page_tracking.c
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/page_tracking.h b/arch/arm64/include/asm/page_tracking.h
new file mode 100644
index 000000000000..5162fb5b648e
--- /dev/null
+++ b/arch/arm64/include/asm/page_tracking.h
@@ -0,0 +1,79 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARM64_PAGE_TRACKING_DEVICE_H
+#define _ARM64_PAGE_TRACKING_DEVICE_H
+
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+
+/* Page tracking mode */
+enum pt_mode {
+	dirty_pages,
+};
+
+/* Configuration of a per-VM page tracker */
+struct pt_config {
+	enum pt_mode mode; /* Tracking mode */
+	u32 vmid;	/* VMID to track */
+};
+
+/* Interface provided by the page tracking device */
+struct page_tracking_device {
+
+	/* Allocates a per-VM tracker, returns tracking context */
+	void* (*allocate_tracker)(struct pt_config config);
+
+	/* Releases a per-VM tracker */
+	int (*release_tracker)(void *ctx);
+
+	/*
+	 * Enables tracking for the specified @ctx and the specified @cpu,
+	 * @cpu = -1 enables tracking for all cpus
+	 *
+	 * The function may be called for the same @ctx and @cpu multiple
+	 * times and the implementation has to do reference counting to
+	 * correctly disable the tracking.
+	 * @returns 0 on success, negative errno in case of a failure
+	 */
+	int (*enable_tracking)(void *ctx, int cpu);
+
+	/*
+	 * Disables tracking for the @ctx
+	 *
+	 * Does actually disable the tracking of the @ctx and the @cpu only
+	 * when the number of disable and enable calls matches, i.e. when the
+	 * reference counter is at 0. @returns 0 in this case, -EBUSY while
+	 * reference counter > 0 and negative errno in case of a failure
+	 */
+	int (*disable_tracking)(void *ctx, int cpu);
+
+	/*
+	 * Flushes any tracking data available for the @ctx,
+	 * @returns 0 on success, negative errno in case of a failure
+	 */
+	int (*flush)(void *ctx);
+
+	/*
+	 * Reads up to @max dirty pages available for the @ctx
+	 * In case @cpu id is not -1, reads only pages dirtied by the specified cpu
+	 * @returns number of read pages and -errno in case of a failure
+	 */
+	int (*read_dirty_pages)(void *ctx,
+				int cpu,
+				gpa_t *pages,
+				u32 max);
+};
+
+/* Page tracking device tear-down, bring-up and existence checks */
+void page_tracking_device_unregister(struct page_tracking_device *pt_dev);
+int page_tracking_device_register(struct page_tracking_device *pt_dev);
+int page_tracking_device_registered(void);
+
+/* Page tracking device wrappers */
+void *page_tracking_allocate(struct pt_config config);
+int page_tracking_release(void *ctx);
+int page_tracking_enable(void *ctx, int cpu);
+int page_tracking_disable(void *ctx, int cpu);
+int page_tracking_flush(void *ctx);
+int page_tracking_read_dirty_pages(void *ctx, int cpu, gpa_t *pages, u32 max);
+
+#endif /*_ARM64_PAGE_TRACKNG_DEVICE_H */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8304eb342be9..33844658279b 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -66,4 +66,16 @@  config PROTECTED_NVHE_STACKTRACE
 
 	  If unsure, or not using protected nVHE (pKVM), say N.
 
+config HAVE_KVM_PAGE_TRACKING_DEVICE
+	bool "Support for hardware accelerated dirty tracking"
+	default n
+	help
+	  Say Y to enable hardware accelerated dirty tracking
+
+	  Adds support for hardware accelerated dirty tracking during live
+	  migration of a virtual machine. Requires a hardware accelerator.
+
+	  If there is no required hardware, say N.
+
+
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 86a629aaf0a1..4e4f5c63baf2 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -18,6 +18,7 @@  kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
 	 guest.o debug.o reset.o sys_regs.o stacktrace.o \
 	 vgic-sys-reg-v3.o fpsimd.o pkvm.o \
 	 arch_timer.o trng.o vmid.o emulate-nested.o nested.o \
+	 page_tracking.o \
 	 vgic/vgic.o vgic/vgic-init.o \
 	 vgic/vgic-irqfd.o vgic/vgic-v2.o \
 	 vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/page_tracking.c b/arch/arm64/kvm/page_tracking.c
new file mode 100644
index 000000000000..a81c917d4faa
--- /dev/null
+++ b/arch/arm64/kvm/page_tracking.c
@@ -0,0 +1,158 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/page_tracking.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+
+#ifndef CONFIG_HAVE_KVM_PAGE_TRACKING_DEVICE
+
+int page_tracking_device_register(struct page_tracking_device *dev) { return 0; }
+void page_tracking_device_unregister(struct page_tracking_device *dev) {}
+int page_tracking_device_registered(void) { return 0; }
+void *page_tracking_allocate(struct pt_config config) { return NULL; }
+int page_tracking_release(void *ctx) { return 0; }
+int page_tracking_enable(void *ctx, int cpu) { return 0; }
+int page_tracking_disable(void *ctx, int cpu) { return 0; }
+int page_tracking_flush(void *ctx) { return 0; }
+int page_tracking_read_dirty_pages(void *ctx, int cpu, gpa_t *pages, u32 max) { return 0; }
+
+#else
+
+static DEFINE_MUTEX(page_tracking_device_mutex);
+static struct page_tracking_device __rcu *pt_dev __read_mostly;
+
+int page_tracking_device_register(struct page_tracking_device *dev)
+{
+	int rc = 0;
+
+	mutex_lock(&page_tracking_device_mutex);
+
+	if (rcu_dereference_protected(pt_dev, lockdep_is_held(&page_tracking_device_mutex))) {
+		rc = -EBUSY;
+		goto out;
+	}
+	rcu_assign_pointer(pt_dev, dev);
+out:
+	mutex_unlock(&page_tracking_device_mutex);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(page_tracking_device_register);
+
+void page_tracking_device_unregister(struct page_tracking_device *dev)
+{
+	mutex_lock(&page_tracking_device_mutex);
+
+	if (dev == rcu_dereference_protected(pt_dev,
+					     lockdep_is_held(&page_tracking_device_mutex))) {
+		/* Disable page tracking device */
+		RCU_INIT_POINTER(pt_dev, NULL);
+		synchronize_rcu();
+	}
+	mutex_unlock(&page_tracking_device_mutex);
+}
+EXPORT_SYMBOL_GPL(page_tracking_device_unregister);
+
+int page_tracking_device_registered(void)
+{
+	bool registered;
+
+	rcu_read_lock();
+	registered = (rcu_dereference(pt_dev) != NULL);
+	rcu_read_unlock();
+	return registered;
+}
+EXPORT_SYMBOL_GPL(page_tracking_device_registered);
+
+/* Allocates a per-VM tracker, returns tracking context */
+void *page_tracking_allocate(struct pt_config config)
+{
+	struct page_tracking_device *dev;
+	void *ctx = NULL;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		ctx = dev->allocate_tracker(config);
+	rcu_read_unlock();
+	return ctx;
+}
+EXPORT_SYMBOL_GPL(page_tracking_allocate);
+
+/* Releases a per-VM tracker */
+int page_tracking_release(void *ctx)
+{
+	int r;
+	struct page_tracking_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		r = dev->release_tracker(ctx);
+	rcu_read_unlock();
+	return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_release);
+
+/* Enables tracking for the specified @ctx and @cpu (-1 for all cpus) */
+int page_tracking_enable(void *ctx, int cpu)
+{
+	int r;
+	struct page_tracking_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		r = dev->enable_tracking(ctx, cpu);
+	rcu_read_unlock();
+	return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_enable);
+
+/* Disables tracking for the @ctx and @cpu */
+int page_tracking_disable(void *ctx, int cpu)
+{
+	int r;
+	struct page_tracking_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		r = dev->disable_tracking(ctx, cpu);
+	rcu_read_unlock();
+	return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_disable);
+
+/* Flushes any available data */
+int page_tracking_flush(void *ctx)
+{
+	int r;
+	struct page_tracking_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		r = dev->flush(ctx);
+	rcu_read_unlock();
+	return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_flush);
+
+/*
+ * Reads up to @max dirty pages available for the @ctx and @cpu (-1 for all cpus)
+ * @returns number of read pages and -errno in case of error
+ */
+int page_tracking_read_dirty_pages(void *ctx, int cpu, gpa_t *pages, u32 max)
+{
+	int r;
+	struct page_tracking_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(pt_dev);
+	if (likely(dev))
+		r = dev->read_dirty_pages(ctx, cpu, pages, max);
+	rcu_read_unlock();
+	return r;
+}
+EXPORT_SYMBOL_GPL(page_tracking_read_dirty_pages);
+
+#endif