diff mbox series

[RFC,18/19] drm/xe/uapi: HAX: Add the xe_madvise_prefer_devmem IOCTL

Message ID 20250312210416.3120-19-thomas.hellstrom@linux.intel.com (mailing list archive)
State New
Headers show
Series drm, drm/xe: Multi-device GPUSVM | expand

Commit Message

Thomas Hellström March 12, 2025, 9:04 p.m. UTC
As a POC, add an xe_madvise_prefer_devmem IOCTL so that the user
can set the preferred pagemap to migrate to for a given memory
region (in this POC, the memory region is the whole GPU VM).

This is intended to be replaced by a proper madvise IOCTL, probably
with improved functionality

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_device.c |  2 +
 drivers/gpu/drm/xe/xe_svm.c    | 72 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_svm.h    |  9 +++++
 include/uapi/drm/xe_drm.h      | 10 +++++
 4 files changed, 93 insertions(+)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 160b3c189de0..a6ac699e9d12 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -196,6 +196,8 @@  static const struct drm_ioctl_desc xe_ioctls[] = {
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(XE_DEVMEM_OPEN, xe_devmem_open_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(XE_MADVISE_PREFER_DEVMEM, xe_madvise_prefer_devmem_ioctl,
+			  DRM_RENDER_ALLOW),
 };
 
 static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index ebdd27b02be7..56c2c731be27 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -1338,3 +1338,75 @@  void xe_svm_vma_fini(struct xe_svm_vma *svma)
 		svma->pref_dpagemap = NULL;
 	}
 }
+
+/**
+ * xe_madvise_prefer_devmem_ioctl() - POC IOCTL callback implementing a rudimentary
+ * version of a madvise prefer_devmem() functionality.
+ * @dev: The struct drm_device.
+ * @data: The ioctl argurment.
+ * @file: The drm file.
+ *
+ * For the given gpu vm, look up all SVM gpu vmas and assign the preferred
+ * drm pagemap for migration to the one associated with the file-descriptor
+ * given by this function. If a negative (invalid) file descriptor is given,
+ * the function instead clears the preferred drm pagemap, meaning that at
+ * fault time, the drm pagemap associated with the same tile as the client
+ * is used.
+ *
+ * Return: %0 on success. Negative error code on failure.
+ */
+int xe_madvise_prefer_devmem_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct xe_device *xe = to_xe_device(dev);
+	struct xe_file *xef = to_xe_file(file);
+	struct drm_xe_madvise_prefer_devmem *args = data;
+	struct xe_vm *vm;
+	struct drm_pagemap *dpagemap;
+	struct drm_gpuva *gpuva;
+	struct xe_vma *gvma;
+	int err = 0;
+
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
+	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+		return -EINVAL;
+
+	vm = xe_vm_lookup(xef, args->vm_id);
+	if (XE_IOCTL_DBG(xe, !vm))
+		return -EINVAL;
+
+	if (args->devmem_fd < 0) {
+		dpagemap = NULL;
+	} else {
+		dpagemap = drm_pagemap_from_fd(args->devmem_fd);
+		if (XE_IOCTL_DBG(xe, IS_ERR(dpagemap))) {
+			err = PTR_ERR(dpagemap);
+			goto out_no_dpagemap;
+		}
+
+		if (XE_IOCTL_DBG(xe, drm_dev_is_unplugged(dpagemap->drm))) {
+			err = -ENODEV;
+			goto out_no_lock;
+		}
+	}
+
+	err = down_write_killable(&vm->lock);
+	if (err)
+		goto out_no_lock;
+
+	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+		gvma = gpuva_to_vma(gpuva);
+		if (!xe_vma_is_cpu_addr_mirror(gvma))
+			continue;
+
+		if (dpagemap != gvma->svm.pref_dpagemap) {
+			drm_pagemap_put(gvma->svm.pref_dpagemap);
+			gvma->svm.pref_dpagemap = drm_pagemap_get(dpagemap);
+		}
+	}
+	up_write(&vm->lock);
+out_no_lock:
+	drm_pagemap_put(dpagemap);
+out_no_dpagemap:
+	xe_vm_put(vm);
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 4f1a9e410dad..7c076c36c1c5 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -146,6 +146,8 @@  static inline void xe_svm_vma_assign_dpagemap(struct xe_svm_vma *svma,
 	svma->pref_dpagemap = drm_pagemap_get(dpagemap);
 }
 
+int xe_madvise_prefer_devmem_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
 #else
 #include <linux/interval_tree.h>
 
@@ -237,6 +239,12 @@  static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
 
 #define xe_svm_vma_assign_dpagemap(...) do {} while (0)
 
+static inline int
+xe_madvise_prefer_devmem_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
@@ -259,6 +267,7 @@  static inline int xe_devmem_open_ioctl(struct drm_device *dev, void *data, struc
 {
 	return -EOPNOTSUPP;
 }
+
 #endif
 
 #endif
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index bb22413713f0..d9572cfb5a10 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -103,6 +103,7 @@  extern "C" {
 #define DRM_XE_WAIT_USER_FENCE		0x0a
 #define DRM_XE_OBSERVATION		0x0b
 #define DRM_XE_DEVMEM_OPEN		0x0c
+#define DRM_XE_MADVISE_PREFER_DEVMEM    0x0d
 
 /* Must be kept compact -- no holes */
 
@@ -119,6 +120,7 @@  extern "C" {
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
 #define DRM_IOCTL_XE_OBSERVATION		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param)
 #define DRM_IOCTL_XE_DEVMEM_OPEN                DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVMEM_OPEN, struct drm_xe_devmem_open)
+#define DRM_IOCTL_XE_MADVISE_PREFER_DEVMEM      DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE_PREFER_DEVMEM, struct drm_xe_madvise_prefer_devmem)
 
 /**
  * DOC: Xe IOCTL Extensions
@@ -1990,6 +1992,14 @@  struct drm_xe_devmem_open {
 	__u64 reserved[2];
 };
 
+struct drm_xe_madvise_prefer_devmem {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+	__u32 vm_id;
+	__u32 devmem_fd;
+	__u64 reserved[2];
+};
+
 #if defined(__cplusplus)
 }
 #endif