diff mbox series

[05/26] drm/xe/eudebug: Introduce exec queue placements event

Message ID 20241209133318.1806472-6-mika.kuoppala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Intel Xe GPU debug support (eudebug) v3 | expand

Commit Message

Mika Kuoppala Dec. 9, 2024, 1:32 p.m. UTC
From: Dominik Grzegorzek <dominik.grzegorzek@intel.com>

This commit introduces the DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE_PLACEMENTS,
which provides dbgUMD with information about the hw engines utilized
during execution. The event is sent for every logical ring context (lrc)
in scenarios involving parallel submission.

Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_eudebug.c       | 99 ++++++++++++++++++++++++---
 drivers/gpu/drm/xe/xe_eudebug_types.h | 26 +++++++
 include/uapi/drm/xe_drm_eudebug.h     | 17 +++++
 3 files changed, 133 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/xe/xe_eudebug.c b/drivers/gpu/drm/xe/xe_eudebug.c
index 3ca46ec838b9..cbcf7a72fdba 100644
--- a/drivers/gpu/drm/xe/xe_eudebug.c
+++ b/drivers/gpu/drm/xe/xe_eudebug.c
@@ -717,7 +717,7 @@  static struct xe_eudebug_event *
 xe_eudebug_create_event(struct xe_eudebug *d, u16 type, u64 seqno, u16 flags,
 			u32 len)
 {
-	const u16 max_event = DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE;
+	const u16 max_event = DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE_PLACEMENTS;
 	const u16 known_flags =
 		DRM_XE_EUDEBUG_EVENT_CREATE |
 		DRM_XE_EUDEBUG_EVENT_DESTROY |
@@ -752,7 +752,7 @@  static long xe_eudebug_read_event(struct xe_eudebug *d,
 		u64_to_user_ptr(arg);
 	struct drm_xe_eudebug_event user_event;
 	struct xe_eudebug_event *event;
-	const unsigned int max_event = DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE;
+	const unsigned int max_event = DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE_PLACEMENTS;
 	long ret = 0;
 
 	if (XE_IOCTL_DBG(xe, copy_from_user(&user_event, user_orig, sizeof(user_event))))
@@ -1206,12 +1206,88 @@  static int send_exec_queue_event(struct xe_eudebug *d, u32 flags,
 	return xe_eudebug_queue_event(d, event);
 }
 
-static int exec_queue_create_event(struct xe_eudebug *d,
-				   struct xe_file *xef, struct xe_exec_queue *q)
+static int send_exec_queue_placements_event(struct xe_eudebug *d,
+					    u64 client_handle, u64 vm_handle,
+					    u64 exec_queue_handle, u64 lrc_handle,
+					    u32 num_placements, u64 *instances,
+					    u64 seqno)
+{
+	struct xe_eudebug_event_exec_queue_placements *e;
+	const u32 sz = struct_size(e, instances, num_placements);
+	struct xe_eudebug_event *event;
+
+	event = xe_eudebug_create_event(d,
+					DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE_PLACEMENTS,
+					seqno, DRM_XE_EUDEBUG_EVENT_CREATE, sz);
+	if (!event)
+		return -ENOMEM;
+
+	e = cast_event(e, event);
+
+	write_member(struct drm_xe_eudebug_event_exec_queue_placements, e, client_handle,
+		     client_handle);
+	write_member(struct drm_xe_eudebug_event_exec_queue_placements, e, vm_handle, vm_handle);
+	write_member(struct drm_xe_eudebug_event_exec_queue_placements, e, exec_queue_handle,
+		     exec_queue_handle);
+	write_member(struct drm_xe_eudebug_event_exec_queue_placements, e, lrc_handle, lrc_handle);
+	write_member(struct drm_xe_eudebug_event_exec_queue_placements, e, num_placements,
+		     num_placements);
+
+	memcpy(e->instances, instances, num_placements * sizeof(*instances));
+
+	return xe_eudebug_queue_event(d, event);
+}
+
+static int send_exec_queue_placements_events(struct xe_eudebug *d, struct xe_exec_queue *q,
+					     u64 client_handle, u64 vm_handle,
+					     u64 exec_queue_handle, u64 *lrc_handles)
+{
+
+	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE] = {};
+	unsigned long mask = q->logical_mask;
+	u32 num_placements = 0;
+	int ret, i, j;
+	u64 seqno;
+
+	for_each_set_bit(i, &mask, sizeof(q->logical_mask) * 8) {
+		if (XE_WARN_ON(num_placements == XE_HW_ENGINE_MAX_INSTANCE))
+			break;
+
+		eci[num_placements].engine_class = xe_to_user_engine_class[q->class];
+		eci[num_placements].engine_instance = i;
+		eci[num_placements++].gt_id = q->gt->info.id;
+	}
+
+	ret = 0;
+	for (i = 0; i < q->width; i++) {
+		seqno = atomic_long_inc_return(&d->events.seqno);
+
+		ret = send_exec_queue_placements_event(d, client_handle, vm_handle,
+						       exec_queue_handle, lrc_handles[i],
+						       num_placements, (u64 *)eci, seqno);
+		if (ret)
+			return ret;
+
+		/*
+		 * Parallel submissions must be logically contiguous,
+		 * so the next placement is just q->logical_mask >> 1
+		 */
+		for (j = 0; j < num_placements; j++) {
+			eci[j].engine_instance++;
+			XE_WARN_ON(eci[j].engine_instance >= XE_HW_ENGINE_MAX_INSTANCE);
+		}
+	}
+
+	return ret;
+}
+
+static int exec_queue_create_events(struct xe_eudebug *d,
+				    struct xe_file *xef, struct xe_exec_queue *q)
 {
 	int h_c, h_vm, h_queue;
 	u64 h_lrc[XE_HW_ENGINE_MAX_INSTANCE], seqno;
 	int i;
+	int ret = 0;
 
 	if (!xe_exec_queue_is_lr(q))
 		return 0;
@@ -1252,9 +1328,14 @@  static int exec_queue_create_event(struct xe_eudebug *d,
 	 * we disconnect
 	 */
 
-	return send_exec_queue_event(d, DRM_XE_EUDEBUG_EVENT_CREATE,
-				     h_c, h_vm, h_queue, q->class,
-				     q->width, h_lrc, seqno);
+	ret = send_exec_queue_event(d, DRM_XE_EUDEBUG_EVENT_CREATE,
+				  h_c, h_vm, h_queue, q->class,
+				  q->width, h_lrc, seqno);
+
+	if (ret)
+		return ret;
+
+	return send_exec_queue_placements_events(d, q, h_c, h_vm, h_queue, h_lrc);
 }
 
 static int exec_queue_destroy_event(struct xe_eudebug *d,
@@ -1317,7 +1398,7 @@  void xe_eudebug_exec_queue_create(struct xe_file *xef, struct xe_exec_queue *q)
 	if (!d)
 		return;
 
-	xe_eudebug_event_put(d, exec_queue_create_event(d, xef, q));
+	xe_eudebug_event_put(d, exec_queue_create_events(d, xef, q));
 }
 
 void xe_eudebug_exec_queue_destroy(struct xe_file *xef, struct xe_exec_queue *q)
@@ -1355,7 +1436,7 @@  static int discover_client(struct xe_eudebug *d, struct xe_file *xef)
 		if (!exec_queue_class_is_tracked(q->class))
 			continue;
 
-		err = exec_queue_create_event(d, xef, q);
+		err = exec_queue_create_events(d, xef, q);
 		if (err)
 			break;
 	}
diff --git a/drivers/gpu/drm/xe/xe_eudebug_types.h b/drivers/gpu/drm/xe/xe_eudebug_types.h
index 4824c4159036..bdffdfb1abff 100644
--- a/drivers/gpu/drm/xe/xe_eudebug_types.h
+++ b/drivers/gpu/drm/xe/xe_eudebug_types.h
@@ -202,4 +202,30 @@  struct xe_eudebug_event_exec_queue {
 	u64 lrc_handle[] __counted_by(width);
 };
 
+struct xe_eudebug_event_exec_queue_placements {
+	/** @base: base event */
+	struct xe_eudebug_event base;
+
+	/** @client_handle: client for the engine create/destroy */
+	u64 client_handle;
+
+	/** @vm_handle: vm handle for the engine create/destroy */
+	u64 vm_handle;
+
+	/** @exec_queue_handle: engine handle */
+	u64 exec_queue_handle;
+
+	/** @engine_handle: engine class */
+	u64 lrc_handle;
+
+	/** @num_placements: all possible placements for given lrc */
+	u32 num_placements;
+
+	/** @pad: padding */
+	u32 pad;
+
+	/** @instances: num_placements sized array containing drm_xe_engine_class_instance*/
+	u64 instances[]; __counted_by(num_placements);
+};
+
 #endif
diff --git a/include/uapi/drm/xe_drm_eudebug.h b/include/uapi/drm/xe_drm_eudebug.h
index ac44e890152a..21690008a869 100644
--- a/include/uapi/drm/xe_drm_eudebug.h
+++ b/include/uapi/drm/xe_drm_eudebug.h
@@ -27,6 +27,7 @@  struct drm_xe_eudebug_event {
 #define DRM_XE_EUDEBUG_EVENT_OPEN		2
 #define DRM_XE_EUDEBUG_EVENT_VM			3
 #define DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE		4
+#define DRM_XE_EUDEBUG_EVENT_EXEC_QUEUE_PLACEMENTS 5
 
 	__u16 flags;
 #define DRM_XE_EUDEBUG_EVENT_CREATE		(1 << 0)
@@ -61,6 +62,22 @@  struct drm_xe_eudebug_event_exec_queue {
 	__u64 lrc_handle[];
 };
 
+struct drm_xe_eudebug_event_exec_queue_placements {
+	struct drm_xe_eudebug_event base;
+
+	__u64 client_handle;
+	__u64 vm_handle;
+	__u64 exec_queue_handle;
+	__u64 lrc_handle;
+	__u32 num_placements;
+	__u32 pad;
+	/**
+	 * @instances: user pointer to num_placements sized array of struct
+	 * drm_xe_engine_class_instance
+	 */
+	__u64 instances[];
+};
+
 #if defined(__cplusplus)
 }
 #endif