diff mbox series

[07/14] drm/xe: Introduce guc_submit_types.h with relevant structs.

Message ID 20230426205713.512695-8-rodrigo.vivi@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce xe_devcoredump. | expand

Commit Message

Rodrigo Vivi April 26, 2023, 8:57 p.m. UTC
These structs and definitions are only used for the guc_submit
and they were added specifically for the parallel submission.

While doing that also delete the unused struct guc_wq_item.

Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_fwif.h         | 29 -----------
 drivers/gpu/drm/xe/xe_guc_submit.c       | 40 ++++-----------
 drivers/gpu/drm/xe/xe_guc_submit_types.h | 64 ++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 58 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_guc_submit_types.h

Comments

Matthew Brost May 2, 2023, 7:44 a.m. UTC | #1
On Wed, Apr 26, 2023 at 04:57:06PM -0400, Rodrigo Vivi wrote:
> These structs and definitions are only used for the guc_submit
> and they were added specifically for the parallel submission.
> 
> While doing that also delete the unused struct guc_wq_item.
> 
> Cc: Matthew Brost <matthew.brost@intel.com>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

Reviewed-by: Matthew Brost <matthew.brost@intel.com>

Side note this reminds me we need to implement to parallel submission
handsake in xe_ring_ops.c if we want parallel submission to truly work.
Should be a straight port from the i915... Also should write an IGT for
that too. 

> ---
>  drivers/gpu/drm/xe/xe_guc_fwif.h         | 29 -----------
>  drivers/gpu/drm/xe/xe_guc_submit.c       | 40 ++++-----------
>  drivers/gpu/drm/xe/xe_guc_submit_types.h | 64 ++++++++++++++++++++++++
>  3 files changed, 75 insertions(+), 58 deletions(-)
>  create mode 100644 drivers/gpu/drm/xe/xe_guc_submit_types.h
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
> index 20155ba4ef07..27d132ce2087 100644
> --- a/drivers/gpu/drm/xe/xe_guc_fwif.h
> +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
> @@ -46,35 +46,6 @@
>  #define GUC_MAX_ENGINE_CLASSES		16
>  #define GUC_MAX_INSTANCES_PER_CLASS	32
>  
> -/* Work item for submitting workloads into work queue of GuC. */
> -#define WQ_STATUS_ACTIVE		1
> -#define WQ_STATUS_SUSPENDED		2
> -#define WQ_STATUS_CMD_ERROR		3
> -#define WQ_STATUS_ENGINE_ID_NOT_USED	4
> -#define WQ_STATUS_SUSPENDED_FROM_RESET	5
> -#define WQ_TYPE_NOOP			0x4
> -#define WQ_TYPE_MULTI_LRC		0x5
> -#define WQ_TYPE_MASK			GENMASK(7, 0)
> -#define WQ_LEN_MASK			GENMASK(26, 16)
> -
> -#define WQ_GUC_ID_MASK			GENMASK(15, 0)
> -#define WQ_RING_TAIL_MASK		GENMASK(28, 18)
> -
> -struct guc_wq_item {
> -	u32 header;
> -	u32 context_desc;
> -	u32 submit_element_info;
> -	u32 fence_id;
> -} __packed;
> -
> -struct guc_sched_wq_desc {
> -	u32 head;
> -	u32 tail;
> -	u32 error_offset;
> -	u32 wq_status;
> -	u32 reserved[28];
> -} __packed;
> -
>  /* Helper for context registration H2G */
>  struct guc_ctxt_registration_info {
>  	u32 flags;
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 231fb4145297..a5fe7755ce4c 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -22,6 +22,7 @@
>  #include "xe_guc.h"
>  #include "xe_guc_ct.h"
>  #include "xe_guc_engine_types.h"
> +#include "xe_guc_submit_types.h"
>  #include "xe_hw_engine.h"
>  #include "xe_hw_fence.h"
>  #include "xe_lrc.h"
> @@ -378,32 +379,12 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
>  		       __guc_engine_policy_action_size(&policy), 0, 0);
>  }
>  
> -#define PARALLEL_SCRATCH_SIZE	2048
> -#define WQ_SIZE			(PARALLEL_SCRATCH_SIZE / 2)
> -#define WQ_OFFSET		(PARALLEL_SCRATCH_SIZE - WQ_SIZE)
> -#define CACHELINE_BYTES		64
> -
> -struct sync_semaphore {
> -	u32 semaphore;
> -	u8 unused[CACHELINE_BYTES - sizeof(u32)];
> -};
> -
> -struct parallel_scratch {
> -	struct guc_sched_wq_desc wq_desc;
> -
> -	struct sync_semaphore go;
> -	struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
> -
> -	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
> -		sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)];
> -
> -	u32 wq[WQ_SIZE / sizeof(u32)];
> -};
> -
>  #define parallel_read(xe_, map_, field_) \
> -	xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_)
> +	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
> +			field_)
>  #define parallel_write(xe_, map_, field_, val_) \
> -	xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_)
> +	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
> +			field_, val_)
>  
>  static void __register_mlrc_engine(struct xe_guc *guc,
>  				   struct xe_engine *e,
> @@ -486,13 +467,13 @@ static void register_engine(struct xe_engine *e)
>  		struct iosys_map map = xe_lrc_parallel_map(lrc);
>  
>  		info.wq_desc_lo = lower_32_bits(ggtt_addr +
> -			offsetof(struct parallel_scratch, wq_desc));
> +			offsetof(struct guc_submit_parallel_scratch, wq_desc));
>  		info.wq_desc_hi = upper_32_bits(ggtt_addr +
> -			offsetof(struct parallel_scratch, wq_desc));
> +			offsetof(struct guc_submit_parallel_scratch, wq_desc));
>  		info.wq_base_lo = lower_32_bits(ggtt_addr +
> -			offsetof(struct parallel_scratch, wq[0]));
> +			offsetof(struct guc_submit_parallel_scratch, wq[0]));
>  		info.wq_base_hi = upper_32_bits(ggtt_addr +
> -			offsetof(struct parallel_scratch, wq[0]));
> +			offsetof(struct guc_submit_parallel_scratch, wq[0]));
>  		info.wq_size = WQ_SIZE;
>  
>  		e->guc->wqi_head = 0;
> @@ -594,7 +575,7 @@ static void wq_item_append(struct xe_engine *e)
>  
>  	XE_BUG_ON(i != wqi_size / sizeof(u32));
>  
> -	iosys_map_incr(&map, offsetof(struct parallel_scratch,
> +	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
>  					wq[e->guc->wqi_tail / sizeof(u32)]));
>  	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
>  	e->guc->wqi_tail += wqi_size;
> @@ -1674,6 +1655,7 @@ static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
>  		guc_engine_wq_print(e, p);
>  
>  	spin_lock(&sched->job_list_lock);
> +
>  	list_for_each_entry(job, &sched->pending_list, drm.list)
>  		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
>  			   xe_sched_job_seqno(job),
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
> new file mode 100644
> index 000000000000..d369ea0bad60
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
> @@ -0,0 +1,64 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +#ifndef _XE_GUC_SUBMIT_TYPES_H_
> +#define _XE_GUC_SUBMIT_TYPES_H_
> +
> +#include "xe_hw_engine_types.h"
> +
> +/* Work item for submitting workloads into work queue of GuC. */
> +#define WQ_STATUS_ACTIVE		1
> +#define WQ_STATUS_SUSPENDED		2
> +#define WQ_STATUS_CMD_ERROR		3
> +#define WQ_STATUS_ENGINE_ID_NOT_USED	4
> +#define WQ_STATUS_SUSPENDED_FROM_RESET	5
> +#define WQ_TYPE_NOOP			0x4
> +#define WQ_TYPE_MULTI_LRC		0x5
> +#define WQ_TYPE_MASK			GENMASK(7, 0)
> +#define WQ_LEN_MASK			GENMASK(26, 16)
> +
> +#define WQ_GUC_ID_MASK			GENMASK(15, 0)
> +#define WQ_RING_TAIL_MASK		GENMASK(28, 18)
> +
> +#define PARALLEL_SCRATCH_SIZE	2048
> +#define WQ_SIZE			(PARALLEL_SCRATCH_SIZE / 2)
> +#define WQ_OFFSET		(PARALLEL_SCRATCH_SIZE - WQ_SIZE)
> +#define CACHELINE_BYTES		64
> +
> +struct guc_sched_wq_desc {
> +	u32 head;
> +	u32 tail;
> +	u32 error_offset;
> +	u32 wq_status;
> +	u32 reserved[28];
> +} __packed;
> +
> +struct sync_semaphore {
> +	u32 semaphore;
> +	u8 unused[CACHELINE_BYTES - sizeof(u32)];
> +};
> +
> +/**
> + * Struct guc_submit_parallel_scratch - A scratch shared mapped buffer.
> + */
> +struct guc_submit_parallel_scratch {
> +	/** @wq_desc: Guc scheduler workqueue descriptor */
> +	struct guc_sched_wq_desc wq_desc;
> +
> +	/** @go: Go Semaphore */
> +	struct sync_semaphore go;
> +	/** @join: Joined semaphore for the relevant hw engine instances */
> +	struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
> +
> +	/** @unused: Unused/Reserved memory space */
> +	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
> +		  sizeof(struct sync_semaphore) *
> +		  (XE_HW_ENGINE_MAX_INSTANCE + 1)];
> +
> +	/** @wq: Workqueue info */
> +	u32 wq[WQ_SIZE / sizeof(u32)];
> +};
> +
> +#endif
> -- 
> 2.39.2
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 20155ba4ef07..27d132ce2087 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -46,35 +46,6 @@ 
 #define GUC_MAX_ENGINE_CLASSES		16
 #define GUC_MAX_INSTANCES_PER_CLASS	32
 
-/* Work item for submitting workloads into work queue of GuC. */
-#define WQ_STATUS_ACTIVE		1
-#define WQ_STATUS_SUSPENDED		2
-#define WQ_STATUS_CMD_ERROR		3
-#define WQ_STATUS_ENGINE_ID_NOT_USED	4
-#define WQ_STATUS_SUSPENDED_FROM_RESET	5
-#define WQ_TYPE_NOOP			0x4
-#define WQ_TYPE_MULTI_LRC		0x5
-#define WQ_TYPE_MASK			GENMASK(7, 0)
-#define WQ_LEN_MASK			GENMASK(26, 16)
-
-#define WQ_GUC_ID_MASK			GENMASK(15, 0)
-#define WQ_RING_TAIL_MASK		GENMASK(28, 18)
-
-struct guc_wq_item {
-	u32 header;
-	u32 context_desc;
-	u32 submit_element_info;
-	u32 fence_id;
-} __packed;
-
-struct guc_sched_wq_desc {
-	u32 head;
-	u32 tail;
-	u32 error_offset;
-	u32 wq_status;
-	u32 reserved[28];
-} __packed;
-
 /* Helper for context registration H2G */
 struct guc_ctxt_registration_info {
 	u32 flags;
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 231fb4145297..a5fe7755ce4c 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -22,6 +22,7 @@ 
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_engine_types.h"
+#include "xe_guc_submit_types.h"
 #include "xe_hw_engine.h"
 #include "xe_hw_fence.h"
 #include "xe_lrc.h"
@@ -378,32 +379,12 @@  static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
 		       __guc_engine_policy_action_size(&policy), 0, 0);
 }
 
-#define PARALLEL_SCRATCH_SIZE	2048
-#define WQ_SIZE			(PARALLEL_SCRATCH_SIZE / 2)
-#define WQ_OFFSET		(PARALLEL_SCRATCH_SIZE - WQ_SIZE)
-#define CACHELINE_BYTES		64
-
-struct sync_semaphore {
-	u32 semaphore;
-	u8 unused[CACHELINE_BYTES - sizeof(u32)];
-};
-
-struct parallel_scratch {
-	struct guc_sched_wq_desc wq_desc;
-
-	struct sync_semaphore go;
-	struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
-
-	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
-		sizeof(struct sync_semaphore) * (XE_HW_ENGINE_MAX_INSTANCE + 1)];
-
-	u32 wq[WQ_SIZE / sizeof(u32)];
-};
-
 #define parallel_read(xe_, map_, field_) \
-	xe_map_rd_field(xe_, &map_, 0, struct parallel_scratch, field_)
+	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
+			field_)
 #define parallel_write(xe_, map_, field_, val_) \
-	xe_map_wr_field(xe_, &map_, 0, struct parallel_scratch, field_, val_)
+	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
+			field_, val_)
 
 static void __register_mlrc_engine(struct xe_guc *guc,
 				   struct xe_engine *e,
@@ -486,13 +467,13 @@  static void register_engine(struct xe_engine *e)
 		struct iosys_map map = xe_lrc_parallel_map(lrc);
 
 		info.wq_desc_lo = lower_32_bits(ggtt_addr +
-			offsetof(struct parallel_scratch, wq_desc));
+			offsetof(struct guc_submit_parallel_scratch, wq_desc));
 		info.wq_desc_hi = upper_32_bits(ggtt_addr +
-			offsetof(struct parallel_scratch, wq_desc));
+			offsetof(struct guc_submit_parallel_scratch, wq_desc));
 		info.wq_base_lo = lower_32_bits(ggtt_addr +
-			offsetof(struct parallel_scratch, wq[0]));
+			offsetof(struct guc_submit_parallel_scratch, wq[0]));
 		info.wq_base_hi = upper_32_bits(ggtt_addr +
-			offsetof(struct parallel_scratch, wq[0]));
+			offsetof(struct guc_submit_parallel_scratch, wq[0]));
 		info.wq_size = WQ_SIZE;
 
 		e->guc->wqi_head = 0;
@@ -594,7 +575,7 @@  static void wq_item_append(struct xe_engine *e)
 
 	XE_BUG_ON(i != wqi_size / sizeof(u32));
 
-	iosys_map_incr(&map, offsetof(struct parallel_scratch,
+	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
 					wq[e->guc->wqi_tail / sizeof(u32)]));
 	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
 	e->guc->wqi_tail += wqi_size;
@@ -1674,6 +1655,7 @@  static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
 		guc_engine_wq_print(e, p);
 
 	spin_lock(&sched->job_list_lock);
+
 	list_for_each_entry(job, &sched->pending_list, drm.list)
 		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
 			   xe_sched_job_seqno(job),
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
new file mode 100644
index 000000000000..d369ea0bad60
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -0,0 +1,64 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_SUBMIT_TYPES_H_
+#define _XE_GUC_SUBMIT_TYPES_H_
+
+#include "xe_hw_engine_types.h"
+
+/* Work item for submitting workloads into work queue of GuC. */
+#define WQ_STATUS_ACTIVE		1
+#define WQ_STATUS_SUSPENDED		2
+#define WQ_STATUS_CMD_ERROR		3
+#define WQ_STATUS_ENGINE_ID_NOT_USED	4
+#define WQ_STATUS_SUSPENDED_FROM_RESET	5
+#define WQ_TYPE_NOOP			0x4
+#define WQ_TYPE_MULTI_LRC		0x5
+#define WQ_TYPE_MASK			GENMASK(7, 0)
+#define WQ_LEN_MASK			GENMASK(26, 16)
+
+#define WQ_GUC_ID_MASK			GENMASK(15, 0)
+#define WQ_RING_TAIL_MASK		GENMASK(28, 18)
+
+#define PARALLEL_SCRATCH_SIZE	2048
+#define WQ_SIZE			(PARALLEL_SCRATCH_SIZE / 2)
+#define WQ_OFFSET		(PARALLEL_SCRATCH_SIZE - WQ_SIZE)
+#define CACHELINE_BYTES		64
+
+struct guc_sched_wq_desc {
+	u32 head;
+	u32 tail;
+	u32 error_offset;
+	u32 wq_status;
+	u32 reserved[28];
+} __packed;
+
+struct sync_semaphore {
+	u32 semaphore;
+	u8 unused[CACHELINE_BYTES - sizeof(u32)];
+};
+
+/**
+ * Struct guc_submit_parallel_scratch - A scratch shared mapped buffer.
+ */
+struct guc_submit_parallel_scratch {
+	/** @wq_desc: Guc scheduler workqueue descriptor */
+	struct guc_sched_wq_desc wq_desc;
+
+	/** @go: Go Semaphore */
+	struct sync_semaphore go;
+	/** @join: Joined semaphore for the relevant hw engine instances */
+	struct sync_semaphore join[XE_HW_ENGINE_MAX_INSTANCE];
+
+	/** @unused: Unused/Reserved memory space */
+	u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+		  sizeof(struct sync_semaphore) *
+		  (XE_HW_ENGINE_MAX_INSTANCE + 1)];
+
+	/** @wq: Workqueue info */
+	u32 wq[WQ_SIZE / sizeof(u32)];
+};
+
+#endif