diff mbox series

[05/14] drm/xe: Convert GuC CT print to snapshot capture and print.

Message ID 20230426205713.512695-6-rodrigo.vivi@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce xe_devcoredump. | expand

Commit Message

Rodrigo Vivi April 26, 2023, 8:57 p.m. UTC
The goal is to allow for a snapshot capture to be taken at the time
of the crash, while the print out can happen at a later time through
the exposed devcoredump virtual device.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc_ct.c       | 132 +++++++++++++++++++++++----
 drivers/gpu/drm/xe/xe_guc_ct.h       |   7 +-
 drivers/gpu/drm/xe/xe_guc_ct_types.h |  26 ++++++
 3 files changed, 145 insertions(+), 20 deletions(-)

Comments

Matthew Brost May 2, 2023, 5:27 a.m. UTC | #1
On Wed, Apr 26, 2023 at 04:57:04PM -0400, Rodrigo Vivi wrote:
> The goal is to allow for a snapshot capture to be taken at the time
> of the crash, while the print out can happen at a later time through
> the exposed devcoredump virtual device.
> 
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_guc_ct.c       | 132 +++++++++++++++++++++++----
>  drivers/gpu/drm/xe/xe_guc_ct.h       |   7 +-
>  drivers/gpu/drm/xe/xe_guc_ct_types.h |  26 ++++++
>  3 files changed, 145 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index e16e5fe37ed4..0b7b95dbd9be 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -1095,31 +1095,26 @@ static void g2h_worker_func(struct work_struct *w)
>  	xe_device_mem_access_put(ct_to_xe(ct));
>  }
>  
> -static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
> -			     struct drm_printer *p)
> +static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
> +				     struct guc_ctb_snapshot *snapshot)
>  {
>  	u32 head, tail;
>  
> -	drm_printf(p, "\tsize: %d\n", ctb->info.size);
> -	drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space);
> -	drm_printf(p, "\thead: %d\n", ctb->info.head);
> -	drm_printf(p, "\ttail: %d\n", ctb->info.tail);
> -	drm_printf(p, "\tspace: %d\n", ctb->info.space);
> -	drm_printf(p, "\tbroken: %d\n", ctb->info.broken);
> +	snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32), GFP_ATOMIC);

So since this GFP_ATOMIC I assume this so we can call this code from the
TDR or a CTB handler (dma fence signaling paths)? Also I don't see
where you check for this allocation failing.

>  
> -	head = desc_read(xe, ctb, head);
> -	tail = desc_read(xe, ctb, tail);
> -	drm_printf(p, "\thead (memory): %d\n", head);
> -	drm_printf(p, "\ttail (memory): %d\n", tail);
> -	drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status));
> +	xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
> +			   sizeof(struct guc_ct_buffer_desc));
> +	memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
> +
> +	head = snapshot->desc.head;
> +	tail = snapshot->desc.tail;
>  
>  	if (head != tail) {
>  		struct iosys_map map =
>  			IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
>  
>  		while (head != tail) {
> -			drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
> -				   xe_map_rd(xe, &map, 0, u32));
> +			snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
>  			++head;
>  			if (head == ctb->info.size) {
>  				head = 0;
> @@ -1131,20 +1126,119 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
>  	}
>  }
>  
> -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
> +static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
> +				   struct drm_printer *p)
> +{
> +	u32 head, tail;
> +
> +	drm_printf(p, "\tsize: %d\n", snapshot->info.size);
> +	drm_printf(p, "\tresv_space: %d\n", snapshot->info.space);
> +	drm_printf(p, "\thead: %d\n", snapshot->info.head);
> +	drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
> +	drm_printf(p, "\tspace: %d\n", snapshot->info.space);
> +	drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
> +	drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
> +	drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
> +	drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
> +
> +	head = snapshot->desc.head;
> +	tail = snapshot->desc.tail;
> +
> +	while (head != tail) {
> +		drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
> +			   snapshot->cmds[head]);
> +		++head;
> +		if (head == snapshot->info.size)
> +			head = 0;
> +	}
> +}
> +
> +static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
> +{
> +	kfree(snapshot->cmds);
> +}
> +
> +/**
> + * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
> + * @ct: GuC CT object.
> + *
> + * This can be printed out in a later stage like during dev_coredump
> + * analysis.
> + *
> + * Returns: a GuC CT snapshot object that must be freed by the caller
> + * 	    by using `xe_guc_ct_snapshot_free`.
> + */
> +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct)
>  {
> +	struct xe_guc_ct_snapshot *snapshot;
> +
> +	snapshot = kzalloc(sizeof(struct xe_guc_ct_snapshot), GFP_ATOMIC);
> +

Same here, need to check for an alloc failure.

Also maybe we should a flag to switch between GFP_ATOMIC (signaling
path) and GFP_KERNEL (debugfs). In the case above, CMDs might be huge as
deadlock workaround (like 16 MBs or something) so atomic seems risky.

Aside from these comments, I do rather like what you have done here.

Matt

>  	if (ct->enabled) {
> +		snapshot->ct_enabled = true;
> +		guc_ctb_snapshot_capture(ct_to_xe(ct), &ct->ctbs.h2g,
> +					 &snapshot->h2g);
> +		guc_ctb_snapshot_capture(ct_to_xe(ct), &ct->ctbs.g2h,
> +					 &snapshot->g2h);
> +	}
> +
> +	return snapshot;
> +}
> +
> +/**
> + * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
> + * @snapshot: GuC CT snapshot object.
> + * @p: drm_printer where it will be printed out.
> + *
> + * This function prints out a given GuC CT snapshot object.
> + */
> +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
> +			      struct drm_printer *p)
> +{
> +	if (snapshot->ct_enabled) {
>  		drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
> -		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p);
> +		guc_ctb_snapshot_print(&snapshot->h2g, p);
>  
>  		drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
> -		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p);
> -		drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding);
> +		guc_ctb_snapshot_print(&snapshot->g2h, p);
> +
> +		drm_printf(p, "\tg2h outstanding: %d\n",
> +			   snapshot->g2h_outstanding);
>  	} else {
>  		drm_puts(p, "\nCT disabled\n");
>  	}
>  }
>  
> +/**
> + * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
> + * @snapshot: GuC CT snapshot object.
> + *
> + * This function free all the memory that needed to be allocated at capture
> + * time.
> + */
> +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
> +{
> +	guc_ctb_snapshot_free(&snapshot->h2g);
> +	guc_ctb_snapshot_free(&snapshot->g2h);
> +	kfree(snapshot);
> +}
> +
> +/**
> + * xe_guc_ct_print - GuC CT Print.
> + * @ct: GuC CT.
> + * @p: drm_printer where it will be printed out.
> + *
> + * This function quickly capture a snapshot and immediately print it out.
> + */
> +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
> +{
> +	struct xe_guc_ct_snapshot *snapshot;
> +
> +	snapshot = xe_guc_ct_snapshot_capture(ct);
> +	xe_guc_ct_snapshot_print(snapshot, p);
> +	xe_guc_ct_snapshot_free(snapshot);
> +}
> +
>  #ifdef XE_GUC_CT_SELFTEST
>  /*
>   * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
> index 49fb74f91e4d..29e0dff7ad9b 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.h
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.h
> @@ -13,9 +13,14 @@ struct drm_printer;
>  int xe_guc_ct_init(struct xe_guc_ct *ct);
>  int xe_guc_ct_enable(struct xe_guc_ct *ct);
>  void xe_guc_ct_disable(struct xe_guc_ct *ct);
> -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
>  void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
>  
> +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct);
> +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
> +			      struct drm_printer *p);
> +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
> +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
> +
>  static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
>  {
>  	wake_up_all(&ct->wq);
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
> index 64e3dd14d4b2..93046d95b009 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
> @@ -48,6 +48,32 @@ struct guc_ctb {
>  	struct guc_ctb_info info;
>  };
>  
> +/**
> + * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot
> + */
> +struct guc_ctb_snapshot {
> +	/** @desc: snapshot of the CTB descriptor */
> +	struct guc_ct_buffer_desc desc;
> +	/** @cmds: snapshot of the CTB commands */
> +	u32 *cmds;
> +	/** @info: snapshot of the CTB info */
> +	struct guc_ctb_info info;
> +};
> +
> +/**
> + * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot
> + */
> +struct xe_guc_ct_snapshot {
> +	/** @ct_enabled: CT enabled info at capture time. */
> +	bool ct_enabled;
> +	/** @g2h_outstanding: G2H outstanding info at the capture time */
> +	u32 g2h_outstanding;
> +	/** @g2h: G2H CTB snapshot */
> +	struct guc_ctb_snapshot g2h;
> +	/** @h2g: H2G CTB snapshot */
> +	struct guc_ctb_snapshot h2g;
> +};
> +
>  /**
>   * struct xe_guc_ct - GuC command transport (CT) layer
>   *
> -- 
> 2.39.2
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index e16e5fe37ed4..0b7b95dbd9be 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1095,31 +1095,26 @@  static void g2h_worker_func(struct work_struct *w)
 	xe_device_mem_access_put(ct_to_xe(ct));
 }
 
-static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
-			     struct drm_printer *p)
+static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+				     struct guc_ctb_snapshot *snapshot)
 {
 	u32 head, tail;
 
-	drm_printf(p, "\tsize: %d\n", ctb->info.size);
-	drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space);
-	drm_printf(p, "\thead: %d\n", ctb->info.head);
-	drm_printf(p, "\ttail: %d\n", ctb->info.tail);
-	drm_printf(p, "\tspace: %d\n", ctb->info.space);
-	drm_printf(p, "\tbroken: %d\n", ctb->info.broken);
+	snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32), GFP_ATOMIC);
 
-	head = desc_read(xe, ctb, head);
-	tail = desc_read(xe, ctb, tail);
-	drm_printf(p, "\thead (memory): %d\n", head);
-	drm_printf(p, "\ttail (memory): %d\n", tail);
-	drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status));
+	xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
+			   sizeof(struct guc_ct_buffer_desc));
+	memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
+
+	head = snapshot->desc.head;
+	tail = snapshot->desc.tail;
 
 	if (head != tail) {
 		struct iosys_map map =
 			IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
 
 		while (head != tail) {
-			drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
-				   xe_map_rd(xe, &map, 0, u32));
+			snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
 			++head;
 			if (head == ctb->info.size) {
 				head = 0;
@@ -1131,20 +1126,119 @@  static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
 	}
 }
 
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
+				   struct drm_printer *p)
+{
+	u32 head, tail;
+
+	drm_printf(p, "\tsize: %d\n", snapshot->info.size);
+	drm_printf(p, "\tresv_space: %d\n", snapshot->info.space);
+	drm_printf(p, "\thead: %d\n", snapshot->info.head);
+	drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
+	drm_printf(p, "\tspace: %d\n", snapshot->info.space);
+	drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
+	drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
+	drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
+	drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
+
+	head = snapshot->desc.head;
+	tail = snapshot->desc.tail;
+
+	while (head != tail) {
+		drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
+			   snapshot->cmds[head]);
+		++head;
+		if (head == snapshot->info.size)
+			head = 0;
+	}
+}
+
+static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
+{
+	kfree(snapshot->cmds);
+}
+
+/**
+ * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
+ * @ct: GuC CT object.
+ *
+ * This can be printed out in a later stage like during dev_coredump
+ * analysis.
+ *
+ * Returns: a GuC CT snapshot object that must be freed by the caller
+ * 	    by using `xe_guc_ct_snapshot_free`.
+ */
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct)
 {
+	struct xe_guc_ct_snapshot *snapshot;
+
+	snapshot = kzalloc(sizeof(struct xe_guc_ct_snapshot), GFP_ATOMIC);
+
 	if (ct->enabled) {
+		snapshot->ct_enabled = true;
+		guc_ctb_snapshot_capture(ct_to_xe(ct), &ct->ctbs.h2g,
+					 &snapshot->h2g);
+		guc_ctb_snapshot_capture(ct_to_xe(ct), &ct->ctbs.g2h,
+					 &snapshot->g2h);
+	}
+
+	return snapshot;
+}
+
+/**
+ * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
+ * @snapshot: GuC CT snapshot object.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function prints out a given GuC CT snapshot object.
+ */
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+			      struct drm_printer *p)
+{
+	if (snapshot->ct_enabled) {
 		drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
-		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p);
+		guc_ctb_snapshot_print(&snapshot->h2g, p);
 
 		drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
-		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p);
-		drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding);
+		guc_ctb_snapshot_print(&snapshot->g2h, p);
+
+		drm_printf(p, "\tg2h outstanding: %d\n",
+			   snapshot->g2h_outstanding);
 	} else {
 		drm_puts(p, "\nCT disabled\n");
 	}
 }
 
+/**
+ * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
+ * @snapshot: GuC CT snapshot object.
+ *
+ * This function free all the memory that needed to be allocated at capture
+ * time.
+ */
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
+{
+	guc_ctb_snapshot_free(&snapshot->h2g);
+	guc_ctb_snapshot_free(&snapshot->g2h);
+	kfree(snapshot);
+}
+
+/**
+ * xe_guc_ct_print - GuC CT Print.
+ * @ct: GuC CT.
+ * @p: drm_printer where it will be printed out.
+ *
+ * This function quickly capture a snapshot and immediately print it out.
+ */
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
+{
+	struct xe_guc_ct_snapshot *snapshot;
+
+	snapshot = xe_guc_ct_snapshot_capture(ct);
+	xe_guc_ct_snapshot_print(snapshot, p);
+	xe_guc_ct_snapshot_free(snapshot);
+}
+
 #ifdef XE_GUC_CT_SELFTEST
 /*
  * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 49fb74f91e4d..29e0dff7ad9b 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -13,9 +13,14 @@  struct drm_printer;
 int xe_guc_ct_init(struct xe_guc_ct *ct);
 int xe_guc_ct_enable(struct xe_guc_ct *ct);
 void xe_guc_ct_disable(struct xe_guc_ct *ct);
-void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
 void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
 
+struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct);
+void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
+			      struct drm_printer *p);
+void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
+void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
+
 static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
 {
 	wake_up_all(&ct->wq);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 64e3dd14d4b2..93046d95b009 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -48,6 +48,32 @@  struct guc_ctb {
 	struct guc_ctb_info info;
 };
 
+/**
+ * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot
+ */
+struct guc_ctb_snapshot {
+	/** @desc: snapshot of the CTB descriptor */
+	struct guc_ct_buffer_desc desc;
+	/** @cmds: snapshot of the CTB commands */
+	u32 *cmds;
+	/** @info: snapshot of the CTB info */
+	struct guc_ctb_info info;
+};
+
+/**
+ * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot
+ */
+struct xe_guc_ct_snapshot {
+	/** @ct_enabled: CT enabled info at capture time. */
+	bool ct_enabled;
+	/** @g2h_outstanding: G2H outstanding info at the capture time */
+	u32 g2h_outstanding;
+	/** @g2h: G2H CTB snapshot */
+	struct guc_ctb_snapshot g2h;
+	/** @h2g: H2G CTB snapshot */
+	struct guc_ctb_snapshot h2g;
+};
+
 /**
  * struct xe_guc_ct - GuC command transport (CT) layer
  *