diff mbox

drm/i915: Add Guc/HuC firmware details to error state

Message ID 20171019125706.38516-1-michal.wajdeczko@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michal Wajdeczko Oct. 19, 2017, 12:57 p.m. UTC
Include GuC and HuC firmware details in captured error state
to provide additional debug information. To reuse existing
uc firmware pretty printer, introduce new drm-printer variant
that works with our i915_error_state_buf output.

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  4 ++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 42 +++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

Comments

Chris Wilson Oct. 19, 2017, 1:30 p.m. UTC | #1
Quoting Michal Wajdeczko (2017-10-19 13:57:06)
> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>         err_print_capabilities(m, &error->device_info);
>         err_print_params(m, &error->params);
>  
> +       if (HAS_GUC(dev_priv)) {

Could we not peek inside the struct to know if it has been set? Looking
at the current device should be a last resort -- better to assume that
everything and anything may have been overwritten since the error.

> +               intel_uc_fw_dump(&dev_priv->guc.fw, &p);
> +               intel_uc_fw_dump(&dev_priv->huc.fw, &p);
> +       }

Looks really useful, thanks.
-Chris
Chris Wilson Oct. 19, 2017, 1:31 p.m. UTC | #2
Quoting Michal Wajdeczko (2017-10-19 13:57:06)
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 653fb69..0105b87 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -30,6 +30,8 @@
>  #include <generated/utsrelease.h>
>  #include <linux/stop_machine.h>
>  #include <linux/zlib.h>
> +#include <drm/drm_print.h>
> +
>  #include "i915_drv.h"
>  
>  static const char *engine_str(int engine)
> @@ -175,6 +177,21 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e,
>  #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
>  #define err_puts(e, s) i915_error_puts(e, s)
>  
> +void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf)

This isn't exported so don't forget the static?
-Chris
Michal Wajdeczko Oct. 19, 2017, 6:44 p.m. UTC | #3
On Thu, 19 Oct 2017 15:30:44 +0200, Chris Wilson  
<chris@chris-wilson.co.uk> wrote:

> Quoting Michal Wajdeczko (2017-10-19 13:57:06)
>> @@ -774,6 +793,11 @@ int i915_error_state_to_str(struct  
>> drm_i915_error_state_buf *m,
>>         err_print_capabilities(m, &error->device_info);
>>         err_print_params(m, &error->params);
>>
>> +       if (HAS_GUC(dev_priv)) {
>
> Could we not peek inside the struct to know if it has been set? Looking
> at the current device should be a last resort -- better to assume that
> everything and anything may have been overwritten since the error.

I was following earlier patterns where HAS_CSR/IS_GEN were used.
Will change into:

	if (error->device_info.has_guc) {

>
>> +               intel_uc_fw_dump(&dev_priv->guc.fw, &p);
>> +               intel_uc_fw_dump(&dev_priv->huc.fw, &p);
>> +       }

Ooops, I don't know how it happen but above dump should be:

		intel_uc_fw_dump(&error->guc_fw, &p);
		intel_uc_fw_dump(&error->huc_fw, &p);

Michal
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f01c800..f608d84 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -912,6 +912,10 @@  struct i915_gpu_state {
 	struct intel_device_info device_info;
 	struct i915_params params;
 
+	/* uC state */
+	struct intel_uc_fw guc_fw;
+	struct intel_uc_fw huc_fw;
+
 	/* Generic register state */
 	u32 eir;
 	u32 pgtbl_er;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 653fb69..0105b87 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,6 +30,8 @@ 
 #include <generated/utsrelease.h>
 #include <linux/stop_machine.h>
 #include <linux/zlib.h>
+#include <drm/drm_print.h>
+
 #include "i915_drv.h"
 
 static const char *engine_str(int engine)
@@ -175,6 +177,21 @@  static void i915_error_puts(struct drm_i915_error_state_buf *e,
 #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
 #define err_puts(e, s) i915_error_puts(e, s)
 
+void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf)
+{
+	i915_error_vprintf(p->arg, vaf->fmt, *vaf->va);
+}
+
+static inline struct drm_printer
+i915_error_printer(struct drm_i915_error_state_buf *e)
+{
+	struct drm_printer p = {
+		.printfn = __i915_printfn_error,
+		.arg = e,
+	};
+	return p;
+}
+
 #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
 
 struct compress {
@@ -592,8 +609,10 @@  static void err_print_pciid(struct drm_i915_error_state_buf *m,
 int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			    const struct i915_gpu_state *error)
 {
+	struct drm_printer p = i915_error_printer(m);
 	struct drm_i915_private *dev_priv = m->i915;
 	struct drm_i915_error_object *obj;
+
 	int i, j;
 
 	if (!error) {
@@ -774,6 +793,11 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_print_capabilities(m, &error->device_info);
 	err_print_params(m, &error->params);
 
+	if (HAS_GUC(dev_priv)) {
+		intel_uc_fw_dump(&dev_priv->guc.fw, &p);
+		intel_uc_fw_dump(&dev_priv->huc.fw, &p);
+	}
+
 	if (m->bytes == 0 && m->err)
 		return m->err;
 
@@ -1559,6 +1583,23 @@  static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
 	error->pinned_bo = bo;
 }
 
+static void i915_capture_uc_state(struct drm_i915_private *dev_priv,
+				  struct i915_gpu_state *error)
+{
+	error->guc_fw = dev_priv->guc.fw;
+	error->huc_fw = dev_priv->huc.fw;
+
+	/* Make sure to capture custom firmware paths */
+	if (i915_modparams.guc_firmware_path) {
+		error->guc_fw.path = error->params.guc_firmware_path;
+		GEM_BUG_ON(strcmp(error->guc_fw.path, dev_priv->guc.fw.path));
+	}
+	if (i915_modparams.huc_firmware_path) {
+		error->huc_fw.path = error->params.huc_firmware_path;
+		GEM_BUG_ON(strcmp(error->huc_fw.path, dev_priv->huc.fw.path));
+	}
+}
+
 static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
 					    struct i915_gpu_state *error)
 {
@@ -1710,6 +1751,7 @@  static int capture(void *data)
 	I915_PARAMS_FOR_EACH(DUP);
 #undef DUP
 
+	i915_capture_uc_state(error->i915, error);
 	i915_capture_gen_state(error->i915, error);
 	i915_capture_reg_state(error->i915, error);
 	i915_gem_record_fences(error->i915, error);