diff mbox

[v2] drm/i915: Add Guc/HuC firmware details to error state

Message ID 20171019190257.30932-1-michal.wajdeczko@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michal Wajdeczko Oct. 19, 2017, 7:02 p.m. UTC
Include GuC and HuC firmware details in captured error state
to provide additional debug information. To reuse existing
uc firmware pretty printer, introduce new drm-printer variant
that works with our i915_error_state_buf output. Also update
uc firmware pretty printer to accept const input.

v2: don't rely on current caps (Chris)
    dump correct fw info (Michal)

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  4 ++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 42 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_uc_fw.c    |  2 +-
 drivers/gpu/drm/i915/intel_uc_fw.h    |  2 +-
 4 files changed, 48 insertions(+), 2 deletions(-)

Comments

Chris Wilson Oct. 19, 2017, 7:27 p.m. UTC | #1
Quoting Michal Wajdeczko (2017-10-19 20:02:57)
> +static void i915_capture_uc_state(struct drm_i915_private *dev_priv,
> +                                 struct i915_gpu_state *error)
> +{
> +       error->guc_fw = dev_priv->guc.fw;
> +       error->huc_fw = dev_priv->huc.fw;
> +
> +       /* Make sure to capture custom firmware paths */
> +       if (i915_modparams.guc_firmware_path) {

If we pretend we are not inside a stop_machine() right now...

i915_modparams may be overwritten at any point by userspace. So first of
all this perhaps should be if (error->params.guc_firmware_path) but then
what about that GEM_BUG_ON?

> +               error->guc_fw.path = error->params.guc_firmware_path;
> +               GEM_BUG_ON(strcmp(error->guc_fw.path, dev_priv->guc.fw.path));
> +       }

Don't you just want to
	error->guc_fw.path = kstrdup(dev_priv->guc.fw.path, GFP_ATOMIC) 
? The goal is to take a copy of the parameters that match the firmware
at the time of the hang, which may not match the state of the modparam
at this moment in time.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3c2649c..4d6519b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -911,6 +911,10 @@  struct i915_gpu_state {
 	struct intel_device_info device_info;
 	struct i915_params params;
 
+	/* uC state */
+	struct intel_uc_fw guc_fw;
+	struct intel_uc_fw huc_fw;
+
 	/* Generic register state */
 	u32 eir;
 	u32 pgtbl_er;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 653fb69..2defbb4 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,6 +30,8 @@ 
 #include <generated/utsrelease.h>
 #include <linux/stop_machine.h>
 #include <linux/zlib.h>
+#include <drm/drm_print.h>
+
 #include "i915_drv.h"
 
 static const char *engine_str(int engine)
@@ -175,6 +177,21 @@  static void i915_error_puts(struct drm_i915_error_state_buf *e,
 #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
 #define err_puts(e, s) i915_error_puts(e, s)
 
+static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf)
+{
+	i915_error_vprintf(p->arg, vaf->fmt, *vaf->va);
+}
+
+static inline struct drm_printer
+i915_error_printer(struct drm_i915_error_state_buf *e)
+{
+	struct drm_printer p = {
+		.printfn = __i915_printfn_error,
+		.arg = e,
+	};
+	return p;
+}
+
 #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
 
 struct compress {
@@ -592,8 +609,10 @@  static void err_print_pciid(struct drm_i915_error_state_buf *m,
 int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			    const struct i915_gpu_state *error)
 {
+	struct drm_printer p = i915_error_printer(m);
 	struct drm_i915_private *dev_priv = m->i915;
 	struct drm_i915_error_object *obj;
+
 	int i, j;
 
 	if (!error) {
@@ -774,6 +793,11 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_print_capabilities(m, &error->device_info);
 	err_print_params(m, &error->params);
 
+	if (error->device_info.has_guc) {
+		intel_uc_fw_dump(&error->guc_fw, &p);
+		intel_uc_fw_dump(&error->huc_fw, &p);
+	}
+
 	if (m->bytes == 0 && m->err)
 		return m->err;
 
@@ -1559,6 +1583,23 @@  static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
 	error->pinned_bo = bo;
 }
 
+static void i915_capture_uc_state(struct drm_i915_private *dev_priv,
+				  struct i915_gpu_state *error)
+{
+	error->guc_fw = dev_priv->guc.fw;
+	error->huc_fw = dev_priv->huc.fw;
+
+	/* Make sure to capture custom firmware paths */
+	if (i915_modparams.guc_firmware_path) {
+		error->guc_fw.path = error->params.guc_firmware_path;
+		GEM_BUG_ON(strcmp(error->guc_fw.path, dev_priv->guc.fw.path));
+	}
+	if (i915_modparams.huc_firmware_path) {
+		error->huc_fw.path = error->params.huc_firmware_path;
+		GEM_BUG_ON(strcmp(error->huc_fw.path, dev_priv->huc.fw.path));
+	}
+}
+
 static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
 					    struct i915_gpu_state *error)
 {
@@ -1710,6 +1751,7 @@  static int capture(void *data)
 	I915_PARAMS_FOR_EACH(DUP);
 #undef DUP
 
+	i915_capture_uc_state(error->i915, error);
 	i915_capture_gen_state(error->i915, error);
 	i915_capture_reg_state(error->i915, error);
 	i915_gem_record_fences(error->i915, error);
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index 973888e..4bc82d3 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -299,7 +299,7 @@  void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
  *
  * Pretty printer for uC firmware.
  */
-void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p)
+void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
 {
 	drm_printf(p, "%s firmware: %s\n",
 		   intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
index 1329036..5394d9d 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -116,6 +116,6 @@  int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
 		       int (*xfer)(struct intel_uc_fw *uc_fw,
 				   struct i915_vma *vma));
 void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
-void intel_uc_fw_dump(struct intel_uc_fw *uc_fw, struct drm_printer *p);
+void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);
 
 #endif