[v2] drm/i915/guc: capture GuC logs if FW fails to load
diff mbox

Message ID 1494006347-32364-1-git-send-email-daniele.ceraolospurio@intel.com
State New
Headers show

Commit Message

Daniele Ceraolo Spurio May 5, 2017, 5:45 p.m. UTC
We're currently deleting the GuC logs if the FW fails to load, but those
are still useful to understand why the loading failed. Keeping the
object around allows us to access them after driver load is completed.

v2: keep the object around instead of using kernel memory (chris)
    don't store the logs in the gpu_error struct (Chris)
    add a check on guc_log_level to avoid snapshotting empty logs

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 30 ++++++++++++++++++------------
 drivers/gpu/drm/i915/i915_drv.c      |  3 +++
 drivers/gpu/drm/i915/intel_guc_log.c | 17 +++++++++++++++++
 drivers/gpu/drm/i915/intel_uc.c      |  7 +++++--
 drivers/gpu/drm/i915/intel_uc.h      |  5 +++++
 5 files changed, 48 insertions(+), 14 deletions(-)

Comments

Chris Wilson May 5, 2017, 7:19 p.m. UTC | #1
On Fri, May 05, 2017 at 10:45:47AM -0700, Daniele Ceraolo Spurio wrote:
> We're currently deleting the GuC logs if the FW fails to load, but those
> are still useful to understand why the loading failed. Keeping the
> object around allows us to access them after driver load is completed.
> 
> v2: keep the object around instead of using kernel memory (chris)
>     don't store the logs in the gpu_error struct (Chris)
>     add a check on guc_log_level to avoid snapshotting empty logs
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c  | 30 ++++++++++++++++++------------
>  drivers/gpu/drm/i915/i915_drv.c      |  3 +++
>  drivers/gpu/drm/i915/intel_guc_log.c | 17 +++++++++++++++++
>  drivers/gpu/drm/i915/intel_uc.c      |  7 +++++--
>  drivers/gpu/drm/i915/intel_uc.h      |  5 +++++
>  5 files changed, 48 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 870c470..a37ab3b 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2544,25 +2544,31 @@ static int i915_guc_log_dump(struct seq_file *m, void *data)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>  	struct drm_i915_gem_object *obj;
> -	int i = 0, pg;
> +	u32 *log;
> +	int i = 0;
>  
> -	if (!dev_priv->guc.log.vma)
> +	if (dev_priv->guc.log.vma)
> +		obj = dev_priv->guc.log.vma->obj;
> +	else if (dev_priv->guc.err_load_log)
> +		obj = dev_priv->guc.err_load_log;
> +	else
>  		return 0;

Since both could be available at the same time, and to be clear which you
are reporting, I think you want a new debugfs entry.
-Chris

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 870c470..a37ab3b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2544,25 +2544,31 @@  static int i915_guc_log_dump(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct drm_i915_gem_object *obj;
-	int i = 0, pg;
+	u32 *log;
+	int i = 0;
 
-	if (!dev_priv->guc.log.vma)
+	if (dev_priv->guc.log.vma)
+		obj = dev_priv->guc.log.vma->obj;
+	else if (dev_priv->guc.err_load_log)
+		obj = dev_priv->guc.err_load_log;
+	else
 		return 0;
 
-	obj = dev_priv->guc.log.vma->obj;
-	for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
-		u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
-
-		for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
-			seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
-				   *(log + i), *(log + i + 1),
-				   *(log + i + 2), *(log + i + 3));
-
-		kunmap_atomic(log);
+	log = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(log)) {
+		DRM_ERROR("Failed to pin guc_log object\n");
+		return PTR_ERR(log);
 	}
 
+	for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+		seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+			   *(log + i), *(log + i + 1),
+			   *(log + i + 2), *(log + i + 3));
+
 	seq_putc(m, '\n');
 
+	i915_gem_object_unpin_map(obj);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 452c265..d8c82ac 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1354,6 +1354,9 @@  void i915_driver_unload(struct drm_device *dev)
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	i915_reset_error_state(dev_priv);
 
+	/* release GuC error log (if any) */
+	i915_guc_load_error_log_free(&dev_priv->guc);
+
 	/* Flush any outstanding unpin_work. */
 	drain_workqueue(dev_priv->wq);
 
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 16d3b87..691da42 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -660,3 +660,20 @@  void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
 	guc_log_runtime_destroy(&dev_priv->guc);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
+
+void i915_guc_load_error_log_capture(struct intel_guc *guc)
+{
+	if (!guc->log.vma || i915.guc_log_level < 0)
+		return;
+
+	if (!guc->err_load_log)
+		guc->err_load_log = i915_gem_object_get(guc->log.vma->obj);
+
+	return;
+}
+
+void i915_guc_load_error_log_free(struct intel_guc *guc)
+{
+	if (guc->err_load_log)
+		i915_gem_object_put(guc->err_load_log);
+}
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 7fd75ca..d66ffab 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -274,6 +274,7 @@  int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 
 	guc_disable_communication(guc);
 	gen9_reset_guc_interrupts(dev_priv);
+	i915_guc_load_error_log_free(guc);
 
 	/* We need to notify the guc whenever we change the GGTT */
 	i915_ggtt_enable_guc(dev_priv);
@@ -320,11 +321,11 @@  int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 
 	/* Did we succeded or run out of retries? */
 	if (ret)
-		goto err_submission;
+		goto err_log_capture;
 
 	ret = guc_enable_communication(guc);
 	if (ret)
-		goto err_submission;
+		goto err_log_capture;
 
 	intel_guc_auth_huc(dev_priv);
 	if (i915.enable_guc_submission) {
@@ -350,6 +351,8 @@  int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 err_interrupts:
 	guc_disable_communication(guc);
 	gen9_disable_guc_interrupts(dev_priv);
+err_log_capture:
+	i915_guc_load_error_log_capture(guc);
 err_submission:
 	if (i915.enable_guc_submission)
 		i915_guc_submission_fini(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 1e0eecd..c46e1ea 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -210,6 +210,9 @@  struct intel_guc {
 
 	/* GuC's FW specific send function */
 	int (*send)(struct intel_guc *guc, const u32 *data, u32 len);
+
+	/* Log snapshot if GuC errors during load */
+	struct drm_i915_gem_object *err_load_log;
 };
 
 struct intel_huc {
@@ -256,6 +259,8 @@  static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 l
 int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
 void i915_guc_log_register(struct drm_i915_private *dev_priv);
 void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
+void i915_guc_load_error_log_capture(struct intel_guc *guc);
+void i915_guc_load_error_log_free(struct intel_guc *guc);
 
 static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 {