diff mbox

[14/19] drm/i915: Forcefully flush GuC log buffer on reset

Message ID 1471596198-30748-15-git-send-email-akash.goel@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

akash.goel@intel.com Aug. 19, 2016, 8:43 a.m. UTC
From: Sagar Arun Kamble <sagar.a.kamble@intel.com>

Before capturing the GuC logs as a part of error state, there should be a
force log buffer flush action sent to GuC before proceeding with GPU reset
and re-initializing GUC. There could be some data in the log buffer which
is yet to be captured and those logs would be particularly useful to
understand that why the GPU reset was initiated.

v2:
- Avoid the wait via flush_work, to serialize against an ongoing log
  buffer flush, from the error state capture path. (Chris)
- Rebase.

Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c      |  2 ++
 drivers/gpu/drm/i915/i915_guc_submission.c | 30 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc.h           |  1 +
 3 files changed, 33 insertions(+)

Comments

Chris Wilson Aug. 19, 2016, 6:10 p.m. UTC | #1
On Fri, Aug 19, 2016 at 02:13:13PM +0530, akash.goel@intel.com wrote:
> From: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> 
> Before capturing the GuC logs as a part of error state, there should be a
> force log buffer flush action sent to GuC before proceeding with GPU reset
> and re-initializing GUC. There could be some data in the log buffer which
> is yet to be captured and those logs would be particularly useful to
> understand that why the GPU reset was initiated.

There's no point if we can't wait for any writes to complete, so just take
the snapshot of the log at the time of the hang.

> +void i915_guc_flush_logs(struct drm_i915_private *dev_priv, bool can_wait)
> +{
> +	if (!i915.enable_guc_submission || (i915.guc_log_level < 0))
> +		return;
> +
> +	/* First disable the interrupts, will be renabled afterwards */
> +	gen9_disable_guc_interrupts(dev_priv);

calls synchronize_irq() which is also illegal from the atomic context of
error capture.
-Chris
akash.goel@intel.com Aug. 20, 2016, 6:49 a.m. UTC | #2
On 8/19/2016 11:40 PM, Chris Wilson wrote:
> On Fri, Aug 19, 2016 at 02:13:13PM +0530, akash.goel@intel.com wrote:
>> From: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>>
>> Before capturing the GuC logs as a part of error state, there should be a
>> force log buffer flush action sent to GuC before proceeding with GPU reset
>> and re-initializing GUC. There could be some data in the log buffer which
>> is yet to be captured and those logs would be particularly useful to
>> understand that why the GPU reset was initiated.
>
> There's no point if we can't wait for any writes to complete, so just take
> the snapshot of the log at the time of the hang.
>


>> +void i915_guc_flush_logs(struct drm_i915_private *dev_priv, bool can_wait)
>> +{
>> +	if (!i915.enable_guc_submission || (i915.guc_log_level < 0))
>> +		return;
>> +
>> +	/* First disable the interrupts, will be renabled afterwards */
>> +	gen9_disable_guc_interrupts(dev_priv);
>
> calls synchronize_irq() which is also illegal from the atomic context of
> error capture.

Fine, will not call gen9_disable_guc_interrupts, just like flush_work, 
from the error state capture path.

But I feel it could still be useful to invoke 
host2guc_force_logbuffer_flush().

Best regards
Akash
> -Chris
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 94297aa..b73c671 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1301,6 +1301,8 @@  static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
 	if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0))
 		return;
 
+	i915_guc_flush_logs(dev_priv, false);
+
 	error->guc_log = i915_error_object_create(dev_priv,
 						  dev_priv->guc.log.vma);
 }
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 27b4047..7ce586d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -185,6 +185,16 @@  static int host2guc_logbuffer_flush_complete(struct intel_guc *guc)
 	return host2guc_action(guc, data, 1);
 }
 
+static int host2guc_force_logbuffer_flush(struct intel_guc *guc)
+{
+	u32 data[2];
+
+	data[0] = HOST2GUC_ACTION_FORCE_LOG_BUFFER_FLUSH;
+	data[1] = 0;
+
+	return host2guc_action(guc, data, 2);
+}
+
 /*
  * Initialise, update, or clear doorbell data shared with the GuC
  *
@@ -1527,6 +1537,26 @@  void i915_guc_capture_logs(struct drm_i915_private *dev_priv)
 	intel_runtime_pm_put(dev_priv);
 }
 
+void i915_guc_flush_logs(struct drm_i915_private *dev_priv, bool can_wait)
+{
+	if (!i915.enable_guc_submission || (i915.guc_log_level < 0))
+		return;
+
+	/* First disable the interrupts, will be renabled afterwards */
+	gen9_disable_guc_interrupts(dev_priv);
+
+	/* Before initiating the forceful flush, wait for any pending/ongoing
+	 * flush to complete otherwise forceful flush may not happen, but wait
+	 * can't be done for some paths like error state capture in which case
+	 * take a chance & directly attempt the forceful flush.
+	 */
+	if (can_wait)
+		flush_work(&dev_priv->guc.log.flush_work);
+
+	/* Ask GuC to update the log buffer state */
+	host2guc_force_logbuffer_flush(&dev_priv->guc);
+}
+
 void i915_guc_unregister(struct drm_i915_private *dev_priv)
 {
 	if (!i915.enable_guc_submission)
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 8598f38..d7eda42 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -182,6 +182,7 @@  int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
 void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
 void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
 void i915_guc_capture_logs(struct drm_i915_private *dev_priv);
+void i915_guc_flush_logs(struct drm_i915_private *dev_priv, bool can_wait);
 void i915_guc_register(struct drm_i915_private *dev_priv);
 void i915_guc_unregister(struct drm_i915_private *dev_priv);