diff mbox

[14/20] drm/i915: Forcefully flush GuC log buffer on reset

Message ID 1470983123-22127-15-git-send-email-akash.goel@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

akash.goel@intel.com Aug. 12, 2016, 6:25 a.m. UTC
From: Sagar Arun Kamble <sagar.a.kamble@intel.com>

Before capturing the GuC logs as a part of error state, there should be a
force log buffer flush action sent to GuC before proceeding with GPU reset
and re-initializing GUC. There could be some data in the log buffer which is
yet to be captured and those logs would be particularly useful to understand
that why the GPU reset was initiated.

Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c      |  2 ++
 drivers/gpu/drm/i915/i915_guc_submission.c | 27 +++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc.h           |  1 +
 3 files changed, 30 insertions(+)

Comments

Chris Wilson Aug. 12, 2016, 6:33 a.m. UTC | #1
On Fri, Aug 12, 2016 at 11:55:17AM +0530, akash.goel@intel.com wrote:
> From: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> 
> Before capturing the GuC logs as a part of error state, there should be a
> force log buffer flush action sent to GuC before proceeding with GPU reset
> and re-initializing GUC. There could be some data in the log buffer which is
> yet to be captured and those logs would be particularly useful to understand
> that why the GPU reset was initiated.
> 
> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c      |  2 ++
>  drivers/gpu/drm/i915/i915_guc_submission.c | 27 +++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_guc.h           |  1 +
>  3 files changed, 30 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 561b523..5e358e2 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1232,6 +1232,8 @@ static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
>  	if (!dev_priv->guc.log.obj)
>  		return;
>  
> +	i915_guc_flush_logs(dev_priv);

This is an invalid context for this function, flush_work() is illegal
inside error capture.
-Chris
akash.goel@intel.com Aug. 12, 2016, 7:02 a.m. UTC | #2
On 8/12/2016 12:03 PM, Chris Wilson wrote:
> On Fri, Aug 12, 2016 at 11:55:17AM +0530, akash.goel@intel.com wrote:
>> From: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>>
>> Before capturing the GuC logs as a part of error state, there should be a
>> force log buffer flush action sent to GuC before proceeding with GPU reset
>> and re-initializing GUC. There could be some data in the log buffer which is
>> yet to be captured and those logs would be particularly useful to understand
>> that why the GPU reset was initiated.
>>
>> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>> Signed-off-by: Akash Goel <akash.goel@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_gpu_error.c      |  2 ++
>>  drivers/gpu/drm/i915/i915_guc_submission.c | 27 +++++++++++++++++++++++++++
>>  drivers/gpu/drm/i915/intel_guc.h           |  1 +
>>  3 files changed, 30 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index 561b523..5e358e2 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -1232,6 +1232,8 @@ static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
>>  	if (!dev_priv->guc.log.obj)
>>  		return;
>>
>> +	i915_guc_flush_logs(dev_priv);
>
> This is an invalid context for this function, flush_work() is illegal
> inside error capture.

Actually the concerned work item should not take much time for execution 
and also it doesn't acquire any such locks due to which it can get blocked.

Should there be no wait whatsoever in error capture ?
Will have to drop this patch.

Best regards
Akash
> -Chris
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 561b523..5e358e2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1232,6 +1232,8 @@  static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv,
 	if (!dev_priv->guc.log.obj)
 		return;
 
+	i915_guc_flush_logs(dev_priv);
+
 	error->guc_log_obj = i915_error_ggtt_object_create(dev_priv,
 						dev_priv->guc.log.obj);
 }
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 8e0f360..4a75c16 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -185,6 +185,16 @@  static int host2guc_logbuffer_flush_complete(struct intel_guc *guc)
 	return host2guc_action(guc, data, 1);
 }
 
+static int host2guc_force_logbuffer_flush(struct intel_guc *guc)
+{
+	u32 data[2];
+
+	data[0] = HOST2GUC_ACTION_FORCE_LOG_BUFFER_FLUSH;
+	data[1] = 0;
+
+	return host2guc_action(guc, data, 2);
+}
+
 /*
  * Initialise, update, or clear doorbell data shared with the GuC
  *
@@ -1492,6 +1502,23 @@  void i915_guc_capture_logs(struct drm_i915_private *dev_priv)
 	intel_runtime_pm_put(dev_priv);
 }
 
+void i915_guc_flush_logs(struct drm_i915_private *dev_priv)
+{
+	if (!i915.enable_guc_submission || (i915.guc_log_level < 0))
+		return;
+
+	/* First disable the interrupts, will be renabled afterwards */
+	gen9_disable_guc_interrupts(dev_priv);
+
+	/* Before initiating the forceful flush wait for the pending/ongoing
+	 * flush to complete.
+	 */
+	flush_work(&dev_priv->guc.events_work);
+
+	/* Ask GuC to update the log buffer state */
+	host2guc_force_logbuffer_flush(&dev_priv->guc);
+}
+
 void i915_guc_unregister(struct drm_i915_private *dev_priv)
 {
 	if (!i915.enable_guc_submission)
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index ed87e98..d3a5447 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -183,6 +183,7 @@  int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
 void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
 void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
 void i915_guc_capture_logs(struct drm_i915_private *dev_priv);
+void i915_guc_flush_logs(struct drm_i915_private *dev_priv);
 void i915_guc_register(struct drm_i915_private *dev_priv);
 void i915_guc_unregister(struct drm_i915_private *dev_priv);