diff mbox

drm/i915: Park before resetting the submission backend

Message ID 20180409101819.5572-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 9, 2018, 10:18 a.m. UTC
As different backends may have different park/unpark callbacks, we
should only ever switch backends (reset_default_submission on wedge
recovery, or on enabling the guc) while parked.

v2: Remove the assert from the guc code, as we are currently trying to
modify the engine vfuncs pointer on a live system after reset (not just
wedging). We will just have to hope that the system is balanced.
v3: Rebase onto __i915_gem_park and improve grammar.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c        | 15 ++++++++++++---
 drivers/gpu/drm/i915/intel_engine_cs.c |  3 +++
 2 files changed, 15 insertions(+), 3 deletions(-)

Comments

sagar.a.kamble@intel.com April 9, 2018, 10:38 a.m. UTC | #1
On 4/9/2018 3:48 PM, Chris Wilson wrote:
> As different backends may have different park/unpark callbacks, we
> should only ever switch backends (reset_default_submission on wedge
> recovery, or on enabling the guc) while parked.
>
> v2: Remove the assert from the guc code, as we are currently trying to
> modify the engine vfuncs pointer on a live system after reset (not just
> wedging). We will just have to hope that the system is balanced.
> v3: Rebase onto __i915_gem_park and improve grammar.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c        | 15 ++++++++++++---
>   drivers/gpu/drm/i915/intel_engine_cs.c |  3 +++
>   2 files changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 28ab0beff86c..dd3e292ba243 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -144,8 +144,6 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
>   	if (!i915->gt.awake)
>   		return I915_EPOCH_INVALID;
>   
> -	GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID);
> -
>   	/*
>   	 * Be paranoid and flush a concurrent interrupt to make sure
>   	 * we don't reactivate any irq tasklets after parking.
> @@ -173,6 +171,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
>   
>   	intel_runtime_pm_put(i915);
>   
> +	GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID);
>   	return i915->gt.epoch;
>   }
>   
> @@ -3435,7 +3434,17 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>   		}
>   	}
>   	i915_retire_requests(i915);
> -	GEM_BUG_ON(i915->gt.active_requests);
> +
> +	/*
> +	 * Park before disengaging the old submit mechanism as different
> +	 * backends may have different park/unpack callbacks.
> +	 *
> +	 * We are idle; the idle-worker will be queued, but we need to run
> +	 * it now. As we already hold the struct mutex, we can park the GPU
> +	 * right away, letting the lazy worker see that we are already active
> +	 * again by the time it acquires the mutex.
> +	 */
> +	__i915_gem_park(i915);
>   
>   	/*
>   	 * Undo nop_submit_request. We prevent all new i915 requests from
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 12486d8f534b..b4ea77a2896c 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1651,6 +1651,9 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
>   	struct intel_engine_cs *engine;
>   	enum intel_engine_id id;
>   
> +	/* Must be parked first! */
> +	GEM_BUG_ON(i915->gt.awake);
> +
>   	for_each_engine(engine, i915, id)
>   		engine->set_default_submission(engine);
>   }
Chris Wilson April 9, 2018, 10:41 a.m. UTC | #2
Quoting Sagar Arun Kamble (2018-04-09 11:38:34)
> 
> 
> On 4/9/2018 3:48 PM, Chris Wilson wrote:
> > As different backends may have different park/unpark callbacks, we
> > should only ever switch backends (reset_default_submission on wedge
> > recovery, or on enabling the guc) while parked.
> >
> > v2: Remove the assert from the guc code, as we are currently trying to
> > modify the engine vfuncs pointer on a live system after reset (not just
> > wedging). We will just have to hope that the system is balanced.
> > v3: Rebase onto __i915_gem_park and improve grammar.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>

Michal, do you want to take this and merge it into your series?
-Chris
Michal Wajdeczko April 9, 2018, 10:49 a.m. UTC | #3
On Mon, 09 Apr 2018 12:41:22 +0200, Chris Wilson  
<chris@chris-wilson.co.uk> wrote:

> Quoting Sagar Arun Kamble (2018-04-09 11:38:34)
>>
>>
>> On 4/9/2018 3:48 PM, Chris Wilson wrote:
>> > As different backends may have different park/unpark callbacks, we
>> > should only ever switch backends (reset_default_submission on wedge
>> > recovery, or on enabling the guc) while parked.
>> >
>> > v2: Remove the assert from the guc code, as we are currently trying to
>> > modify the engine vfuncs pointer on a live system after reset (not  
>> just
>> > wedging). We will just have to hope that the system is balanced.
>> > v3: Rebase onto __i915_gem_park and improve grammar.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> > Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>
> Michal, do you want to take this and merge it into your series?

Yes, I can take it.
Then we will see results from CI.IGT with GuC enabled.

/Michal

ps. this patch is also

Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 28ab0beff86c..dd3e292ba243 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -144,8 +144,6 @@  static u32 __i915_gem_park(struct drm_i915_private *i915)
 	if (!i915->gt.awake)
 		return I915_EPOCH_INVALID;
 
-	GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID);
-
 	/*
 	 * Be paranoid and flush a concurrent interrupt to make sure
 	 * we don't reactivate any irq tasklets after parking.
@@ -173,6 +171,7 @@  static u32 __i915_gem_park(struct drm_i915_private *i915)
 
 	intel_runtime_pm_put(i915);
 
+	GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID);
 	return i915->gt.epoch;
 }
 
@@ -3435,7 +3434,17 @@  bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		}
 	}
 	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
+
+	/*
+	 * Park before disengaging the old submit mechanism as different
+	 * backends may have different park/unpack callbacks.
+	 *
+	 * We are idle; the idle-worker will be queued, but we need to run
+	 * it now. As we already hold the struct mutex, we can park the GPU
+	 * right away, letting the lazy worker see that we are already active
+	 * again by the time it acquires the mutex.
+	 */
+	__i915_gem_park(i915);
 
 	/*
 	 * Undo nop_submit_request. We prevent all new i915 requests from
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 12486d8f534b..b4ea77a2896c 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1651,6 +1651,9 @@  void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	/* Must be parked first! */
+	GEM_BUG_ON(i915->gt.awake);
+
 	for_each_engine(engine, i915, id)
 		engine->set_default_submission(engine);
 }