diff mbox

[3/5] drm/i915: Move engine reset prepare/finish to backends

Message ID 20180320001848.4405-3-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson March 20, 2018, 12:18 a.m. UTC
In preparation to more carefully handling incomplete preemption during
reset by execlists, we move the existing code wholesale to the backends
under a couple of new reset vfuncs.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
CC: Michel Thierry <michel.thierry@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         | 42 ++++----------------------
 drivers/gpu/drm/i915/intel_lrc.c        | 52 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  9 ++++--
 4 files changed, 78 insertions(+), 45 deletions(-)

Comments

jeff.mcgee@intel.com March 22, 2018, 3:28 p.m. UTC | #1
On Tue, Mar 20, 2018 at 12:18:46AM +0000, Chris Wilson wrote:
> In preparation to more carefully handling incomplete preemption during
> reset by execlists, we move the existing code wholesale to the backends
> under a couple of new reset vfuncs.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> CC: Michel Thierry <michel.thierry@intel.com>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c         | 42 ++++----------------------
>  drivers/gpu/drm/i915/intel_lrc.c        | 52 +++++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +++++++++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  9 ++++--
>  4 files changed, 78 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 802df8e1a544..38f7160d99c9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2917,7 +2917,7 @@ static bool engine_stalled(struct intel_engine_cs *engine)
>  struct i915_request *
>  i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
>  {
> -	struct i915_request *request = NULL;
> +	struct i915_request *request;
>  
>  	/*
>  	 * During the reset sequence, we must prevent the engine from
> @@ -2940,40 +2940,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
>  	 */
>  	kthread_park(engine->breadcrumbs.signaler);
>  
> -	/*
> -	 * Prevent request submission to the hardware until we have
> -	 * completed the reset in i915_gem_reset_finish(). If a request
> -	 * is completed by one engine, it may then queue a request
> -	 * to a second via its execlists->tasklet *just* as we are
> -	 * calling engine->init_hw() and also writing the ELSP.
> -	 * Turning off the execlists->tasklet until the reset is over
> -	 * prevents the race.
> -	 *
> -	 * Note that this needs to be a single atomic operation on the
> -	 * tasklet (flush existing tasks, prevent new tasks) to prevent
> -	 * a race between reset and set-wedged. It is not, so we do the best
> -	 * we can atm and make sure we don't lock the machine up in the more
> -	 * common case of recursively being called from set-wedged from inside
> -	 * i915_reset.
> -	 */
> -	if (!atomic_read(&engine->execlists.tasklet.count))
> -		tasklet_kill(&engine->execlists.tasklet);
> -	tasklet_disable(&engine->execlists.tasklet);
> -
> -	/*
> -	 * We're using worker to queue preemption requests from the tasklet in
> -	 * GuC submission mode.
> -	 * Even though tasklet was disabled, we may still have a worker queued.
> -	 * Let's make sure that all workers scheduled before disabling the
> -	 * tasklet are completed before continuing with the reset.
> -	 */
> -	if (engine->i915->guc.preempt_wq)
> -		flush_workqueue(engine->i915->guc.preempt_wq);
> -
> -	if (engine->irq_seqno_barrier)
> -		engine->irq_seqno_barrier(engine);
> -
> -	request = i915_gem_find_active_request(engine);
> +	request = engine->reset.prepare(engine);
>  	if (request && request->fence.error == -EIO)
>  		request = ERR_PTR(-EIO); /* Previous reset failed! */
>  
> @@ -3120,7 +3087,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
>  	}
>  
>  	/* Setup the CS to resume from the breadcrumb of the hung request */
> -	engine->reset_hw(engine, request);
> +	engine->reset.reset(engine, request);
>  }
>  
>  void i915_gem_reset(struct drm_i915_private *dev_priv)
> @@ -3172,7 +3139,8 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
>  
>  void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
>  {
> -	tasklet_enable(&engine->execlists.tasklet);
> +	engine->reset.finish(engine);
> +
>  	kthread_unpark(engine->breadcrumbs.signaler);
>  
>  	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 0bfaeb56b8c7..f662a9524233 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1663,6 +1663,44 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
>  	return init_workarounds_ring(engine);
>  }
>  
> +static struct i915_request *
> +execlists_reset_prepare(struct intel_engine_cs *engine)
> +{
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
> +
> +	/*
> +	 * Prevent request submission to the hardware until we have
> +	 * completed the reset in i915_gem_reset_finish(). If a request
> +	 * is completed by one engine, it may then queue a request
> +	 * to a second via its execlists->tasklet *just* as we are
> +	 * calling engine->init_hw() and also writing the ELSP.
> +	 * Turning off the execlists->tasklet until the reset is over
> +	 * prevents the race.
> +	 *
> +	 * Note that this needs to be a single atomic operation on the
> +	 * tasklet (flush existing tasks, prevent new tasks) to prevent
> +	 * a race between reset and set-wedged. It is not, so we do the best
> +	 * we can atm and make sure we don't lock the machine up in the more
> +	 * common case of recursively being called from set-wedged from inside
> +	 * i915_reset.
> +	 */
> +	if (!atomic_read(&execlists->tasklet.count))
> +		tasklet_kill(&execlists->tasklet);
> +	tasklet_disable(&execlists->tasklet);
> +
> +	/*
> +	 * We're using worker to queue preemption requests from the tasklet in
> +	 * GuC submission mode.
> +	 * Even though tasklet was disabled, we may still have a worker queued.
> +	 * Let's make sure that all workers scheduled before disabling the
> +	 * tasklet are completed before continuing with the reset.
> +	 */
> +	if (engine->i915->guc.preempt_wq)
> +		flush_workqueue(engine->i915->guc.preempt_wq);
> +
> +	return i915_gem_find_active_request(engine);
> +}
> +
>  static void reset_irq(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
> @@ -1692,8 +1730,8 @@ static void reset_irq(struct intel_engine_cs *engine)
>  	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>  }
>  
> -static void reset_common_ring(struct intel_engine_cs *engine,
> -			      struct i915_request *request)
> +static void execlists_reset(struct intel_engine_cs *engine,
> +			    struct i915_request *request)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
>  	struct intel_context *ce;
> @@ -1766,6 +1804,11 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>  	unwind_wa_tail(request);
>  }
>  
> +static void execlists_reset_finish(struct intel_engine_cs *engine)
> +{
> +	tasklet_enable(&engine->execlists.tasklet);
> +}
> +
>  static int intel_logical_ring_emit_pdps(struct i915_request *rq)
>  {
>  	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
> @@ -2090,7 +2133,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>  {
>  	/* Default vfuncs which can be overriden by each engine. */
>  	engine->init_hw = gen8_init_common_ring;
> -	engine->reset_hw = reset_common_ring;
> +
> +	engine->reset.prepare = execlists_reset_prepare;
> +	engine->reset.reset = execlists_reset;
> +	engine->reset.finish = execlists_reset_finish;
>  
>  	engine->context_pin = execlists_context_pin;
>  	engine->context_unpin = execlists_context_unpin;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 04d9d9a946a7..eebcc877ef60 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -530,8 +530,16 @@ static int init_ring_common(struct intel_engine_cs *engine)
>  	return ret;
>  }
>  
> -static void reset_ring_common(struct intel_engine_cs *engine,
> -			      struct i915_request *request)
> +static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
> +{
> +	if (engine->irq_seqno_barrier)
> +		engine->irq_seqno_barrier(engine);
> +
> +	return i915_gem_find_active_request(engine);
> +}
> +
> +static void reset_ring(struct intel_engine_cs *engine,
> +		       struct i915_request *request)
>  {
>  	/*
>  	 * RC6 must be prevented until the reset is complete and the engine
> @@ -595,6 +603,10 @@ static void reset_ring_common(struct intel_engine_cs *engine,
>  	}
>  }
>  
> +static void reset_finish(struct intel_engine_cs *engine)
> +{
> +}
> +
>  static int intel_rcs_ctx_init(struct i915_request *rq)
>  {
>  	int ret;
> @@ -1987,7 +1999,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>  	intel_ring_init_semaphores(dev_priv, engine);
>  
>  	engine->init_hw = init_ring_common;
> -	engine->reset_hw = reset_ring_common;
> +	engine->reset.prepare = reset_prepare;
> +	engine->reset.reset = reset_ring;
> +	engine->reset.finish = reset_finish;
>  
>  	engine->context_pin = intel_ring_context_pin;
>  	engine->context_unpin = intel_ring_context_unpin;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 1f50727a5ddb..e2681303ce21 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -418,8 +418,13 @@ struct intel_engine_cs {
>  	void		(*irq_disable)(struct intel_engine_cs *engine);
>  
>  	int		(*init_hw)(struct intel_engine_cs *engine);
> -	void		(*reset_hw)(struct intel_engine_cs *engine,
> -				    struct i915_request *rq);
> +
> +	struct {
> +		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
> +		void (*reset)(struct intel_engine_cs *engine,
> +			      struct i915_request *rq);
> +		void (*finish)(struct intel_engine_cs *engine);
> +	} reset;
>  
>  	void		(*park)(struct intel_engine_cs *engine);
>  	void		(*unpark)(struct intel_engine_cs *engine);
> -- 
> 2.16.2
> 

Reviewed-by: Jeff McGee <jeff.mcgee@intel.com>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 802df8e1a544..38f7160d99c9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2917,7 +2917,7 @@  static bool engine_stalled(struct intel_engine_cs *engine)
 struct i915_request *
 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 {
-	struct i915_request *request = NULL;
+	struct i915_request *request;
 
 	/*
 	 * During the reset sequence, we must prevent the engine from
@@ -2940,40 +2940,7 @@  i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 	 */
 	kthread_park(engine->breadcrumbs.signaler);
 
-	/*
-	 * Prevent request submission to the hardware until we have
-	 * completed the reset in i915_gem_reset_finish(). If a request
-	 * is completed by one engine, it may then queue a request
-	 * to a second via its execlists->tasklet *just* as we are
-	 * calling engine->init_hw() and also writing the ELSP.
-	 * Turning off the execlists->tasklet until the reset is over
-	 * prevents the race.
-	 *
-	 * Note that this needs to be a single atomic operation on the
-	 * tasklet (flush existing tasks, prevent new tasks) to prevent
-	 * a race between reset and set-wedged. It is not, so we do the best
-	 * we can atm and make sure we don't lock the machine up in the more
-	 * common case of recursively being called from set-wedged from inside
-	 * i915_reset.
-	 */
-	if (!atomic_read(&engine->execlists.tasklet.count))
-		tasklet_kill(&engine->execlists.tasklet);
-	tasklet_disable(&engine->execlists.tasklet);
-
-	/*
-	 * We're using worker to queue preemption requests from the tasklet in
-	 * GuC submission mode.
-	 * Even though tasklet was disabled, we may still have a worker queued.
-	 * Let's make sure that all workers scheduled before disabling the
-	 * tasklet are completed before continuing with the reset.
-	 */
-	if (engine->i915->guc.preempt_wq)
-		flush_workqueue(engine->i915->guc.preempt_wq);
-
-	if (engine->irq_seqno_barrier)
-		engine->irq_seqno_barrier(engine);
-
-	request = i915_gem_find_active_request(engine);
+	request = engine->reset.prepare(engine);
 	if (request && request->fence.error == -EIO)
 		request = ERR_PTR(-EIO); /* Previous reset failed! */
 
@@ -3120,7 +3087,7 @@  void i915_gem_reset_engine(struct intel_engine_cs *engine,
 	}
 
 	/* Setup the CS to resume from the breadcrumb of the hung request */
-	engine->reset_hw(engine, request);
+	engine->reset.reset(engine, request);
 }
 
 void i915_gem_reset(struct drm_i915_private *dev_priv)
@@ -3172,7 +3139,8 @@  void i915_gem_reset(struct drm_i915_private *dev_priv)
 
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
 {
-	tasklet_enable(&engine->execlists.tasklet);
+	engine->reset.finish(engine);
+
 	kthread_unpark(engine->breadcrumbs.signaler);
 
 	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0bfaeb56b8c7..f662a9524233 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1663,6 +1663,44 @@  static int gen9_init_render_ring(struct intel_engine_cs *engine)
 	return init_workarounds_ring(engine);
 }
 
+static struct i915_request *
+execlists_reset_prepare(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+
+	/*
+	 * Prevent request submission to the hardware until we have
+	 * completed the reset in i915_gem_reset_finish(). If a request
+	 * is completed by one engine, it may then queue a request
+	 * to a second via its execlists->tasklet *just* as we are
+	 * calling engine->init_hw() and also writing the ELSP.
+	 * Turning off the execlists->tasklet until the reset is over
+	 * prevents the race.
+	 *
+	 * Note that this needs to be a single atomic operation on the
+	 * tasklet (flush existing tasks, prevent new tasks) to prevent
+	 * a race between reset and set-wedged. It is not, so we do the best
+	 * we can atm and make sure we don't lock the machine up in the more
+	 * common case of recursively being called from set-wedged from inside
+	 * i915_reset.
+	 */
+	if (!atomic_read(&execlists->tasklet.count))
+		tasklet_kill(&execlists->tasklet);
+	tasklet_disable(&execlists->tasklet);
+
+	/*
+	 * We're using worker to queue preemption requests from the tasklet in
+	 * GuC submission mode.
+	 * Even though tasklet was disabled, we may still have a worker queued.
+	 * Let's make sure that all workers scheduled before disabling the
+	 * tasklet are completed before continuing with the reset.
+	 */
+	if (engine->i915->guc.preempt_wq)
+		flush_workqueue(engine->i915->guc.preempt_wq);
+
+	return i915_gem_find_active_request(engine);
+}
+
 static void reset_irq(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -1692,8 +1730,8 @@  static void reset_irq(struct intel_engine_cs *engine)
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
-static void reset_common_ring(struct intel_engine_cs *engine,
-			      struct i915_request *request)
+static void execlists_reset(struct intel_engine_cs *engine,
+			    struct i915_request *request)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct intel_context *ce;
@@ -1766,6 +1804,11 @@  static void reset_common_ring(struct intel_engine_cs *engine,
 	unwind_wa_tail(request);
 }
 
+static void execlists_reset_finish(struct intel_engine_cs *engine)
+{
+	tasklet_enable(&engine->execlists.tasklet);
+}
+
 static int intel_logical_ring_emit_pdps(struct i915_request *rq)
 {
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
@@ -2090,7 +2133,10 @@  logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
 	engine->init_hw = gen8_init_common_ring;
-	engine->reset_hw = reset_common_ring;
+
+	engine->reset.prepare = execlists_reset_prepare;
+	engine->reset.reset = execlists_reset;
+	engine->reset.finish = execlists_reset_finish;
 
 	engine->context_pin = execlists_context_pin;
 	engine->context_unpin = execlists_context_unpin;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 04d9d9a946a7..eebcc877ef60 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -530,8 +530,16 @@  static int init_ring_common(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static void reset_ring_common(struct intel_engine_cs *engine,
-			      struct i915_request *request)
+static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
+{
+	if (engine->irq_seqno_barrier)
+		engine->irq_seqno_barrier(engine);
+
+	return i915_gem_find_active_request(engine);
+}
+
+static void reset_ring(struct intel_engine_cs *engine,
+		       struct i915_request *request)
 {
 	/*
 	 * RC6 must be prevented until the reset is complete and the engine
@@ -595,6 +603,10 @@  static void reset_ring_common(struct intel_engine_cs *engine,
 	}
 }
 
+static void reset_finish(struct intel_engine_cs *engine)
+{
+}
+
 static int intel_rcs_ctx_init(struct i915_request *rq)
 {
 	int ret;
@@ -1987,7 +1999,9 @@  static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	intel_ring_init_semaphores(dev_priv, engine);
 
 	engine->init_hw = init_ring_common;
-	engine->reset_hw = reset_ring_common;
+	engine->reset.prepare = reset_prepare;
+	engine->reset.reset = reset_ring;
+	engine->reset.finish = reset_finish;
 
 	engine->context_pin = intel_ring_context_pin;
 	engine->context_unpin = intel_ring_context_unpin;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 1f50727a5ddb..e2681303ce21 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -418,8 +418,13 @@  struct intel_engine_cs {
 	void		(*irq_disable)(struct intel_engine_cs *engine);
 
 	int		(*init_hw)(struct intel_engine_cs *engine);
-	void		(*reset_hw)(struct intel_engine_cs *engine,
-				    struct i915_request *rq);
+
+	struct {
+		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
+		void (*reset)(struct intel_engine_cs *engine,
+			      struct i915_request *rq);
+		void (*finish)(struct intel_engine_cs *engine);
+	} reset;
 
 	void		(*park)(struct intel_engine_cs *engine);
 	void		(*unpark)(struct intel_engine_cs *engine);