[2/2] drm/i915/execlists: Explicitly reset both reg and context runtime
diff mbox series

Message ID 20200326231810.16852-2-chris@chris-wilson.co.uk
State New
Headers show
Series
  • [1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
Related show

Commit Message

Chris Wilson March 26, 2020, 11:18 p.m. UTC
Upon a GPU reset, we copy the default context image over top of the
guilty image. This will rollback the CTX_TIMESTAMP register to before
our value of ce->runtime.last. Reset both back to 0 so that we do not
encounter an underflow on the next schedule out after resume.

This should not be a huge issue in practice, as hangs should be rare in
correct code.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

Comments

Tvrtko Ursulin March 27, 2020, 4:01 p.m. UTC | #1
On 26/03/2020 23:18, Chris Wilson wrote:
> Upon a GPU reset, we copy the default context image over top of the
> guilty image. This will rollback the CTX_TIMESTAMP register to before
> our value of ce->runtime.last. Reset both back to 0 so that we do not
> encounter an underflow on the next schedule out after resume.
> 
> This should not be a huge issue in practice, as hangs should be rare in
> correct code.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c | 24 +++++++++++++-----------
>   1 file changed, 13 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 4edda15eba26..47cec545a069 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -238,6 +238,17 @@ __execlists_update_reg_state(const struct intel_context *ce,
>   			     const struct intel_engine_cs *engine,
>   			     u32 head);
>   
> +static u32 intel_context_get_runtime(const struct intel_context *ce)
> +{
> +	/*
> +	 * We can use either ppHWSP[16] which is recorded before the context
> +	 * switch (and so excludes the cost of context switches) or use the
> +	 * value from the context image itself, which is saved/restored earlier
> +	 * and so includes the cost of the save.
> +	 */
> +	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
> +}
> +
>   static void mark_eio(struct i915_request *rq)
>   {
>   	if (i915_request_completed(rq))
> @@ -1154,6 +1165,7 @@ static void restore_default_state(struct intel_context *ce,
>   		       engine->context_size - PAGE_SIZE);
>   
>   	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> +	ce->runtime.last = intel_context_get_runtime(ce);
>   }
>   
>   static void reset_active(struct i915_request *rq,
> @@ -1195,17 +1207,6 @@ static void reset_active(struct i915_request *rq,
>   	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
>   }
>   
> -static u32 intel_context_get_runtime(const struct intel_context *ce)
> -{
> -	/*
> -	 * We can use either ppHWSP[16] which is recorded before the context
> -	 * switch (and so excludes the cost of context switches) or use the
> -	 * value from the context image itself, which is saved/restored earlier
> -	 * and so includes the cost of the save.
> -	 */
> -	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
> -}
> -
>   static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
>   {
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> @@ -4581,6 +4582,7 @@ static void init_common_reg_state(u32 * const regs,
>   	regs[CTX_CONTEXT_CONTROL] = ctl;
>   
>   	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
> +	regs[CTX_TIMESTAMP] = 0;
>   }
>   
>   static void init_wa_bb_reg_state(u32 * const regs,
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 4edda15eba26..47cec545a069 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -238,6 +238,17 @@  __execlists_update_reg_state(const struct intel_context *ce,
 			     const struct intel_engine_cs *engine,
 			     u32 head);
 
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+	/*
+	 * We can use either ppHWSP[16] which is recorded before the context
+	 * switch (and so excludes the cost of context switches) or use the
+	 * value from the context image itself, which is saved/restored earlier
+	 * and so includes the cost of the save.
+	 */
+	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
 static void mark_eio(struct i915_request *rq)
 {
 	if (i915_request_completed(rq))
@@ -1154,6 +1165,7 @@  static void restore_default_state(struct intel_context *ce,
 		       engine->context_size - PAGE_SIZE);
 
 	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+	ce->runtime.last = intel_context_get_runtime(ce);
 }
 
 static void reset_active(struct i915_request *rq,
@@ -1195,17 +1207,6 @@  static void reset_active(struct i915_request *rq,
 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 }
 
-static u32 intel_context_get_runtime(const struct intel_context *ce)
-{
-	/*
-	 * We can use either ppHWSP[16] which is recorded before the context
-	 * switch (and so excludes the cost of context switches) or use the
-	 * value from the context image itself, which is saved/restored earlier
-	 * and so includes the cost of the save.
-	 */
-	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
-}
-
 static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
 {
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -4581,6 +4582,7 @@  static void init_common_reg_state(u32 * const regs,
 	regs[CTX_CONTEXT_CONTROL] = ctl;
 
 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+	regs[CTX_TIMESTAMP] = 0;
 }
 
 static void init_wa_bb_reg_state(u32 * const regs,