diff mbox

[1/3] drm/i915/execlists: Reinitialise context image after GPU hang

Message ID 20160930075032.30967-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Sept. 30, 2016, 7:50 a.m. UTC
On Braswell, at least, we observe that the context image is written in
multiple phases. The first phase is to clear the register state, and
subsequently rewrite it. A GPU reset at the right moment can interrupt
the context update leaving it corrupt, and our update of the RING_HEAD
is not sufficient to restart the engine afterwards. To recover, we need
to reset the registers back to their original values. The context state
is lost. What we need is a better mechanism to serialise the reset with
pending flushes from the GPU.

Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 95 +++++++++++++++++++++++-----------------
 1 file changed, 56 insertions(+), 39 deletions(-)

Comments

Mika Kuoppala Oct. 3, 2016, 12:25 p.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Braswell, at least, we observe that the context image is written in
> multiple phases. The first phase is to clear the register state, and
> subsequently rewrite it. A GPU reset at the right moment can interrupt
> the context update leaving it corrupt, and our update of the RING_HEAD
> is not sufficient to restart the engine afterwards. To recover, we need
> to reset the registers back to their original values. The context state
> is lost. What we need is a better mechanism to serialise the reset with
> pending flushes from the GPU.
>
> Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 95 +++++++++++++++++++++++-----------------
>  1 file changed, 56 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 2d8eb2eb2b72..d6e762718ff4 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -226,10 +226,16 @@ enum {
>  /* Typical size of the average request (2 pipecontrols and a MI_BB) */
>  #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
>  
> +#define WA_TAIL_DWORDS 2
> +
>  static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>  					    struct intel_engine_cs *engine);
>  static int intel_lr_context_pin(struct i915_gem_context *ctx,
>  				struct intel_engine_cs *engine);
> +static void execlists_init_reg_state(u32 *reg_state,
> +				     struct i915_gem_context *ctx,
> +				     struct intel_engine_cs *engine,
> +				     struct intel_ring *ring);
>  
>  /**
>   * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
> @@ -707,7 +713,6 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>  {
>  	struct intel_context *ce = &ctx->engine[engine->id];
>  	void *vaddr;
> -	u32 *lrc_reg_state;
>  	int ret;
>  
>  	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
> @@ -726,17 +731,16 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>  		goto unpin_vma;
>  	}
>  
> -	lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
> -
>  	ret = intel_ring_pin(ce->ring);
>  	if (ret)
>  		goto unpin_map;
>  
>  	intel_lr_context_descriptor_update(ctx, engine);
>  
> -	lrc_reg_state[CTX_RING_BUFFER_START+1] =
> +	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
> +	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
>  		i915_ggtt_offset(ce->ring->vma);
> -	ce->lrc_reg_state = lrc_reg_state;
> +
>  	ce->state->obj->dirty = true;
>  
>  	/* Invalidate GuC TLB. */
> @@ -1284,8 +1288,14 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>  	struct execlist_port *port = engine->execlist_port;
>  	struct intel_context *ce = &request->ctx->engine[engine->id];
>  
> +	execlists_init_reg_state(ce->lrc_reg_state,
> +				 request->ctx, engine, ce->ring);
> +
>  	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
> +	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
> +		i915_ggtt_offset(ce->ring->vma);
>  	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
> +
>  	request->ring->head = request->postfix;
>  	request->ring->last_retired_head = -1;
>  	intel_ring_update_space(request->ring);
> @@ -1305,6 +1315,9 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>  	GEM_BUG_ON(request->ctx != port[0].request->ctx);
>  	port[0].count = 0;
>  	port[1].count = 0;
> +
> +	/* Reset WaIdleLiteRestore:bdw,skl as well */
> +	request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
>  }
>  
>  static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
> @@ -1542,7 +1555,6 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
>   * used as a workaround for not being allowed to do lite
>   * restore with HEAD==TAIL (WaIdleLiteRestore).
>   */
> -#define WA_TAIL_DWORDS 2
>  
>  static int gen8_emit_request(struct drm_i915_gem_request *request)
>  {
> @@ -1889,38 +1901,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
>  	return indirect_ctx_offset;
>  }
>  
> -static int
> -populate_lr_context(struct i915_gem_context *ctx,
> -		    struct drm_i915_gem_object *ctx_obj,
> -		    struct intel_engine_cs *engine,
> -		    struct intel_ring *ring)
> +static void execlists_init_reg_state(u32 *reg_state,
> +				     struct i915_gem_context *ctx,
> +				     struct intel_engine_cs *engine,
> +				     struct intel_ring *ring)
>  {
> -	struct drm_i915_private *dev_priv = ctx->i915;
> -	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
> -	void *vaddr;
> -	u32 *reg_state;
> -	int ret;
> -
> -	if (!ppgtt)
> -		ppgtt = dev_priv->mm.aliasing_ppgtt;
> -
> -	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
> -	if (ret) {
> -		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
> -		return ret;
> -	}
> -
> -	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
> -	if (IS_ERR(vaddr)) {
> -		ret = PTR_ERR(vaddr);
> -		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
> -		return ret;
> -	}
> -	ctx_obj->dirty = true;
> -
> -	/* The second page of the context object contains some fields which must
> -	 * be set up prior to the first execution. */
> -	reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
> +	struct drm_i915_private *dev_priv = engine->i915;
> +	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt;
>  
>  	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
>  	 * commands followed by (reg, value) pairs. The values we are setting here are
> @@ -1934,7 +1921,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>  		       _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
>  					  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>  					  (HAS_RESOURCE_STREAMER(dev_priv) ?
> -					    CTX_CTRL_RS_CTX_ENABLE : 0)));
> +					   CTX_CTRL_RS_CTX_ENABLE : 0)));
>  	ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base),
>  		       0);
>  	ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base),
> @@ -1946,7 +1933,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>  		       RING_START(engine->mmio_base), 0);
>  	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL,
>  		       RING_CTL(engine->mmio_base),
> -		       ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
> +		       (ring->size - PAGE_SIZE) | RING_VALID);

Patch looks good.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

Not exactly problems with this patch, but as we are in
the territory:

I still would like the ring->size setting to be accompanied
with comment about it matching page shift. I have fallen
for it twice now so I suspect the next reader will too.

And for that matter, removal of misleading comment

/ * It is written to the context image in execlists_update_context() */

in execlists_init_reg_state()

Thanks,
-Mika


>  	ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U,
>  		       RING_BBADDR_UDW(engine->mmio_base), 0);
>  	ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L,
> @@ -2024,6 +2011,36 @@ populate_lr_context(struct i915_gem_context *ctx,
>  		ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
>  			       make_rpcs(dev_priv));
>  	}
> +}
> +
> +static int
> +populate_lr_context(struct i915_gem_context *ctx,
> +		    struct drm_i915_gem_object *ctx_obj,
> +		    struct intel_engine_cs *engine,
> +		    struct intel_ring *ring)
> +{
> +	void *vaddr;
> +	int ret;
> +
> +	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
> +	if (ret) {
> +		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
> +		return ret;
> +	}
> +
> +	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
> +	if (IS_ERR(vaddr)) {
> +		ret = PTR_ERR(vaddr);
> +		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
> +		return ret;
> +	}
> +	ctx_obj->dirty = true;
> +
> +	/* The second page of the context object contains some fields which must
> +	 * be set up prior to the first execution. */
> +
> +	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
> +				 ctx, engine, ring);
>  
>  	i915_gem_object_unpin_map(ctx_obj);
>  
> -- 
> 2.9.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2d8eb2eb2b72..d6e762718ff4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -226,10 +226,16 @@  enum {
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 
+#define WA_TAIL_DWORDS 2
+
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine);
 static int intel_lr_context_pin(struct i915_gem_context *ctx,
 				struct intel_engine_cs *engine);
+static void execlists_init_reg_state(u32 *reg_state,
+				     struct i915_gem_context *ctx,
+				     struct intel_engine_cs *engine,
+				     struct intel_ring *ring);
 
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
@@ -707,7 +713,6 @@  static int intel_lr_context_pin(struct i915_gem_context *ctx,
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 	void *vaddr;
-	u32 *lrc_reg_state;
 	int ret;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
@@ -726,17 +731,16 @@  static int intel_lr_context_pin(struct i915_gem_context *ctx,
 		goto unpin_vma;
 	}
 
-	lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
-
 	ret = intel_ring_pin(ce->ring);
 	if (ret)
 		goto unpin_map;
 
 	intel_lr_context_descriptor_update(ctx, engine);
 
-	lrc_reg_state[CTX_RING_BUFFER_START+1] =
+	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
 		i915_ggtt_offset(ce->ring->vma);
-	ce->lrc_reg_state = lrc_reg_state;
+
 	ce->state->obj->dirty = true;
 
 	/* Invalidate GuC TLB. */
@@ -1284,8 +1288,14 @@  static void reset_common_ring(struct intel_engine_cs *engine,
 	struct execlist_port *port = engine->execlist_port;
 	struct intel_context *ce = &request->ctx->engine[engine->id];
 
+	execlists_init_reg_state(ce->lrc_reg_state,
+				 request->ctx, engine, ce->ring);
+
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
+	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
+		i915_ggtt_offset(ce->ring->vma);
 	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+
 	request->ring->head = request->postfix;
 	request->ring->last_retired_head = -1;
 	intel_ring_update_space(request->ring);
@@ -1305,6 +1315,9 @@  static void reset_common_ring(struct intel_engine_cs *engine,
 	GEM_BUG_ON(request->ctx != port[0].request->ctx);
 	port[0].count = 0;
 	port[1].count = 0;
+
+	/* Reset WaIdleLiteRestore:bdw,skl as well */
+	request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
 }
 
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
@@ -1542,7 +1555,6 @@  static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
  * used as a workaround for not being allowed to do lite
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
-#define WA_TAIL_DWORDS 2
 
 static int gen8_emit_request(struct drm_i915_gem_request *request)
 {
@@ -1889,38 +1901,13 @@  static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 	return indirect_ctx_offset;
 }
 
-static int
-populate_lr_context(struct i915_gem_context *ctx,
-		    struct drm_i915_gem_object *ctx_obj,
-		    struct intel_engine_cs *engine,
-		    struct intel_ring *ring)
+static void execlists_init_reg_state(u32 *reg_state,
+				     struct i915_gem_context *ctx,
+				     struct intel_engine_cs *engine,
+				     struct intel_ring *ring)
 {
-	struct drm_i915_private *dev_priv = ctx->i915;
-	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
-	void *vaddr;
-	u32 *reg_state;
-	int ret;
-
-	if (!ppgtt)
-		ppgtt = dev_priv->mm.aliasing_ppgtt;
-
-	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
-		return ret;
-	}
-
-	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
-	if (IS_ERR(vaddr)) {
-		ret = PTR_ERR(vaddr);
-		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
-		return ret;
-	}
-	ctx_obj->dirty = true;
-
-	/* The second page of the context object contains some fields which must
-	 * be set up prior to the first execution. */
-	reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt;
 
 	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
 	 * commands followed by (reg, value) pairs. The values we are setting here are
@@ -1934,7 +1921,7 @@  populate_lr_context(struct i915_gem_context *ctx,
 		       _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
 					  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
 					  (HAS_RESOURCE_STREAMER(dev_priv) ?
-					    CTX_CTRL_RS_CTX_ENABLE : 0)));
+					   CTX_CTRL_RS_CTX_ENABLE : 0)));
 	ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base),
 		       0);
 	ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base),
@@ -1946,7 +1933,7 @@  populate_lr_context(struct i915_gem_context *ctx,
 		       RING_START(engine->mmio_base), 0);
 	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL,
 		       RING_CTL(engine->mmio_base),
-		       ((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
+		       (ring->size - PAGE_SIZE) | RING_VALID);
 	ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U,
 		       RING_BBADDR_UDW(engine->mmio_base), 0);
 	ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L,
@@ -2024,6 +2011,36 @@  populate_lr_context(struct i915_gem_context *ctx,
 		ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
 			       make_rpcs(dev_priv));
 	}
+}
+
+static int
+populate_lr_context(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_object *ctx_obj,
+		    struct intel_engine_cs *engine,
+		    struct intel_ring *ring)
+{
+	void *vaddr;
+	int ret;
+
+	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
+		return ret;
+	}
+
+	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		ret = PTR_ERR(vaddr);
+		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
+		return ret;
+	}
+	ctx_obj->dirty = true;
+
+	/* The second page of the context object contains some fields which must
+	 * be set up prior to the first execution. */
+
+	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+				 ctx, engine, ring);
 
 	i915_gem_object_unpin_map(ctx_obj);