diff mbox

[2/2] drm/i915: Track the previous pinned context inside the request

Message ID 1461070748-767-3-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 19, 2016, 12:59 p.m. UTC
As the contexts are accessed by the hardware until the switch is completed
to a new context, the hardware may still be writing to the context object
after the breadcrumb is visible. We must not unpin/unbind/prune that
object whilst still active and so we keep the previous context pinned until
the following request. If we move this tracking onto the request, we can
simplify the code and treat execlists/GuC dispatch identically.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h  | 11 +++++++++++
 drivers/gpu/drm/i915/i915_gem.c  |  8 ++++----
 drivers/gpu/drm/i915/intel_lrc.c | 17 ++++++++---------
 3 files changed, 23 insertions(+), 13 deletions(-)

Comments

Tvrtko Ursulin April 20, 2016, 2:08 p.m. UTC | #1
On 19/04/16 13:59, Chris Wilson wrote:
> As the contexts are accessed by the hardware until the switch is completed
> to a new context, the hardware may still be writing to the context object
> after the breadcrumb is visible. We must not unpin/unbind/prune that
> object whilst still active and so we keep the previous context pinned until
> the following request. If we move this tracking onto the request, we can
> simplify the code and treat execlists/GuC dispatch identically.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h  | 11 +++++++++++
>   drivers/gpu/drm/i915/i915_gem.c  |  8 ++++----
>   drivers/gpu/drm/i915/intel_lrc.c | 17 ++++++++---------
>   3 files changed, 23 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c59b2670cc36..be98e9643072 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2302,6 +2302,17 @@ struct drm_i915_gem_request {
>   	struct intel_context *ctx;
>   	struct intel_ringbuffer *ringbuf;
>
> +	/**
> +	 * Context related to the previous request.
> +	 * As the contexts are accessed by the hardware until the switch is
> +	 * completed to a new context, the hardware may still be writing
> +	 * to the context object after the breadcrumb is visible. We must
> +	 * not unpin/unbind/prune that object whilst still active and so
> +	 * we keep the previous context pinned until the following (this)
> +	 * request is retired.
> +	 */
> +	struct intel_context *previous_context;
> +
>   	/** Batch buffer related to this request if any (used for
>   	    error state dump only) */
>   	struct drm_i915_gem_object *batch_obj;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9b4854a17264..537aacfda3eb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1413,13 +1413,13 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>   	list_del_init(&request->list);
>   	i915_gem_request_remove_from_client(request);
>
> -	if (request->ctx) {
> +	if (request->previous_context) {
>   		if (i915.enable_execlists)
> -			intel_lr_context_unpin(request->ctx, request->engine);
> -
> -		i915_gem_context_unreference(request->ctx);
> +			intel_lr_context_unpin(request->previous_context,
> +					       request->engine);
>   	}
>
> +	i915_gem_context_unreference(request->ctx);
>   	i915_gem_request_unreference(request);
>   }
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ee4e9bb80042..06e013293ec6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -590,7 +590,6 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
>   	struct drm_i915_gem_request *cursor;
>   	int num_elements = 0;
>
> -	intel_lr_context_pin(request->ctx, request->engine);

I really really think this must go in a separate, subsequent patch.

Both from the conceptual side, leaving this patch to just extend 
pinning, not limit it; and from the POV that there is a bug unless a 
patch like mine which I pasted in yesterday is inserted between them 
("drm/i915: Store LRC hardware id in the context", note the summary is 
wrong, it is storing in requests not contexts so I have to rename it).

Otherwise execlists_check_remove_request when accessing head_req->ctx is 
use after free. And I can demonstrate that easily via gem-close-race. 
Put a WARN_ON(atomic_read(&head_req->ctx->ref.refcount) == 0); and see. :)

What I think happens is that with two submission ports, we can get two 
context completions aggregated in an interrupt which comes after the 
seqno for both has been consumed by GEM and so LRCs unpinned.

But with your persistent ctx hw id patches, I think the course is fine 
to do this including the complete elimination of the execlist retired queue.

You can just drop the two chunks for the patch and I will follow up with 
two patches to finish it all off.

>   	i915_gem_request_reference(request);
>
>   	spin_lock_bh(&engine->execlist_lock);
> @@ -788,12 +787,14 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>   	if (intel_engine_stopped(engine))
>   		return 0;
>
> -	if (engine->last_context != request->ctx) {
> -		if (engine->last_context)
> -			intel_lr_context_unpin(engine->last_context, engine);
> -		intel_lr_context_pin(request->ctx, engine);
> -		engine->last_context = request->ctx;
> -	}
> +	/* We keep the previous context alive until we retire the following
> +	 * request. This ensures that any the context object is still pinned
> +	 * for any residual writes the HW makes into it on the context switch
> +	 * into the next object following the breadcrumb. Otherwise, we may
> +	 * retire the context too early.
> +	 */
> +	request->previous_context = engine->last_context;
> +	engine->last_context = request->ctx;
>
>   	if (dev_priv->guc.execbuf_client)
>   		i915_guc_submit(dev_priv->guc.execbuf_client, request);
> @@ -1015,8 +1016,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
>   	spin_unlock_bh(&engine->execlist_lock);
>
>   	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> -		intel_lr_context_unpin(req->ctx, engine);
> -
>   		list_del(&req->execlist_link);
>   		i915_gem_request_unreference(req);
>   	}
>

Regards,

Tvrtko
Chris Wilson April 20, 2016, 2:18 p.m. UTC | #2
On Wed, Apr 20, 2016 at 03:08:19PM +0100, Tvrtko Ursulin wrote:
> 
> On 19/04/16 13:59, Chris Wilson wrote:
> >As the contexts are accessed by the hardware until the switch is completed
> >to a new context, the hardware may still be writing to the context object
> >after the breadcrumb is visible. We must not unpin/unbind/prune that
> >object whilst still active and so we keep the previous context pinned until
> >the following request. If we move this tracking onto the request, we can
> >simplify the code and treat execlists/GuC dispatch identically.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/i915_drv.h  | 11 +++++++++++
> >  drivers/gpu/drm/i915/i915_gem.c  |  8 ++++----
> >  drivers/gpu/drm/i915/intel_lrc.c | 17 ++++++++---------
> >  3 files changed, 23 insertions(+), 13 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index c59b2670cc36..be98e9643072 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -2302,6 +2302,17 @@ struct drm_i915_gem_request {
> >  	struct intel_context *ctx;
> >  	struct intel_ringbuffer *ringbuf;
> >
> >+	/**
> >+	 * Context related to the previous request.
> >+	 * As the contexts are accessed by the hardware until the switch is
> >+	 * completed to a new context, the hardware may still be writing
> >+	 * to the context object after the breadcrumb is visible. We must
> >+	 * not unpin/unbind/prune that object whilst still active and so
> >+	 * we keep the previous context pinned until the following (this)
> >+	 * request is retired.
> >+	 */
> >+	struct intel_context *previous_context;
> >+
> >  	/** Batch buffer related to this request if any (used for
> >  	    error state dump only) */
> >  	struct drm_i915_gem_object *batch_obj;
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index 9b4854a17264..537aacfda3eb 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -1413,13 +1413,13 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> >  	list_del_init(&request->list);
> >  	i915_gem_request_remove_from_client(request);
> >
> >-	if (request->ctx) {
> >+	if (request->previous_context) {
> >  		if (i915.enable_execlists)
> >-			intel_lr_context_unpin(request->ctx, request->engine);
> >-
> >-		i915_gem_context_unreference(request->ctx);
> >+			intel_lr_context_unpin(request->previous_context,
> >+					       request->engine);
> >  	}
> >
> >+	i915_gem_context_unreference(request->ctx);
> >  	i915_gem_request_unreference(request);
> >  }
> >
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index ee4e9bb80042..06e013293ec6 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -590,7 +590,6 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
> >  	struct drm_i915_gem_request *cursor;
> >  	int num_elements = 0;
> >
> >-	intel_lr_context_pin(request->ctx, request->engine);
> 
> I really really think this must go in a separate, subsequent patch.
> 
> Both from the conceptual side, leaving this patch to just extend
> pinning, not limit it; and from the POV that there is a bug unless a
> patch like mine which I pasted in yesterday is inserted between them
> ("drm/i915: Store LRC hardware id in the context", note the summary
> is wrong, it is storing in requests not contexts so I have to rename
> it).
> 
> Otherwise execlists_check_remove_request when accessing
> head_req->ctx is use after free. And I can demonstrate that easily
> via gem-close-race. Put a
> WARN_ON(atomic_read(&head_req->ctx->ref.refcount) == 0); and see. :)

Oh, I don't have those racy accesses in my tree.
 
> What I think happens is that with two submission ports, we can get
> two context completions aggregated in an interrupt which comes after
> the seqno for both has been consumed by GEM and so LRCs unpinned.
> 
> But with your persistent ctx hw id patches, I think the course is
> fine to do this including the complete elimination of the execlist
> retired queue.
> 
> You can just drop the two chunks for the patch and I will follow up
> with two patches to finish it all off.

Or I could bring some more patches forward ;)
-Chris
Chris Wilson April 20, 2016, 2:22 p.m. UTC | #3
On Wed, Apr 20, 2016 at 03:08:19PM +0100, Tvrtko Ursulin wrote:
> Otherwise execlists_check_remove_request when accessing
> head_req->ctx is use after free. And I can demonstrate that easily
> via gem-close-race. Put a
> WARN_ON(atomic_read(&head_req->ctx->ref.refcount) == 0); and see. :)

More to the point, could we do a 10s burst of close race for BAT. What's
the likelihood of that capturing such faults?
With lockdep/kmemcheck etc enabled?
-Chris
Tvrtko Ursulin April 20, 2016, 4:34 p.m. UTC | #4
On 20/04/16 15:22, Chris Wilson wrote:
> On Wed, Apr 20, 2016 at 03:08:19PM +0100, Tvrtko Ursulin wrote:
>> Otherwise execlists_check_remove_request when accessing
>> head_req->ctx is use after free. And I can demonstrate that easily
>> via gem-close-race. Put a
>> WARN_ON(atomic_read(&head_req->ctx->ref.refcount) == 0); and see. :)
>
> More to the point, could we do a 10s burst of close race for BAT. What's
> the likelihood of that capturing such faults?
> With lockdep/kmemcheck etc enabled?

No idea, but on a lean kernel it takes a lot less than 10s. So maybe 
time limited gem-close-race-basic, resend your series and see if it 
catches it?

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c59b2670cc36..be98e9643072 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2302,6 +2302,17 @@  struct drm_i915_gem_request {
 	struct intel_context *ctx;
 	struct intel_ringbuffer *ringbuf;
 
+	/**
+	 * Context related to the previous request.
+	 * As the contexts are accessed by the hardware until the switch is
+	 * completed to a new context, the hardware may still be writing
+	 * to the context object after the breadcrumb is visible. We must
+	 * not unpin/unbind/prune that object whilst still active and so
+	 * we keep the previous context pinned until the following (this)
+	 * request is retired.
+	 */
+	struct intel_context *previous_context;
+
 	/** Batch buffer related to this request if any (used for
 	    error state dump only) */
 	struct drm_i915_gem_object *batch_obj;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9b4854a17264..537aacfda3eb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1413,13 +1413,13 @@  static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	list_del_init(&request->list);
 	i915_gem_request_remove_from_client(request);
 
-	if (request->ctx) {
+	if (request->previous_context) {
 		if (i915.enable_execlists)
-			intel_lr_context_unpin(request->ctx, request->engine);
-
-		i915_gem_context_unreference(request->ctx);
+			intel_lr_context_unpin(request->previous_context,
+					       request->engine);
 	}
 
+	i915_gem_context_unreference(request->ctx);
 	i915_gem_request_unreference(request);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ee4e9bb80042..06e013293ec6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -590,7 +590,6 @@  static void execlists_context_queue(struct drm_i915_gem_request *request)
 	struct drm_i915_gem_request *cursor;
 	int num_elements = 0;
 
-	intel_lr_context_pin(request->ctx, request->engine);
 	i915_gem_request_reference(request);
 
 	spin_lock_bh(&engine->execlist_lock);
@@ -788,12 +787,14 @@  intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	if (intel_engine_stopped(engine))
 		return 0;
 
-	if (engine->last_context != request->ctx) {
-		if (engine->last_context)
-			intel_lr_context_unpin(engine->last_context, engine);
-		intel_lr_context_pin(request->ctx, engine);
-		engine->last_context = request->ctx;
-	}
+	/* We keep the previous context alive until we retire the following
+	 * request. This ensures that any the context object is still pinned
+	 * for any residual writes the HW makes into it on the context switch
+	 * into the next object following the breadcrumb. Otherwise, we may
+	 * retire the context too early.
+	 */
+	request->previous_context = engine->last_context;
+	engine->last_context = request->ctx;
 
 	if (dev_priv->guc.execbuf_client)
 		i915_guc_submit(dev_priv->guc.execbuf_client, request);
@@ -1015,8 +1016,6 @@  void intel_execlists_retire_requests(struct intel_engine_cs *engine)
 	spin_unlock_bh(&engine->execlist_lock);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		intel_lr_context_unpin(req->ctx, engine);
-
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);
 	}