diff mbox

[8/9] drm/i915: Track the previous pinned context inside the request

Message ID 1461048560-31983-9-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 19, 2016, 6:49 a.m. UTC
As the contexts are accessed by the hardware until the switch is completed
to a new context, the hardware may still be writing to the context object
after the breadcrumb is visible. We must not unpin/unbind/prune that
object whilst still active and so we keep the previous context pinned until
the following request. If we move this tracking onto the request, we can
simplify the code and enable freeing of the request without the
struct_mutex in subsequent patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c |  8 ++++----
 drivers/gpu/drm/i915/i915_gem_request.h | 11 +++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        | 12 +++++-------
 3 files changed, 20 insertions(+), 11 deletions(-)

Comments

Tvrtko Ursulin April 19, 2016, 12:02 p.m. UTC | #1
On 19/04/16 07:49, Chris Wilson wrote:
> As the contexts are accessed by the hardware until the switch is completed
> to a new context, the hardware may still be writing to the context object
> after the breadcrumb is visible. We must not unpin/unbind/prune that
> object whilst still active and so we keep the previous context pinned until
> the following request. If we move this tracking onto the request, we can
> simplify the code and enable freeing of the request without the
> struct_mutex in subsequent patches.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c |  8 ++++----
>   drivers/gpu/drm/i915/i915_gem_request.h | 11 +++++++++++
>   drivers/gpu/drm/i915/intel_lrc.c        | 12 +++++-------
>   3 files changed, 20 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 33aacf1725dd..8d7c415f1896 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -643,12 +643,12 @@ void i915_gem_request_free(struct kref *req_ref)
>   	if (req->file_priv)
>   		i915_gem_request_remove_from_client(req);
>
> -	if (ctx) {
> +	if (req->pinned_context) {
>   		if (i915.enable_execlists)
> -			intel_lr_context_unpin(ctx, req->engine);
> -
> -		i915_gem_context_unreference(ctx);
> +			intel_lr_context_unpin(req->pinned_context,
> +					       req->engine);
>   	}
>
> +	i915_gem_context_unreference(ctx);
>   	kmem_cache_free(to_i915(req)->requests, req);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 69a4d4e2c97b..389813cbc19a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -85,6 +85,17 @@ struct drm_i915_gem_request {
>   	struct intel_context *ctx;
>   	struct intel_ringbuffer *ringbuf;
>
> +	/**
> +	 * Context related to the previous request.
> +	 * As the contexts are accessed by the hardware until the switch is
> +	 * completed to a new context, the hardware may still be writing
> +	 * to the context object after the breadcrumb is visible. We must
> +	 * not unpin/unbind/prune that object whilst still active and so
> +	 * we keep the previous context pinned until the following (this)
> +	 * request is retired.
> +	 */
> +	struct intel_context *pinned_context;
> +
>   	/** Batch buffer related to this request if any (used for
>   	    error state dump only) */
>   	struct drm_i915_gem_object *batch_obj;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b0d20af38574..0e55f206e592 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -708,6 +708,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
>   		request->ctx->engine[engine->id].initialised = true;
>   	}
>
> +	request->pinned_context = request->ctx;

Add a little bit of comment to the big one above explaining the 
possibility of pinned_context being, not the previous, but the current 
one before submission?

>   	return 0;
>   }
>
> @@ -782,12 +783,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>   	intel_logical_ring_emit(ringbuf, MI_NOOP);
>   	intel_logical_ring_advance(ringbuf);
>
> -	if (engine->last_context != request->ctx) {
> -		if (engine->last_context)
> -			intel_lr_context_unpin(engine->last_context, engine);
> -		intel_lr_context_pin(request->ctx, engine);
> -		engine->last_context = request->ctx;
> -	}
> +	request->pinned_context = engine->last_context;
> +	engine->last_context = request->ctx;

I am not sure if this is very complicated or just very different from my 
approach. Either way after thinking long and hard I cannot fault it. 
Looks like it will work.

>
>   	if (dev_priv->guc.execbuf_client)
>   		i915_guc_submit(dev_priv->guc.execbuf_client, request);
> @@ -1009,7 +1006,8 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
>   	spin_unlock_bh(&engine->execlist_lock);
>
>   	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> -		intel_lr_context_unpin(req->ctx, engine);
> +		if (req->pinned_context)
> +			intel_lr_context_unpin(req->pinned_context, engine);
>
>   		list_del(&req->execlist_link);
>   		i915_gem_request_unreference(req);
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

I suppose you did not see any performance effect since you decided to 
turn it on for both GuC and execlists? (Assuming vma iomap is in place.)

Regards,

Tvrtko
Chris Wilson April 19, 2016, 12:14 p.m. UTC | #2
On Tue, Apr 19, 2016 at 01:02:26PM +0100, Tvrtko Ursulin wrote:
> On 19/04/16 07:49, Chris Wilson wrote:
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index b0d20af38574..0e55f206e592 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -708,6 +708,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
> >  		request->ctx->engine[engine->id].initialised = true;
> >  	}
> >
> >+	request->pinned_context = request->ctx;
> 
> Add a little bit of comment to the big one above explaining the
> possibility of pinned_context being, not the previous, but the
> current one before submission?

This was here because we used to be able to cancel the context. Now that
we always go through intel_logical_ring_advance_and_submit, I've dopped
it. It makes a little nervous because we are not clearly tracking the
pinned_context now.

I also switched to request->previous_context, but I'm undecided as to
whether that is a better name (still worrying over lack of pinned
context tracking).

> >  	return 0;
> >  }
> >
> >@@ -782,12 +783,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
> >  	intel_logical_ring_emit(ringbuf, MI_NOOP);
> >  	intel_logical_ring_advance(ringbuf);
> >
> >-	if (engine->last_context != request->ctx) {
> >-		if (engine->last_context)
> >-			intel_lr_context_unpin(engine->last_context, engine);
> >-		intel_lr_context_pin(request->ctx, engine);
> >-		engine->last_context = request->ctx;
> >-	}
> >+	request->pinned_context = engine->last_context;
> >+	engine->last_context = request->ctx;
> 
> I am not sure if this is very complicated or just very different
> from my approach. Either way after thinking long and hard I cannot
> fault it. Looks like it will work.

Subtle enough that I gave it a comment.

> >  	if (dev_priv->guc.execbuf_client)
> >  		i915_guc_submit(dev_priv->guc.execbuf_client, request);
> >@@ -1009,7 +1006,8 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
> >  	spin_unlock_bh(&engine->execlist_lock);
> >
> >  	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> >-		intel_lr_context_unpin(req->ctx, engine);
> >+		if (req->pinned_context)
> >+			intel_lr_context_unpin(req->pinned_context, engine);
> >
> >  		list_del(&req->execlist_link);
> >  		i915_gem_request_unreference(req);
> >
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> I suppose you did not see any performance effect since you decided
> to turn it on for both GuC and execlists? (Assuming vma iomap is in
> place.)

Context unpinning (with the caching in place) doesn't appear on the
profiles for me to worry about. There are easier low hanging fruit in
the needless locked atomic instructions and the like we do. (Besides
which there are lots of reasons why execlists doesn't yet outperform
legacy...)
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 33aacf1725dd..8d7c415f1896 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -643,12 +643,12 @@  void i915_gem_request_free(struct kref *req_ref)
 	if (req->file_priv)
 		i915_gem_request_remove_from_client(req);
 
-	if (ctx) {
+	if (req->pinned_context) {
 		if (i915.enable_execlists)
-			intel_lr_context_unpin(ctx, req->engine);
-
-		i915_gem_context_unreference(ctx);
+			intel_lr_context_unpin(req->pinned_context,
+					       req->engine);
 	}
 
+	i915_gem_context_unreference(ctx);
 	kmem_cache_free(to_i915(req)->requests, req);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 69a4d4e2c97b..389813cbc19a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -85,6 +85,17 @@  struct drm_i915_gem_request {
 	struct intel_context *ctx;
 	struct intel_ringbuffer *ringbuf;
 
+	/**
+	 * Context related to the previous request.
+	 * As the contexts are accessed by the hardware until the switch is
+	 * completed to a new context, the hardware may still be writing
+	 * to the context object after the breadcrumb is visible. We must
+	 * not unpin/unbind/prune that object whilst still active and so
+	 * we keep the previous context pinned until the following (this)
+	 * request is retired.
+	 */
+	struct intel_context *pinned_context;
+
 	/** Batch buffer related to this request if any (used for
 	    error state dump only) */
 	struct drm_i915_gem_object *batch_obj;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b0d20af38574..0e55f206e592 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -708,6 +708,7 @@  int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 		request->ctx->engine[engine->id].initialised = true;
 	}
 
+	request->pinned_context = request->ctx;
 	return 0;
 }
 
@@ -782,12 +783,8 @@  intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
 	intel_logical_ring_advance(ringbuf);
 
-	if (engine->last_context != request->ctx) {
-		if (engine->last_context)
-			intel_lr_context_unpin(engine->last_context, engine);
-		intel_lr_context_pin(request->ctx, engine);
-		engine->last_context = request->ctx;
-	}
+	request->pinned_context = engine->last_context;
+	engine->last_context = request->ctx;
 
 	if (dev_priv->guc.execbuf_client)
 		i915_guc_submit(dev_priv->guc.execbuf_client, request);
@@ -1009,7 +1006,8 @@  void intel_execlists_retire_requests(struct intel_engine_cs *engine)
 	spin_unlock_bh(&engine->execlist_lock);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		intel_lr_context_unpin(req->ctx, engine);
+		if (req->pinned_context)
+			intel_lr_context_unpin(req->pinned_context, engine);
 
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);