diff mbox

drm/i915: Skip waking the signaler when enabling before request submission

Message ID 20170426080659.28771-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 26, 2017, 8:06 a.m. UTC
If we are enabling the breadcrumbs signaling prior to submitting the
request, we know that we cannot have missed the interrupt and can
therefore skip immediately waking the signaler to check.

This reduces a significant chunk of the __i915_gem_request_submit()
overhead for inter-engine synchronisation, for example in gem_exec_whisper.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c    | 4 ++--
 drivers/gpu/drm/i915/i915_guc_submission.c | 2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c   | 7 ++++---
 drivers/gpu/drm/i915/intel_ringbuffer.h    | 3 ++-
 4 files changed, 9 insertions(+), 7 deletions(-)

Comments

Tvrtko Ursulin April 26, 2017, 10:35 a.m. UTC | #1
On 26/04/2017 09:06, Chris Wilson wrote:
> If we are enabling the breadcrumbs signaling prior to submitting the
> request, we know that we cannot have missed the interrupt and can
> therefore skip immediately waking the signaler to check.
>
> This reduces a significant chunk of the __i915_gem_request_submit()
> overhead for inter-engine synchronisation, for example in gem_exec_whisper.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c    | 4 ++--
>  drivers/gpu/drm/i915/i915_guc_submission.c | 2 +-
>  drivers/gpu/drm/i915/intel_breadcrumbs.c   | 7 ++++---
>  drivers/gpu/drm/i915/intel_ringbuffer.h    | 3 ++-
>  4 files changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 8bbec19e267a..7b9a509f00f3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -61,7 +61,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence)
>  	if (i915_fence_signaled(fence))
>  		return false;
>
> -	intel_engine_enable_signaling(to_request(fence));
> +	intel_engine_enable_signaling(to_request(fence), true);
>  	return true;
>  }
>
> @@ -448,7 +448,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
>  	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
>  	request->global_seqno = seqno;
>  	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> -		intel_engine_enable_signaling(request);
> +		intel_engine_enable_signaling(request, false);
>  	spin_unlock(&request->lock);
>
>  	engine->emit_breadcrumb(request,
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index a4eca6ac449b..7ccb22709efb 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -649,7 +649,7 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
>  	trace_dma_fence_enable_signal(&rq->fence);
>
>  	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
> -	intel_engine_enable_signaling(rq);
> +	intel_engine_enable_signaling(rq, true);
>  	spin_unlock(&rq->lock);
>  }
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 0839f928bc57..8f52fd5f6102 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -683,12 +683,13 @@ static int intel_breadcrumbs_signaler(void *arg)
>  	return 0;
>  }
>
> -void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> +void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> +				   bool wakeup)
>  {
>  	struct intel_engine_cs *engine = request->engine;
>  	struct intel_breadcrumbs *b = &engine->breadcrumbs;
>  	struct rb_node *parent, **p;
> -	bool first, wakeup;
> +	bool first;
>  	u32 seqno;
>
>  	/* Note that we may be called from an interrupt handler on another
> @@ -721,7 +722,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>  	 * If we are the oldest waiter, enable the irq (after which we
>  	 * must double check that the seqno did not complete).
>  	 */
> -	wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
> +	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
>
>  	/* Now insert ourselves into the retirement ordered list of signals
>  	 * on this engine. We track the oldest seqno as that will be the
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 3f7d5666bcf6..de5b968f508a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -672,7 +672,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
>  			   struct intel_wait *wait);
>  void intel_engine_remove_wait(struct intel_engine_cs *engine,
>  			      struct intel_wait *wait);
> -void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
> +void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> +				   bool wakeup);
>  void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
>
>  static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Chris Wilson April 26, 2017, 10:55 a.m. UTC | #2
On Wed, Apr 26, 2017 at 11:35:33AM +0100, Tvrtko Ursulin wrote:
> 
> On 26/04/2017 09:06, Chris Wilson wrote:
> >If we are enabling the breadcrumbs signaling prior to submitting the
> >request, we know that we cannot have missed the interrupt and can
> >therefore skip immediately waking the signaler to check.
> >
> >This reduces a significant chunk of the __i915_gem_request_submit()
> >overhead for inter-engine synchronisation, for example in gem_exec_whisper.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >---
> > drivers/gpu/drm/i915/i915_gem_request.c    | 4 ++--
> > drivers/gpu/drm/i915/i915_guc_submission.c | 2 +-
> > drivers/gpu/drm/i915/intel_breadcrumbs.c   | 7 ++++---
> > drivers/gpu/drm/i915/intel_ringbuffer.h    | 3 ++-
> > 4 files changed, 9 insertions(+), 7 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> >index 8bbec19e267a..7b9a509f00f3 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_request.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_request.c
> >@@ -61,7 +61,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence)
> > 	if (i915_fence_signaled(fence))
> > 		return false;
> >
> >-	intel_engine_enable_signaling(to_request(fence));
> >+	intel_engine_enable_signaling(to_request(fence), true);
> > 	return true;
> > }
> >
> >@@ -448,7 +448,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> > 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> > 	request->global_seqno = seqno;
> > 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> >-		intel_engine_enable_signaling(request);
> >+		intel_engine_enable_signaling(request, false);
> > 	spin_unlock(&request->lock);
> >
> > 	engine->emit_breadcrumb(request,
> >diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> >index a4eca6ac449b..7ccb22709efb 100644
> >--- a/drivers/gpu/drm/i915/i915_guc_submission.c
> >+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> >@@ -649,7 +649,7 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
> > 	trace_dma_fence_enable_signal(&rq->fence);
> >
> > 	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
> >-	intel_engine_enable_signaling(rq);
> >+	intel_engine_enable_signaling(rq, true);
> > 	spin_unlock(&rq->lock);
> > }
> >
> >diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >index 0839f928bc57..8f52fd5f6102 100644
> >--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> >@@ -683,12 +683,13 @@ static int intel_breadcrumbs_signaler(void *arg)
> > 	return 0;
> > }
> >
> >-void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> >+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> >+				   bool wakeup)
> > {
> > 	struct intel_engine_cs *engine = request->engine;
> > 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> > 	struct rb_node *parent, **p;
> >-	bool first, wakeup;
> >+	bool first;
> > 	u32 seqno;
> >
> > 	/* Note that we may be called from an interrupt handler on another
> >@@ -721,7 +722,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> > 	 * If we are the oldest waiter, enable the irq (after which we
> > 	 * must double check that the seqno did not complete).
> > 	 */
> >-	wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
> >+	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
> >
> > 	/* Now insert ourselves into the retirement ordered list of signals
> > 	 * on this engine. We track the oldest seqno as that will be the
> >diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >index 3f7d5666bcf6..de5b968f508a 100644
> >--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> >+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >@@ -672,7 +672,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
> > 			   struct intel_wait *wait);
> > void intel_engine_remove_wait(struct intel_engine_cs *engine,
> > 			      struct intel_wait *wait);
> >-void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
> >+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> >+				   bool wakeup);
> > void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
> >
> > static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
> >
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Ta, pushed this one as it's been the only thing to give a clear
improvement to gem_exec_whisper + execlist in yonks :)
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8bbec19e267a..7b9a509f00f3 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -61,7 +61,7 @@  static bool i915_fence_enable_signaling(struct dma_fence *fence)
 	if (i915_fence_signaled(fence))
 		return false;
 
-	intel_engine_enable_signaling(to_request(fence));
+	intel_engine_enable_signaling(to_request(fence), true);
 	return true;
 }
 
@@ -448,7 +448,7 @@  void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = seqno;
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-		intel_engine_enable_signaling(request);
+		intel_engine_enable_signaling(request, false);
 	spin_unlock(&request->lock);
 
 	engine->emit_breadcrumb(request,
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index a4eca6ac449b..7ccb22709efb 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -649,7 +649,7 @@  static void nested_enable_signaling(struct drm_i915_gem_request *rq)
 	trace_dma_fence_enable_signal(&rq->fence);
 
 	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
-	intel_engine_enable_signaling(rq);
+	intel_engine_enable_signaling(rq, true);
 	spin_unlock(&rq->lock);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 0839f928bc57..8f52fd5f6102 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -683,12 +683,13 @@  static int intel_breadcrumbs_signaler(void *arg)
 	return 0;
 }
 
-void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
+				   bool wakeup)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	struct rb_node *parent, **p;
-	bool first, wakeup;
+	bool first;
 	u32 seqno;
 
 	/* Note that we may be called from an interrupt handler on another
@@ -721,7 +722,7 @@  void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	 * If we are the oldest waiter, enable the irq (after which we
 	 * must double check that the seqno did not complete).
 	 */
-	wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
+	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
 
 	/* Now insert ourselves into the retirement ordered list of signals
 	 * on this engine. We track the oldest seqno as that will be the
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3f7d5666bcf6..de5b968f508a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -672,7 +672,8 @@  bool intel_engine_add_wait(struct intel_engine_cs *engine,
 			   struct intel_wait *wait);
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait);
-void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
+				   bool wakeup);
 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
 
 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)