diff mbox

[07/10] drm/i915: Reduce spinlock hold time during notify_ring() interrupt

Message ID 20180115212455.24046-8-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Jan. 15, 2018, 9:24 p.m. UTC
By taking advantage of the RCU protection of the task struct, we can find
the appropriate signaler under the spinlock and then release the spinlock
before waking the task and signaling the fence.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

Comments

Tvrtko Ursulin Jan. 17, 2018, 10:45 a.m. UTC | #1
On 15/01/2018 21:24, Chris Wilson wrote:
> By taking advantage of the RCU protection of the task struct, we can find
> the appropriate signaler under the spinlock and then release the spinlock
> before waking the task and signaling the fence.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_irq.c | 29 +++++++++++++++++++----------
>   1 file changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 3517c6548e2c..0b272501b738 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1065,21 +1065,24 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>   
>   static void notify_ring(struct intel_engine_cs *engine)
>   {
> +	const u32 seqno = intel_engine_get_seqno(engine);
>   	struct drm_i915_gem_request *rq = NULL;
> +	struct task_struct *tsk = NULL;
>   	struct intel_wait *wait;
>   
> -	if (!engine->breadcrumbs.irq_armed)
> +	if (unlikely(!engine->breadcrumbs.irq_armed))
>   		return;

It isn't unlikely in GuC mode, just sayin'...

>   
>   	atomic_inc(&engine->irq_count);
>   	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>   
> +	rcu_read_lock();
> +
>   	spin_lock(&engine->breadcrumbs.irq_lock);
>   	wait = engine->breadcrumbs.irq_wait;
>   	if (wait) {
> -		bool wakeup = engine->irq_seqno_barrier;
> -
> -		/* We use a callback from the dma-fence to submit
> +		/*
> +		 * We use a callback from the dma-fence to submit
>   		 * requests after waiting on our own requests. To
>   		 * ensure minimum delay in queuing the next request to
>   		 * hardware, signal the fence now rather than wait for
> @@ -1090,19 +1093,20 @@ static void notify_ring(struct intel_engine_cs *engine)
>   		 * and to handle coalescing of multiple seqno updates
>   		 * and many waiters.
>   		 */
> -		if (i915_seqno_passed(intel_engine_get_seqno(engine),
> -				      wait->seqno)) {
> +		if (i915_seqno_passed(seqno, wait->seqno)) {
>   			struct drm_i915_gem_request *waiter = wait->request;
>   
> -			wakeup = true;
>   			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>   				      &waiter->fence.flags) &&
>   			    intel_wait_check_request(wait, waiter))
>   				rq = i915_gem_request_get(waiter);
> -		}
>   
> -		if (wakeup)
> -			wake_up_process(wait->tsk);
> +			tsk = wait->tsk;
> +		} else {
> +			if (engine->irq_seqno_barrier &&
> +			    i915_seqno_passed(seqno, wait->seqno - 1))

Hm what is this about? Why -1 on platforms with coherency issues and not 
some other number? Needs a comment as minimum but still is a behaviour 
change which I did not immediately figure out how it goes with the 
commit message. If it is some additional optimization it needs to be 
split out into a separate patch.

> +				tsk = wait->tsk;
> +		}
>   	} else {
>   		if (engine->breadcrumbs.irq_armed)
>   			__intel_engine_disarm_breadcrumbs(engine);
> @@ -1114,6 +1118,11 @@ static void notify_ring(struct intel_engine_cs *engine)
>   		i915_gem_request_put(rq);
>   	}
>   
> +	if (tsk && tsk->state != TASK_RUNNING)
> +		wake_up_process(tsk);
> +
> +	rcu_read_unlock();
> +
>   	trace_intel_engine_notify(engine, wait);
>   }
>   
> 

Regards,

Tvrtko
Chris Wilson Jan. 18, 2018, 6:08 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-01-17 10:45:16)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > By taking advantage of the RCU protection of the task struct, we can find
> > the appropriate signaler under the spinlock and then release the spinlock
> > before waking the task and signaling the fence.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_irq.c | 29 +++++++++++++++++++----------
> >   1 file changed, 19 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 3517c6548e2c..0b272501b738 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1065,21 +1065,24 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
> >   
> >   static void notify_ring(struct intel_engine_cs *engine)
> >   {
> > +     const u32 seqno = intel_engine_get_seqno(engine);
> >       struct drm_i915_gem_request *rq = NULL;
> > +     struct task_struct *tsk = NULL;
> >       struct intel_wait *wait;
> >   
> > -     if (!engine->breadcrumbs.irq_armed)
> > +     if (unlikely(!engine->breadcrumbs.irq_armed))
> >               return;
> 
> It isn't unlikely in GuC mode, just sayin'...

Already taken care of, guc now "pins" the irq as opposed to arming it. So
we should be able to equate irq_armed to mean "has waiters".
-Chris
Chris Wilson Jan. 18, 2018, 6:10 p.m. UTC | #3
Quoting Tvrtko Ursulin (2018-01-17 10:45:16)
> 
> On 15/01/2018 21:24, Chris Wilson wrote:
> > -             if (wakeup)
> > -                     wake_up_process(wait->tsk);
> > +                     tsk = wait->tsk;
> > +             } else {
> > +                     if (engine->irq_seqno_barrier &&
> > +                         i915_seqno_passed(seqno, wait->seqno - 1))
> 
> Hm what is this about? Why -1 on platforms with coherency issues and not 
> some other number? Needs a comment as minimum but still is a behaviour 
> change which I did not immediately figure out how it goes with the 
> commit message. If it is some additional optimization it needs to be 
> split out into a separate patch.

It's a finger in the air statement that I don't expect to be more than
one seqno behind in the interrupt-vs-breadcrumb race. So far I haven't
been disappointed.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3517c6548e2c..0b272501b738 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1065,21 +1065,24 @@  static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 
 static void notify_ring(struct intel_engine_cs *engine)
 {
+	const u32 seqno = intel_engine_get_seqno(engine);
 	struct drm_i915_gem_request *rq = NULL;
+	struct task_struct *tsk = NULL;
 	struct intel_wait *wait;
 
-	if (!engine->breadcrumbs.irq_armed)
+	if (unlikely(!engine->breadcrumbs.irq_armed))
 		return;
 
 	atomic_inc(&engine->irq_count);
 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
+	rcu_read_lock();
+
 	spin_lock(&engine->breadcrumbs.irq_lock);
 	wait = engine->breadcrumbs.irq_wait;
 	if (wait) {
-		bool wakeup = engine->irq_seqno_barrier;
-
-		/* We use a callback from the dma-fence to submit
+		/*
+		 * We use a callback from the dma-fence to submit
 		 * requests after waiting on our own requests. To
 		 * ensure minimum delay in queuing the next request to
 		 * hardware, signal the fence now rather than wait for
@@ -1090,19 +1093,20 @@  static void notify_ring(struct intel_engine_cs *engine)
 		 * and to handle coalescing of multiple seqno updates
 		 * and many waiters.
 		 */
-		if (i915_seqno_passed(intel_engine_get_seqno(engine),
-				      wait->seqno)) {
+		if (i915_seqno_passed(seqno, wait->seqno)) {
 			struct drm_i915_gem_request *waiter = wait->request;
 
-			wakeup = true;
 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				      &waiter->fence.flags) &&
 			    intel_wait_check_request(wait, waiter))
 				rq = i915_gem_request_get(waiter);
-		}
 
-		if (wakeup)
-			wake_up_process(wait->tsk);
+			tsk = wait->tsk;
+		} else {
+			if (engine->irq_seqno_barrier &&
+			    i915_seqno_passed(seqno, wait->seqno - 1))
+				tsk = wait->tsk;
+		}
 	} else {
 		if (engine->breadcrumbs.irq_armed)
 			__intel_engine_disarm_breadcrumbs(engine);
@@ -1114,6 +1118,11 @@  static void notify_ring(struct intel_engine_cs *engine)
 		i915_gem_request_put(rq);
 	}
 
+	if (tsk && tsk->state != TASK_RUNNING)
+		wake_up_process(tsk);
+
+	rcu_read_unlock();
+
 	trace_intel_engine_notify(engine, wait);
 }