diff mbox

[12/31] drm/i915: Reduce spinlock hold time during notify_ring() interrupt

Message ID 20180625094842.8499-12-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 25, 2018, 9:48 a.m. UTC
By taking advantage of the RCU protection of the task struct, we can find
the appropriate signaler under the spinlock and then release the spinlock
before waking the task and signaling the fence.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

Comments

Mika Kuoppala June 27, 2018, 1:08 p.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> By taking advantage of the RCU protection of the task struct, we can find
> the appropriate signaler under the spinlock and then release the spinlock
> before waking the task and signaling the fence.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 33 ++++++++++++++++++++++-----------
>  1 file changed, 22 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 316d0b08d40f..53dad48f92ce 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1145,21 +1145,23 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>  
>  static void notify_ring(struct intel_engine_cs *engine)
>  {
> +	const u32 seqno = intel_engine_get_seqno(engine);
>  	struct i915_request *rq = NULL;
> +	struct task_struct *tsk = NULL;
>  	struct intel_wait *wait;
>  
> -	if (!engine->breadcrumbs.irq_armed)
> +	if (unlikely(!engine->breadcrumbs.irq_armed))
>  		return;
>

Ok, so due to unlikeliness, you get the seqno early.

>  	atomic_inc(&engine->irq_count);
> -	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> +
> +	rcu_read_lock();

As I understand from irc discussion, we have our own const
or stable copy of task struct from now on.
>  
>  	spin_lock(&engine->breadcrumbs.irq_lock);
>  	wait = engine->breadcrumbs.irq_wait;
>  	if (wait) {
> -		bool wakeup = engine->irq_seqno_barrier;
> -
> -		/* We use a callback from the dma-fence to submit
> +		/*
> +		 * We use a callback from the dma-fence to submit
>  		 * requests after waiting on our own requests. To
>  		 * ensure minimum delay in queuing the next request to
>  		 * hardware, signal the fence now rather than wait for
> @@ -1170,19 +1172,23 @@ static void notify_ring(struct intel_engine_cs *engine)
>  		 * and to handle coalescing of multiple seqno updates
>  		 * and many waiters.
>  		 */
> -		if (i915_seqno_passed(intel_engine_get_seqno(engine),
> -				      wait->seqno)) {
> +		if (i915_seqno_passed(seqno, wait->seqno)) {
>  			struct i915_request *waiter = wait->request;
>  
> -			wakeup = true;
>  			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>  				      &waiter->fence.flags) &&
>  			    intel_wait_check_request(wait, waiter))
>  				rq = i915_request_get(waiter);
> -		}
>  
> -		if (wakeup)
> -			wake_up_process(wait->tsk);
> +			tsk = wait->tsk;
> +		} else {
> +			if (engine->irq_seqno_barrier &&
> +			    i915_seqno_passed(seqno, wait->seqno - 1)) {
> +				set_bit(ENGINE_IRQ_BREADCRUMB,
> +					&engine->irq_posted);
> +				tsk = wait->tsk;

Hmm, you are optimistic that the latency of wakeup will be on par
or greater than the next request completion?

And wait side notices too that we are close and spins,
instead of going back to sleep?

> +			}
> +		}
>  	} else {
>  		if (engine->breadcrumbs.irq_armed)
>  			__intel_engine_disarm_breadcrumbs(engine);
> @@ -1195,6 +1201,11 @@ static void notify_ring(struct intel_engine_cs *engine)
>  		i915_request_put(rq);
>  	}
>  
> +	if (tsk && tsk->state & TASK_NORMAL)
> +		wake_up_process(tsk);
> +

Why the TASK_NORMAL check?

-Mika

> +	rcu_read_unlock();
> +
>  	trace_intel_engine_notify(engine, wait);
>  }
>  
> -- 
> 2.18.0
Chris Wilson June 27, 2018, 1:14 p.m. UTC | #2
Quoting Mika Kuoppala (2018-06-27 14:08:34)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > By taking advantage of the RCU protection of the task struct, we can find
> > the appropriate signaler under the spinlock and then release the spinlock
> > before waking the task and signaling the fence.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_irq.c | 33 ++++++++++++++++++++++-----------
> >  1 file changed, 22 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 316d0b08d40f..53dad48f92ce 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1145,21 +1145,23 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
> >  
> >  static void notify_ring(struct intel_engine_cs *engine)
> >  {
> > +     const u32 seqno = intel_engine_get_seqno(engine);
> >       struct i915_request *rq = NULL;
> > +     struct task_struct *tsk = NULL;
> >       struct intel_wait *wait;
> >  
> > -     if (!engine->breadcrumbs.irq_armed)
> > +     if (unlikely(!engine->breadcrumbs.irq_armed))
> >               return;
> >
> 
> Ok, so due to unlikeliness, you get the seqno early.
> 
> >       atomic_inc(&engine->irq_count);
> > -     set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> > +
> > +     rcu_read_lock();
> 
> As I understand from irc discussion, we have our own const
> or stable copy of task struct from now on.
> >  
> >       spin_lock(&engine->breadcrumbs.irq_lock);
> >       wait = engine->breadcrumbs.irq_wait;
> >       if (wait) {
> > -             bool wakeup = engine->irq_seqno_barrier;
> > -
> > -             /* We use a callback from the dma-fence to submit
> > +             /*
> > +              * We use a callback from the dma-fence to submit
> >                * requests after waiting on our own requests. To
> >                * ensure minimum delay in queuing the next request to
> >                * hardware, signal the fence now rather than wait for
> > @@ -1170,19 +1172,23 @@ static void notify_ring(struct intel_engine_cs *engine)
> >                * and to handle coalescing of multiple seqno updates
> >                * and many waiters.
> >                */
> > -             if (i915_seqno_passed(intel_engine_get_seqno(engine),
> > -                                   wait->seqno)) {
> > +             if (i915_seqno_passed(seqno, wait->seqno)) {
> >                       struct i915_request *waiter = wait->request;
> >  
> > -                     wakeup = true;
> >                       if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> >                                     &waiter->fence.flags) &&
> >                           intel_wait_check_request(wait, waiter))
> >                               rq = i915_request_get(waiter);
> > -             }
> >  
> > -             if (wakeup)
> > -                     wake_up_process(wait->tsk);
> > +                     tsk = wait->tsk;
> > +             } else {
> > +                     if (engine->irq_seqno_barrier &&
> > +                         i915_seqno_passed(seqno, wait->seqno - 1)) {
> > +                             set_bit(ENGINE_IRQ_BREADCRUMB,
> > +                                     &engine->irq_posted);
> > +                             tsk = wait->tsk;
> 
> Hmm, you are optimistic that the latency of wakeup will be on par
> or greater than the next request completion?
> 
> And wait side notices too that we are close and spins,
> instead of going back to sleep?

Don't forget this is the missed breadcrumb mitigation for gen5-gen7, and
only for it. The intent is to keep that away from the paths that do not
need the extra delays.

> > +                     }
> > +             }
> >       } else {
> >               if (engine->breadcrumbs.irq_armed)
> >                       __intel_engine_disarm_breadcrumbs(engine);
> > @@ -1195,6 +1201,11 @@ static void notify_ring(struct intel_engine_cs *engine)
> >               i915_request_put(rq);
> >       }
> >  
> > +     if (tsk && tsk->state & TASK_NORMAL)
> > +             wake_up_process(tsk);
> > +
> 
> Why the TASK_NORMAL check?

*shrug* too long spent watching the signaler on gen6, i.e. I grown
accustomed to expecting the tsk to be running at this point.
-Chris
Mika Kuoppala June 27, 2018, 2:01 p.m. UTC | #3
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2018-06-27 14:08:34)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > By taking advantage of the RCU protection of the task struct, we can find
>> > the appropriate signaler under the spinlock and then release the spinlock
>> > before waking the task and signaling the fence.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> >  drivers/gpu/drm/i915/i915_irq.c | 33 ++++++++++++++++++++++-----------
>> >  1 file changed, 22 insertions(+), 11 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>> > index 316d0b08d40f..53dad48f92ce 100644
>> > --- a/drivers/gpu/drm/i915/i915_irq.c
>> > +++ b/drivers/gpu/drm/i915/i915_irq.c
>> > @@ -1145,21 +1145,23 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>> >  
>> >  static void notify_ring(struct intel_engine_cs *engine)
>> >  {
>> > +     const u32 seqno = intel_engine_get_seqno(engine);
>> >       struct i915_request *rq = NULL;
>> > +     struct task_struct *tsk = NULL;
>> >       struct intel_wait *wait;
>> >  
>> > -     if (!engine->breadcrumbs.irq_armed)
>> > +     if (unlikely(!engine->breadcrumbs.irq_armed))
>> >               return;
>> >
>> 
>> Ok, so due to unlikeliness, you get the seqno early.
>> 
>> >       atomic_inc(&engine->irq_count);
>> > -     set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>> > +
>> > +     rcu_read_lock();
>> 
>> As I understand from irc discussion, we have our own const
>> or stable copy of task struct from now on.
>> >  
>> >       spin_lock(&engine->breadcrumbs.irq_lock);
>> >       wait = engine->breadcrumbs.irq_wait;
>> >       if (wait) {
>> > -             bool wakeup = engine->irq_seqno_barrier;
>> > -
>> > -             /* We use a callback from the dma-fence to submit
>> > +             /*
>> > +              * We use a callback from the dma-fence to submit
>> >                * requests after waiting on our own requests. To
>> >                * ensure minimum delay in queuing the next request to
>> >                * hardware, signal the fence now rather than wait for
>> > @@ -1170,19 +1172,23 @@ static void notify_ring(struct intel_engine_cs *engine)
>> >                * and to handle coalescing of multiple seqno updates
>> >                * and many waiters.
>> >                */
>> > -             if (i915_seqno_passed(intel_engine_get_seqno(engine),
>> > -                                   wait->seqno)) {
>> > +             if (i915_seqno_passed(seqno, wait->seqno)) {
>> >                       struct i915_request *waiter = wait->request;
>> >  
>> > -                     wakeup = true;
>> >                       if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>> >                                     &waiter->fence.flags) &&
>> >                           intel_wait_check_request(wait, waiter))
>> >                               rq = i915_request_get(waiter);
>> > -             }
>> >  
>> > -             if (wakeup)
>> > -                     wake_up_process(wait->tsk);
>> > +                     tsk = wait->tsk;
>> > +             } else {
>> > +                     if (engine->irq_seqno_barrier &&
>> > +                         i915_seqno_passed(seqno, wait->seqno - 1)) {
>> > +                             set_bit(ENGINE_IRQ_BREADCRUMB,
>> > +                                     &engine->irq_posted);
>> > +                             tsk = wait->tsk;
>> 
>> Hmm, you are optimistic that the latency of wakeup will be on par
>> or greater than the next request completion?
>> 
>> And wait side notices too that we are close and spins,
>> instead of going back to sleep?
>
> Don't forget this is the missed breadcrumb mitigation for gen5-gen7, and
> only for it. The intent is to keep that away from the paths that do not
> need the extra delays.
>

Looks sensible optimization. But I would strip it out from this
patch to it's own, to keep the content in sync with the commit message.

-Mika
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 316d0b08d40f..53dad48f92ce 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1145,21 +1145,23 @@  static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 
 static void notify_ring(struct intel_engine_cs *engine)
 {
+	const u32 seqno = intel_engine_get_seqno(engine);
 	struct i915_request *rq = NULL;
+	struct task_struct *tsk = NULL;
 	struct intel_wait *wait;
 
-	if (!engine->breadcrumbs.irq_armed)
+	if (unlikely(!engine->breadcrumbs.irq_armed))
 		return;
 
 	atomic_inc(&engine->irq_count);
-	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
+
+	rcu_read_lock();
 
 	spin_lock(&engine->breadcrumbs.irq_lock);
 	wait = engine->breadcrumbs.irq_wait;
 	if (wait) {
-		bool wakeup = engine->irq_seqno_barrier;
-
-		/* We use a callback from the dma-fence to submit
+		/*
+		 * We use a callback from the dma-fence to submit
 		 * requests after waiting on our own requests. To
 		 * ensure minimum delay in queuing the next request to
 		 * hardware, signal the fence now rather than wait for
@@ -1170,19 +1172,23 @@  static void notify_ring(struct intel_engine_cs *engine)
 		 * and to handle coalescing of multiple seqno updates
 		 * and many waiters.
 		 */
-		if (i915_seqno_passed(intel_engine_get_seqno(engine),
-				      wait->seqno)) {
+		if (i915_seqno_passed(seqno, wait->seqno)) {
 			struct i915_request *waiter = wait->request;
 
-			wakeup = true;
 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				      &waiter->fence.flags) &&
 			    intel_wait_check_request(wait, waiter))
 				rq = i915_request_get(waiter);
-		}
 
-		if (wakeup)
-			wake_up_process(wait->tsk);
+			tsk = wait->tsk;
+		} else {
+			if (engine->irq_seqno_barrier &&
+			    i915_seqno_passed(seqno, wait->seqno - 1)) {
+				set_bit(ENGINE_IRQ_BREADCRUMB,
+					&engine->irq_posted);
+				tsk = wait->tsk;
+			}
+		}
 	} else {
 		if (engine->breadcrumbs.irq_armed)
 			__intel_engine_disarm_breadcrumbs(engine);
@@ -1195,6 +1201,11 @@  static void notify_ring(struct intel_engine_cs *engine)
 		i915_request_put(rq);
 	}
 
+	if (tsk && tsk->state & TASK_NORMAL)
+		wake_up_process(tsk);
+
+	rcu_read_unlock();
+
 	trace_intel_engine_notify(engine, wait);
 }