diff mbox

[v2,6/7] drm/i915/execlists: Direct submission from irq handler

Message ID 20180507135731.10587-6-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 7, 2018, 1:57 p.m. UTC
Continuing the themem of bypassing ksoftirqd latency, also first try to
directly submit from the CS interrupt handler to clear the ELSP and
queue the next.

In the past, we have been hesitant to do this as the context switch
processing has been quite heavy, requiring forcewaked mmio. However, as
we now can read the GPU state from the cacheable HWSP, it is relatively
cheap!

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c             | 13 ++++++-------
 drivers/gpu/drm/i915/intel_guc_submission.c |  2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h     | 16 ++++++++++++++++
 3 files changed, 24 insertions(+), 7 deletions(-)

Comments

Tvrtko Ursulin May 8, 2018, 10:54 a.m. UTC | #1
On 07/05/2018 14:57, Chris Wilson wrote:
> Continuing the themem of bypassing ksoftirqd latency, also first try to
> directly submit from the CS interrupt handler to clear the ELSP and
> queue the next.
> 
> In the past, we have been hesitant to do this as the context switch
> processing has been quite heavy, requiring forcewaked mmio. However, as
> we now can read the GPU state from the cacheable HWSP, it is relatively
> cheap!
> 
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_irq.c             | 13 ++++++-------
>   drivers/gpu/drm/i915/intel_guc_submission.c |  2 ++
>   drivers/gpu/drm/i915/intel_ringbuffer.h     | 16 ++++++++++++++++
>   3 files changed, 24 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index f9bc3aaa90d0..775cf167d938 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1465,19 +1465,18 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	bool tasklet = false;
>   
> -	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
> -		if (READ_ONCE(engine->execlists.active))
> -			tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
> -						    &engine->irq_posted);
> -	}
> +	if (iir & GT_CONTEXT_SWITCH_INTERRUPT && READ_ONCE(execlists->active))
> +		tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
> +					    &engine->irq_posted);
>   
>   	if (iir & GT_RENDER_USER_INTERRUPT) {
>   		notify_ring(engine);
> -		tasklet |= USES_GUC_SUBMISSION(engine->i915);
> +		if (!test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> +			tasklet = USES_GUC_SUBMISSION(engine->i915);

I don't understand this change. In the GuC case IRQ_EXECLISTS is never 
set so the conditional is pointeless. In execlist mode user interrupt 
has nothing to do with scheduling the tasklet.

>   	}
>   
>   	if (tasklet)
> -		tasklet_hi_schedule(&execlists->tasklet);
> +		execlists_tasklet(execlists);
>   }
>   
>   static void gen8_gt_irq_ack(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 6bfe30af7826..7d4542b46f5e 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -782,6 +782,8 @@ static void guc_submission_tasklet(unsigned long data)
>   	struct execlist_port *port = execlists->port;
>   	struct i915_request *rq;
>   
> +	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> +

I don't understand this either - there is no changed code path which 
sets this in GuC mode.

Regards,

Tvrtko

>   	rq = port_request(port);
>   	while (rq && i915_request_completed(rq)) {
>   		trace_i915_request_out(rq);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index f5545391d76a..da7e00ff2c6b 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -717,6 +717,22 @@ execlists_port_complete(struct intel_engine_execlists * const execlists,
>   	return port;
>   }
>   
> +static inline void
> +execlists_tasklet(struct intel_engine_execlists * const execlists)
> +{
> +	struct tasklet_struct * const t = &execlists->tasklet;
> +
> +	if (unlikely(atomic_read(&t->count))) /* GPU reset active */
> +		return;
> +
> +	if (tasklet_trylock(t)) {
> +		t->func(t->data);
> +		tasklet_unlock(t);
> +	} else {
> +		tasklet_hi_schedule(t);
> +	}
> +}
> +
>   static inline unsigned int
>   intel_engine_flag(const struct intel_engine_cs *engine)
>   {
>
Chris Wilson May 8, 2018, 11:10 a.m. UTC | #2
Quoting Tvrtko Ursulin (2018-05-08 11:54:27)
> 
> On 07/05/2018 14:57, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index f9bc3aaa90d0..775cf167d938 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1465,19 +1465,18 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> >       bool tasklet = false;
> >   
> > -     if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
> > -             if (READ_ONCE(engine->execlists.active))
> > -                     tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
> > -                                                 &engine->irq_posted);
> > -     }
> > +     if (iir & GT_CONTEXT_SWITCH_INTERRUPT && READ_ONCE(execlists->active))
> > +             tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
> > +                                         &engine->irq_posted);
> >   
> >       if (iir & GT_RENDER_USER_INTERRUPT) {
> >               notify_ring(engine);
> > -             tasklet |= USES_GUC_SUBMISSION(engine->i915);
> > +             if (!test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> > +                     tasklet = USES_GUC_SUBMISSION(engine->i915);
> 
> I don't understand this change. In the GuC case IRQ_EXECLISTS is never 
> set so the conditional is pointeless. In execlist mode user interrupt 
> has nothing to do with scheduling the tasklet.

Because notify_ring() may have just executed the tasklet and cleared the
bit from irq_posted. I didn't want to then do a second dequeue.

> >   static void gen8_gt_irq_ack(struct drm_i915_private *i915,
> > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> > index 6bfe30af7826..7d4542b46f5e 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> > @@ -782,6 +782,8 @@ static void guc_submission_tasklet(unsigned long data)
> >       struct execlist_port *port = execlists->port;
> >       struct i915_request *rq;
> >   
> > +     clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> > +
> 
> I don't understand this either - there is no changed code path which 
> sets this in GuC mode.

The guc may takeover with the bit set. And since we aren't particularly
careful with parking before takeover, it was prudent to always clear it
here as a direct analogue to the execlists context switch handler.
-Chris
Tvrtko Ursulin May 8, 2018, 11:53 a.m. UTC | #3
On 08/05/2018 12:10, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-05-08 11:54:27)
>>
>> On 07/05/2018 14:57, Chris Wilson wrote:
>>> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>>> index f9bc3aaa90d0..775cf167d938 100644
>>> --- a/drivers/gpu/drm/i915/i915_irq.c
>>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>>> @@ -1465,19 +1465,18 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>>>        struct intel_engine_execlists * const execlists = &engine->execlists;
>>>        bool tasklet = false;
>>>    
>>> -     if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
>>> -             if (READ_ONCE(engine->execlists.active))
>>> -                     tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
>>> -                                                 &engine->irq_posted);
>>> -     }
>>> +     if (iir & GT_CONTEXT_SWITCH_INTERRUPT && READ_ONCE(execlists->active))
>>> +             tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
>>> +                                         &engine->irq_posted);
>>>    
>>>        if (iir & GT_RENDER_USER_INTERRUPT) {
>>>                notify_ring(engine);
>>> -             tasklet |= USES_GUC_SUBMISSION(engine->i915);
>>> +             if (!test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
>>> +                     tasklet = USES_GUC_SUBMISSION(engine->i915);
>>
>> I don't understand this change. In the GuC case IRQ_EXECLISTS is never
>> set so the conditional is pointeless. In execlist mode user interrupt
>> has nothing to do with scheduling the tasklet.
> 
> Because notify_ring() may have just executed the tasklet and cleared the
> bit from irq_posted. I didn't want to then do a second dequeue.

But IRQ_EXECLISTS is never set in GuC mode. So set-if-clear in this case 
is equivalent to unconditional-or.

If you want to clear the tasklet bool in execlist mode then this is 
extremely non-obvious. More readable owuld be something like

if (iir & USER_IRQ)
	tasklet = notify_ring(...) ? 0 : USES_GUC(...);

Where notify_ring would return true if it signalled anything.

We wouldn't know though if that means the tasklet actually ran. :(

Oh well.. put a comment please, because it really is non-obvious.

>>>    static void gen8_gt_irq_ack(struct drm_i915_private *i915,
>>> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
>>> index 6bfe30af7826..7d4542b46f5e 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
>>> @@ -782,6 +782,8 @@ static void guc_submission_tasklet(unsigned long data)
>>>        struct execlist_port *port = execlists->port;
>>>        struct i915_request *rq;
>>>    
>>> +     clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>>> +
>>
>> I don't understand this either - there is no changed code path which
>> sets this in GuC mode.
> 
> The guc may takeover with the bit set. And since we aren't particularly
> careful with parking before takeover, it was prudent to always clear it
> here as a direct analogue to the execlists context switch handler.

Okay.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f9bc3aaa90d0..775cf167d938 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1465,19 +1465,18 @@  gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	bool tasklet = false;
 
-	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
-		if (READ_ONCE(engine->execlists.active))
-			tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
-						    &engine->irq_posted);
-	}
+	if (iir & GT_CONTEXT_SWITCH_INTERRUPT && READ_ONCE(execlists->active))
+		tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
+					    &engine->irq_posted);
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
 		notify_ring(engine);
-		tasklet |= USES_GUC_SUBMISSION(engine->i915);
+		if (!test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+			tasklet = USES_GUC_SUBMISSION(engine->i915);
 	}
 
 	if (tasklet)
-		tasklet_hi_schedule(&execlists->tasklet);
+		execlists_tasklet(execlists);
 }
 
 static void gen8_gt_irq_ack(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 6bfe30af7826..7d4542b46f5e 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -782,6 +782,8 @@  static void guc_submission_tasklet(unsigned long data)
 	struct execlist_port *port = execlists->port;
 	struct i915_request *rq;
 
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+
 	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
 		trace_i915_request_out(rq);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f5545391d76a..da7e00ff2c6b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -717,6 +717,22 @@  execlists_port_complete(struct intel_engine_execlists * const execlists,
 	return port;
 }
 
+static inline void
+execlists_tasklet(struct intel_engine_execlists * const execlists)
+{
+	struct tasklet_struct * const t = &execlists->tasklet;
+
+	if (unlikely(atomic_read(&t->count))) /* GPU reset active */
+		return;
+
+	if (tasklet_trylock(t)) {
+		t->func(t->data);
+		tasklet_unlock(t);
+	} else {
+		tasklet_hi_schedule(t);
+	}
+}
+
 static inline unsigned int
 intel_engine_flag(const struct intel_engine_cs *engine)
 {