diff mbox

[v2,3/7] drm/i915/execlists: Make submission tasklet hardirq safe

Message ID 20180507135731.10587-3-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 7, 2018, 1:57 p.m. UTC
Prepare to allow the execlists submission to be run from underneath a
hardirq timer context (and not just the current softirq context) as is
required for fast preemption resets and context switches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 13 deletions(-)

Comments

Tvrtko Ursulin May 8, 2018, 10:10 a.m. UTC | #1
On 07/05/2018 14:57, Chris Wilson wrote:
> Prepare to allow the execlists submission to be run from underneath a
> hardirq timer context (and not just the current softirq context) as is
> required for fast preemption resets and context switches.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++----------
>   1 file changed, 29 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f9f4064dec0e..15c373ea5b7e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
>   {
>   	struct intel_engine_cs *engine =
>   		container_of(execlists, typeof(*engine), execlists);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
> -	spin_lock_irq(&engine->timeline.lock);
>   	__unwind_incomplete_requests(engine);
> -	spin_unlock_irq(&engine->timeline.lock);
> +
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
>   static inline void
> @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>   	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> -static void execlists_dequeue(struct intel_engine_cs *engine)
> +static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
> @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	struct rb_node *rb;
>   	bool submit = false;
>   
> +	lockdep_assert_held(&engine->timeline.lock);
> +
>   	/* Hardware submission is through 2 ports. Conceptually each port
>   	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
>   	 * static for a context, and unique to each, so we only execute
> @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * and context switches) submission.
>   	 */
>   
> -	spin_lock_irq(&engine->timeline.lock);
>   	rb = execlists->first;
>   	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
>   
> @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   						EXECLISTS_ACTIVE_USER));
>   		GEM_BUG_ON(!port_count(&port[0]));
>   		if (port_count(&port[0]) > 1)
> -			goto unlock;
> +			return false;
>   
>   		/*
>   		 * If we write to ELSP a second time before the HW has had
> @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		 * the HW to indicate that it has had a chance to respond.
>   		 */
>   		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> -			goto unlock;
> +			return false;
>   
>   		if (need_preempt(engine, last, execlists->queue_priority)) {
>   			inject_preempt_context(engine);
> -			goto unlock;
> +			return false;
>   		}
>   
>   		/*
> @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		 * priorities of the ports haven't been switch.
>   		 */
>   		if (port_count(&port[1]))
> -			goto unlock;
> +			return false;
>   
>   		/*
>   		 * WaIdleLiteRestore:bdw,skl
> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	/* We must always keep the beast fed if we have work piled up */
>   	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
>   
> -unlock:
> -	spin_unlock_irq(&engine->timeline.lock);
> -
> -	if (submit) {
> +	/* Re-evaluate the executing context setup after each preemptive kick */
> +	if (last)
>   		execlists_user_begin(execlists, execlists->port);

Last can be non-null and submit false, so this is not equivalent.

By the looks of it makes no difference since it is OK to set the 
execlists user active bit multiple times. Even though the helper is 
called execlists_set_active_once. But the return value is not looked at.

Still, why not keep doing this when submit is true?

Regards,

Tvrtko

> +
> +	return submit;
> +}
> +
> +static void execlists_dequeue(struct intel_engine_cs *engine)
> +{
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
> +	unsigned long flags;
> +	bool submit;
> +
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	submit = __execlists_dequeue(engine);
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +
> +	if (submit)
>   		execlists_submit_ports(engine);
> -	}
>   
>   	GEM_BUG_ON(port_isset(execlists->port) &&
>   		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
>
Chris Wilson May 8, 2018, 10:24 a.m. UTC | #2
Quoting Tvrtko Ursulin (2018-05-08 11:10:41)
> On 07/05/2018 14:57, Chris Wilson wrote:
> > @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >       /* We must always keep the beast fed if we have work piled up */
> >       GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> >   
> > -unlock:
> > -     spin_unlock_irq(&engine->timeline.lock);
> > -
> > -     if (submit) {
> > +     /* Re-evaluate the executing context setup after each preemptive kick */
> > +     if (last)
> >               execlists_user_begin(execlists, execlists->port);
> 
> Last can be non-null and submit false, so this is not equivalent.
> 
> By the looks of it makes no difference since it is OK to set the 
> execlists user active bit multiple times. Even though the helper is 
> called execlists_set_active_once. But the return value is not looked at.
> 
> Still, why not keep doing this when submit is true?

It's a subtle difference, in that we want the context reevaluated every
time we kick the queue as we may have changed state that we want to
reload, and not just ELSP. Sometimes we need inheritance of more than
just priority...
-Chris
Tvrtko Ursulin May 8, 2018, 10:56 a.m. UTC | #3
On 08/05/2018 11:24, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-05-08 11:10:41)
>> On 07/05/2018 14:57, Chris Wilson wrote:
>>> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>>>        /* We must always keep the beast fed if we have work piled up */
>>>        GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
>>>    
>>> -unlock:
>>> -     spin_unlock_irq(&engine->timeline.lock);
>>> -
>>> -     if (submit) {
>>> +     /* Re-evaluate the executing context setup after each preemptive kick */
>>> +     if (last)
>>>                execlists_user_begin(execlists, execlists->port);
>>
>> Last can be non-null and submit false, so this is not equivalent.
>>
>> By the looks of it makes no difference since it is OK to set the
>> execlists user active bit multiple times. Even though the helper is
>> called execlists_set_active_once. But the return value is not looked at.
>>
>> Still, why not keep doing this when submit is true?
> 
> It's a subtle difference, in that we want the context reevaluated every
> time we kick the queue as we may have changed state that we want to
> reload, and not just ELSP. Sometimes we need inheritance of more than
> just priority...

What do you mean by context re-evaluated?

ACTIVE_USER is set from first to last request, no? I don't understand 
what would change if you would set it multiple times while it is already 
set.

Regards,

Tvrtko
Chris Wilson May 8, 2018, 11:05 a.m. UTC | #4
Quoting Tvrtko Ursulin (2018-05-08 11:56:37)
> 
> On 08/05/2018 11:24, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-05-08 11:10:41)
> >> On 07/05/2018 14:57, Chris Wilson wrote:
> >>> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >>>        /* We must always keep the beast fed if we have work piled up */
> >>>        GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> >>>    
> >>> -unlock:
> >>> -     spin_unlock_irq(&engine->timeline.lock);
> >>> -
> >>> -     if (submit) {
> >>> +     /* Re-evaluate the executing context setup after each preemptive kick */
> >>> +     if (last)
> >>>                execlists_user_begin(execlists, execlists->port);
> >>
> >> Last can be non-null and submit false, so this is not equivalent.
> >>
> >> By the looks of it makes no difference since it is OK to set the
> >> execlists user active bit multiple times. Even though the helper is
> >> called execlists_set_active_once. But the return value is not looked at.
> >>
> >> Still, why not keep doing this when submit is true?
> > 
> > It's a subtle difference, in that we want the context reevaluated every
> > time we kick the queue as we may have changed state that we want to
> > reload, and not just ELSP. Sometimes we need inheritance of more than
> > just priority...
> 
> What do you mean by context re-evaluated?
> 
> ACTIVE_USER is set from first to last request, no? I don't understand 
> what would change if you would set it multiple times while it is already 
> set.

It's not about ACTIVE_USER. This is a hook to indicate a change in
context state being executed on the GPU. It's to be hooked into by the
cpufreq/pstate code, gpufreq code, etc. Later realisation is that they
need to be notified for all context changes here.
-Chris
Tvrtko Ursulin May 8, 2018, 11:38 a.m. UTC | #5
On 08/05/2018 12:05, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-05-08 11:56:37)
>>
>> On 08/05/2018 11:24, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-05-08 11:10:41)
>>>> On 07/05/2018 14:57, Chris Wilson wrote:
>>>>> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>>>>>         /* We must always keep the beast fed if we have work piled up */
>>>>>         GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
>>>>>     
>>>>> -unlock:
>>>>> -     spin_unlock_irq(&engine->timeline.lock);
>>>>> -
>>>>> -     if (submit) {
>>>>> +     /* Re-evaluate the executing context setup after each preemptive kick */
>>>>> +     if (last)
>>>>>                 execlists_user_begin(execlists, execlists->port);
>>>>
>>>> Last can be non-null and submit false, so this is not equivalent.
>>>>
>>>> By the looks of it makes no difference since it is OK to set the
>>>> execlists user active bit multiple times. Even though the helper is
>>>> called execlists_set_active_once. But the return value is not looked at.
>>>>
>>>> Still, why not keep doing this when submit is true?
>>>
>>> It's a subtle difference, in that we want the context reevaluated every
>>> time we kick the queue as we may have changed state that we want to
>>> reload, and not just ELSP. Sometimes we need inheritance of more than
>>> just priority...
>>
>> What do you mean by context re-evaluated?
>>
>> ACTIVE_USER is set from first to last request, no? I don't understand
>> what would change if you would set it multiple times while it is already
>> set.
> 
> It's not about ACTIVE_USER. This is a hook to indicate a change in
> context state being executed on the GPU. It's to be hooked into by the
> cpufreq/pstate code, gpufreq code, etc. Later realisation is that they
> need to be notified for all context changes here.

Ok so nothing as such relating to making tasklets hardirq safe.

Is the execlists_user_begin still the right name then?

Any future use for execlists_set_active_once or we could drop it 
(replacing with execlists_set_active)?

Regards,

Tvrtko
Chris Wilson May 8, 2018, 11:43 a.m. UTC | #6
Quoting Tvrtko Ursulin (2018-05-08 12:38:06)
> 
> On 08/05/2018 12:05, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-05-08 11:56:37)
> >>
> >> On 08/05/2018 11:24, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2018-05-08 11:10:41)
> >>>> On 07/05/2018 14:57, Chris Wilson wrote:
> >>>>> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >>>>>         /* We must always keep the beast fed if we have work piled up */
> >>>>>         GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> >>>>>     
> >>>>> -unlock:
> >>>>> -     spin_unlock_irq(&engine->timeline.lock);
> >>>>> -
> >>>>> -     if (submit) {
> >>>>> +     /* Re-evaluate the executing context setup after each preemptive kick */
> >>>>> +     if (last)
> >>>>>                 execlists_user_begin(execlists, execlists->port);
> >>>>
> >>>> Last can be non-null and submit false, so this is not equivalent.
> >>>>
> >>>> By the looks of it makes no difference since it is OK to set the
> >>>> execlists user active bit multiple times. Even though the helper is
> >>>> called execlists_set_active_once. But the return value is not looked at.
> >>>>
> >>>> Still, why not keep doing this when submit is true?
> >>>
> >>> It's a subtle difference, in that we want the context reevaluated every
> >>> time we kick the queue as we may have changed state that we want to
> >>> reload, and not just ELSP. Sometimes we need inheritance of more than
> >>> just priority...
> >>
> >> What do you mean by context re-evaluated?
> >>
> >> ACTIVE_USER is set from first to last request, no? I don't understand
> >> what would change if you would set it multiple times while it is already
> >> set.
> > 
> > It's not about ACTIVE_USER. This is a hook to indicate a change in
> > context state being executed on the GPU. It's to be hooked into by the
> > cpufreq/pstate code, gpufreq code, etc. Later realisation is that they
> > need to be notified for all context changes here.
> 
> Ok so nothing as such relating to making tasklets hardirq safe.

Ssh.
 
> Is the execlists_user_begin still the right name then?

It's never been quite the right name. Just the best I could come up
with. execlists_user_busy()/execlists_user_idle() might be more
sensible.
 
> Any future use for execlists_set_active_once or we could drop it 
> (replacing with execlists_set_active)?

set_active_once is intended to be used by the cpufreq/pstate code, or
anything else that only cares about the off->on/on->off transition and
not every reload. I'm waiting for that code to be merged...
-Chris
Tvrtko Ursulin May 8, 2018, 5:38 p.m. UTC | #7
On 07/05/2018 14:57, Chris Wilson wrote:
> Prepare to allow the execlists submission to be run from underneath a
> hardirq timer context (and not just the current softirq context) as is
> required for fast preemption resets and context switches.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++----------
>   1 file changed, 29 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f9f4064dec0e..15c373ea5b7e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
>   {
>   	struct intel_engine_cs *engine =
>   		container_of(execlists, typeof(*engine), execlists);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
> -	spin_lock_irq(&engine->timeline.lock);
>   	__unwind_incomplete_requests(engine);
> -	spin_unlock_irq(&engine->timeline.lock);
> +
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
>   static inline void
> @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>   	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> -static void execlists_dequeue(struct intel_engine_cs *engine)
> +static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
> @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	struct rb_node *rb;
>   	bool submit = false;
>   
> +	lockdep_assert_held(&engine->timeline.lock);
> +
>   	/* Hardware submission is through 2 ports. Conceptually each port
>   	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
>   	 * static for a context, and unique to each, so we only execute
> @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * and context switches) submission.
>   	 */
>   
> -	spin_lock_irq(&engine->timeline.lock);
>   	rb = execlists->first;
>   	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
>   
> @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   						EXECLISTS_ACTIVE_USER));
>   		GEM_BUG_ON(!port_count(&port[0]));
>   		if (port_count(&port[0]) > 1)
> -			goto unlock;
> +			return false;
>   
>   		/*
>   		 * If we write to ELSP a second time before the HW has had
> @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		 * the HW to indicate that it has had a chance to respond.
>   		 */
>   		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> -			goto unlock;
> +			return false;
>   
>   		if (need_preempt(engine, last, execlists->queue_priority)) {
>   			inject_preempt_context(engine);
> -			goto unlock;
> +			return false;
>   		}
>   
>   		/*
> @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		 * priorities of the ports haven't been switch.
>   		 */
>   		if (port_count(&port[1]))
> -			goto unlock;
> +			return false;
>   
>   		/*
>   		 * WaIdleLiteRestore:bdw,skl
> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	/* We must always keep the beast fed if we have work piled up */
>   	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
>   
> -unlock:
> -	spin_unlock_irq(&engine->timeline.lock);
> -
> -	if (submit) {
> +	/* Re-evaluate the executing context setup after each preemptive kick */
> +	if (last)
>   		execlists_user_begin(execlists, execlists->port);
> +
> +	return submit;
> +}
> +
> +static void execlists_dequeue(struct intel_engine_cs *engine)
> +{
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
> +	unsigned long flags;
> +	bool submit;
> +
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	submit = __execlists_dequeue(engine);
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +
> +	if (submit)
>   		execlists_submit_ports(engine);
> -	}
>   
>   	GEM_BUG_ON(port_isset(execlists->port) &&
>   		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Tvrtko Ursulin May 8, 2018, 5:45 p.m. UTC | #8
On 07/05/2018 14:57, Chris Wilson wrote:
> Prepare to allow the execlists submission to be run from underneath a
> hardirq timer context (and not just the current softirq context) as is
> required for fast preemption resets and context switches.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++----------
>   1 file changed, 29 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f9f4064dec0e..15c373ea5b7e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
>   {
>   	struct intel_engine_cs *engine =
>   		container_of(execlists, typeof(*engine), execlists);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
> -	spin_lock_irq(&engine->timeline.lock);
>   	__unwind_incomplete_requests(engine);
> -	spin_unlock_irq(&engine->timeline.lock);
> +
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
>   static inline void
> @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>   	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> -static void execlists_dequeue(struct intel_engine_cs *engine)
> +static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
> @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	struct rb_node *rb;
>   	bool submit = false;
>   
> +	lockdep_assert_held(&engine->timeline.lock);
> +
>   	/* Hardware submission is through 2 ports. Conceptually each port
>   	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
>   	 * static for a context, and unique to each, so we only execute
> @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * and context switches) submission.
>   	 */
>   
> -	spin_lock_irq(&engine->timeline.lock);
>   	rb = execlists->first;
>   	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
>   
> @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   						EXECLISTS_ACTIVE_USER));
>   		GEM_BUG_ON(!port_count(&port[0]));
>   		if (port_count(&port[0]) > 1)
> -			goto unlock;
> +			return false;
>   
>   		/*
>   		 * If we write to ELSP a second time before the HW has had
> @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		 * the HW to indicate that it has had a chance to respond.
>   		 */
>   		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> -			goto unlock;
> +			return false;
>   
>   		if (need_preempt(engine, last, execlists->queue_priority)) {
>   			inject_preempt_context(engine);
> -			goto unlock;
> +			return false;
>   		}
>   
>   		/*
> @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		 * priorities of the ports haven't been switch.
>   		 */
>   		if (port_count(&port[1]))
> -			goto unlock;
> +			return false;
>   
>   		/*
>   		 * WaIdleLiteRestore:bdw,skl
> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	/* We must always keep the beast fed if we have work piled up */
>   	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
>   
> -unlock:
> -	spin_unlock_irq(&engine->timeline.lock);
> -
> -	if (submit) {
> +	/* Re-evaluate the executing context setup after each preemptive kick */
> +	if (last)
>   		execlists_user_begin(execlists, execlists->port);
> +
> +	return submit;
> +}
> +
> +static void execlists_dequeue(struct intel_engine_cs *engine)
> +{
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
> +	unsigned long flags;
> +	bool submit;
> +
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	submit = __execlists_dequeue(engine);
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +
> +	if (submit)
>   		execlists_submit_ports(engine);
> -	}

Actually, having read the guc version, why doesn't 
execlists_submit_ports need to be hardirq safe?

Regards,

Tvrtko

>   
>   	GEM_BUG_ON(port_isset(execlists->port) &&
>   		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
>
Chris Wilson May 8, 2018, 8:59 p.m. UTC | #9
Quoting Tvrtko Ursulin (2018-05-08 18:45:44)
> 
> On 07/05/2018 14:57, Chris Wilson wrote:
> > Prepare to allow the execlists submission to be run from underneath a
> > hardirq timer context (and not just the current softirq context) as is
> > required for fast preemption resets and context switches.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++----------
> >   1 file changed, 29 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index f9f4064dec0e..15c373ea5b7e 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
> >   {
> >       struct intel_engine_cs *engine =
> >               container_of(execlists, typeof(*engine), execlists);
> > +     unsigned long flags;
> > +
> > +     spin_lock_irqsave(&engine->timeline.lock, flags);
> >   
> > -     spin_lock_irq(&engine->timeline.lock);
> >       __unwind_incomplete_requests(engine);
> > -     spin_unlock_irq(&engine->timeline.lock);
> > +
> > +     spin_unlock_irqrestore(&engine->timeline.lock, flags);
> >   }
> >   
> >   static inline void
> > @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> >       execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> >   }
> >   
> > -static void execlists_dequeue(struct intel_engine_cs *engine)
> > +static bool __execlists_dequeue(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> >       struct execlist_port *port = execlists->port;
> > @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >       struct rb_node *rb;
> >       bool submit = false;
> >   
> > +     lockdep_assert_held(&engine->timeline.lock);
> > +
> >       /* Hardware submission is through 2 ports. Conceptually each port
> >        * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
> >        * static for a context, and unique to each, so we only execute
> > @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >        * and context switches) submission.
> >        */
> >   
> > -     spin_lock_irq(&engine->timeline.lock);
> >       rb = execlists->first;
> >       GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> >   
> > @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >                                               EXECLISTS_ACTIVE_USER));
> >               GEM_BUG_ON(!port_count(&port[0]));
> >               if (port_count(&port[0]) > 1)
> > -                     goto unlock;
> > +                     return false;
> >   
> >               /*
> >                * If we write to ELSP a second time before the HW has had
> > @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >                * the HW to indicate that it has had a chance to respond.
> >                */
> >               if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> > -                     goto unlock;
> > +                     return false;
> >   
> >               if (need_preempt(engine, last, execlists->queue_priority)) {
> >                       inject_preempt_context(engine);
> > -                     goto unlock;
> > +                     return false;
> >               }
> >   
> >               /*
> > @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >                * priorities of the ports haven't been switch.
> >                */
> >               if (port_count(&port[1]))
> > -                     goto unlock;
> > +                     return false;
> >   
> >               /*
> >                * WaIdleLiteRestore:bdw,skl
> > @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >       /* We must always keep the beast fed if we have work piled up */
> >       GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> >   
> > -unlock:
> > -     spin_unlock_irq(&engine->timeline.lock);
> > -
> > -     if (submit) {
> > +     /* Re-evaluate the executing context setup after each preemptive kick */
> > +     if (last)
> >               execlists_user_begin(execlists, execlists->port);
> > +
> > +     return submit;
> > +}
> > +
> > +static void execlists_dequeue(struct intel_engine_cs *engine)
> > +{
> > +     struct intel_engine_execlists * const execlists = &engine->execlists;
> > +     unsigned long flags;
> > +     bool submit;
> > +
> > +     spin_lock_irqsave(&engine->timeline.lock, flags);
> > +     submit = __execlists_dequeue(engine);
> > +     spin_unlock_irqrestore(&engine->timeline.lock, flags);
> > +
> > +     if (submit)
> >               execlists_submit_ports(engine);
> > -     }
> 
> Actually, having read the guc version, why doesn't 
> execlists_submit_ports need to be hardirq safe?

execlists->port[] and the ESLP register are guarded by the tasklet (they
are only accessed from inside the tasklet). guc caught me off guard
because it uses a shared wq (and spinlock) for all tasklets. So guc
requires extending the irq-off section across the shared wq.
-Chris
Chris Wilson May 9, 2018, 9:23 a.m. UTC | #10
Quoting Chris Wilson (2018-05-08 21:59:20)
> Quoting Tvrtko Ursulin (2018-05-08 18:45:44)
> > 
> > On 07/05/2018 14:57, Chris Wilson wrote:
> > > Prepare to allow the execlists submission to be run from underneath a
> > > hardirq timer context (and not just the current softirq context) as is
> > > required for fast preemption resets and context switches.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >   drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++----------
> > >   1 file changed, 29 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > > index f9f4064dec0e..15c373ea5b7e 100644
> > > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > > @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
> > >   {
> > >       struct intel_engine_cs *engine =
> > >               container_of(execlists, typeof(*engine), execlists);
> > > +     unsigned long flags;
> > > +
> > > +     spin_lock_irqsave(&engine->timeline.lock, flags);
> > >   
> > > -     spin_lock_irq(&engine->timeline.lock);
> > >       __unwind_incomplete_requests(engine);
> > > -     spin_unlock_irq(&engine->timeline.lock);
> > > +
> > > +     spin_unlock_irqrestore(&engine->timeline.lock, flags);
> > >   }
> > >   
> > >   static inline void
> > > @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> > >       execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> > >   }
> > >   
> > > -static void execlists_dequeue(struct intel_engine_cs *engine)
> > > +static bool __execlists_dequeue(struct intel_engine_cs *engine)
> > >   {
> > >       struct intel_engine_execlists * const execlists = &engine->execlists;
> > >       struct execlist_port *port = execlists->port;
> > > @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> > >       struct rb_node *rb;
> > >       bool submit = false;
> > >   
> > > +     lockdep_assert_held(&engine->timeline.lock);
> > > +
> > >       /* Hardware submission is through 2 ports. Conceptually each port
> > >        * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
> > >        * static for a context, and unique to each, so we only execute
> > > @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> > >        * and context switches) submission.
> > >        */
> > >   
> > > -     spin_lock_irq(&engine->timeline.lock);
> > >       rb = execlists->first;
> > >       GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> > >   
> > > @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> > >                                               EXECLISTS_ACTIVE_USER));
> > >               GEM_BUG_ON(!port_count(&port[0]));
> > >               if (port_count(&port[0]) > 1)
> > > -                     goto unlock;
> > > +                     return false;
> > >   
> > >               /*
> > >                * If we write to ELSP a second time before the HW has had
> > > @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> > >                * the HW to indicate that it has had a chance to respond.
> > >                */
> > >               if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> > > -                     goto unlock;
> > > +                     return false;
> > >   
> > >               if (need_preempt(engine, last, execlists->queue_priority)) {
> > >                       inject_preempt_context(engine);
> > > -                     goto unlock;
> > > +                     return false;
> > >               }
> > >   
> > >               /*
> > > @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> > >                * priorities of the ports haven't been switch.
> > >                */
> > >               if (port_count(&port[1]))
> > > -                     goto unlock;
> > > +                     return false;
> > >   
> > >               /*
> > >                * WaIdleLiteRestore:bdw,skl
> > > @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> > >       /* We must always keep the beast fed if we have work piled up */
> > >       GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> > >   
> > > -unlock:
> > > -     spin_unlock_irq(&engine->timeline.lock);
> > > -
> > > -     if (submit) {
> > > +     /* Re-evaluate the executing context setup after each preemptive kick */
> > > +     if (last)
> > >               execlists_user_begin(execlists, execlists->port);
> > > +
> > > +     return submit;
> > > +}
> > > +
> > > +static void execlists_dequeue(struct intel_engine_cs *engine)
> > > +{
> > > +     struct intel_engine_execlists * const execlists = &engine->execlists;
> > > +     unsigned long flags;
> > > +     bool submit;
> > > +
> > > +     spin_lock_irqsave(&engine->timeline.lock, flags);
> > > +     submit = __execlists_dequeue(engine);
> > > +     spin_unlock_irqrestore(&engine->timeline.lock, flags);
> > > +
> > > +     if (submit)
> > >               execlists_submit_ports(engine);
> > > -     }
> > 
> > Actually, having read the guc version, why doesn't 
> > execlists_submit_ports need to be hardirq safe?
> 
> execlists->port[] and the ESLP register are guarded by the tasklet (they
> are only accessed from inside the tasklet). guc caught me off guard
> because it uses a shared wq (and spinlock) for all tasklets. So guc
> requires extending the irq-off section across the shared wq.

I took your r-b and ran with it. The question you asked was exactly the
puzzle I few into with the guc, so I believe we're on the same page :)

Pushed the irqsafe patches, as they are useful for a couple of series.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f9f4064dec0e..15c373ea5b7e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -357,10 +357,13 @@  execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
 {
 	struct intel_engine_cs *engine =
 		container_of(execlists, typeof(*engine), execlists);
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 
-	spin_lock_irq(&engine->timeline.lock);
 	__unwind_incomplete_requests(engine);
-	spin_unlock_irq(&engine->timeline.lock);
+
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static inline void
@@ -554,7 +557,7 @@  static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
-static void execlists_dequeue(struct intel_engine_cs *engine)
+static bool __execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -564,6 +567,8 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct rb_node *rb;
 	bool submit = false;
 
+	lockdep_assert_held(&engine->timeline.lock);
+
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
 	 * static for a context, and unique to each, so we only execute
@@ -585,7 +590,6 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
-	spin_lock_irq(&engine->timeline.lock);
 	rb = execlists->first;
 	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
 
@@ -600,7 +604,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 						EXECLISTS_ACTIVE_USER));
 		GEM_BUG_ON(!port_count(&port[0]));
 		if (port_count(&port[0]) > 1)
-			goto unlock;
+			return false;
 
 		/*
 		 * If we write to ELSP a second time before the HW has had
@@ -610,11 +614,11 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * the HW to indicate that it has had a chance to respond.
 		 */
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-			goto unlock;
+			return false;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
 			inject_preempt_context(engine);
-			goto unlock;
+			return false;
 		}
 
 		/*
@@ -639,7 +643,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * priorities of the ports haven't been switch.
 		 */
 		if (port_count(&port[1]))
-			goto unlock;
+			return false;
 
 		/*
 		 * WaIdleLiteRestore:bdw,skl
@@ -744,13 +748,25 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 	/* We must always keep the beast fed if we have work piled up */
 	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
 
-unlock:
-	spin_unlock_irq(&engine->timeline.lock);
-
-	if (submit) {
+	/* Re-evaluate the executing context setup after each preemptive kick */
+	if (last)
 		execlists_user_begin(execlists, execlists->port);
+
+	return submit;
+}
+
+static void execlists_dequeue(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	unsigned long flags;
+	bool submit;
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+	submit = __execlists_dequeue(engine);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+	if (submit)
 		execlists_submit_ports(engine);
-	}
 
 	GEM_BUG_ON(port_isset(execlists->port) &&
 		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));