Message ID | 20180507135731.10587-3-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 07/05/2018 14:57, Chris Wilson wrote: > Prepare to allow the execlists submission to be run from underneath a > hardirq timer context (and not just the current softirq context) as is > required for fast preemption resets and context switches. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++---------- > 1 file changed, 29 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index f9f4064dec0e..15c373ea5b7e 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) > { > struct intel_engine_cs *engine = > container_of(execlists, typeof(*engine), execlists); > + unsigned long flags; > + > + spin_lock_irqsave(&engine->timeline.lock, flags); > > - spin_lock_irq(&engine->timeline.lock); > __unwind_incomplete_requests(engine); > - spin_unlock_irq(&engine->timeline.lock); > + > + spin_unlock_irqrestore(&engine->timeline.lock, flags); > } > > static inline void > @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) > execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); > } > > -static void execlists_dequeue(struct intel_engine_cs *engine) > +static bool __execlists_dequeue(struct intel_engine_cs *engine) > { > struct intel_engine_execlists * const execlists = &engine->execlists; > struct execlist_port *port = execlists->port; > @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > struct rb_node *rb; > bool submit = false; > > + lockdep_assert_held(&engine->timeline.lock); > + > /* Hardware submission is through 2 ports. Conceptually each port > * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is > * static for a context, and unique to each, so we only execute > @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * and context switches) submission. > */ > > - spin_lock_irq(&engine->timeline.lock); > rb = execlists->first; > GEM_BUG_ON(rb_first(&execlists->queue) != rb); > > @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > EXECLISTS_ACTIVE_USER)); > GEM_BUG_ON(!port_count(&port[0])); > if (port_count(&port[0]) > 1) > - goto unlock; > + return false; > > /* > * If we write to ELSP a second time before the HW has had > @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * the HW to indicate that it has had a chance to respond. > */ > if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) > - goto unlock; > + return false; > > if (need_preempt(engine, last, execlists->queue_priority)) { > inject_preempt_context(engine); > - goto unlock; > + return false; > } > > /* > @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * priorities of the ports haven't been switch. > */ > if (port_count(&port[1])) > - goto unlock; > + return false; > > /* > * WaIdleLiteRestore:bdw,skl > @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > /* We must always keep the beast fed if we have work piled up */ > GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); > > -unlock: > - spin_unlock_irq(&engine->timeline.lock); > - > - if (submit) { > + /* Re-evaluate the executing context setup after each preemptive kick */ > + if (last) > execlists_user_begin(execlists, execlists->port); Last can be non-null and submit false, so this is not equivalent. By the looks of it makes no difference since it is OK to set the execlists user active bit multiple times. Even though the helper is called execlists_set_active_once. But the return value is not looked at. Still, why not keep doing this when submit is true? Regards, Tvrtko > + > + return submit; > +} > + > +static void execlists_dequeue(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + unsigned long flags; > + bool submit; > + > + spin_lock_irqsave(&engine->timeline.lock, flags); > + submit = __execlists_dequeue(engine); > + spin_unlock_irqrestore(&engine->timeline.lock, flags); > + > + if (submit) > execlists_submit_ports(engine); > - } > > GEM_BUG_ON(port_isset(execlists->port) && > !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); >
Quoting Tvrtko Ursulin (2018-05-08 11:10:41) > On 07/05/2018 14:57, Chris Wilson wrote: > > @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > /* We must always keep the beast fed if we have work piled up */ > > GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); > > > > -unlock: > > - spin_unlock_irq(&engine->timeline.lock); > > - > > - if (submit) { > > + /* Re-evaluate the executing context setup after each preemptive kick */ > > + if (last) > > execlists_user_begin(execlists, execlists->port); > > Last can be non-null and submit false, so this is not equivalent. > > By the looks of it makes no difference since it is OK to set the > execlists user active bit multiple times. Even though the helper is > called execlists_set_active_once. But the return value is not looked at. > > Still, why not keep doing this when submit is true? It's a subtle difference, in that we want the context reevaluated every time we kick the queue as we may have changed state that we want to reload, and not just ELSP. Sometimes we need inheritance of more than just priority... -Chris
On 08/05/2018 11:24, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-05-08 11:10:41) >> On 07/05/2018 14:57, Chris Wilson wrote: >>> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) >>> /* We must always keep the beast fed if we have work piled up */ >>> GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); >>> >>> -unlock: >>> - spin_unlock_irq(&engine->timeline.lock); >>> - >>> - if (submit) { >>> + /* Re-evaluate the executing context setup after each preemptive kick */ >>> + if (last) >>> execlists_user_begin(execlists, execlists->port); >> >> Last can be non-null and submit false, so this is not equivalent. >> >> By the looks of it makes no difference since it is OK to set the >> execlists user active bit multiple times. Even though the helper is >> called execlists_set_active_once. But the return value is not looked at. >> >> Still, why not keep doing this when submit is true? > > It's a subtle difference, in that we want the context reevaluated every > time we kick the queue as we may have changed state that we want to > reload, and not just ELSP. Sometimes we need inheritance of more than > just priority... What do you mean by context re-evaluated? ACTIVE_USER is set from first to last request, no? I don't understand what would change if you would set it multiple times while it is already set. Regards, Tvrtko
Quoting Tvrtko Ursulin (2018-05-08 11:56:37) > > On 08/05/2018 11:24, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2018-05-08 11:10:41) > >> On 07/05/2018 14:57, Chris Wilson wrote: > >>> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > >>> /* We must always keep the beast fed if we have work piled up */ > >>> GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); > >>> > >>> -unlock: > >>> - spin_unlock_irq(&engine->timeline.lock); > >>> - > >>> - if (submit) { > >>> + /* Re-evaluate the executing context setup after each preemptive kick */ > >>> + if (last) > >>> execlists_user_begin(execlists, execlists->port); > >> > >> Last can be non-null and submit false, so this is not equivalent. > >> > >> By the looks of it makes no difference since it is OK to set the > >> execlists user active bit multiple times. Even though the helper is > >> called execlists_set_active_once. But the return value is not looked at. > >> > >> Still, why not keep doing this when submit is true? > > > > It's a subtle difference, in that we want the context reevaluated every > > time we kick the queue as we may have changed state that we want to > > reload, and not just ELSP. Sometimes we need inheritance of more than > > just priority... > > What do you mean by context re-evaluated? > > ACTIVE_USER is set from first to last request, no? I don't understand > what would change if you would set it multiple times while it is already > set. It's not about ACTIVE_USER. This is a hook to indicate a change in context state being executed on the GPU. It's to be hooked into by the cpufreq/pstate code, gpufreq code, etc. Later realisation is that they need to be notified for all context changes here. -Chris
On 08/05/2018 12:05, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-05-08 11:56:37) >> >> On 08/05/2018 11:24, Chris Wilson wrote: >>> Quoting Tvrtko Ursulin (2018-05-08 11:10:41) >>>> On 07/05/2018 14:57, Chris Wilson wrote: >>>>> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) >>>>> /* We must always keep the beast fed if we have work piled up */ >>>>> GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); >>>>> >>>>> -unlock: >>>>> - spin_unlock_irq(&engine->timeline.lock); >>>>> - >>>>> - if (submit) { >>>>> + /* Re-evaluate the executing context setup after each preemptive kick */ >>>>> + if (last) >>>>> execlists_user_begin(execlists, execlists->port); >>>> >>>> Last can be non-null and submit false, so this is not equivalent. >>>> >>>> By the looks of it makes no difference since it is OK to set the >>>> execlists user active bit multiple times. Even though the helper is >>>> called execlists_set_active_once. But the return value is not looked at. >>>> >>>> Still, why not keep doing this when submit is true? >>> >>> It's a subtle difference, in that we want the context reevaluated every >>> time we kick the queue as we may have changed state that we want to >>> reload, and not just ELSP. Sometimes we need inheritance of more than >>> just priority... >> >> What do you mean by context re-evaluated? >> >> ACTIVE_USER is set from first to last request, no? I don't understand >> what would change if you would set it multiple times while it is already >> set. > > It's not about ACTIVE_USER. This is a hook to indicate a change in > context state being executed on the GPU. It's to be hooked into by the > cpufreq/pstate code, gpufreq code, etc. Later realisation is that they > need to be notified for all context changes here. Ok so nothing as such relating to making tasklets hardirq safe. Is the execlists_user_begin still the right name then? Any future use for execlists_set_active_once or we could drop it (replacing with execlists_set_active)? Regards, Tvrtko
Quoting Tvrtko Ursulin (2018-05-08 12:38:06) > > On 08/05/2018 12:05, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2018-05-08 11:56:37) > >> > >> On 08/05/2018 11:24, Chris Wilson wrote: > >>> Quoting Tvrtko Ursulin (2018-05-08 11:10:41) > >>>> On 07/05/2018 14:57, Chris Wilson wrote: > >>>>> @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > >>>>> /* We must always keep the beast fed if we have work piled up */ > >>>>> GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); > >>>>> > >>>>> -unlock: > >>>>> - spin_unlock_irq(&engine->timeline.lock); > >>>>> - > >>>>> - if (submit) { > >>>>> + /* Re-evaluate the executing context setup after each preemptive kick */ > >>>>> + if (last) > >>>>> execlists_user_begin(execlists, execlists->port); > >>>> > >>>> Last can be non-null and submit false, so this is not equivalent. > >>>> > >>>> By the looks of it makes no difference since it is OK to set the > >>>> execlists user active bit multiple times. Even though the helper is > >>>> called execlists_set_active_once. But the return value is not looked at. > >>>> > >>>> Still, why not keep doing this when submit is true? > >>> > >>> It's a subtle difference, in that we want the context reevaluated every > >>> time we kick the queue as we may have changed state that we want to > >>> reload, and not just ELSP. Sometimes we need inheritance of more than > >>> just priority... > >> > >> What do you mean by context re-evaluated? > >> > >> ACTIVE_USER is set from first to last request, no? I don't understand > >> what would change if you would set it multiple times while it is already > >> set. > > > > It's not about ACTIVE_USER. This is a hook to indicate a change in > > context state being executed on the GPU. It's to be hooked into by the > > cpufreq/pstate code, gpufreq code, etc. Later realisation is that they > > need to be notified for all context changes here. > > Ok so nothing as such relating to making tasklets hardirq safe. Ssh. > Is the execlists_user_begin still the right name then? It's never been quite the right name. Just the best I could come up with. execlists_user_busy()/execlists_user_idle() might be more sensible. > Any future use for execlists_set_active_once or we could drop it > (replacing with execlists_set_active)? set_active_once is intended to be used by the cpufreq/pstate code, or anything else that only cares about the off->on/on->off transition and not every reload. I'm waiting for that code to be merged... -Chris
On 07/05/2018 14:57, Chris Wilson wrote: > Prepare to allow the execlists submission to be run from underneath a > hardirq timer context (and not just the current softirq context) as is > required for fast preemption resets and context switches. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++---------- > 1 file changed, 29 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index f9f4064dec0e..15c373ea5b7e 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) > { > struct intel_engine_cs *engine = > container_of(execlists, typeof(*engine), execlists); > + unsigned long flags; > + > + spin_lock_irqsave(&engine->timeline.lock, flags); > > - spin_lock_irq(&engine->timeline.lock); > __unwind_incomplete_requests(engine); > - spin_unlock_irq(&engine->timeline.lock); > + > + spin_unlock_irqrestore(&engine->timeline.lock, flags); > } > > static inline void > @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) > execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); > } > > -static void execlists_dequeue(struct intel_engine_cs *engine) > +static bool __execlists_dequeue(struct intel_engine_cs *engine) > { > struct intel_engine_execlists * const execlists = &engine->execlists; > struct execlist_port *port = execlists->port; > @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > struct rb_node *rb; > bool submit = false; > > + lockdep_assert_held(&engine->timeline.lock); > + > /* Hardware submission is through 2 ports. Conceptually each port > * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is > * static for a context, and unique to each, so we only execute > @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * and context switches) submission. > */ > > - spin_lock_irq(&engine->timeline.lock); > rb = execlists->first; > GEM_BUG_ON(rb_first(&execlists->queue) != rb); > > @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > EXECLISTS_ACTIVE_USER)); > GEM_BUG_ON(!port_count(&port[0])); > if (port_count(&port[0]) > 1) > - goto unlock; > + return false; > > /* > * If we write to ELSP a second time before the HW has had > @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * the HW to indicate that it has had a chance to respond. > */ > if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) > - goto unlock; > + return false; > > if (need_preempt(engine, last, execlists->queue_priority)) { > inject_preempt_context(engine); > - goto unlock; > + return false; > } > > /* > @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * priorities of the ports haven't been switch. > */ > if (port_count(&port[1])) > - goto unlock; > + return false; > > /* > * WaIdleLiteRestore:bdw,skl > @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > /* We must always keep the beast fed if we have work piled up */ > GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); > > -unlock: > - spin_unlock_irq(&engine->timeline.lock); > - > - if (submit) { > + /* Re-evaluate the executing context setup after each preemptive kick */ > + if (last) > execlists_user_begin(execlists, execlists->port); > + > + return submit; > +} > + > +static void execlists_dequeue(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + unsigned long flags; > + bool submit; > + > + spin_lock_irqsave(&engine->timeline.lock, flags); > + submit = __execlists_dequeue(engine); > + spin_unlock_irqrestore(&engine->timeline.lock, flags); > + > + if (submit) > execlists_submit_ports(engine); > - } > > GEM_BUG_ON(port_isset(execlists->port) && > !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
On 07/05/2018 14:57, Chris Wilson wrote: > Prepare to allow the execlists submission to be run from underneath a > hardirq timer context (and not just the current softirq context) as is > required for fast preemption resets and context switches. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++---------- > 1 file changed, 29 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index f9f4064dec0e..15c373ea5b7e 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) > { > struct intel_engine_cs *engine = > container_of(execlists, typeof(*engine), execlists); > + unsigned long flags; > + > + spin_lock_irqsave(&engine->timeline.lock, flags); > > - spin_lock_irq(&engine->timeline.lock); > __unwind_incomplete_requests(engine); > - spin_unlock_irq(&engine->timeline.lock); > + > + spin_unlock_irqrestore(&engine->timeline.lock, flags); > } > > static inline void > @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) > execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); > } > > -static void execlists_dequeue(struct intel_engine_cs *engine) > +static bool __execlists_dequeue(struct intel_engine_cs *engine) > { > struct intel_engine_execlists * const execlists = &engine->execlists; > struct execlist_port *port = execlists->port; > @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > struct rb_node *rb; > bool submit = false; > > + lockdep_assert_held(&engine->timeline.lock); > + > /* Hardware submission is through 2 ports. Conceptually each port > * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is > * static for a context, and unique to each, so we only execute > @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * and context switches) submission. > */ > > - spin_lock_irq(&engine->timeline.lock); > rb = execlists->first; > GEM_BUG_ON(rb_first(&execlists->queue) != rb); > > @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > EXECLISTS_ACTIVE_USER)); > GEM_BUG_ON(!port_count(&port[0])); > if (port_count(&port[0]) > 1) > - goto unlock; > + return false; > > /* > * If we write to ELSP a second time before the HW has had > @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * the HW to indicate that it has had a chance to respond. > */ > if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) > - goto unlock; > + return false; > > if (need_preempt(engine, last, execlists->queue_priority)) { > inject_preempt_context(engine); > - goto unlock; > + return false; > } > > /* > @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * priorities of the ports haven't been switch. > */ > if (port_count(&port[1])) > - goto unlock; > + return false; > > /* > * WaIdleLiteRestore:bdw,skl > @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > /* We must always keep the beast fed if we have work piled up */ > GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); > > -unlock: > - spin_unlock_irq(&engine->timeline.lock); > - > - if (submit) { > + /* Re-evaluate the executing context setup after each preemptive kick */ > + if (last) > execlists_user_begin(execlists, execlists->port); > + > + return submit; > +} > + > +static void execlists_dequeue(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + unsigned long flags; > + bool submit; > + > + spin_lock_irqsave(&engine->timeline.lock, flags); > + submit = __execlists_dequeue(engine); > + spin_unlock_irqrestore(&engine->timeline.lock, flags); > + > + if (submit) > execlists_submit_ports(engine); > - } Actually, having read the guc version, why doesn't execlists_submit_ports need to be hardirq safe? Regards, Tvrtko > > GEM_BUG_ON(port_isset(execlists->port) && > !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); >
Quoting Tvrtko Ursulin (2018-05-08 18:45:44) > > On 07/05/2018 14:57, Chris Wilson wrote: > > Prepare to allow the execlists submission to be run from underneath a > > hardirq timer context (and not just the current softirq context) as is > > required for fast preemption resets and context switches. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++---------- > > 1 file changed, 29 insertions(+), 13 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > > index f9f4064dec0e..15c373ea5b7e 100644 > > --- a/drivers/gpu/drm/i915/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/intel_lrc.c > > @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) > > { > > struct intel_engine_cs *engine = > > container_of(execlists, typeof(*engine), execlists); > > + unsigned long flags; > > + > > + spin_lock_irqsave(&engine->timeline.lock, flags); > > > > - spin_lock_irq(&engine->timeline.lock); > > __unwind_incomplete_requests(engine); > > - spin_unlock_irq(&engine->timeline.lock); > > + > > + spin_unlock_irqrestore(&engine->timeline.lock, flags); > > } > > > > static inline void > > @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) > > execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); > > } > > > > -static void execlists_dequeue(struct intel_engine_cs *engine) > > +static bool __execlists_dequeue(struct intel_engine_cs *engine) > > { > > struct intel_engine_execlists * const execlists = &engine->execlists; > > struct execlist_port *port = execlists->port; > > @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > struct rb_node *rb; > > bool submit = false; > > > > + lockdep_assert_held(&engine->timeline.lock); > > + > > /* Hardware submission is through 2 ports. Conceptually each port > > * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is > > * static for a context, and unique to each, so we only execute > > @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > * and context switches) submission. > > */ > > > > - spin_lock_irq(&engine->timeline.lock); > > rb = execlists->first; > > GEM_BUG_ON(rb_first(&execlists->queue) != rb); > > > > @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > EXECLISTS_ACTIVE_USER)); > > GEM_BUG_ON(!port_count(&port[0])); > > if (port_count(&port[0]) > 1) > > - goto unlock; > > + return false; > > > > /* > > * If we write to ELSP a second time before the HW has had > > @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > * the HW to indicate that it has had a chance to respond. > > */ > > if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) > > - goto unlock; > > + return false; > > > > if (need_preempt(engine, last, execlists->queue_priority)) { > > inject_preempt_context(engine); > > - goto unlock; > > + return false; > > } > > > > /* > > @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > * priorities of the ports haven't been switch. > > */ > > if (port_count(&port[1])) > > - goto unlock; > > + return false; > > > > /* > > * WaIdleLiteRestore:bdw,skl > > @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > /* We must always keep the beast fed if we have work piled up */ > > GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); > > > > -unlock: > > - spin_unlock_irq(&engine->timeline.lock); > > - > > - if (submit) { > > + /* Re-evaluate the executing context setup after each preemptive kick */ > > + if (last) > > execlists_user_begin(execlists, execlists->port); > > + > > + return submit; > > +} > > + > > +static void execlists_dequeue(struct intel_engine_cs *engine) > > +{ > > + struct intel_engine_execlists * const execlists = &engine->execlists; > > + unsigned long flags; > > + bool submit; > > + > > + spin_lock_irqsave(&engine->timeline.lock, flags); > > + submit = __execlists_dequeue(engine); > > + spin_unlock_irqrestore(&engine->timeline.lock, flags); > > + > > + if (submit) > > execlists_submit_ports(engine); > > - } > > Actually, having read the guc version, why doesn't > execlists_submit_ports need to be hardirq safe? execlists->port[] and the ESLP register are guarded by the tasklet (they are only accessed from inside the tasklet). guc caught me off guard because it uses a shared wq (and spinlock) for all tasklets. So guc requires extending the irq-off section across the shared wq. -Chris
Quoting Chris Wilson (2018-05-08 21:59:20) > Quoting Tvrtko Ursulin (2018-05-08 18:45:44) > > > > On 07/05/2018 14:57, Chris Wilson wrote: > > > Prepare to allow the execlists submission to be run from underneath a > > > hardirq timer context (and not just the current softirq context) as is > > > required for fast preemption resets and context switches. > > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > > --- > > > drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++---------- > > > 1 file changed, 29 insertions(+), 13 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > > > index f9f4064dec0e..15c373ea5b7e 100644 > > > --- a/drivers/gpu/drm/i915/intel_lrc.c > > > +++ b/drivers/gpu/drm/i915/intel_lrc.c > > > @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) > > > { > > > struct intel_engine_cs *engine = > > > container_of(execlists, typeof(*engine), execlists); > > > + unsigned long flags; > > > + > > > + spin_lock_irqsave(&engine->timeline.lock, flags); > > > > > > - spin_lock_irq(&engine->timeline.lock); > > > __unwind_incomplete_requests(engine); > > > - spin_unlock_irq(&engine->timeline.lock); > > > + > > > + spin_unlock_irqrestore(&engine->timeline.lock, flags); > > > } > > > > > > static inline void > > > @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) > > > execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); > > > } > > > > > > -static void execlists_dequeue(struct intel_engine_cs *engine) > > > +static bool __execlists_dequeue(struct intel_engine_cs *engine) > > > { > > > struct intel_engine_execlists * const execlists = &engine->execlists; > > > struct execlist_port *port = execlists->port; > > > @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > > struct rb_node *rb; > > > bool submit = false; > > > > > > + lockdep_assert_held(&engine->timeline.lock); > > > + > > > /* Hardware submission is through 2 ports. Conceptually each port > > > * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is > > > * static for a context, and unique to each, so we only execute > > > @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > > * and context switches) submission. > > > */ > > > > > > - spin_lock_irq(&engine->timeline.lock); > > > rb = execlists->first; > > > GEM_BUG_ON(rb_first(&execlists->queue) != rb); > > > > > > @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > > EXECLISTS_ACTIVE_USER)); > > > GEM_BUG_ON(!port_count(&port[0])); > > > if (port_count(&port[0]) > 1) > > > - goto unlock; > > > + return false; > > > > > > /* > > > * If we write to ELSP a second time before the HW has had > > > @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > > * the HW to indicate that it has had a chance to respond. > > > */ > > > if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) > > > - goto unlock; > > > + return false; > > > > > > if (need_preempt(engine, last, execlists->queue_priority)) { > > > inject_preempt_context(engine); > > > - goto unlock; > > > + return false; > > > } > > > > > > /* > > > @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > > * priorities of the ports haven't been switch. > > > */ > > > if (port_count(&port[1])) > > > - goto unlock; > > > + return false; > > > > > > /* > > > * WaIdleLiteRestore:bdw,skl > > > @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > > /* We must always keep the beast fed if we have work piled up */ > > > GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); > > > > > > -unlock: > > > - spin_unlock_irq(&engine->timeline.lock); > > > - > > > - if (submit) { > > > + /* Re-evaluate the executing context setup after each preemptive kick */ > > > + if (last) > > > execlists_user_begin(execlists, execlists->port); > > > + > > > + return submit; > > > +} > > > + > > > +static void execlists_dequeue(struct intel_engine_cs *engine) > > > +{ > > > + struct intel_engine_execlists * const execlists = &engine->execlists; > > > + unsigned long flags; > > > + bool submit; > > > + > > > + spin_lock_irqsave(&engine->timeline.lock, flags); > > > + submit = __execlists_dequeue(engine); > > > + spin_unlock_irqrestore(&engine->timeline.lock, flags); > > > + > > > + if (submit) > > > execlists_submit_ports(engine); > > > - } > > > > Actually, having read the guc version, why doesn't > > execlists_submit_ports need to be hardirq safe? > > execlists->port[] and the ESLP register are guarded by the tasklet (they > are only accessed from inside the tasklet). guc caught me off guard > because it uses a shared wq (and spinlock) for all tasklets. So guc > requires extending the irq-off section across the shared wq. I took your r-b and ran with it. The question you asked was exactly the puzzle I few into with the guc, so I believe we're on the same page :) Pushed the irqsafe patches, as they are useful for a couple of series. -Chris
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f9f4064dec0e..15c373ea5b7e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -357,10 +357,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) { struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock_irq(&engine->timeline.lock); __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->timeline.lock); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static inline void @@ -554,7 +557,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } -static void execlists_dequeue(struct intel_engine_cs *engine) +static bool __execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -564,6 +567,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; + lockdep_assert_held(&engine->timeline.lock); + /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -585,7 +590,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock_irq(&engine->timeline.lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); @@ -600,7 +604,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(!port_count(&port[0])); if (port_count(&port[0]) > 1) - goto unlock; + return false; /* * If we write to ELSP a second time before the HW has had @@ -610,11 +614,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the HW to indicate that it has had a chance to respond. */ if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - goto unlock; + return false; if (need_preempt(engine, last, execlists->queue_priority)) { inject_preempt_context(engine); - goto unlock; + return false; } /* @@ -639,7 +643,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * priorities of the ports haven't been switch. */ if (port_count(&port[1])) - goto unlock; + return false; /* * WaIdleLiteRestore:bdw,skl @@ -744,13 +748,25 @@ static void execlists_dequeue(struct intel_engine_cs *engine) /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); -unlock: - spin_unlock_irq(&engine->timeline.lock); - - if (submit) { + /* Re-evaluate the executing context setup after each preemptive kick */ + if (last) execlists_user_begin(execlists, execlists->port); + + return submit; +} + +static void execlists_dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + unsigned long flags; + bool submit; + + spin_lock_irqsave(&engine->timeline.lock, flags); + submit = __execlists_dequeue(engine); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + if (submit) execlists_submit_ports(engine); - } GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
Prepare to allow the execlists submission to be run from underneath a hardirq timer context (and not just the current softirq context) as is required for fast preemption resets and context switches. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 13 deletions(-)