diff mbox

[03/11] drm/i915/execlists: Pull submit after dequeue under timeline lock

Message ID 20180531185204.19520-4-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 31, 2018, 6:51 p.m. UTC
In the next patch, we will begin processing the CSB from inside the
interrupt handler. This means that updating the execlists->port[] will
no longer be locked by the tasklet but by the engine->timeline.lock
instead. Pull dequeue and submit under the same lock for protection.
(An alternative, future, plan is to keep the in/out arrays separate for
concurrent processing and reduced lock coverage.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

Comments

Tvrtko Ursulin June 1, 2018, 2:02 p.m. UTC | #1
On 31/05/2018 19:51, Chris Wilson wrote:
> In the next patch, we will begin processing the CSB from inside the
> interrupt handler. This means that updating the execlists->port[] will

The message that we will be processing CSB from irq handler, here and in 
following patch 5/11, doesn't seem to be true. So why is this needed? 
Why not just drop some patches from the series?

Regards,

Tvrtko

> no longer be locked by the tasklet but by the engine->timeline.lock
> instead. Pull dequeue and submit under the same lock for protection.
> (An alternative, future, plan is to keep the in/out arrays separate for
> concurrent processing and reduced lock coverage.)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 32 ++++++++++++--------------------
>   1 file changed, 12 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e5cf049c18f8..d207a1bf9dc9 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -562,7 +562,7 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
>   	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> -static bool __execlists_dequeue(struct intel_engine_cs *engine)
> +static void __execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
> @@ -617,11 +617,11 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   		 * the HW to indicate that it has had a chance to respond.
>   		 */
>   		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> -			return false;
> +			return;
>   
>   		if (need_preempt(engine, last, execlists->queue_priority)) {
>   			inject_preempt_context(engine);
> -			return false;
> +			return;
>   		}
>   
>   		/*
> @@ -646,7 +646,7 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   		 * priorities of the ports haven't been switch.
>   		 */
>   		if (port_count(&port[1]))
> -			return false;
> +			return;
>   
>   		/*
>   		 * WaIdleLiteRestore:bdw,skl
> @@ -746,8 +746,10 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   		port != execlists->port ? rq_prio(last) : INT_MIN;
>   
>   	execlists->first = rb;
> -	if (submit)
> +	if (submit) {
>   		port_assign(port, last);
> +		execlists_submit_ports(engine);
> +	}
>   
>   	/* We must always keep the beast fed if we have work piled up */
>   	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> @@ -756,24 +758,19 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   	if (last)
>   		execlists_user_begin(execlists, execlists->port);
>   
> -	return submit;
> +	/* If the engine is now idle, so should be the flag; and vice versa. */
> +	GEM_BUG_ON(execlists_is_active(&engine->execlists,
> +				       EXECLISTS_ACTIVE_USER) ==
> +		   !port_isset(engine->execlists.port));
>   }
>   
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
> -	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	unsigned long flags;
> -	bool submit;
>   
>   	spin_lock_irqsave(&engine->timeline.lock, flags);
> -	submit = __execlists_dequeue(engine);
> +	__execlists_dequeue(engine);
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> -
> -	if (submit)
> -		execlists_submit_ports(engine);
> -
> -	GEM_BUG_ON(port_isset(execlists->port) &&
> -		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
>   }
>   
>   void
> @@ -1156,11 +1153,6 @@ static void execlists_submission_tasklet(unsigned long data)
>   
>   	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
>   		execlists_dequeue(engine);
> -
> -	/* If the engine is now idle, so should be the flag; and vice versa. */
> -	GEM_BUG_ON(execlists_is_active(&engine->execlists,
> -				       EXECLISTS_ACTIVE_USER) ==
> -		   !port_isset(engine->execlists.port));
>   }
>   
>   static void queue_request(struct intel_engine_cs *engine,
>
Chris Wilson June 1, 2018, 2:07 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-06-01 15:02:38)
> 
> On 31/05/2018 19:51, Chris Wilson wrote:
> > In the next patch, we will begin processing the CSB from inside the
> > interrupt handler. This means that updating the execlists->port[] will
> 
> The message that we will be processing CSB from irq handler, here and in 
> following patch 5/11, doesn't seem to be true. So why is this needed? 
> Why not just drop some patches from the series?

It will still be called from irq context; as submit_request is called
from irq context. Hence we still require irqsafe variants.

So yes, while the overall direction of the patchset changed slightly to
be less enthusiastic about calling from irq context, such calls were not
eliminated.
-Chris
Tvrtko Ursulin June 4, 2018, 9:25 a.m. UTC | #3
On 01/06/2018 15:07, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-01 15:02:38)
>>
>> On 31/05/2018 19:51, Chris Wilson wrote:
>>> In the next patch, we will begin processing the CSB from inside the
>>> interrupt handler. This means that updating the execlists->port[] will
>>
>> The message that we will be processing CSB from irq handler, here and in
>> following patch 5/11, doesn't seem to be true. So why is this needed?
>> Why not just drop some patches from the series?
> 
> It will still be called from irq context; as submit_request is called
> from irq context. Hence we still require irqsafe variants.

submit_request is already called from irqoff contexts so I don't get it.

> So yes, while the overall direction of the patchset changed slightly to
> be less enthusiastic about calling from irq context, such calls were not
> eliminated.

I have a problem with commit messages where one says we'll be processing 
CSB directly from hard irq, while another says we'll always use the tasklet.

It is very hard to review it like this since I cannot rely on the high 
level narrative for a little bit of help. To help me figure out what is 
needed in this latest incarnation, and what perhaps isn't. And we 
shouldn't really do it like this in general (have inconsistent commit 
messages).

Regards,

Tvrtko
Chris Wilson June 4, 2018, 10:12 a.m. UTC | #4
Quoting Tvrtko Ursulin (2018-06-04 10:25:46)
> 
> On 01/06/2018 15:07, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-06-01 15:02:38)
> >>
> >> On 31/05/2018 19:51, Chris Wilson wrote:
> >>> In the next patch, we will begin processing the CSB from inside the
> >>> interrupt handler. This means that updating the execlists->port[] will
> >>
> >> The message that we will be processing CSB from irq handler, here and in
> >> following patch 5/11, doesn't seem to be true. So why is this needed?
> >> Why not just drop some patches from the series?
> > 
> > It will still be called from irq context; as submit_request is called
> > from irq context. Hence we still require irqsafe variants.
> 
> submit_request is already called from irqoff contexts so I don't get it.

Right, but the patch series pulls the CSB processing into
submit_request as well.
 
> > So yes, while the overall direction of the patchset changed slightly to
> > be less enthusiastic about calling from irq context, such calls were not
> > eliminated.
> 
> I have a problem with commit messages where one says we'll be processing 
> CSB directly from hard irq, while another says we'll always use the tasklet.

It's done inside the tasklet callback; the tasklet callback may be
invoked directly, and that may also happen inside an irq.
-Chris
Tvrtko Ursulin June 4, 2018, 10:58 a.m. UTC | #5
On 04/06/2018 11:12, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-04 10:25:46)
>>
>> On 01/06/2018 15:07, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-06-01 15:02:38)
>>>>
>>>> On 31/05/2018 19:51, Chris Wilson wrote:
>>>>> In the next patch, we will begin processing the CSB from inside the
>>>>> interrupt handler. This means that updating the execlists->port[] will
>>>>
>>>> The message that we will be processing CSB from irq handler, here and in
>>>> following patch 5/11, doesn't seem to be true. So why is this needed?
>>>> Why not just drop some patches from the series?
>>>
>>> It will still be called from irq context; as submit_request is called
>>> from irq context. Hence we still require irqsafe variants.
>>
>> submit_request is already called from irqoff contexts so I don't get it.
> 
> Right, but the patch series pulls the CSB processing into
> submit_request as well.
>   
>>> So yes, while the overall direction of the patchset changed slightly to
>>> be less enthusiastic about calling from irq context, such calls were not
>>> eliminated.
>>
>> I have a problem with commit messages where one says we'll be processing
>> CSB directly from hard irq, while another says we'll always use the tasklet.
> 
> It's done inside the tasklet callback; the tasklet callback may be
> invoked directly, and that may also happen inside an irq.

Via notify_ring yes. I was looking for something on the context switch 
path all this time.

So CSB processing via notify_ring is quite optimistic and my question is 
whether it brings anything? Or whole change is purely due dequeue?

Another mystery later in the patch series is what happens with 
writel(execlists->csb_read), which is mmio, but I see you have removed 
forcewake handling and one commit says there is no more mmio.

Regards,

Tvrtko
Chris Wilson June 4, 2018, 11:15 a.m. UTC | #6
Quoting Tvrtko Ursulin (2018-06-04 11:58:00)
> 
> On 04/06/2018 11:12, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-06-04 10:25:46)
> >>
> >> On 01/06/2018 15:07, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2018-06-01 15:02:38)
> >>>>
> >>>> On 31/05/2018 19:51, Chris Wilson wrote:
> >>>>> In the next patch, we will begin processing the CSB from inside the
> >>>>> interrupt handler. This means that updating the execlists->port[] will
> >>>>
> >>>> The message that we will be processing CSB from irq handler, here and in
> >>>> following patch 5/11, doesn't seem to be true. So why is this needed?
> >>>> Why not just drop some patches from the series?
> >>>
> >>> It will still be called from irq context; as submit_request is called
> >>> from irq context. Hence we still require irqsafe variants.
> >>
> >> submit_request is already called from irqoff contexts so I don't get it.
> > 
> > Right, but the patch series pulls the CSB processing into
> > submit_request as well.
> >   
> >>> So yes, while the overall direction of the patchset changed slightly to
> >>> be less enthusiastic about calling from irq context, such calls were not
> >>> eliminated.
> >>
> >> I have a problem with commit messages where one says we'll be processing
> >> CSB directly from hard irq, while another says we'll always use the tasklet.
> > 
> > It's done inside the tasklet callback; the tasklet callback may be
> > invoked directly, and that may also happen inside an irq.
> 
> Via notify_ring yes. I was looking for something on the context switch 
> path all this time.

Also don't forget third-party callers may come in from under their irq.
 
> So CSB processing via notify_ring is quite optimistic and my question is 
> whether it brings anything? Or whole change is purely due dequeue?

That's the essence of the major improvement for ring switching. Pretty
sure I cooked up gem_exec_latency/rthog to show that one as well, if not
that's certainly one I'd like to show.

> Another mystery later in the patch series is what happens with 
> writel(execlists->csb_read), which is mmio, but I see you have removed 
> forcewake handling and one commit says there is no more mmio.

We removed forcewake for that last year. Where I say mmio, think
forcewaked mmio reads.
-Chris
Tvrtko Ursulin June 4, 2018, 2:19 p.m. UTC | #7
On 31/05/2018 19:51, Chris Wilson wrote:
> In the next patch, we will begin processing the CSB from inside the
> interrupt handler. This means that updating the execlists->port[] will

I'd put an extra note here that this is via the user interrupt hooks and 
not the more obvious csb interrupt.

> no longer be locked by the tasklet but by the engine->timeline.lock
> instead. Pull dequeue and submit under the same lock for protection.
> (An alternative, future, plan is to keep the in/out arrays separate for
> concurrent processing and reduced lock coverage.)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 32 ++++++++++++--------------------
>   1 file changed, 12 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e5cf049c18f8..d207a1bf9dc9 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -562,7 +562,7 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
>   	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> -static bool __execlists_dequeue(struct intel_engine_cs *engine)
> +static void __execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
> @@ -617,11 +617,11 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   		 * the HW to indicate that it has had a chance to respond.
>   		 */
>   		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
> -			return false;
> +			return;
>   
>   		if (need_preempt(engine, last, execlists->queue_priority)) {
>   			inject_preempt_context(engine);
> -			return false;
> +			return;
>   		}
>   
>   		/*
> @@ -646,7 +646,7 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   		 * priorities of the ports haven't been switch.
>   		 */
>   		if (port_count(&port[1]))
> -			return false;
> +			return;
>   
>   		/*
>   		 * WaIdleLiteRestore:bdw,skl
> @@ -746,8 +746,10 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   		port != execlists->port ? rq_prio(last) : INT_MIN;
>   
>   	execlists->first = rb;
> -	if (submit)
> +	if (submit) {
>   		port_assign(port, last);
> +		execlists_submit_ports(engine);
> +	}
>   
>   	/* We must always keep the beast fed if we have work piled up */
>   	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> @@ -756,24 +758,19 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine)
>   	if (last)
>   		execlists_user_begin(execlists, execlists->port);
>   
> -	return submit;
> +	/* If the engine is now idle, so should be the flag; and vice versa. */
> +	GEM_BUG_ON(execlists_is_active(&engine->execlists,
> +				       EXECLISTS_ACTIVE_USER) ==
> +		   !port_isset(engine->execlists.port));
>   }
>   
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
> -	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	unsigned long flags;
> -	bool submit;
>   
>   	spin_lock_irqsave(&engine->timeline.lock, flags);
> -	submit = __execlists_dequeue(engine);
> +	__execlists_dequeue(engine);
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> -
> -	if (submit)
> -		execlists_submit_ports(engine);
> -
> -	GEM_BUG_ON(port_isset(execlists->port) &&
> -		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
>   }
>   
>   void
> @@ -1156,11 +1153,6 @@ static void execlists_submission_tasklet(unsigned long data)
>   
>   	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
>   		execlists_dequeue(engine);
> -
> -	/* If the engine is now idle, so should be the flag; and vice versa. */
> -	GEM_BUG_ON(execlists_is_active(&engine->execlists,
> -				       EXECLISTS_ACTIVE_USER) ==
> -		   !port_isset(engine->execlists.port));
>   }
>   
>   static void queue_request(struct intel_engine_cs *engine,
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e5cf049c18f8..d207a1bf9dc9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -562,7 +562,7 @@  static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
-static bool __execlists_dequeue(struct intel_engine_cs *engine)
+static void __execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -617,11 +617,11 @@  static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		 * the HW to indicate that it has had a chance to respond.
 		 */
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-			return false;
+			return;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
 			inject_preempt_context(engine);
-			return false;
+			return;
 		}
 
 		/*
@@ -646,7 +646,7 @@  static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		 * priorities of the ports haven't been switch.
 		 */
 		if (port_count(&port[1]))
-			return false;
+			return;
 
 		/*
 		 * WaIdleLiteRestore:bdw,skl
@@ -746,8 +746,10 @@  static bool __execlists_dequeue(struct intel_engine_cs *engine)
 		port != execlists->port ? rq_prio(last) : INT_MIN;
 
 	execlists->first = rb;
-	if (submit)
+	if (submit) {
 		port_assign(port, last);
+		execlists_submit_ports(engine);
+	}
 
 	/* We must always keep the beast fed if we have work piled up */
 	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
@@ -756,24 +758,19 @@  static bool __execlists_dequeue(struct intel_engine_cs *engine)
 	if (last)
 		execlists_user_begin(execlists, execlists->port);
 
-	return submit;
+	/* If the engine is now idle, so should be the flag; and vice versa. */
+	GEM_BUG_ON(execlists_is_active(&engine->execlists,
+				       EXECLISTS_ACTIVE_USER) ==
+		   !port_isset(engine->execlists.port));
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	unsigned long flags;
-	bool submit;
 
 	spin_lock_irqsave(&engine->timeline.lock, flags);
-	submit = __execlists_dequeue(engine);
+	__execlists_dequeue(engine);
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-	if (submit)
-		execlists_submit_ports(engine);
-
-	GEM_BUG_ON(port_isset(execlists->port) &&
-		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
 }
 
 void
@@ -1156,11 +1153,6 @@  static void execlists_submission_tasklet(unsigned long data)
 
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
-
-	/* If the engine is now idle, so should be the flag; and vice versa. */
-	GEM_BUG_ON(execlists_is_active(&engine->execlists,
-				       EXECLISTS_ACTIVE_USER) ==
-		   !port_isset(engine->execlists.port));
 }
 
 static void queue_request(struct intel_engine_cs *engine,