diff mbox series

drm/i915/gt: Move execlists_reset() out of line

Message ID 20210121003253.27225-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series drm/i915/gt: Move execlists_reset() out of line | expand

Commit Message

Chris Wilson Jan. 21, 2021, 12:32 a.m. UTC
Reduce the bulk of execlists_submission_tasklet by moving the unlikely
reset function out of line.

add/remove: 1/0 grow/shrink: 0/1 up/down: 960/-935 (25)
Function                                     old     new   delta
execlists_reset                                -     960    +960
execlists_submission_tasklet                6629    5694    -935

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/gt/intel_execlists_submission.c  | 36 +++++++++----------
 1 file changed, 17 insertions(+), 19 deletions(-)

Comments

Tvrtko Ursulin Jan. 21, 2021, 10:55 a.m. UTC | #1
On 21/01/2021 00:32, Chris Wilson wrote:
> Reduce the bulk of execlists_submission_tasklet by moving the unlikely
> reset function out of line.
> 
> add/remove: 1/0 grow/shrink: 0/1 up/down: 960/-935 (25)
> Function                                     old     new   delta
> execlists_reset                                -     960    +960
> execlists_submission_tasklet                6629    5694    -935
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   .../drm/i915/gt/intel_execlists_submission.c  | 36 +++++++++----------
>   1 file changed, 17 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 740ff05fd692..43cc85241886 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -2299,10 +2299,13 @@ static void execlists_capture(struct intel_engine_cs *engine)
>   	kfree(cap);
>   }
>   
> -static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
> +static noinline void execlists_reset(struct intel_engine_cs *engine)
>   {
> +	struct intel_engine_execlists *el = &engine->execlists;
>   	const unsigned int bit = I915_RESET_ENGINE + engine->id;
>   	unsigned long *lock = &engine->gt->reset.flags;
> +	unsigned long eir = fetch_and_zero(&el->error_interrupt);

We got the locking wrong for this one. Irq handler side is under 
gt->irq_lock, but there are unlocked rmw cycles in the tasklet. Not 
counting this fetch_and_zero which is also unlocked.

If we nest gt->irq_lock under the engine->active.lock here, or the 
opposite from the irq_handler, plus short locked sections in the tasklet 
and csb handling. I *think* both options could be fine from locking order.

Regards,

Tvrtko

> +	const char *msg;
>   
>   	if (!intel_has_reset_engine(engine->gt))
>   		return;
> @@ -2310,16 +2313,25 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
>   	if (test_and_set_bit(bit, lock))
>   		return;
>   
> +	/* Generate the error message in priority wrt to the user! */
> +	if (eir & GENMASK(15, 0))
> +		msg = "CS error"; /* thrown by a user payload */
> +	else if (eir & ERROR_CSB)
> +		msg = "invalid CSB event";
> +	else if (eir & ERROR_PREEMPT)
> +		msg = "preemption time out";
> +	else
> +		msg = "internal error";
>   	ENGINE_TRACE(engine, "reset for %s\n", msg);
>   
>   	/* Mark this tasklet as disabled to avoid waiting for it to complete */
> -	tasklet_disable_nosync(&engine->execlists.tasklet);
> +	tasklet_disable_nosync(&el->tasklet);
>   
>   	ring_set_paused(engine, 1); /* Freeze the current request in place */
>   	execlists_capture(engine);
>   	intel_engine_reset(engine, msg);
>   
> -	tasklet_enable(&engine->execlists.tasklet);
> +	tasklet_enable(&el->tasklet);
>   	clear_and_wake_up_bit(bit, lock);
>   }
>   
> @@ -2355,22 +2367,8 @@ static void execlists_submission_tasklet(unsigned long data)
>   		engine->execlists.error_interrupt |= ERROR_PREEMPT;
>   	}
>   
> -	if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
> -		const char *msg;
> -
> -		/* Generate the error message in priority wrt to the user! */
> -		if (engine->execlists.error_interrupt & GENMASK(15, 0))
> -			msg = "CS error"; /* thrown by a user payload */
> -		else if (engine->execlists.error_interrupt & ERROR_CSB)
> -			msg = "invalid CSB event";
> -		else if (engine->execlists.error_interrupt & ERROR_PREEMPT)
> -			msg = "preemption time out";
> -		else
> -			msg = "internal error";
> -
> -		engine->execlists.error_interrupt = 0;
> -		execlists_reset(engine, msg);
> -	}
> +	if (unlikely(READ_ONCE(engine->execlists.error_interrupt)))
> +		execlists_reset(engine);
>   
>   	if (!engine->execlists.pending[0]) {
>   		execlists_dequeue_irq(engine);
>
Chris Wilson Jan. 21, 2021, 10:58 a.m. UTC | #2
Quoting Tvrtko Ursulin (2021-01-21 10:55:49)
> 
> On 21/01/2021 00:32, Chris Wilson wrote:
> > Reduce the bulk of execlists_submission_tasklet by moving the unlikely
> > reset function out of line.
> > 
> > add/remove: 1/0 grow/shrink: 0/1 up/down: 960/-935 (25)
> > Function                                     old     new   delta
> > execlists_reset                                -     960    +960
> > execlists_submission_tasklet                6629    5694    -935
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   .../drm/i915/gt/intel_execlists_submission.c  | 36 +++++++++----------
> >   1 file changed, 17 insertions(+), 19 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > index 740ff05fd692..43cc85241886 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > @@ -2299,10 +2299,13 @@ static void execlists_capture(struct intel_engine_cs *engine)
> >       kfree(cap);
> >   }
> >   
> > -static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
> > +static noinline void execlists_reset(struct intel_engine_cs *engine)
> >   {
> > +     struct intel_engine_execlists *el = &engine->execlists;
> >       const unsigned int bit = I915_RESET_ENGINE + engine->id;
> >       unsigned long *lock = &engine->gt->reset.flags;
> > +     unsigned long eir = fetch_and_zero(&el->error_interrupt);
> 
> We got the locking wrong for this one. Irq handler side is under 
> gt->irq_lock, but there are unlocked rmw cycles in the tasklet. Not 
> counting this fetch_and_zero which is also unlocked.

It doesn't require locking.
-Chris
Tvrtko Ursulin Jan. 21, 2021, 11:17 a.m. UTC | #3
On 21/01/2021 10:58, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2021-01-21 10:55:49)
>>
>> On 21/01/2021 00:32, Chris Wilson wrote:
>>> Reduce the bulk of execlists_submission_tasklet by moving the unlikely
>>> reset function out of line.
>>>
>>> add/remove: 1/0 grow/shrink: 0/1 up/down: 960/-935 (25)
>>> Function                                     old     new   delta
>>> execlists_reset                                -     960    +960
>>> execlists_submission_tasklet                6629    5694    -935
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    .../drm/i915/gt/intel_execlists_submission.c  | 36 +++++++++----------
>>>    1 file changed, 17 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>>> index 740ff05fd692..43cc85241886 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>>> @@ -2299,10 +2299,13 @@ static void execlists_capture(struct intel_engine_cs *engine)
>>>        kfree(cap);
>>>    }
>>>    
>>> -static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
>>> +static noinline void execlists_reset(struct intel_engine_cs *engine)
>>>    {
>>> +     struct intel_engine_execlists *el = &engine->execlists;
>>>        const unsigned int bit = I915_RESET_ENGINE + engine->id;
>>>        unsigned long *lock = &engine->gt->reset.flags;
>>> +     unsigned long eir = fetch_and_zero(&el->error_interrupt);
>>
>> We got the locking wrong for this one. Irq handler side is under
>> gt->irq_lock, but there are unlocked rmw cycles in the tasklet. Not
>> counting this fetch_and_zero which is also unlocked.
> 
> It doesn't require locking.

		/* Disable the error interrupt until after the reset */
		if (likely(eir)) {
			ENGINE_WRITE(engine, RING_EMR, ~0u);
			ENGINE_WRITE(engine, RING_EIR, eir);
			WRITE_ONCE(engine->execlists.error_interrupt, eir);
			tasklet = true;
		}

Okay I did not grep with enough context.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 740ff05fd692..43cc85241886 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2299,10 +2299,13 @@  static void execlists_capture(struct intel_engine_cs *engine)
 	kfree(cap);
 }
 
-static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
+static noinline void execlists_reset(struct intel_engine_cs *engine)
 {
+	struct intel_engine_execlists *el = &engine->execlists;
 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
 	unsigned long *lock = &engine->gt->reset.flags;
+	unsigned long eir = fetch_and_zero(&el->error_interrupt);
+	const char *msg;
 
 	if (!intel_has_reset_engine(engine->gt))
 		return;
@@ -2310,16 +2313,25 @@  static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
 	if (test_and_set_bit(bit, lock))
 		return;
 
+	/* Generate the error message in priority wrt to the user! */
+	if (eir & GENMASK(15, 0))
+		msg = "CS error"; /* thrown by a user payload */
+	else if (eir & ERROR_CSB)
+		msg = "invalid CSB event";
+	else if (eir & ERROR_PREEMPT)
+		msg = "preemption time out";
+	else
+		msg = "internal error";
 	ENGINE_TRACE(engine, "reset for %s\n", msg);
 
 	/* Mark this tasklet as disabled to avoid waiting for it to complete */
-	tasklet_disable_nosync(&engine->execlists.tasklet);
+	tasklet_disable_nosync(&el->tasklet);
 
 	ring_set_paused(engine, 1); /* Freeze the current request in place */
 	execlists_capture(engine);
 	intel_engine_reset(engine, msg);
 
-	tasklet_enable(&engine->execlists.tasklet);
+	tasklet_enable(&el->tasklet);
 	clear_and_wake_up_bit(bit, lock);
 }
 
@@ -2355,22 +2367,8 @@  static void execlists_submission_tasklet(unsigned long data)
 		engine->execlists.error_interrupt |= ERROR_PREEMPT;
 	}
 
-	if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
-		const char *msg;
-
-		/* Generate the error message in priority wrt to the user! */
-		if (engine->execlists.error_interrupt & GENMASK(15, 0))
-			msg = "CS error"; /* thrown by a user payload */
-		else if (engine->execlists.error_interrupt & ERROR_CSB)
-			msg = "invalid CSB event";
-		else if (engine->execlists.error_interrupt & ERROR_PREEMPT)
-			msg = "preemption time out";
-		else
-			msg = "internal error";
-
-		engine->execlists.error_interrupt = 0;
-		execlists_reset(engine, msg);
-	}
+	if (unlikely(READ_ONCE(engine->execlists.error_interrupt)))
+		execlists_reset(engine);
 
 	if (!engine->execlists.pending[0]) {
 		execlists_dequeue_irq(engine);