diff mbox series

drm/i915/gt: Do not consider preemption during execlists_dequeue for gen8

Message ID 20240709125302.861319-1-nitin.r.gote@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/gt: Do not consider preemption during execlists_dequeue for gen8 | expand

Commit Message

Nitin Gote July 9, 2024, 12:53 p.m. UTC
We're seeing a GPU HANG issue on a CHV platform, which was caused by
bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for gen8").

Gen8 platform has only timeslice and doesn't support a preemption mechanism
as engines do not have a preemption timer and doesn't send an irq if the
preemption timeout expires. So, add a fix to not consider preemption
during dequeuing for gen8 platforms.

Also move can_preemt() above need_preempt() function to resolve implicit
declaration of function ‘can_preempt' error and make can_preempt()
function param as const to resolve error: passing argument 1 of
‘can_preempt’ discards ‘const’ qualifier from the pointer target type.

Fixes: bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for gen8")
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11396
Suggested-by: Andi Shyti <andi.shyti@intel.com>
Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
CC: <stable@vger.kernel.org> # v5.2+
---
 .../drm/i915/gt/intel_execlists_submission.c  | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

Comments

Tvrtko Ursulin July 9, 2024, 2:02 p.m. UTC | #1
On 09/07/2024 13:53, Nitin Gote wrote:
> We're seeing a GPU HANG issue on a CHV platform, which was caused by
> bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for gen8").
> 
> Gen8 platform has only timeslice and doesn't support a preemption mechanism
> as engines do not have a preemption timer and doesn't send an irq if the
> preemption timeout expires. So, add a fix to not consider preemption
> during dequeuing for gen8 platforms.
> 
> Also move can_preemt() above need_preempt() function to resolve implicit
> declaration of function ‘can_preempt' error and make can_preempt()
> function param as const to resolve error: passing argument 1 of
> ‘can_preempt’ discards ‘const’ qualifier from the pointer target type.
> 
> Fixes: bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for gen8")
> Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11396
> Suggested-by: Andi Shyti <andi.shyti@intel.com>
> Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> CC: <stable@vger.kernel.org> # v5.2+
> ---
>   .../drm/i915/gt/intel_execlists_submission.c  | 24 ++++++++++++-------
>   1 file changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 21829439e686..30631cc690f2 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -294,11 +294,26 @@ static int virtual_prio(const struct intel_engine_execlists *el)
>   	return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
>   }
>   
> +static bool can_preempt(const struct intel_engine_cs *engine)
> +{
> +	if (GRAPHICS_VER(engine->i915) > 8)
> +		return true;
> +
> +	if (IS_CHERRYVIEW(engine->i915) || IS_BROADWELL(engine->i915))
> +		return false;
> +
> +	/* GPGPU on bdw requires extra w/a; not implemented */
> +	return engine->class != RENDER_CLASS;

Aren't BDW and CHV the only Gen8 platforms, in which case this function 
can be simplifies as:

...
{
	return GRAPHICS_VER(engine->i915) > 8;
}

?

> +}
> +
>   static bool need_preempt(const struct intel_engine_cs *engine,
>   			 const struct i915_request *rq)
>   {
>   	int last_prio;
>   
> +	if ((GRAPHICS_VER(engine->i915) <= 8) && can_preempt(engine))

The GRAPHICS_VER check here looks redundant with the one inside 
can_preempt().

Regards,

Tvrtko

> +		return false;
> +
>   	if (!intel_engine_has_semaphores(engine))
>   		return false;
>   
> @@ -3313,15 +3328,6 @@ static void remove_from_engine(struct i915_request *rq)
>   	i915_request_notify_execute_cb_imm(rq);
>   }
>   
> -static bool can_preempt(struct intel_engine_cs *engine)
> -{
> -	if (GRAPHICS_VER(engine->i915) > 8)
> -		return true;
> -
> -	/* GPGPU on bdw requires extra w/a; not implemented */
> -	return engine->class != RENDER_CLASS;
> -}
> -
>   static void kick_execlists(const struct i915_request *rq, int prio)
>   {
>   	struct intel_engine_cs *engine = rq->engine;
Tvrtko Ursulin July 10, 2024, 10:52 a.m. UTC | #2
On 09/07/2024 15:02, Tvrtko Ursulin wrote:
> 
> On 09/07/2024 13:53, Nitin Gote wrote:
>> We're seeing a GPU HANG issue on a CHV platform, which was caused by
>> bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries 
>> for gen8").
>>
>> Gen8 platform has only timeslice and doesn't support a preemption 
>> mechanism
>> as engines do not have a preemption timer and doesn't send an irq if the
>> preemption timeout expires. So, add a fix to not consider preemption
>> during dequeuing for gen8 platforms.
>>
>> Also move can_preemt() above need_preempt() function to resolve implicit
>> declaration of function ‘can_preempt' error and make can_preempt()
>> function param as const to resolve error: passing argument 1 of
>> ‘can_preempt’ discards ‘const’ qualifier from the pointer target type.
>>
>> Fixes: bac24f59f454 ("drm/i915/execlists: Enable coarse preemption 
>> boundaries for gen8")
>> Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11396
>> Suggested-by: Andi Shyti <andi.shyti@intel.com>
>> Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
>> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
>> CC: <stable@vger.kernel.org> # v5.2+
>> ---
>>   .../drm/i915/gt/intel_execlists_submission.c  | 24 ++++++++++++-------
>>   1 file changed, 15 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
>> b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> index 21829439e686..30631cc690f2 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> @@ -294,11 +294,26 @@ static int virtual_prio(const struct 
>> intel_engine_execlists *el)
>>       return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
>>   }
>> +static bool can_preempt(const struct intel_engine_cs *engine)
>> +{
>> +    if (GRAPHICS_VER(engine->i915) > 8)
>> +        return true;
>> +
>> +    if (IS_CHERRYVIEW(engine->i915) || IS_BROADWELL(engine->i915))
>> +        return false;
>> +
>> +    /* GPGPU on bdw requires extra w/a; not implemented */
>> +    return engine->class != RENDER_CLASS;
> 
> Aren't BDW and CHV the only Gen8 platforms, in which case this function 
> can be simplifies as:
> 
> ...
> {
>      return GRAPHICS_VER(engine->i915) > 8;
> }
> 
> ?
> 
>> +}
>> +
>>   static bool need_preempt(const struct intel_engine_cs *engine,
>>                const struct i915_request *rq)
>>   {
>>       int last_prio;
>> +    if ((GRAPHICS_VER(engine->i915) <= 8) && can_preempt(engine))
> 
> The GRAPHICS_VER check here looks redundant with the one inside 
> can_preempt().

One more thing - I think gen8_emit_bb_start() becomes dead code after 
this and can be removed.

Regards,

Tvrtko

>> +        return false;
>> +
>>       if (!intel_engine_has_semaphores(engine))
>>           return false;
>> @@ -3313,15 +3328,6 @@ static void remove_from_engine(struct 
>> i915_request *rq)
>>       i915_request_notify_execute_cb_imm(rq);
>>   }
>> -static bool can_preempt(struct intel_engine_cs *engine)
>> -{
>> -    if (GRAPHICS_VER(engine->i915) > 8)
>> -        return true;
>> -
>> -    /* GPGPU on bdw requires extra w/a; not implemented */
>> -    return engine->class != RENDER_CLASS;
>> -}
>> -
>>   static void kick_execlists(const struct i915_request *rq, int prio)
>>   {
>>       struct intel_engine_cs *engine = rq->engine;
Nitin Gote July 10, 2024, 2:58 p.m. UTC | #3
> -----Original Message-----
> From: Tvrtko Ursulin <tursulin@ursulin.net>
> Sent: Wednesday, July 10, 2024 4:22 PM
> To: Gote, Nitin R <nitin.r.gote@intel.com>; intel-gfx@lists.freedesktop.org
> Cc: dri-devel@lists.freedesktop.org; Shyti, Andi <andi.shyti@intel.com>;
> chris.p.wilson@linux.intel.com; Das, Nirmoy <nirmoy.das@intel.com>;
> janusz.krzysztofik@linux.intel.com; stable@vger.kernel.org
> Subject: Re: [PATCH] drm/i915/gt: Do not consider preemption during
> execlists_dequeue for gen8
> 
> 
> On 09/07/2024 15:02, Tvrtko Ursulin wrote:
> >
> > On 09/07/2024 13:53, Nitin Gote wrote:
> >> We're seeing a GPU HANG issue on a CHV platform, which was caused by
> >> bac24f59f454 ("drm/i915/execlists: Enable coarse preemption
> >> boundaries for gen8").
> >>
> >> Gen8 platform has only timeslice and doesn't support a preemption
> >> mechanism as engines do not have a preemption timer and doesn't send
> >> an irq if the preemption timeout expires. So, add a fix to not
> >> consider preemption during dequeuing for gen8 platforms.
> >>
> >> Also move can_preemt() above need_preempt() function to resolve
> >> implicit declaration of function ‘can_preempt' error and make
> >> can_preempt() function param as const to resolve error: passing
> >> argument 1 of ‘can_preempt’ discards ‘const’ qualifier from the pointer
> target type.
> >>
> >> Fixes: bac24f59f454 ("drm/i915/execlists: Enable coarse preemption
> >> boundaries for gen8")
> >> Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11396
> >> Suggested-by: Andi Shyti <andi.shyti@intel.com>
> >> Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
> >> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> >> CC: <stable@vger.kernel.org> # v5.2+
> >> ---
> >>   .../drm/i915/gt/intel_execlists_submission.c  | 24
> >> ++++++++++++-------
> >>   1 file changed, 15 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> >> b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> >> index 21829439e686..30631cc690f2 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> >> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> >> @@ -294,11 +294,26 @@ static int virtual_prio(const struct
> >> intel_engine_execlists *el)
> >>       return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
> >>   }
> >> +static bool can_preempt(const struct intel_engine_cs *engine) {
> >> +    if (GRAPHICS_VER(engine->i915) > 8)
> >> +        return true;
> >> +
> >> +    if (IS_CHERRYVIEW(engine->i915) || IS_BROADWELL(engine->i915))
> >> +        return false;
> >> +
> >> +    /* GPGPU on bdw requires extra w/a; not implemented */
> >> +    return engine->class != RENDER_CLASS;
> >
> > Aren't BDW and CHV the only Gen8 platforms, in which case this
> > function can be simplifies as:
> >
> > ...
> > {
> >      return GRAPHICS_VER(engine->i915) > 8; }
> >
> > ?
> >
> >> +}

Yes. I will simply this function.

> >> +
> >>   static bool need_preempt(const struct intel_engine_cs *engine,
> >>                const struct i915_request *rq)
> >>   {
> >>       int last_prio;
> >> +    if ((GRAPHICS_VER(engine->i915) <= 8) && can_preempt(engine))
> >
> > The GRAPHICS_VER check here looks redundant with the one inside
> > can_preempt().
True. I will update the condition.
> 
> One more thing - I think gen8_emit_bb_start() becomes dead code after this
> and can be removed.

I think gen8_emit_bb_start() still require for graphics version < 12 as it is
used in else part of if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) condition.
So, it will not be dead code.

Thanks and Regards,
Nitin
> 
> Regards,
> 
> Tvrtko
> 
> >> +        return false;
> >> +
> >>       if (!intel_engine_has_semaphores(engine))
> >>           return false;
> >> @@ -3313,15 +3328,6 @@ static void remove_from_engine(struct
> >> i915_request *rq)
> >>       i915_request_notify_execute_cb_imm(rq);
> >>   }
> >> -static bool can_preempt(struct intel_engine_cs *engine) -{
> >> -    if (GRAPHICS_VER(engine->i915) > 8)
> >> -        return true;
> >> -
> >> -    /* GPGPU on bdw requires extra w/a; not implemented */
> >> -    return engine->class != RENDER_CLASS; -}
> >> -
> >>   static void kick_execlists(const struct i915_request *rq, int prio)
> >>   {
> >>       struct intel_engine_cs *engine = rq->engine;
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 21829439e686..30631cc690f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -294,11 +294,26 @@  static int virtual_prio(const struct intel_engine_execlists *el)
 	return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
 }
 
+static bool can_preempt(const struct intel_engine_cs *engine)
+{
+	if (GRAPHICS_VER(engine->i915) > 8)
+		return true;
+
+	if (IS_CHERRYVIEW(engine->i915) || IS_BROADWELL(engine->i915))
+		return false;
+
+	/* GPGPU on bdw requires extra w/a; not implemented */
+	return engine->class != RENDER_CLASS;
+}
+
 static bool need_preempt(const struct intel_engine_cs *engine,
 			 const struct i915_request *rq)
 {
 	int last_prio;
 
+	if ((GRAPHICS_VER(engine->i915) <= 8) && can_preempt(engine))
+		return false;
+
 	if (!intel_engine_has_semaphores(engine))
 		return false;
 
@@ -3313,15 +3328,6 @@  static void remove_from_engine(struct i915_request *rq)
 	i915_request_notify_execute_cb_imm(rq);
 }
 
-static bool can_preempt(struct intel_engine_cs *engine)
-{
-	if (GRAPHICS_VER(engine->i915) > 8)
-		return true;
-
-	/* GPGPU on bdw requires extra w/a; not implemented */
-	return engine->class != RENDER_CLASS;
-}
-
 static void kick_execlists(const struct i915_request *rq, int prio)
 {
 	struct intel_engine_cs *engine = rq->engine;