diff mbox series

[03/21] drm/i915/gem: Set the watchdog timeout directly in intel_context_set_gem

Message ID 20210423223131.879208-4-jason@jlekstrand.net (mailing list archive)
State New, archived
Headers show
Series drm/i915/gem: ioctl clean-ups | expand

Commit Message

Jason Ekstrand April 23, 2021, 10:31 p.m. UTC
Instead of handling it like a context param, unconditionally set it when
intel_contexts are created.  This doesn't fix anything but does simplify
the code a bit.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
 drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
 3 files changed, 6 insertions(+), 44 deletions(-)

Comments

Daniel Vetter April 27, 2021, 9:42 a.m. UTC | #1
On Fri, Apr 23, 2021 at 05:31:13PM -0500, Jason Ekstrand wrote:
> Instead of handling it like a context param, unconditionally set it when
> intel_contexts are created.  This doesn't fix anything but does simplify
> the code a bit.
> 
> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>

So the idea here is that since years we've had a watchdog uapi floating
about. Aim was for media, so that they could set very tight deadlines for
their transcodes jobs, so that if you have a corrupt bitstream (especially
for decoding) you don't hang your desktop unecessarily wrong.

But it's been stuck in limbo since forever, plus I get how this gets a bit
in the way of the proto ctx work, so makes sense to remove this prep work
again.

Maybe include the above in the commit message for a notch more context.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
>  .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
>  drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
>  3 files changed, 6 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 35bcdeddfbf3f..1091cc04a242a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -233,7 +233,11 @@ static void intel_context_set_gem(struct intel_context *ce,
>  	    intel_engine_has_timeslices(ce->engine))
>  		__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
>  
> -	intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
> +	if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
> +	    ctx->i915->params.request_timeout_ms) {
> +		unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
> +		intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
> +	}
>  }
>  
>  static void __free_engines(struct i915_gem_engines *e, unsigned int count)
> @@ -792,41 +796,6 @@ static void __assign_timeline(struct i915_gem_context *ctx,
>  	context_apply_all(ctx, __apply_timeline, timeline);
>  }
>  
> -static int __apply_watchdog(struct intel_context *ce, void *timeout_us)
> -{
> -	return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us);
> -}
> -
> -static int
> -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us)
> -{
> -	int ret;
> -
> -	ret = context_apply_all(ctx, __apply_watchdog,
> -				(void *)(uintptr_t)timeout_us);
> -	if (!ret)
> -		ctx->watchdog.timeout_us = timeout_us;
> -
> -	return ret;
> -}
> -
> -static void __set_default_fence_expiry(struct i915_gem_context *ctx)
> -{
> -	struct drm_i915_private *i915 = ctx->i915;
> -	int ret;
> -
> -	if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) ||
> -	    !i915->params.request_timeout_ms)
> -		return;
> -
> -	/* Default expiry for user fences. */
> -	ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000);
> -	if (ret)
> -		drm_notice(&i915->drm,
> -			   "Failed to configure default fence expiry! (%d)",
> -			   ret);
> -}
> -
>  static struct i915_gem_context *
>  i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
>  {
> @@ -871,8 +840,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
>  		intel_timeline_put(timeline);
>  	}
>  
> -	__set_default_fence_expiry(ctx);
> -
>  	trace_i915_context_create(ctx);
>  
>  	return ctx;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index 5ae71ec936f7c..676592e27e7d2 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -153,10 +153,6 @@ struct i915_gem_context {
>  	 */
>  	atomic_t active_count;
>  
> -	struct {
> -		u64 timeout_us;
> -	} watchdog;
> -
>  	/**
>  	 * @hang_timestamp: The last time(s) this context caused a GPU hang
>  	 */
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
> index dffedd983693d..0c69cb42d075c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_param.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
> @@ -10,11 +10,10 @@
>  
>  #include "intel_context.h"
>  
> -static inline int
> +static inline void
>  intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us)
>  {
>  	ce->watchdog.timeout_us = timeout_us;
> -	return 0;
>  }
>  
>  #endif /* INTEL_CONTEXT_PARAM_H */
> -- 
> 2.31.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Tvrtko Ursulin April 28, 2021, 3:55 p.m. UTC | #2
On 23/04/2021 23:31, Jason Ekstrand wrote:
> Instead of handling it like a context param, unconditionally set it when
> intel_contexts are created.  This doesn't fix anything but does simplify
> the code a bit.
> 
> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
>   .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
>   drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
>   3 files changed, 6 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 35bcdeddfbf3f..1091cc04a242a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -233,7 +233,11 @@ static void intel_context_set_gem(struct intel_context *ce,
>   	    intel_engine_has_timeslices(ce->engine))
>   		__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
>   
> -	intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
> +	if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
> +	    ctx->i915->params.request_timeout_ms) {
> +		unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
> +		intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);

Blank line between declarations and code please, or just lose the local.

Otherwise looks okay. Slight change that same GEM context can now have a 
mix of different request expirations isn't interesting I think. At least 
the change goes away by the end of the series.

Regards,

Tvrtko

> +	}
>   }
>   
>   static void __free_engines(struct i915_gem_engines *e, unsigned int count)
> @@ -792,41 +796,6 @@ static void __assign_timeline(struct i915_gem_context *ctx,
>   	context_apply_all(ctx, __apply_timeline, timeline);
>   }
>   
> -static int __apply_watchdog(struct intel_context *ce, void *timeout_us)
> -{
> -	return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us);
> -}
> -
> -static int
> -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us)
> -{
> -	int ret;
> -
> -	ret = context_apply_all(ctx, __apply_watchdog,
> -				(void *)(uintptr_t)timeout_us);
> -	if (!ret)
> -		ctx->watchdog.timeout_us = timeout_us;
> -
> -	return ret;
> -}
> -
> -static void __set_default_fence_expiry(struct i915_gem_context *ctx)
> -{
> -	struct drm_i915_private *i915 = ctx->i915;
> -	int ret;
> -
> -	if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) ||
> -	    !i915->params.request_timeout_ms)
> -		return;
> -
> -	/* Default expiry for user fences. */
> -	ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000);
> -	if (ret)
> -		drm_notice(&i915->drm,
> -			   "Failed to configure default fence expiry! (%d)",
> -			   ret);
> -}
> -
>   static struct i915_gem_context *
>   i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
>   {
> @@ -871,8 +840,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
>   		intel_timeline_put(timeline);
>   	}
>   
> -	__set_default_fence_expiry(ctx);
> -
>   	trace_i915_context_create(ctx);
>   
>   	return ctx;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index 5ae71ec936f7c..676592e27e7d2 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -153,10 +153,6 @@ struct i915_gem_context {
>   	 */
>   	atomic_t active_count;
>   
> -	struct {
> -		u64 timeout_us;
> -	} watchdog;
> -
>   	/**
>   	 * @hang_timestamp: The last time(s) this context caused a GPU hang
>   	 */
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
> index dffedd983693d..0c69cb42d075c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_param.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
> @@ -10,11 +10,10 @@
>   
>   #include "intel_context.h"
>   
> -static inline int
> +static inline void
>   intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us)
>   {
>   	ce->watchdog.timeout_us = timeout_us;
> -	return 0;
>   }
>   
>   #endif /* INTEL_CONTEXT_PARAM_H */
>
Jason Ekstrand April 28, 2021, 5:24 p.m. UTC | #3
On Wed, Apr 28, 2021 at 10:55 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
> On 23/04/2021 23:31, Jason Ekstrand wrote:
> > Instead of handling it like a context param, unconditionally set it when
> > intel_contexts are created.  This doesn't fix anything but does simplify
> > the code a bit.
> >
> > Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
> >   .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
> >   drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
> >   3 files changed, 6 insertions(+), 44 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index 35bcdeddfbf3f..1091cc04a242a 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -233,7 +233,11 @@ static void intel_context_set_gem(struct intel_context *ce,
> >           intel_engine_has_timeslices(ce->engine))
> >               __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
> >
> > -     intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
> > +     if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
> > +         ctx->i915->params.request_timeout_ms) {
> > +             unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
> > +             intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
>
> Blank line between declarations and code please, or just lose the local.
>
> Otherwise looks okay. Slight change that same GEM context can now have a
> mix of different request expirations isn't interesting I think. At least
> the change goes away by the end of the series.

In order for that to happen, I think you'd have to have a race between
CREATE_CONTEXT and someone smashing the request_timeout_ms param via
sysfs.  Or am I missing something?  Given that timeouts are really
per-engine anyway, I don't think we need to care too much about that.

--Jason

> Regards,
>
> Tvrtko
>
> > +     }
> >   }
> >
> >   static void __free_engines(struct i915_gem_engines *e, unsigned int count)
> > @@ -792,41 +796,6 @@ static void __assign_timeline(struct i915_gem_context *ctx,
> >       context_apply_all(ctx, __apply_timeline, timeline);
> >   }
> >
> > -static int __apply_watchdog(struct intel_context *ce, void *timeout_us)
> > -{
> > -     return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us);
> > -}
> > -
> > -static int
> > -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us)
> > -{
> > -     int ret;
> > -
> > -     ret = context_apply_all(ctx, __apply_watchdog,
> > -                             (void *)(uintptr_t)timeout_us);
> > -     if (!ret)
> > -             ctx->watchdog.timeout_us = timeout_us;
> > -
> > -     return ret;
> > -}
> > -
> > -static void __set_default_fence_expiry(struct i915_gem_context *ctx)
> > -{
> > -     struct drm_i915_private *i915 = ctx->i915;
> > -     int ret;
> > -
> > -     if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) ||
> > -         !i915->params.request_timeout_ms)
> > -             return;
> > -
> > -     /* Default expiry for user fences. */
> > -     ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000);
> > -     if (ret)
> > -             drm_notice(&i915->drm,
> > -                        "Failed to configure default fence expiry! (%d)",
> > -                        ret);
> > -}
> > -
> >   static struct i915_gem_context *
> >   i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
> >   {
> > @@ -871,8 +840,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
> >               intel_timeline_put(timeline);
> >       }
> >
> > -     __set_default_fence_expiry(ctx);
> > -
> >       trace_i915_context_create(ctx);
> >
> >       return ctx;
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> > index 5ae71ec936f7c..676592e27e7d2 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> > @@ -153,10 +153,6 @@ struct i915_gem_context {
> >        */
> >       atomic_t active_count;
> >
> > -     struct {
> > -             u64 timeout_us;
> > -     } watchdog;
> > -
> >       /**
> >        * @hang_timestamp: The last time(s) this context caused a GPU hang
> >        */
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
> > index dffedd983693d..0c69cb42d075c 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_param.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
> > @@ -10,11 +10,10 @@
> >
> >   #include "intel_context.h"
> >
> > -static inline int
> > +static inline void
> >   intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us)
> >   {
> >       ce->watchdog.timeout_us = timeout_us;
> > -     return 0;
> >   }
> >
> >   #endif /* INTEL_CONTEXT_PARAM_H */
> >
Tvrtko Ursulin April 29, 2021, 8:04 a.m. UTC | #4
On 28/04/2021 18:24, Jason Ekstrand wrote:
> On Wed, Apr 28, 2021 at 10:55 AM Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>> On 23/04/2021 23:31, Jason Ekstrand wrote:
>>> Instead of handling it like a context param, unconditionally set it when
>>> intel_contexts are created.  This doesn't fix anything but does simplify
>>> the code a bit.
>>>
>>> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
>>> ---
>>>    drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
>>>    .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
>>>    drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
>>>    3 files changed, 6 insertions(+), 44 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>> index 35bcdeddfbf3f..1091cc04a242a 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>> @@ -233,7 +233,11 @@ static void intel_context_set_gem(struct intel_context *ce,
>>>            intel_engine_has_timeslices(ce->engine))
>>>                __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
>>>
>>> -     intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
>>> +     if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
>>> +         ctx->i915->params.request_timeout_ms) {
>>> +             unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
>>> +             intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
>>
>> Blank line between declarations and code please, or just lose the local.
>>
>> Otherwise looks okay. Slight change that same GEM context can now have a
>> mix of different request expirations isn't interesting I think. At least
>> the change goes away by the end of the series.
> 
> In order for that to happen, I think you'd have to have a race between
> CREATE_CONTEXT and someone smashing the request_timeout_ms param via
> sysfs.  Or am I missing something?  Given that timeouts are really
> per-engine anyway, I don't think we need to care too much about that.

We don't care, no.

For completeness only - by the end of the series it is what you say. But 
at _this_ point in the series though it is if modparam changes at any 
point between context create and replacing engines. Which is a change 
compared to before this patch, since modparam was cached in the GEM 
context so far. So one GEM context was a single request_timeout_ms.

Regards,

Tvrtko

> --Jason
> 
>> Regards,
>>
>> Tvrtko
>>
>>> +     }
>>>    }
>>>
>>>    static void __free_engines(struct i915_gem_engines *e, unsigned int count)
>>> @@ -792,41 +796,6 @@ static void __assign_timeline(struct i915_gem_context *ctx,
>>>        context_apply_all(ctx, __apply_timeline, timeline);
>>>    }
>>>
>>> -static int __apply_watchdog(struct intel_context *ce, void *timeout_us)
>>> -{
>>> -     return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us);
>>> -}
>>> -
>>> -static int
>>> -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us)
>>> -{
>>> -     int ret;
>>> -
>>> -     ret = context_apply_all(ctx, __apply_watchdog,
>>> -                             (void *)(uintptr_t)timeout_us);
>>> -     if (!ret)
>>> -             ctx->watchdog.timeout_us = timeout_us;
>>> -
>>> -     return ret;
>>> -}
>>> -
>>> -static void __set_default_fence_expiry(struct i915_gem_context *ctx)
>>> -{
>>> -     struct drm_i915_private *i915 = ctx->i915;
>>> -     int ret;
>>> -
>>> -     if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) ||
>>> -         !i915->params.request_timeout_ms)
>>> -             return;
>>> -
>>> -     /* Default expiry for user fences. */
>>> -     ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000);
>>> -     if (ret)
>>> -             drm_notice(&i915->drm,
>>> -                        "Failed to configure default fence expiry! (%d)",
>>> -                        ret);
>>> -}
>>> -
>>>    static struct i915_gem_context *
>>>    i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
>>>    {
>>> @@ -871,8 +840,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
>>>                intel_timeline_put(timeline);
>>>        }
>>>
>>> -     __set_default_fence_expiry(ctx);
>>> -
>>>        trace_i915_context_create(ctx);
>>>
>>>        return ctx;
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>>> index 5ae71ec936f7c..676592e27e7d2 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
>>> @@ -153,10 +153,6 @@ struct i915_gem_context {
>>>         */
>>>        atomic_t active_count;
>>>
>>> -     struct {
>>> -             u64 timeout_us;
>>> -     } watchdog;
>>> -
>>>        /**
>>>         * @hang_timestamp: The last time(s) this context caused a GPU hang
>>>         */
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
>>> index dffedd983693d..0c69cb42d075c 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_context_param.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
>>> @@ -10,11 +10,10 @@
>>>
>>>    #include "intel_context.h"
>>>
>>> -static inline int
>>> +static inline void
>>>    intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us)
>>>    {
>>>        ce->watchdog.timeout_us = timeout_us;
>>> -     return 0;
>>>    }
>>>
>>>    #endif /* INTEL_CONTEXT_PARAM_H */
>>>
Jason Ekstrand April 29, 2021, 2:54 p.m. UTC | #5
On Thu, Apr 29, 2021 at 3:04 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 28/04/2021 18:24, Jason Ekstrand wrote:
> > On Wed, Apr 28, 2021 at 10:55 AM Tvrtko Ursulin
> > <tvrtko.ursulin@linux.intel.com> wrote:
> >> On 23/04/2021 23:31, Jason Ekstrand wrote:
> >>> Instead of handling it like a context param, unconditionally set it when
> >>> intel_contexts are created.  This doesn't fix anything but does simplify
> >>> the code a bit.
> >>>
> >>> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
> >>> ---
> >>>    drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
> >>>    .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
> >>>    drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
> >>>    3 files changed, 6 insertions(+), 44 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>> index 35bcdeddfbf3f..1091cc04a242a 100644
> >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>> @@ -233,7 +233,11 @@ static void intel_context_set_gem(struct intel_context *ce,
> >>>            intel_engine_has_timeslices(ce->engine))
> >>>                __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
> >>>
> >>> -     intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
> >>> +     if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
> >>> +         ctx->i915->params.request_timeout_ms) {
> >>> +             unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
> >>> +             intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
> >>
> >> Blank line between declarations and code please, or just lose the local.
> >>
> >> Otherwise looks okay. Slight change that same GEM context can now have a
> >> mix of different request expirations isn't interesting I think. At least
> >> the change goes away by the end of the series.
> >
> > In order for that to happen, I think you'd have to have a race between
> > CREATE_CONTEXT and someone smashing the request_timeout_ms param via
> > sysfs.  Or am I missing something?  Given that timeouts are really
> > per-engine anyway, I don't think we need to care too much about that.
>
> We don't care, no.
>
> For completeness only - by the end of the series it is what you say. But
> at _this_ point in the series though it is if modparam changes at any
> point between context create and replacing engines. Which is a change
> compared to before this patch, since modparam was cached in the GEM
> context so far. So one GEM context was a single request_timeout_ms.

I've added the following to the commit message:

It also means that sync files exported from different engines on a
SINGLE_TIMELINE context will have different fence contexts.  This is
visible to userspace if it looks at the obj_name field of
sync_fence_info.

How's that sound?

--Jason

> Regards,
>
> Tvrtko
>
> > --Jason
> >
> >> Regards,
> >>
> >> Tvrtko
> >>
> >>> +     }
> >>>    }
> >>>
> >>>    static void __free_engines(struct i915_gem_engines *e, unsigned int count)
> >>> @@ -792,41 +796,6 @@ static void __assign_timeline(struct i915_gem_context *ctx,
> >>>        context_apply_all(ctx, __apply_timeline, timeline);
> >>>    }
> >>>
> >>> -static int __apply_watchdog(struct intel_context *ce, void *timeout_us)
> >>> -{
> >>> -     return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us);
> >>> -}
> >>> -
> >>> -static int
> >>> -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us)
> >>> -{
> >>> -     int ret;
> >>> -
> >>> -     ret = context_apply_all(ctx, __apply_watchdog,
> >>> -                             (void *)(uintptr_t)timeout_us);
> >>> -     if (!ret)
> >>> -             ctx->watchdog.timeout_us = timeout_us;
> >>> -
> >>> -     return ret;
> >>> -}
> >>> -
> >>> -static void __set_default_fence_expiry(struct i915_gem_context *ctx)
> >>> -{
> >>> -     struct drm_i915_private *i915 = ctx->i915;
> >>> -     int ret;
> >>> -
> >>> -     if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) ||
> >>> -         !i915->params.request_timeout_ms)
> >>> -             return;
> >>> -
> >>> -     /* Default expiry for user fences. */
> >>> -     ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000);
> >>> -     if (ret)
> >>> -             drm_notice(&i915->drm,
> >>> -                        "Failed to configure default fence expiry! (%d)",
> >>> -                        ret);
> >>> -}
> >>> -
> >>>    static struct i915_gem_context *
> >>>    i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
> >>>    {
> >>> @@ -871,8 +840,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
> >>>                intel_timeline_put(timeline);
> >>>        }
> >>>
> >>> -     __set_default_fence_expiry(ctx);
> >>> -
> >>>        trace_i915_context_create(ctx);
> >>>
> >>>        return ctx;
> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> >>> index 5ae71ec936f7c..676592e27e7d2 100644
> >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> >>> @@ -153,10 +153,6 @@ struct i915_gem_context {
> >>>         */
> >>>        atomic_t active_count;
> >>>
> >>> -     struct {
> >>> -             u64 timeout_us;
> >>> -     } watchdog;
> >>> -
> >>>        /**
> >>>         * @hang_timestamp: The last time(s) this context caused a GPU hang
> >>>         */
> >>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
> >>> index dffedd983693d..0c69cb42d075c 100644
> >>> --- a/drivers/gpu/drm/i915/gt/intel_context_param.h
> >>> +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
> >>> @@ -10,11 +10,10 @@
> >>>
> >>>    #include "intel_context.h"
> >>>
> >>> -static inline int
> >>> +static inline void
> >>>    intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us)
> >>>    {
> >>>        ce->watchdog.timeout_us = timeout_us;
> >>> -     return 0;
> >>>    }
> >>>
> >>>    #endif /* INTEL_CONTEXT_PARAM_H */
> >>>
Daniel Vetter April 29, 2021, 5:12 p.m. UTC | #6
On Thu, Apr 29, 2021 at 09:54:15AM -0500, Jason Ekstrand wrote:
> On Thu, Apr 29, 2021 at 3:04 AM Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
> >
> >
> > On 28/04/2021 18:24, Jason Ekstrand wrote:
> > > On Wed, Apr 28, 2021 at 10:55 AM Tvrtko Ursulin
> > > <tvrtko.ursulin@linux.intel.com> wrote:
> > >> On 23/04/2021 23:31, Jason Ekstrand wrote:
> > >>> Instead of handling it like a context param, unconditionally set it when
> > >>> intel_contexts are created.  This doesn't fix anything but does simplify
> > >>> the code a bit.
> > >>>
> > >>> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
> > >>> ---
> > >>>    drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
> > >>>    .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
> > >>>    drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
> > >>>    3 files changed, 6 insertions(+), 44 deletions(-)
> > >>>
> > >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > >>> index 35bcdeddfbf3f..1091cc04a242a 100644
> > >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > >>> @@ -233,7 +233,11 @@ static void intel_context_set_gem(struct intel_context *ce,
> > >>>            intel_engine_has_timeslices(ce->engine))
> > >>>                __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
> > >>>
> > >>> -     intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
> > >>> +     if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
> > >>> +         ctx->i915->params.request_timeout_ms) {
> > >>> +             unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
> > >>> +             intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
> > >>
> > >> Blank line between declarations and code please, or just lose the local.
> > >>
> > >> Otherwise looks okay. Slight change that same GEM context can now have a
> > >> mix of different request expirations isn't interesting I think. At least
> > >> the change goes away by the end of the series.
> > >
> > > In order for that to happen, I think you'd have to have a race between
> > > CREATE_CONTEXT and someone smashing the request_timeout_ms param via
> > > sysfs.  Or am I missing something?  Given that timeouts are really
> > > per-engine anyway, I don't think we need to care too much about that.
> >
> > We don't care, no.
> >
> > For completeness only - by the end of the series it is what you say. But
> > at _this_ point in the series though it is if modparam changes at any
> > point between context create and replacing engines. Which is a change
> > compared to before this patch, since modparam was cached in the GEM
> > context so far. So one GEM context was a single request_timeout_ms.
> 
> I've added the following to the commit message:
> 
> It also means that sync files exported from different engines on a
> SINGLE_TIMELINE context will have different fence contexts.  This is
> visible to userspace if it looks at the obj_name field of
> sync_fence_info.
> 
> How's that sound?

If you add "Which media-driver as the sole user of this doesn't do" then I
think it's perfect.
-Daniel

> 
> --Jason
> 
> > Regards,
> >
> > Tvrtko
> >
> > > --Jason
> > >
> > >> Regards,
> > >>
> > >> Tvrtko
> > >>
> > >>> +     }
> > >>>    }
> > >>>
> > >>>    static void __free_engines(struct i915_gem_engines *e, unsigned int count)
> > >>> @@ -792,41 +796,6 @@ static void __assign_timeline(struct i915_gem_context *ctx,
> > >>>        context_apply_all(ctx, __apply_timeline, timeline);
> > >>>    }
> > >>>
> > >>> -static int __apply_watchdog(struct intel_context *ce, void *timeout_us)
> > >>> -{
> > >>> -     return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us);
> > >>> -}
> > >>> -
> > >>> -static int
> > >>> -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us)
> > >>> -{
> > >>> -     int ret;
> > >>> -
> > >>> -     ret = context_apply_all(ctx, __apply_watchdog,
> > >>> -                             (void *)(uintptr_t)timeout_us);
> > >>> -     if (!ret)
> > >>> -             ctx->watchdog.timeout_us = timeout_us;
> > >>> -
> > >>> -     return ret;
> > >>> -}
> > >>> -
> > >>> -static void __set_default_fence_expiry(struct i915_gem_context *ctx)
> > >>> -{
> > >>> -     struct drm_i915_private *i915 = ctx->i915;
> > >>> -     int ret;
> > >>> -
> > >>> -     if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) ||
> > >>> -         !i915->params.request_timeout_ms)
> > >>> -             return;
> > >>> -
> > >>> -     /* Default expiry for user fences. */
> > >>> -     ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000);
> > >>> -     if (ret)
> > >>> -             drm_notice(&i915->drm,
> > >>> -                        "Failed to configure default fence expiry! (%d)",
> > >>> -                        ret);
> > >>> -}
> > >>> -
> > >>>    static struct i915_gem_context *
> > >>>    i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
> > >>>    {
> > >>> @@ -871,8 +840,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
> > >>>                intel_timeline_put(timeline);
> > >>>        }
> > >>>
> > >>> -     __set_default_fence_expiry(ctx);
> > >>> -
> > >>>        trace_i915_context_create(ctx);
> > >>>
> > >>>        return ctx;
> > >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> > >>> index 5ae71ec936f7c..676592e27e7d2 100644
> > >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> > >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> > >>> @@ -153,10 +153,6 @@ struct i915_gem_context {
> > >>>         */
> > >>>        atomic_t active_count;
> > >>>
> > >>> -     struct {
> > >>> -             u64 timeout_us;
> > >>> -     } watchdog;
> > >>> -
> > >>>        /**
> > >>>         * @hang_timestamp: The last time(s) this context caused a GPU hang
> > >>>         */
> > >>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
> > >>> index dffedd983693d..0c69cb42d075c 100644
> > >>> --- a/drivers/gpu/drm/i915/gt/intel_context_param.h
> > >>> +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
> > >>> @@ -10,11 +10,10 @@
> > >>>
> > >>>    #include "intel_context.h"
> > >>>
> > >>> -static inline int
> > >>> +static inline void
> > >>>    intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us)
> > >>>    {
> > >>>        ce->watchdog.timeout_us = timeout_us;
> > >>> -     return 0;
> > >>>    }
> > >>>
> > >>>    #endif /* INTEL_CONTEXT_PARAM_H */
> > >>>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Daniel Vetter April 29, 2021, 5:13 p.m. UTC | #7
On Thu, Apr 29, 2021 at 07:12:05PM +0200, Daniel Vetter wrote:
> On Thu, Apr 29, 2021 at 09:54:15AM -0500, Jason Ekstrand wrote:
> > On Thu, Apr 29, 2021 at 3:04 AM Tvrtko Ursulin
> > <tvrtko.ursulin@linux.intel.com> wrote:
> > >
> > >
> > > On 28/04/2021 18:24, Jason Ekstrand wrote:
> > > > On Wed, Apr 28, 2021 at 10:55 AM Tvrtko Ursulin
> > > > <tvrtko.ursulin@linux.intel.com> wrote:
> > > >> On 23/04/2021 23:31, Jason Ekstrand wrote:
> > > >>> Instead of handling it like a context param, unconditionally set it when
> > > >>> intel_contexts are created.  This doesn't fix anything but does simplify
> > > >>> the code a bit.
> > > >>>
> > > >>> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
> > > >>> ---
> > > >>>    drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
> > > >>>    .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
> > > >>>    drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
> > > >>>    3 files changed, 6 insertions(+), 44 deletions(-)
> > > >>>
> > > >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > >>> index 35bcdeddfbf3f..1091cc04a242a 100644
> > > >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > >>> @@ -233,7 +233,11 @@ static void intel_context_set_gem(struct intel_context *ce,
> > > >>>            intel_engine_has_timeslices(ce->engine))
> > > >>>                __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
> > > >>>
> > > >>> -     intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
> > > >>> +     if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
> > > >>> +         ctx->i915->params.request_timeout_ms) {
> > > >>> +             unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
> > > >>> +             intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
> > > >>
> > > >> Blank line between declarations and code please, or just lose the local.
> > > >>
> > > >> Otherwise looks okay. Slight change that same GEM context can now have a
> > > >> mix of different request expirations isn't interesting I think. At least
> > > >> the change goes away by the end of the series.
> > > >
> > > > In order for that to happen, I think you'd have to have a race between
> > > > CREATE_CONTEXT and someone smashing the request_timeout_ms param via
> > > > sysfs.  Or am I missing something?  Given that timeouts are really
> > > > per-engine anyway, I don't think we need to care too much about that.
> > >
> > > We don't care, no.
> > >
> > > For completeness only - by the end of the series it is what you say. But
> > > at _this_ point in the series though it is if modparam changes at any
> > > point between context create and replacing engines. Which is a change
> > > compared to before this patch, since modparam was cached in the GEM
> > > context so far. So one GEM context was a single request_timeout_ms.
> > 
> > I've added the following to the commit message:
> > 
> > It also means that sync files exported from different engines on a
> > SINGLE_TIMELINE context will have different fence contexts.  This is
> > visible to userspace if it looks at the obj_name field of
> > sync_fence_info.
> > 
> > How's that sound?
> 
> If you add "Which media-driver as the sole user of this doesn't do" then I
> think it's perfect.

Uh I think you replied to the wrong thread :-)

This here is about watchdog, not timeline.
-Daniel
Jason Ekstrand April 29, 2021, 6:41 p.m. UTC | #8
On Thu, Apr 29, 2021 at 12:13 PM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Thu, Apr 29, 2021 at 07:12:05PM +0200, Daniel Vetter wrote:
> > On Thu, Apr 29, 2021 at 09:54:15AM -0500, Jason Ekstrand wrote:
> > > On Thu, Apr 29, 2021 at 3:04 AM Tvrtko Ursulin
> > > <tvrtko.ursulin@linux.intel.com> wrote:
> > > >
> > > >
> > > > On 28/04/2021 18:24, Jason Ekstrand wrote:
> > > > > On Wed, Apr 28, 2021 at 10:55 AM Tvrtko Ursulin
> > > > > <tvrtko.ursulin@linux.intel.com> wrote:
> > > > >> On 23/04/2021 23:31, Jason Ekstrand wrote:
> > > > >>> Instead of handling it like a context param, unconditionally set it when
> > > > >>> intel_contexts are created.  This doesn't fix anything but does simplify
> > > > >>> the code a bit.
> > > > >>>
> > > > >>> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
> > > > >>> ---
> > > > >>>    drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
> > > > >>>    .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
> > > > >>>    drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
> > > > >>>    3 files changed, 6 insertions(+), 44 deletions(-)
> > > > >>>
> > > > >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > > >>> index 35bcdeddfbf3f..1091cc04a242a 100644
> > > > >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > > >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > > > >>> @@ -233,7 +233,11 @@ static void intel_context_set_gem(struct intel_context *ce,
> > > > >>>            intel_engine_has_timeslices(ce->engine))
> > > > >>>                __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
> > > > >>>
> > > > >>> -     intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
> > > > >>> +     if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
> > > > >>> +         ctx->i915->params.request_timeout_ms) {
> > > > >>> +             unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
> > > > >>> +             intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
> > > > >>
> > > > >> Blank line between declarations and code please, or just lose the local.
> > > > >>
> > > > >> Otherwise looks okay. Slight change that same GEM context can now have a
> > > > >> mix of different request expirations isn't interesting I think. At least
> > > > >> the change goes away by the end of the series.
> > > > >
> > > > > In order for that to happen, I think you'd have to have a race between
> > > > > CREATE_CONTEXT and someone smashing the request_timeout_ms param via
> > > > > sysfs.  Or am I missing something?  Given that timeouts are really
> > > > > per-engine anyway, I don't think we need to care too much about that.
> > > >
> > > > We don't care, no.
> > > >
> > > > For completeness only - by the end of the series it is what you say. But
> > > > at _this_ point in the series though it is if modparam changes at any
> > > > point between context create and replacing engines. Which is a change
> > > > compared to before this patch, since modparam was cached in the GEM
> > > > context so far. So one GEM context was a single request_timeout_ms.
> > >
> > > I've added the following to the commit message:
> > >
> > > It also means that sync files exported from different engines on a
> > > SINGLE_TIMELINE context will have different fence contexts.  This is
> > > visible to userspace if it looks at the obj_name field of
> > > sync_fence_info.
> > >
> > > How's that sound?
> >
> > If you add "Which media-driver as the sole user of this doesn't do" then I
> > think it's perfect.
>
> Uh I think you replied to the wrong thread :-)

Indeed!

> This here is about watchdog, not timeline.
> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Tvrtko Ursulin April 30, 2021, 11:18 a.m. UTC | #9
On 29/04/2021 15:54, Jason Ekstrand wrote:
> On Thu, Apr 29, 2021 at 3:04 AM Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>>
>>
>> On 28/04/2021 18:24, Jason Ekstrand wrote:
>>> On Wed, Apr 28, 2021 at 10:55 AM Tvrtko Ursulin
>>> <tvrtko.ursulin@linux.intel.com> wrote:
>>>> On 23/04/2021 23:31, Jason Ekstrand wrote:
>>>>> Instead of handling it like a context param, unconditionally set it when
>>>>> intel_contexts are created.  This doesn't fix anything but does simplify
>>>>> the code a bit.
>>>>>
>>>>> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
>>>>>     .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
>>>>>     drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
>>>>>     3 files changed, 6 insertions(+), 44 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>>> index 35bcdeddfbf3f..1091cc04a242a 100644
>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>>> @@ -233,7 +233,11 @@ static void intel_context_set_gem(struct intel_context *ce,
>>>>>             intel_engine_has_timeslices(ce->engine))
>>>>>                 __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
>>>>>
>>>>> -     intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
>>>>> +     if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
>>>>> +         ctx->i915->params.request_timeout_ms) {
>>>>> +             unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
>>>>> +             intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
>>>>
>>>> Blank line between declarations and code please, or just lose the local.
>>>>
>>>> Otherwise looks okay. Slight change that same GEM context can now have a
>>>> mix of different request expirations isn't interesting I think. At least
>>>> the change goes away by the end of the series.
>>>
>>> In order for that to happen, I think you'd have to have a race between
>>> CREATE_CONTEXT and someone smashing the request_timeout_ms param via
>>> sysfs.  Or am I missing something?  Given that timeouts are really
>>> per-engine anyway, I don't think we need to care too much about that.
>>
>> We don't care, no.
>>
>> For completeness only - by the end of the series it is what you say. But
>> at _this_ point in the series though it is if modparam changes at any
>> point between context create and replacing engines. Which is a change
>> compared to before this patch, since modparam was cached in the GEM
>> context so far. So one GEM context was a single request_timeout_ms.
> 
> I've added the following to the commit message:
> 
> It also means that sync files exported from different engines on a
> SINGLE_TIMELINE context will have different fence contexts.  This is
> visible to userspace if it looks at the obj_name field of
> sync_fence_info.
> 
> How's that sound?

Wrong thread but sounds good.

I haven't looked into the fence merge logic apart from noticing context 
is used there. So I'd suggest a quick look there on top, just to make 
sure merging logic does not hold any surprises if contexts start to 
differ. Probably just results with more inefficiency somewhere, in theory.

Regards,

Tvrtko
Jason Ekstrand April 30, 2021, 3:35 p.m. UTC | #10
On Fri, Apr 30, 2021 at 6:18 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 29/04/2021 15:54, Jason Ekstrand wrote:
> > On Thu, Apr 29, 2021 at 3:04 AM Tvrtko Ursulin
> > <tvrtko.ursulin@linux.intel.com> wrote:
> >>
> >>
> >> On 28/04/2021 18:24, Jason Ekstrand wrote:
> >>> On Wed, Apr 28, 2021 at 10:55 AM Tvrtko Ursulin
> >>> <tvrtko.ursulin@linux.intel.com> wrote:
> >>>> On 23/04/2021 23:31, Jason Ekstrand wrote:
> >>>>> Instead of handling it like a context param, unconditionally set it when
> >>>>> intel_contexts are created.  This doesn't fix anything but does simplify
> >>>>> the code a bit.
> >>>>>
> >>>>> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
> >>>>> ---
> >>>>>     drivers/gpu/drm/i915/gem/i915_gem_context.c   | 43 +++----------------
> >>>>>     .../gpu/drm/i915/gem/i915_gem_context_types.h |  4 --
> >>>>>     drivers/gpu/drm/i915/gt/intel_context_param.h |  3 +-
> >>>>>     3 files changed, 6 insertions(+), 44 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>>>> index 35bcdeddfbf3f..1091cc04a242a 100644
> >>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>>>> @@ -233,7 +233,11 @@ static void intel_context_set_gem(struct intel_context *ce,
> >>>>>             intel_engine_has_timeslices(ce->engine))
> >>>>>                 __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
> >>>>>
> >>>>> -     intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
> >>>>> +     if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
> >>>>> +         ctx->i915->params.request_timeout_ms) {
> >>>>> +             unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
> >>>>> +             intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
> >>>>
> >>>> Blank line between declarations and code please, or just lose the local.
> >>>>
> >>>> Otherwise looks okay. Slight change that same GEM context can now have a
> >>>> mix of different request expirations isn't interesting I think. At least
> >>>> the change goes away by the end of the series.
> >>>
> >>> In order for that to happen, I think you'd have to have a race between
> >>> CREATE_CONTEXT and someone smashing the request_timeout_ms param via
> >>> sysfs.  Or am I missing something?  Given that timeouts are really
> >>> per-engine anyway, I don't think we need to care too much about that.
> >>
> >> We don't care, no.
> >>
> >> For completeness only - by the end of the series it is what you say. But
> >> at _this_ point in the series though it is if modparam changes at any
> >> point between context create and replacing engines. Which is a change
> >> compared to before this patch, since modparam was cached in the GEM
> >> context so far. So one GEM context was a single request_timeout_ms.
> >
> > I've added the following to the commit message:
> >
> > It also means that sync files exported from different engines on a
> > SINGLE_TIMELINE context will have different fence contexts.  This is
> > visible to userspace if it looks at the obj_name field of
> > sync_fence_info.
> >
> > How's that sound?
>
> Wrong thread but sounds good.
>
> I haven't looked into the fence merge logic apart from noticing context
> is used there. So I'd suggest a quick look there on top, just to make
> sure merging logic does not hold any surprises if contexts start to
> differ. Probably just results with more inefficiency somewhere, in theory.

Looked at it yesterday.  It really does just create a fence array with
all the fences. :-)

--Jason
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 35bcdeddfbf3f..1091cc04a242a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -233,7 +233,11 @@  static void intel_context_set_gem(struct intel_context *ce,
 	    intel_engine_has_timeslices(ce->engine))
 		__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
 
-	intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us);
+	if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) &&
+	    ctx->i915->params.request_timeout_ms) {
+		unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
+		intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
+	}
 }
 
 static void __free_engines(struct i915_gem_engines *e, unsigned int count)
@@ -792,41 +796,6 @@  static void __assign_timeline(struct i915_gem_context *ctx,
 	context_apply_all(ctx, __apply_timeline, timeline);
 }
 
-static int __apply_watchdog(struct intel_context *ce, void *timeout_us)
-{
-	return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us);
-}
-
-static int
-__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us)
-{
-	int ret;
-
-	ret = context_apply_all(ctx, __apply_watchdog,
-				(void *)(uintptr_t)timeout_us);
-	if (!ret)
-		ctx->watchdog.timeout_us = timeout_us;
-
-	return ret;
-}
-
-static void __set_default_fence_expiry(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
-	int ret;
-
-	if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) ||
-	    !i915->params.request_timeout_ms)
-		return;
-
-	/* Default expiry for user fences. */
-	ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000);
-	if (ret)
-		drm_notice(&i915->drm,
-			   "Failed to configure default fence expiry! (%d)",
-			   ret);
-}
-
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
 {
@@ -871,8 +840,6 @@  i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
 		intel_timeline_put(timeline);
 	}
 
-	__set_default_fence_expiry(ctx);
-
 	trace_i915_context_create(ctx);
 
 	return ctx;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 5ae71ec936f7c..676592e27e7d2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -153,10 +153,6 @@  struct i915_gem_context {
 	 */
 	atomic_t active_count;
 
-	struct {
-		u64 timeout_us;
-	} watchdog;
-
 	/**
 	 * @hang_timestamp: The last time(s) this context caused a GPU hang
 	 */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
index dffedd983693d..0c69cb42d075c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_param.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
@@ -10,11 +10,10 @@ 
 
 #include "intel_context.h"
 
-static inline int
+static inline void
 intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us)
 {
 	ce->watchdog.timeout_us = timeout_us;
-	return 0;
 }
 
 #endif /* INTEL_CONTEXT_PARAM_H */