diff mbox series

[05/10] drm/i915: Track runtime spent in unreachable intel_contexts

Message ID 20200318121138.909-6-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Per client engine busyness | expand

Commit Message

Tvrtko Ursulin March 18, 2020, 12:11 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

As contexts are abandoned we want to remember how much GPU time they used
(per class) so later we can used it for smarter purposes.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c       | 13 ++++++++++++-
 drivers/gpu/drm/i915/gem/i915_gem_context_types.h |  5 +++++
 2 files changed, 17 insertions(+), 1 deletion(-)

Comments

Chris Wilson March 18, 2020, 1:55 p.m. UTC | #1
Quoting Tvrtko Ursulin (2020-03-18 12:11:34)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> As contexts are abandoned we want to remember how much GPU time they used
> (per class) so later we can used it for smarter purposes.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c       | 13 ++++++++++++-
>  drivers/gpu/drm/i915/gem/i915_gem_context_types.h |  5 +++++
>  2 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 7c119a3a2cbd..5edf79ed6247 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -257,7 +257,19 @@ static void free_engines_rcu(struct rcu_head *rcu)
>  {
>         struct i915_gem_engines *engines =
>                 container_of(rcu, struct i915_gem_engines, rcu);
> +       struct i915_gem_context *ctx = engines->ctx;
> +       struct i915_gem_engines_iter it;
> +       struct intel_context *ce;
> +
> +       /* Transfer accumulated runtime to the parent GEM context. */
> +       for_each_gem_engine(ce, engines, it) {
> +               unsigned int class = ce->engine->uabi_class;
>  
> +               GEM_BUG_ON(class >= ARRAY_SIZE(ctx->past_runtime));
> +               atomic64_add(ce->runtime.total, &ctx->past_runtime[class]);

Hmm, there's an odd situation where the free_engines_rcu could fire
before we do the final schedule_out of the context.

GEM_BUG_ON(intel_context_inflight(ce)) to see if that's being too
paranoid.
-Chris
Tvrtko Ursulin March 18, 2020, 2:13 p.m. UTC | #2
On 18/03/2020 13:55, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-03-18 12:11:34)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> As contexts are abandoned we want to remember how much GPU time they used
>> (per class) so later we can used it for smarter purposes.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_context.c       | 13 ++++++++++++-
>>   drivers/gpu/drm/i915/gem/i915_gem_context_types.h |  5 +++++
>>   2 files changed, 17 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> index 7c119a3a2cbd..5edf79ed6247 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> @@ -257,7 +257,19 @@ static void free_engines_rcu(struct rcu_head *rcu)
>>   {
>>          struct i915_gem_engines *engines =
>>                  container_of(rcu, struct i915_gem_engines, rcu);
>> +       struct i915_gem_context *ctx = engines->ctx;
>> +       struct i915_gem_engines_iter it;
>> +       struct intel_context *ce;
>> +
>> +       /* Transfer accumulated runtime to the parent GEM context. */
>> +       for_each_gem_engine(ce, engines, it) {
>> +               unsigned int class = ce->engine->uabi_class;
>>   
>> +               GEM_BUG_ON(class >= ARRAY_SIZE(ctx->past_runtime));
>> +               atomic64_add(ce->runtime.total, &ctx->past_runtime[class]);
> 
> Hmm, there's an odd situation where the free_engines_rcu could fire
> before we do the final schedule_out of the context.
> 
> GEM_BUG_ON(intel_context_inflight(ce)) to see if that's being too
> paranoid.

Deja vu.. have I forgotten to move this into intel_context_free while 
the purpose of keeping ce->gem_context valid was exactly to enable that.

Regards,

Tvrtko
Chris Wilson March 18, 2020, 2:32 p.m. UTC | #3
Quoting Tvrtko Ursulin (2020-03-18 14:13:14)
> 
> On 18/03/2020 13:55, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-03-18 12:11:34)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> As contexts are abandoned we want to remember how much GPU time they used
> >> (per class) so later we can used it for smarter purposes.
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/gem/i915_gem_context.c       | 13 ++++++++++++-
> >>   drivers/gpu/drm/i915/gem/i915_gem_context_types.h |  5 +++++
> >>   2 files changed, 17 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> index 7c119a3a2cbd..5edf79ed6247 100644
> >> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> @@ -257,7 +257,19 @@ static void free_engines_rcu(struct rcu_head *rcu)
> >>   {
> >>          struct i915_gem_engines *engines =
> >>                  container_of(rcu, struct i915_gem_engines, rcu);
> >> +       struct i915_gem_context *ctx = engines->ctx;
> >> +       struct i915_gem_engines_iter it;
> >> +       struct intel_context *ce;
> >> +
> >> +       /* Transfer accumulated runtime to the parent GEM context. */
> >> +       for_each_gem_engine(ce, engines, it) {
> >> +               unsigned int class = ce->engine->uabi_class;
> >>   
> >> +               GEM_BUG_ON(class >= ARRAY_SIZE(ctx->past_runtime));
> >> +               atomic64_add(ce->runtime.total, &ctx->past_runtime[class]);
> > 
> > Hmm, there's an odd situation where the free_engines_rcu could fire
> > before we do the final schedule_out of the context.
> > 
> > GEM_BUG_ON(intel_context_inflight(ce)) to see if that's being too
> > paranoid.
> 
> Deja vu.. have I forgotten to move this into intel_context_free while 
> the purpose of keeping ce->gem_context valid was exactly to enable that.

I would rather not have it in gt/ if possible. The delay should be
nominal at worst.
-Chris
Tvrtko Ursulin March 18, 2020, 2:38 p.m. UTC | #4
On 18/03/2020 14:32, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-03-18 14:13:14)
>>
>> On 18/03/2020 13:55, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2020-03-18 12:11:34)
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> As contexts are abandoned we want to remember how much GPU time they used
>>>> (per class) so later we can used it for smarter purposes.
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/gem/i915_gem_context.c       | 13 ++++++++++++-
>>>>    drivers/gpu/drm/i915/gem/i915_gem_context_types.h |  5 +++++
>>>>    2 files changed, 17 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>> index 7c119a3a2cbd..5edf79ed6247 100644
>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>>>> @@ -257,7 +257,19 @@ static void free_engines_rcu(struct rcu_head *rcu)
>>>>    {
>>>>           struct i915_gem_engines *engines =
>>>>                   container_of(rcu, struct i915_gem_engines, rcu);
>>>> +       struct i915_gem_context *ctx = engines->ctx;
>>>> +       struct i915_gem_engines_iter it;
>>>> +       struct intel_context *ce;
>>>> +
>>>> +       /* Transfer accumulated runtime to the parent GEM context. */
>>>> +       for_each_gem_engine(ce, engines, it) {
>>>> +               unsigned int class = ce->engine->uabi_class;
>>>>    
>>>> +               GEM_BUG_ON(class >= ARRAY_SIZE(ctx->past_runtime));
>>>> +               atomic64_add(ce->runtime.total, &ctx->past_runtime[class]);
>>>
>>> Hmm, there's an odd situation where the free_engines_rcu could fire
>>> before we do the final schedule_out of the context.
>>>
>>> GEM_BUG_ON(intel_context_inflight(ce)) to see if that's being too
>>> paranoid.
>>
>> Deja vu.. have I forgotten to move this into intel_context_free while
>> the purpose of keeping ce->gem_context valid was exactly to enable that.
> 
> I would rather not have it in gt/ if possible. The delay should be
> nominal at worst.

But I thought your concern was this can miss the accumulation of the 
last active period due active tracker triggering the rcu cleanup before 
last context save. What do you then recommend?

Regards,

Tvrtko
Chris Wilson March 18, 2020, 2:48 p.m. UTC | #5
Quoting Tvrtko Ursulin (2020-03-18 14:38:53)
> 
> On 18/03/2020 14:32, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-03-18 14:13:14)
> >>
> >> On 18/03/2020 13:55, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2020-03-18 12:11:34)
> >>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>
> >>>> As contexts are abandoned we want to remember how much GPU time they used
> >>>> (per class) so later we can used it for smarter purposes.
> >>>>
> >>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>> ---
> >>>>    drivers/gpu/drm/i915/gem/i915_gem_context.c       | 13 ++++++++++++-
> >>>>    drivers/gpu/drm/i915/gem/i915_gem_context_types.h |  5 +++++
> >>>>    2 files changed, 17 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>>> index 7c119a3a2cbd..5edf79ed6247 100644
> >>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >>>> @@ -257,7 +257,19 @@ static void free_engines_rcu(struct rcu_head *rcu)
> >>>>    {
> >>>>           struct i915_gem_engines *engines =
> >>>>                   container_of(rcu, struct i915_gem_engines, rcu);
> >>>> +       struct i915_gem_context *ctx = engines->ctx;
> >>>> +       struct i915_gem_engines_iter it;
> >>>> +       struct intel_context *ce;
> >>>> +
> >>>> +       /* Transfer accumulated runtime to the parent GEM context. */
> >>>> +       for_each_gem_engine(ce, engines, it) {
> >>>> +               unsigned int class = ce->engine->uabi_class;
> >>>>    
> >>>> +               GEM_BUG_ON(class >= ARRAY_SIZE(ctx->past_runtime));
> >>>> +               atomic64_add(ce->runtime.total, &ctx->past_runtime[class]);
> >>>
> >>> Hmm, there's an odd situation where the free_engines_rcu could fire
> >>> before we do the final schedule_out of the context.
> >>>
> >>> GEM_BUG_ON(intel_context_inflight(ce)) to see if that's being too
> >>> paranoid.
> >>
> >> Deja vu.. have I forgotten to move this into intel_context_free while
> >> the purpose of keeping ce->gem_context valid was exactly to enable that.
> > 
> > I would rather not have it in gt/ if possible. The delay should be
> > nominal at worst.
> 
> But I thought your concern was this can miss the accumulation of the 
> last active period due active tracker triggering the rcu cleanup before 
> last context save. What do you then recommend?

That is the concern, but it's not a huge concern for me :)

I was tempted just to put a busywait here, rather than couple up a
notification scheme. Although...

We do have a notification here for the context_retire we could listen to
instead of listening to the request idling. If we use

	i915_sw_fence_await_active(&engines->fence,
				   &ce->active,
				   I915_ACTIVE_AWAIT_ALL);

instead, then the fence will not fire until the final barrier has
executed.

Tada!
-Chris
Chris Wilson March 18, 2020, 3:07 p.m. UTC | #6
Quoting Chris Wilson (2020-03-18 14:48:05)
> We do have a notification here for the context_retire we could listen to
> instead of listening to the request idling. If we use
> 
>         i915_sw_fence_await_active(&engines->fence,
>                                    &ce->active,
>                                    I915_ACTIVE_AWAIT_ALL);
> 
> instead, then the fence will not fire until the final barrier has
> executed.
> 
> Tada!

It's close. It's still strictly firing on the pulse request signaling,
which is currently not guaranteed to be after after the context_out.
Although we can arrange that with a sentinel.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 7c119a3a2cbd..5edf79ed6247 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -257,7 +257,19 @@  static void free_engines_rcu(struct rcu_head *rcu)
 {
 	struct i915_gem_engines *engines =
 		container_of(rcu, struct i915_gem_engines, rcu);
+	struct i915_gem_context *ctx = engines->ctx;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+
+	/* Transfer accumulated runtime to the parent GEM context. */
+	for_each_gem_engine(ce, engines, it) {
+		unsigned int class = ce->engine->uabi_class;
 
+		GEM_BUG_ON(class >= ARRAY_SIZE(ctx->past_runtime));
+		atomic64_add(ce->runtime.total, &ctx->past_runtime[class]);
+	}
+
+	i915_gem_context_put(ctx);
 	i915_sw_fence_fini(&engines->fence);
 	free_engines(engines);
 }
@@ -278,7 +290,6 @@  engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 			list_del(&engines->link);
 			spin_unlock_irqrestore(&ctx->stale.lock, flags);
 		}
-		i915_gem_context_put(engines->ctx);
 		break;
 
 	case FENCE_FREE:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index b0e03380c690..f0d7441aafc8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -177,6 +177,11 @@  struct i915_gem_context {
 		spinlock_t lock;
 		struct list_head engines;
 	} stale;
+
+	/**
+	 * @past_runtime: Accumulation of freed intel_context pphwsp runtimes.
+	 */
+	atomic64_t past_runtime[MAX_ENGINE_CLASS + 1];
 };
 
 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */