diff mbox series

[5/6] drm/i915: Track per drm client engine class busyness

Message ID 20200207161331.23447-6-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Per client engine busyness | expand

Commit Message

Tvrtko Ursulin Feb. 7, 2020, 4:13 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Add per client, per engine class tracking of on GPU runtime on top of the
previously added per context tracking.

This data will then be exported via sysfs in a following patch in order to
implement per DRM client view of GPU utilization.

To implement this we add a stats structure to each drm client, which is
per engine class, used to track the total time spent on engines of a
certain class before the last activity transition, count of currently
active contexts and a timestamp of the last activity transition (any
increase or decrease). This allow accumulating activity segments and
reporting in-progress activity.

There is a small locked section (seqlock) from which time accounting
updates are made as contexts are entering and exiting from the GPU. Lock
is per class so contention is limited to same client activity on the same
engine class. Other scenarios add no new lock contention.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.h   | 32 ++++++++++++++++++----
 drivers/gpu/drm/i915/gt/intel_engine_cs.c |  1 +
 drivers/gpu/drm/i915/gt/intel_lrc.c       | 33 ++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_drm_client.c    |  4 ++-
 drivers/gpu/drm/i915/i915_drm_client.h    | 10 +++++++
 5 files changed, 67 insertions(+), 13 deletions(-)

Comments

Chris Wilson Feb. 7, 2020, 4:33 p.m. UTC | #1
Quoting Tvrtko Ursulin (2020-02-07 16:13:30)
>  static inline void
> -__intel_context_stats_start(struct intel_context *ce, ktime_t now)
> +__intel_context_stats_start(struct intel_context *ce,
> +                           struct intel_engine_cs *engine,
> +                           ktime_t now)
>  {
>         struct intel_context_stats *stats = &ce->stats;
> -
> -       if (!stats->active) {
> -               stats->start = now;
> -               stats->active = true;
> +       struct i915_gem_context *ctx;
> +
> +       if (stats->active)
> +               return;
> +
> +       stats->start = now;
> +       stats->active = true;
> +
> +       rcu_read_lock();
> +       ctx = rcu_dereference(ce->gem_context);
> +       if (ctx && ctx->client) {

I'd rather avoid having to dig into the GEM context down here next to
the HW.

First thought would be to keep the stats local on the intel_context and
for the client to chase collate them when the user reads the fd.

Hmm, didn't you structure it like so earlier? What made you change your
mind?
-Chris
Tvrtko Ursulin Feb. 7, 2020, 4:49 p.m. UTC | #2
On 07/02/2020 16:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-02-07 16:13:30)
>>   static inline void
>> -__intel_context_stats_start(struct intel_context *ce, ktime_t now)
>> +__intel_context_stats_start(struct intel_context *ce,
>> +                           struct intel_engine_cs *engine,
>> +                           ktime_t now)
>>   {
>>          struct intel_context_stats *stats = &ce->stats;
>> -
>> -       if (!stats->active) {
>> -               stats->start = now;
>> -               stats->active = true;
>> +       struct i915_gem_context *ctx;
>> +
>> +       if (stats->active)
>> +               return;
>> +
>> +       stats->start = now;
>> +       stats->active = true;
>> +
>> +       rcu_read_lock();
>> +       ctx = rcu_dereference(ce->gem_context);
>> +       if (ctx && ctx->client) {
> 
> I'd rather avoid having to dig into the GEM context down here next to
> the HW.
> 
> First thought would be to keep the stats local on the intel_context and
> for the client to chase collate them when the user reads the fd.
> 
> Hmm, didn't you structure it like so earlier? What made you change your
> mind?

Yes, it's in the cover letter - we must not have disappearing 
contributions - client can submit from one context for a bit, close it, 
and oops usage history lost.

ce->drm_client? :)


Regards,

Tvrtko
Chris Wilson Feb. 7, 2020, 5:02 p.m. UTC | #3
Quoting Tvrtko Ursulin (2020-02-07 16:49:17)
> 
> On 07/02/2020 16:33, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-02-07 16:13:30)
> >>   static inline void
> >> -__intel_context_stats_start(struct intel_context *ce, ktime_t now)
> >> +__intel_context_stats_start(struct intel_context *ce,
> >> +                           struct intel_engine_cs *engine,
> >> +                           ktime_t now)
> >>   {
> >>          struct intel_context_stats *stats = &ce->stats;
> >> -
> >> -       if (!stats->active) {
> >> -               stats->start = now;
> >> -               stats->active = true;
> >> +       struct i915_gem_context *ctx;
> >> +
> >> +       if (stats->active)
> >> +               return;
> >> +
> >> +       stats->start = now;
> >> +       stats->active = true;
> >> +
> >> +       rcu_read_lock();
> >> +       ctx = rcu_dereference(ce->gem_context);
> >> +       if (ctx && ctx->client) {
> > 
> > I'd rather avoid having to dig into the GEM context down here next to
> > the HW.
> > 
> > First thought would be to keep the stats local on the intel_context and
> > for the client to chase collate them when the user reads the fd.
> > 
> > Hmm, didn't you structure it like so earlier? What made you change your
> > mind?
> 
> Yes, it's in the cover letter - we must not have disappearing 
> contributions - client can submit from one context for a bit, close it, 
> and oops usage history lost.

If only we had a mechanism for accumulating contributions from stale
engines and then from stale contests?
 
> ce->drm_client? :)

Yeah that is equally icky for swimming upstream.

Ok, I have an idea for capturing the contributions at close rather than
having to do global accumulations at runtime.
-Chris
Tvrtko Ursulin Feb. 7, 2020, 5:24 p.m. UTC | #4
On 07/02/2020 17:02, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-02-07 16:49:17)
>>
>> On 07/02/2020 16:33, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2020-02-07 16:13:30)
>>>>    static inline void
>>>> -__intel_context_stats_start(struct intel_context *ce, ktime_t now)
>>>> +__intel_context_stats_start(struct intel_context *ce,
>>>> +                           struct intel_engine_cs *engine,
>>>> +                           ktime_t now)
>>>>    {
>>>>           struct intel_context_stats *stats = &ce->stats;
>>>> -
>>>> -       if (!stats->active) {
>>>> -               stats->start = now;
>>>> -               stats->active = true;
>>>> +       struct i915_gem_context *ctx;
>>>> +
>>>> +       if (stats->active)
>>>> +               return;
>>>> +
>>>> +       stats->start = now;
>>>> +       stats->active = true;
>>>> +
>>>> +       rcu_read_lock();
>>>> +       ctx = rcu_dereference(ce->gem_context);
>>>> +       if (ctx && ctx->client) {
>>>
>>> I'd rather avoid having to dig into the GEM context down here next to
>>> the HW.
>>>
>>> First thought would be to keep the stats local on the intel_context and
>>> for the client to chase collate them when the user reads the fd.
>>>
>>> Hmm, didn't you structure it like so earlier? What made you change your
>>> mind?
>>
>> Yes, it's in the cover letter - we must not have disappearing
>> contributions - client can submit from one context for a bit, close it,
>> and oops usage history lost.
> 
> If only we had a mechanism for accumulating contributions from stale
> engines and then from stale contests?
>   
>> ce->drm_client? :)
> 
> Yeah that is equally icky for swimming upstream.
> 
> Ok, I have an idea for capturing the contributions at close rather than
> having to do global accumulations at runtime.

It's certainly possible by accumulating over context runtimes when they 
are closed and adding to active runtime at readout. I contemplated that 
solution as well.

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 871196aa7d5a..a0a77a590566 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@ 
 #include <linux/types.h>
 
 #include "i915_active.h"
+#include "i915_drm_client.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
 #include "intel_ring_types.h"
@@ -228,14 +229,35 @@  intel_context_clear_nopreempt(struct intel_context *ce)
 }
 
 static inline void
-__intel_context_stats_start(struct intel_context *ce, ktime_t now)
+__intel_context_stats_start(struct intel_context *ce,
+			    struct intel_engine_cs *engine,
+			    ktime_t now)
 {
 	struct intel_context_stats *stats = &ce->stats;
-
-	if (!stats->active) {
-		stats->start = now;
-		stats->active = true;
+	struct i915_gem_context *ctx;
+
+	if (stats->active)
+		return;
+
+	stats->start = now;
+	stats->active = true;
+
+	rcu_read_lock();
+	ctx = rcu_dereference(ce->gem_context);
+	if (ctx && ctx->client) {
+		struct i915_drm_client_stats *cstats =
+			&ctx->client->stats[engine->uabi_class];
+		ktime_t now = ktime_get();
+		unsigned long flags;
+
+		write_seqlock_irqsave(&cstats->lock, flags);
+		cstats->busy += ktime_to_ns(ktime_sub(now, cstats->start)) *
+				cstats->active++;
+		GEM_WARN_ON(cstats->active == 0);
+		cstats->start = now;
+		write_sequnlock_irqrestore(&cstats->lock, flags);
 	}
+	rcu_read_unlock();
 }
 
 ktime_t intel_context_get_busy_time(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index a5344d2c7c10..a9f8c5af8f81 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1609,6 +1609,7 @@  int intel_enable_engine_stats(struct intel_engine_cs *engine)
 		rq = *port;
 		if (rq)
 			__intel_context_stats_start(rq->context,
+						    engine,
 						    engine->stats.enabled_at);
 
 		for (; (rq = *port); port++)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 1cd38b6eb189..7eefa7fe160b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1197,19 +1197,23 @@  static void reset_active(struct i915_request *rq,
 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 }
 
-static void intel_context_stats_start(struct intel_context *ce)
+static void intel_context_stats_start(struct intel_context *ce,
+				      struct intel_engine_cs *engine)
 {
 	struct intel_context_stats *stats = &ce->stats;
 	unsigned long flags;
 
 	write_seqlock_irqsave(&stats->lock, flags);
-	__intel_context_stats_start(ce, ktime_get());
+	__intel_context_stats_start(ce, engine, ktime_get());
 	write_sequnlock_irqrestore(&stats->lock, flags);
 }
 
-static void intel_context_stats_stop(struct intel_context *ce)
+static void intel_context_stats_stop(struct intel_context *ce,
+				     struct intel_engine_cs *engine)
 {
 	struct intel_context_stats *stats = &ce->stats;
+	struct i915_gem_context *ctx;
+	ktime_t now, runtime;
 	unsigned long flags;
 
 	if (!READ_ONCE(stats->active))
@@ -1217,10 +1221,25 @@  static void intel_context_stats_stop(struct intel_context *ce)
 
 	write_seqlock_irqsave(&stats->lock, flags);
 	GEM_BUG_ON(!READ_ONCE(stats->active));
-	stats->total = ktime_add(stats->total,
-				 ktime_sub(ktime_get(), stats->start));
+	now = ktime_get();
+	runtime = ktime_sub(now, stats->start);
+	stats->total = ktime_add(stats->total, runtime);
 	stats->active = false;
 	write_sequnlock_irqrestore(&stats->lock, flags);
+
+	rcu_read_lock();
+	ctx = rcu_dereference(ce->gem_context);
+	if (ctx && ctx->client) {
+		struct i915_drm_client_stats *cstats =
+			&ctx->client->stats[engine->uabi_class];
+
+		write_seqlock_irqsave(&cstats->lock, flags);
+		GEM_WARN_ON(cstats->active == 0);
+		cstats->busy += ktime_to_ns(runtime) * cstats->active--;
+		cstats->start = now;
+		write_sequnlock_irqrestore(&cstats->lock, flags);
+	}
+	rcu_read_unlock();
 }
 
 static inline struct intel_engine_cs *
@@ -1307,7 +1326,7 @@  __execlists_schedule_out(struct i915_request *rq,
 		intel_engine_add_retire(engine, ce->timeline);
 
 	intel_engine_context_out(engine);
-	intel_context_stats_stop(ce);
+	intel_context_stats_stop(ce, engine);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put_async(engine->gt);
 
@@ -2367,7 +2386,7 @@  static void process_csb(struct intel_engine_cs *engine)
 
 		rq = *execlists->active;
 		if (rq)
-			intel_context_stats_start(rq->context);
+			intel_context_stats_start(rq->context, engine);
 	} while (head != tail);
 
 	execlists->csb_head = head;
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 770094ef2e67..d26583d5825f 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -121,7 +121,7 @@  i915_drm_client_add(struct i915_drm_clients *clients, struct task_struct *task)
 {
 	struct i915_drm_client *client;
 	u32 next = 0;
-	int ret;
+	int i, ret;
 
 	client = kzalloc(sizeof(*client), GFP_KERNEL);
 	if (!client)
@@ -129,6 +129,8 @@  i915_drm_client_add(struct i915_drm_clients *clients, struct task_struct *task)
 
 	kref_init(&client->kref);
 	mutex_init(&client->update_lock);
+	for (i = 0; i < ARRAY_SIZE(client->stats); i++)
+		seqlock_init(&client->stats[i].lock);
 	client->clients = clients;
 
 	ret = xa_alloc_cyclic(&clients->xarray, &client->id, client,
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index a9d4d7396e43..6361976a9f05 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -14,8 +14,11 @@ 
 #include <linux/pid.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/seqlock.h>
 #include <linux/xarray.h>
 
+#include "gt/intel_engine_types.h"
+
 struct i915_drm_clients {
 	struct xarray xarray;
 	struct kobject *root;
@@ -33,6 +36,13 @@  struct i915_drm_client {
 	char __rcu *name;
 	bool closed;
 
+	struct i915_drm_client_stats {
+		seqlock_t lock;
+		unsigned int active;
+		ktime_t start;
+		u64 busy;
+	} stats[MAX_ENGINE_CLASS + 1];
+
 	struct i915_drm_clients *clients;
 
 	struct kobject *root;