Message ID | 20200207161331.23447-6-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Per client engine busyness | expand |
Quoting Tvrtko Ursulin (2020-02-07 16:13:30) > static inline void > -__intel_context_stats_start(struct intel_context *ce, ktime_t now) > +__intel_context_stats_start(struct intel_context *ce, > + struct intel_engine_cs *engine, > + ktime_t now) > { > struct intel_context_stats *stats = &ce->stats; > - > - if (!stats->active) { > - stats->start = now; > - stats->active = true; > + struct i915_gem_context *ctx; > + > + if (stats->active) > + return; > + > + stats->start = now; > + stats->active = true; > + > + rcu_read_lock(); > + ctx = rcu_dereference(ce->gem_context); > + if (ctx && ctx->client) { I'd rather avoid having to dig into the GEM context down here next to the HW. First thought would be to keep the stats local on the intel_context and for the client to chase collate them when the user reads the fd. Hmm, didn't you structure it like so earlier? What made you change your mind? -Chris
On 07/02/2020 16:33, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2020-02-07 16:13:30) >> static inline void >> -__intel_context_stats_start(struct intel_context *ce, ktime_t now) >> +__intel_context_stats_start(struct intel_context *ce, >> + struct intel_engine_cs *engine, >> + ktime_t now) >> { >> struct intel_context_stats *stats = &ce->stats; >> - >> - if (!stats->active) { >> - stats->start = now; >> - stats->active = true; >> + struct i915_gem_context *ctx; >> + >> + if (stats->active) >> + return; >> + >> + stats->start = now; >> + stats->active = true; >> + >> + rcu_read_lock(); >> + ctx = rcu_dereference(ce->gem_context); >> + if (ctx && ctx->client) { > > I'd rather avoid having to dig into the GEM context down here next to > the HW. > > First thought would be to keep the stats local on the intel_context and > for the client to chase collate them when the user reads the fd. > > Hmm, didn't you structure it like so earlier? What made you change your > mind? Yes, it's in the cover letter - we must not have disappearing contributions - client can submit from one context for a bit, close it, and oops usage history lost. ce->drm_client? :) Regards, Tvrtko
Quoting Tvrtko Ursulin (2020-02-07 16:49:17) > > On 07/02/2020 16:33, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2020-02-07 16:13:30) > >> static inline void > >> -__intel_context_stats_start(struct intel_context *ce, ktime_t now) > >> +__intel_context_stats_start(struct intel_context *ce, > >> + struct intel_engine_cs *engine, > >> + ktime_t now) > >> { > >> struct intel_context_stats *stats = &ce->stats; > >> - > >> - if (!stats->active) { > >> - stats->start = now; > >> - stats->active = true; > >> + struct i915_gem_context *ctx; > >> + > >> + if (stats->active) > >> + return; > >> + > >> + stats->start = now; > >> + stats->active = true; > >> + > >> + rcu_read_lock(); > >> + ctx = rcu_dereference(ce->gem_context); > >> + if (ctx && ctx->client) { > > > > I'd rather avoid having to dig into the GEM context down here next to > > the HW. > > > > First thought would be to keep the stats local on the intel_context and > > for the client to chase collate them when the user reads the fd. > > > > Hmm, didn't you structure it like so earlier? What made you change your > > mind? > > Yes, it's in the cover letter - we must not have disappearing > contributions - client can submit from one context for a bit, close it, > and oops usage history lost. If only we had a mechanism for accumulating contributions from stale engines and then from stale contests? > ce->drm_client? :) Yeah that is equally icky for swimming upstream. Ok, I have an idea for capturing the contributions at close rather than having to do global accumulations at runtime. -Chris
On 07/02/2020 17:02, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2020-02-07 16:49:17) >> >> On 07/02/2020 16:33, Chris Wilson wrote: >>> Quoting Tvrtko Ursulin (2020-02-07 16:13:30) >>>> static inline void >>>> -__intel_context_stats_start(struct intel_context *ce, ktime_t now) >>>> +__intel_context_stats_start(struct intel_context *ce, >>>> + struct intel_engine_cs *engine, >>>> + ktime_t now) >>>> { >>>> struct intel_context_stats *stats = &ce->stats; >>>> - >>>> - if (!stats->active) { >>>> - stats->start = now; >>>> - stats->active = true; >>>> + struct i915_gem_context *ctx; >>>> + >>>> + if (stats->active) >>>> + return; >>>> + >>>> + stats->start = now; >>>> + stats->active = true; >>>> + >>>> + rcu_read_lock(); >>>> + ctx = rcu_dereference(ce->gem_context); >>>> + if (ctx && ctx->client) { >>> >>> I'd rather avoid having to dig into the GEM context down here next to >>> the HW. >>> >>> First thought would be to keep the stats local on the intel_context and >>> for the client to chase collate them when the user reads the fd. >>> >>> Hmm, didn't you structure it like so earlier? What made you change your >>> mind? >> >> Yes, it's in the cover letter - we must not have disappearing >> contributions - client can submit from one context for a bit, close it, >> and oops usage history lost. > > If only we had a mechanism for accumulating contributions from stale > engines and then from stale contests? > >> ce->drm_client? :) > > Yeah that is equally icky for swimming upstream. > > Ok, I have an idea for capturing the contributions at close rather than > having to do global accumulations at runtime. It's certainly possible by accumulating over context runtimes when they are closed and adding to active runtime at readout. I contemplated that solution as well. Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 871196aa7d5a..a0a77a590566 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,6 +12,7 @@ #include <linux/types.h> #include "i915_active.h" +#include "i915_drm_client.h" #include "intel_context_types.h" #include "intel_engine_types.h" #include "intel_ring_types.h" @@ -228,14 +229,35 @@ intel_context_clear_nopreempt(struct intel_context *ce) } static inline void -__intel_context_stats_start(struct intel_context *ce, ktime_t now) +__intel_context_stats_start(struct intel_context *ce, + struct intel_engine_cs *engine, + ktime_t now) { struct intel_context_stats *stats = &ce->stats; - - if (!stats->active) { - stats->start = now; - stats->active = true; + struct i915_gem_context *ctx; + + if (stats->active) + return; + + stats->start = now; + stats->active = true; + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx && ctx->client) { + struct i915_drm_client_stats *cstats = + &ctx->client->stats[engine->uabi_class]; + ktime_t now = ktime_get(); + unsigned long flags; + + write_seqlock_irqsave(&cstats->lock, flags); + cstats->busy += ktime_to_ns(ktime_sub(now, cstats->start)) * + cstats->active++; + GEM_WARN_ON(cstats->active == 0); + cstats->start = now; + write_sequnlock_irqrestore(&cstats->lock, flags); } + rcu_read_unlock(); } ktime_t intel_context_get_busy_time(struct intel_context *ce); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index a5344d2c7c10..a9f8c5af8f81 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1609,6 +1609,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) rq = *port; if (rq) __intel_context_stats_start(rq->context, + engine, engine->stats.enabled_at); for (; (rq = *port); port++) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 1cd38b6eb189..7eefa7fe160b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1197,19 +1197,23 @@ static void reset_active(struct i915_request *rq, ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; } -static void intel_context_stats_start(struct intel_context *ce) +static void intel_context_stats_start(struct intel_context *ce, + struct intel_engine_cs *engine) { struct intel_context_stats *stats = &ce->stats; unsigned long flags; write_seqlock_irqsave(&stats->lock, flags); - __intel_context_stats_start(ce, ktime_get()); + __intel_context_stats_start(ce, engine, ktime_get()); write_sequnlock_irqrestore(&stats->lock, flags); } -static void intel_context_stats_stop(struct intel_context *ce) +static void intel_context_stats_stop(struct intel_context *ce, + struct intel_engine_cs *engine) { struct intel_context_stats *stats = &ce->stats; + struct i915_gem_context *ctx; + ktime_t now, runtime; unsigned long flags; if (!READ_ONCE(stats->active)) @@ -1217,10 +1221,25 @@ static void intel_context_stats_stop(struct intel_context *ce) write_seqlock_irqsave(&stats->lock, flags); GEM_BUG_ON(!READ_ONCE(stats->active)); - stats->total = ktime_add(stats->total, - ktime_sub(ktime_get(), stats->start)); + now = ktime_get(); + runtime = ktime_sub(now, stats->start); + stats->total = ktime_add(stats->total, runtime); stats->active = false; write_sequnlock_irqrestore(&stats->lock, flags); + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx && ctx->client) { + struct i915_drm_client_stats *cstats = + &ctx->client->stats[engine->uabi_class]; + + write_seqlock_irqsave(&cstats->lock, flags); + GEM_WARN_ON(cstats->active == 0); + cstats->busy += ktime_to_ns(runtime) * cstats->active--; + cstats->start = now; + write_sequnlock_irqrestore(&cstats->lock, flags); + } + rcu_read_unlock(); } static inline struct intel_engine_cs * @@ -1307,7 +1326,7 @@ __execlists_schedule_out(struct i915_request *rq, intel_engine_add_retire(engine, ce->timeline); intel_engine_context_out(engine); - intel_context_stats_stop(ce); + intel_context_stats_stop(ce, engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put_async(engine->gt); @@ -2367,7 +2386,7 @@ static void process_csb(struct intel_engine_cs *engine) rq = *execlists->active; if (rq) - intel_context_stats_start(rq->context); + intel_context_stats_start(rq->context, engine); } while (head != tail); execlists->csb_head = head; diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c index 770094ef2e67..d26583d5825f 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.c +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -121,7 +121,7 @@ i915_drm_client_add(struct i915_drm_clients *clients, struct task_struct *task) { struct i915_drm_client *client; u32 next = 0; - int ret; + int i, ret; client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) @@ -129,6 +129,8 @@ i915_drm_client_add(struct i915_drm_clients *clients, struct task_struct *task) kref_init(&client->kref); mutex_init(&client->update_lock); + for (i = 0; i < ARRAY_SIZE(client->stats); i++) + seqlock_init(&client->stats[i].lock); client->clients = clients; ret = xa_alloc_cyclic(&clients->xarray, &client->id, client, diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h index a9d4d7396e43..6361976a9f05 100644 --- a/drivers/gpu/drm/i915/i915_drm_client.h +++ b/drivers/gpu/drm/i915/i915_drm_client.h @@ -14,8 +14,11 @@ #include <linux/pid.h> #include <linux/rcupdate.h> #include <linux/sched.h> +#include <linux/seqlock.h> #include <linux/xarray.h> +#include "gt/intel_engine_types.h" + struct i915_drm_clients { struct xarray xarray; struct kobject *root; @@ -33,6 +36,13 @@ struct i915_drm_client { char __rcu *name; bool closed; + struct i915_drm_client_stats { + seqlock_t lock; + unsigned int active; + ktime_t start; + u64 busy; + } stats[MAX_ENGINE_CLASS + 1]; + struct i915_drm_clients *clients; struct kobject *root;