Message ID | 20200110133049.2705-8-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Per client engine busyness | expand |
Quoting Tvrtko Ursulin (2020-01-10 13:30:48) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > GPU saves accumulated context runtime (in CS timestamp units) in PPHWSP > which will be useful for us in cases when we are not able to track context > busyness ourselves (like with GuC). Keep a copy of this in struct > intel_context from where it can be easily read even if the context is not > pinned. > > QQQ: Do we want to make this accounting conditional / able to turn on/off? > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_context.h | 7 +++++++ > drivers/gpu/drm/i915/gt/intel_context_types.h | 5 +++++ > drivers/gpu/drm/i915/gt/intel_lrc.c | 9 +++++++++ > drivers/gpu/drm/i915/intel_device_info.c | 2 ++ > drivers/gpu/drm/i915/intel_device_info.h | 1 + > 5 files changed, 24 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h > index 30f0268fcc9a..389a05736fc7 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.h > +++ b/drivers/gpu/drm/i915/gt/intel_context.h > @@ -12,6 +12,7 @@ > #include <linux/types.h> > > #include "i915_active.h" > +#include "i915_drv.h" > #include "intel_context_types.h" > #include "intel_engine_types.h" > #include "intel_ring_types.h" > @@ -235,4 +236,10 @@ __intel_context_stats_start(struct intel_context_stats *stats, ktime_t now) > > ktime_t intel_context_get_busy_time(struct intel_context *ce); > > +static inline u64 intel_context_get_hw_runtime_ns(struct intel_context *ce) > +{ > + return ce->total_runtime * > + RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns; > +} > + > #endif /* __INTEL_CONTEXT_H__ */ > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h > index 963d33dc5289..7b08bf87fb82 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h > @@ -69,6 +69,11 @@ struct intel_context { > u64 lrc_desc; > u32 tag; /* cookie passed to HW to track this context on submission */ > > + /* Time on GPU as tracked by the hw. */ > + u32 last_runtime; > + u64 total_runtime; > + u32 *pphwsp; I wouldn't bother with keeping pphwsp, we know it's the page before the reg state. At least for the foreseeable future. > unsigned int active_count; /* protected by timeline->mutex */ > > atomic_t pin_count; > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index dd559547500f..26999b43e5a1 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -1289,6 +1289,7 @@ __execlists_schedule_out(struct i915_request *rq, > struct intel_engine_cs * const engine) > { > struct intel_context *ce = rq->context; > + u32 old, new; > > /* > * NB process_csb() is not under the engine->active.lock and hence > @@ -1309,6 +1310,13 @@ __execlists_schedule_out(struct i915_request *rq, > execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); > intel_gt_pm_put_async(engine->gt); > > + old = ce->last_runtime; > + ce->last_runtime = new = ce->pphwsp[16]; > + if (new > old) > + ce->total_runtime += new - old; > + else > + ce->total_runtime += (~0UL - old) + new + 1; It's u32, unsigned wrap-around arithmetic is defined, so just ce->total_runtime += new - old; > + > /* > * If this is part of a virtual engine, its next request may > * have been blocked waiting for access to the active context. > @@ -2608,6 +2616,7 @@ __execlists_context_pin(struct intel_context *ce, > > ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; > ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; > + ce->pphwsp = vaddr + LRC_PPHWSP_PN * PAGE_SIZE; > __execlists_update_reg_state(ce, engine); > > return 0; > diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c > index 6670a0763be2..7732748e1939 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.c > +++ b/drivers/gpu/drm/i915/intel_device_info.c > @@ -1042,6 +1042,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) > > /* Initialize command stream timestamp frequency */ > runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv); > + runtime->cs_timestamp_period_ns = > + div_u64(1e6, runtime->cs_timestamp_frequency_khz); drm_debug(&dev_priv->drm, "CS timestamp wraparound in %lld\n", div_u64(U32_MAX * runtime->cs_timestamp_period_ns, NSEC_PER_SEC); -Chris
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 30f0268fcc9a..389a05736fc7 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,6 +12,7 @@ #include <linux/types.h> #include "i915_active.h" +#include "i915_drv.h" #include "intel_context_types.h" #include "intel_engine_types.h" #include "intel_ring_types.h" @@ -235,4 +236,10 @@ __intel_context_stats_start(struct intel_context_stats *stats, ktime_t now) ktime_t intel_context_get_busy_time(struct intel_context *ce); +static inline u64 intel_context_get_hw_runtime_ns(struct intel_context *ce) +{ + return ce->total_runtime * + RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns; +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 963d33dc5289..7b08bf87fb82 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -69,6 +69,11 @@ struct intel_context { u64 lrc_desc; u32 tag; /* cookie passed to HW to track this context on submission */ + /* Time on GPU as tracked by the hw. */ + u32 last_runtime; + u64 total_runtime; + u32 *pphwsp; + unsigned int active_count; /* protected by timeline->mutex */ atomic_t pin_count; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index dd559547500f..26999b43e5a1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1289,6 +1289,7 @@ __execlists_schedule_out(struct i915_request *rq, struct intel_engine_cs * const engine) { struct intel_context *ce = rq->context; + u32 old, new; /* * NB process_csb() is not under the engine->active.lock and hence @@ -1309,6 +1310,13 @@ __execlists_schedule_out(struct i915_request *rq, execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put_async(engine->gt); + old = ce->last_runtime; + ce->last_runtime = new = ce->pphwsp[16]; + if (new > old) + ce->total_runtime += new - old; + else + ce->total_runtime += (~0UL - old) + new + 1; + /* * If this is part of a virtual engine, its next request may * have been blocked waiting for access to the active context. @@ -2608,6 +2616,7 @@ __execlists_context_pin(struct intel_context *ce, ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; + ce->pphwsp = vaddr + LRC_PPHWSP_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine); return 0; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 6670a0763be2..7732748e1939 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -1042,6 +1042,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) /* Initialize command stream timestamp frequency */ runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv); + runtime->cs_timestamp_period_ns = + div_u64(1e6, runtime->cs_timestamp_frequency_khz); } void intel_driver_caps_print(const struct intel_driver_caps *caps, diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 2725cb7fc169..9ec816dbc418 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -216,6 +216,7 @@ struct intel_runtime_info { struct sseu_dev_info sseu; u32 cs_timestamp_frequency_khz; + u32 cs_timestamp_period_ns; /* Media engine access to SFC per instance */ u8 vdbox_sfc_access;