Message ID | 20180405123923.22671-3-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Tvrtko Ursulin (2018-04-05 13:39:18) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Keep a per-engine number of runnable (waiting for GPU time) requests. > > v2: > * Move queued increment from insert_request to execlist_submit_request to > avoid bumping when re-ordering for priority. > * Support the counter on the ringbuffer submission path as well, albeit > just notionally. (Chris Wilson) > > v3: > * Rebase. > > v4: > * Rename and move the stats into a container structure. (Chris Wilson) > > v5: > * Re-order fields in struct intel_engine_cs. (Chris Wilson) > > v6-v8: > * Rebases. > > v9: > * Fix accounting during wedging. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_gem.c | 1 + > drivers/gpu/drm/i915/i915_request.c | 7 +++++++ > drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++-- > drivers/gpu/drm/i915/intel_lrc.c | 1 + > drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +++++++++ > 5 files changed, 21 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 9650a7b10c5f..63f334d5f7fd 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3211,6 +3211,7 @@ static void nop_complete_submit_request(struct i915_request *request) > dma_fence_set_error(&request->fence, -EIO); > > spin_lock_irqsave(&request->engine->timeline->lock, flags); > + request->engine->request_stats.runnable++; > __i915_request_submit(request); > intel_engine_init_global_seqno(request->engine, request->global_seqno); > spin_unlock_irqrestore(&request->engine->timeline->lock, flags); > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 585242831974..5c01291ad1cc 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -540,6 +540,9 @@ void __i915_request_submit(struct i915_request *request) > /* Transfer from per-context onto the global per-engine timeline */ > move_to_timeline(request, engine->timeline); > > + GEM_BUG_ON(engine->request_stats.runnable == 0); > + engine->request_stats.runnable--; > + > trace_i915_request_execute(request); > > wake_up_all(&request->execute); > @@ -553,6 +556,8 @@ void i915_request_submit(astruct i915_request *request) > /* Will be called from irq-context when using foreign fences. */ > spin_lock_irqsave(&engine->timeline->lock, flags); > > + engine->request_stats.runnable++; Hmm, I was thinking this should be in submit_notify(), as you want to count from when all fences are signaled. But you are using the timeline lock as its guard? The only downside is having to repeat the inc in each path. And with the slight disparity for unsubmit. Not a blocker, just had to actually think about what you were doing, so maybe discuss that upfront in the commit msg. -Chris
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9650a7b10c5f..63f334d5f7fd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3211,6 +3211,7 @@ static void nop_complete_submit_request(struct i915_request *request) dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&request->engine->timeline->lock, flags); + request->engine->request_stats.runnable++; __i915_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); spin_unlock_irqrestore(&request->engine->timeline->lock, flags); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 585242831974..5c01291ad1cc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -540,6 +540,9 @@ void __i915_request_submit(struct i915_request *request) /* Transfer from per-context onto the global per-engine timeline */ move_to_timeline(request, engine->timeline); + GEM_BUG_ON(engine->request_stats.runnable == 0); + engine->request_stats.runnable--; + trace_i915_request_execute(request); wake_up_all(&request->execute); @@ -553,6 +556,8 @@ void i915_request_submit(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); + engine->request_stats.runnable++; + __i915_request_submit(request); spin_unlock_irqrestore(&engine->timeline->lock, flags); @@ -591,6 +596,8 @@ void __i915_request_unsubmit(struct i915_request *request) /* Transfer back from the global per-engine timeline to per-context */ move_to_timeline(request, request->timeline); + engine->request_stats.runnable++; + /* * We don't need to wake_up any waiters on request->execute, they * will get woken by any other event or us re-adding this request diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 12486d8f534b..98254ff92785 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1934,12 +1934,13 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_terminally_wedged(&engine->i915->gpu_error)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d, runnable %u\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); + engine->timeline->inflight_seqnos, + engine->request_stats.runnable); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3592288e4696..f6631ff11caf 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1113,6 +1113,7 @@ static void execlists_submit_request(struct i915_request *request) queue_request(engine, &request->priotree, rq_prio(request)); submit_queue(engine, rq_prio(request)); + engine->request_stats.runnable++; GEM_BUG_ON(!engine->execlists.first); GEM_BUG_ON(list_empty(&request->priotree.link)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0c548c400699..54d2ad1c8daa 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -338,6 +338,15 @@ struct intel_engine_cs { struct drm_i915_gem_object *default_state; + struct { + /** + * @runnable: Number of runnable requests sent to the backend. + * + * Count of requests waiting for the GPU to execute them. + */ + unsigned int runnable; + } request_stats; + atomic_t irq_count; unsigned long irq_posted; #define ENGINE_IRQ_BREADCRUMB 0