Message ID | 20180426174932.23127-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 26/04/2018 18:49, Chris Wilson wrote: > In commit 9b6586ae9f6b ("drm/i915: Keep a global seqno per-engine"), we > moved from a global inflight counter to per-engine counters in the > hope that will be easy to run concurrently in future. However, with the > advent of the desire to move requests between engines, we do need a > global counter to preserve the semantics that no engine wraps in the > middle of a submit. (Although this semantic is now only required for gen7 > semaphore support, which only supports greater-then comparisons!) > > v2: Keep a global counter of all requests ever submitted and force the > reset when it wraps. > > References: 9b6586ae9f6b ("drm/i915: Keep a global seqno per-engine") > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 5 ++-- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_gem_timeline.h | 6 ----- > drivers/gpu/drm/i915/i915_request.c | 33 ++++++++++++------------ > drivers/gpu/drm/i915/intel_engine_cs.c | 5 ++-- > 5 files changed, 22 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 1c88805d3354..83c86257fe1c 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -1340,10 +1340,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) > struct rb_node *rb; > > seq_printf(m, "%s:\n", engine->name); > - seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", > + seq_printf(m, "\tseqno = %x [current %x, last %x]\n", > engine->hangcheck.seqno, seqno[id], > - intel_engine_last_submit(engine), > - engine->timeline->inflight_seqnos); > + intel_engine_last_submit(engine)); > seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", > yesno(intel_engine_has_waiter(engine)), > yesno(test_bit(engine->id, > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 8444ca8d5aa3..8fd9fb6efba5 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2061,6 +2061,7 @@ struct drm_i915_private { > struct list_head timelines; > struct i915_gem_timeline global_timeline; > u32 active_requests; > + u32 request_serial; > > /** > * Is the GPU currently considered idle, or busy executing > diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h > index 33e01bf6aa36..6e82119e2cd8 100644 > --- a/drivers/gpu/drm/i915/i915_gem_timeline.h > +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h > @@ -37,12 +37,6 @@ struct intel_timeline { > u64 fence_context; > u32 seqno; > > - /** > - * Count of outstanding requests, from the time they are constructed > - * to the moment they are retired. Loosely coupled to hardware. > - */ > - u32 inflight_seqnos; > - > spinlock_t lock; > > /** > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index b692a9f7c357..b1993d4a1a53 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -241,6 +241,7 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) > sizeof(timeline->engine[id].global_sync)); > } > > + i915->gt.request_serial = seqno; > return 0; > } > > @@ -257,18 +258,22 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) > return reset_all_global_seqno(i915, seqno - 1); > } > > -static int reserve_engine(struct intel_engine_cs *engine) > +static int reserve_gt(struct drm_i915_private *i915) > { > - struct drm_i915_private *i915 = engine->i915; > - u32 active = ++engine->timeline->inflight_seqnos; > - u32 seqno = engine->timeline->seqno; > int ret; > > - /* Reservation is fine until we need to wrap around */ > - if (unlikely(add_overflows(seqno, active))) { > + /* > + * Reservation is fine until we may need to wrap around > + * > + * By incrementing the serial for every request, we know that no > + * individual engine may exceed that serial (as each is reset to 0 > + * on any wrap). This protects even the most pessimistic of migrations > + * of every request from all engines onto just one. > + */ > + while (unlikely(++i915->gt.request_serial == 0)) { > ret = reset_all_global_seqno(i915, 0); > if (ret) { > - engine->timeline->inflight_seqnos--; > + i915->gt.request_serial--; > return ret; > } > } > @@ -279,15 +284,10 @@ static int reserve_engine(struct intel_engine_cs *engine) > return 0; > } > > -static void unreserve_engine(struct intel_engine_cs *engine) > +static void unreserve_gt(struct drm_i915_private *i915) > { > - struct drm_i915_private *i915 = engine->i915; > - > if (!--i915->gt.active_requests) > i915_gem_park(i915); > - > - GEM_BUG_ON(!engine->timeline->inflight_seqnos); > - engine->timeline->inflight_seqnos--; > } > > void i915_gem_retire_noop(struct i915_gem_active *active, > @@ -362,7 +362,6 @@ static void i915_request_retire(struct i915_request *request) > list_del_init(&request->link); > spin_unlock_irq(&engine->timeline->lock); > > - unreserve_engine(request->engine); > advance_ring(request); > > free_capture_list(request); > @@ -424,6 +423,8 @@ static void i915_request_retire(struct i915_request *request) > } > spin_unlock_irq(&request->lock); > > + unreserve_gt(request->i915); > + > i915_sched_node_fini(request->i915, &request->sched); > i915_request_put(request); > } > @@ -642,7 +643,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > return ERR_CAST(ring); > GEM_BUG_ON(!ring); > > - ret = reserve_engine(engine); > + ret = reserve_gt(i915); > if (ret) > goto err_unpin; > > @@ -784,7 +785,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > > kmem_cache_free(i915->requests, rq); > err_unreserve: > - unreserve_engine(engine); > + unreserve_gt(i915); > err_unpin: > engine->context_unpin(engine, ctx); > return ERR_PTR(ret); > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index ac009f10c948..eba81d55dc3a 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -1321,12 +1321,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, > if (i915_terminally_wedged(&engine->i915->gpu_error)) > drm_printf(m, "*** WEDGED ***\n"); > > - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", > + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", > intel_engine_get_seqno(engine), > intel_engine_last_submit(engine), > engine->hangcheck.seqno, > - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), > - engine->timeline->inflight_seqnos); > + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); > drm_printf(m, "\tReset count: %d (global %d)\n", > i915_reset_engine_count(error, engine), > i915_reset_count(error)); > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1c88805d3354..83c86257fe1c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1340,10 +1340,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", + seq_printf(m, "\tseqno = %x [current %x, last %x]\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine), - engine->timeline->inflight_seqnos); + intel_engine_last_submit(engine)); seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8444ca8d5aa3..8fd9fb6efba5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2061,6 +2061,7 @@ struct drm_i915_private { struct list_head timelines; struct i915_gem_timeline global_timeline; u32 active_requests; + u32 request_serial; /** * Is the GPU currently considered idle, or busy executing diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 33e01bf6aa36..6e82119e2cd8 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -37,12 +37,6 @@ struct intel_timeline { u64 fence_context; u32 seqno; - /** - * Count of outstanding requests, from the time they are constructed - * to the moment they are retired. Loosely coupled to hardware. - */ - u32 inflight_seqnos; - spinlock_t lock; /** diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b692a9f7c357..b1993d4a1a53 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -241,6 +241,7 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) sizeof(timeline->engine[id].global_sync)); } + i915->gt.request_serial = seqno; return 0; } @@ -257,18 +258,22 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) return reset_all_global_seqno(i915, seqno - 1); } -static int reserve_engine(struct intel_engine_cs *engine) +static int reserve_gt(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = engine->i915; - u32 active = ++engine->timeline->inflight_seqnos; - u32 seqno = engine->timeline->seqno; int ret; - /* Reservation is fine until we need to wrap around */ - if (unlikely(add_overflows(seqno, active))) { + /* + * Reservation is fine until we may need to wrap around + * + * By incrementing the serial for every request, we know that no + * individual engine may exceed that serial (as each is reset to 0 + * on any wrap). This protects even the most pessimistic of migrations + * of every request from all engines onto just one. + */ + while (unlikely(++i915->gt.request_serial == 0)) { ret = reset_all_global_seqno(i915, 0); if (ret) { - engine->timeline->inflight_seqnos--; + i915->gt.request_serial--; return ret; } } @@ -279,15 +284,10 @@ static int reserve_engine(struct intel_engine_cs *engine) return 0; } -static void unreserve_engine(struct intel_engine_cs *engine) +static void unreserve_gt(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = engine->i915; - if (!--i915->gt.active_requests) i915_gem_park(i915); - - GEM_BUG_ON(!engine->timeline->inflight_seqnos); - engine->timeline->inflight_seqnos--; } void i915_gem_retire_noop(struct i915_gem_active *active, @@ -362,7 +362,6 @@ static void i915_request_retire(struct i915_request *request) list_del_init(&request->link); spin_unlock_irq(&engine->timeline->lock); - unreserve_engine(request->engine); advance_ring(request); free_capture_list(request); @@ -424,6 +423,8 @@ static void i915_request_retire(struct i915_request *request) } spin_unlock_irq(&request->lock); + unreserve_gt(request->i915); + i915_sched_node_fini(request->i915, &request->sched); i915_request_put(request); } @@ -642,7 +643,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) return ERR_CAST(ring); GEM_BUG_ON(!ring); - ret = reserve_engine(engine); + ret = reserve_gt(i915); if (ret) goto err_unpin; @@ -784,7 +785,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) kmem_cache_free(i915->requests, rq); err_unreserve: - unreserve_engine(engine); + unreserve_gt(i915); err_unpin: engine->context_unpin(engine, ctx); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ac009f10c948..eba81d55dc3a 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1321,12 +1321,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_terminally_wedged(&engine->i915->gpu_error)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error));
In commit 9b6586ae9f6b ("drm/i915: Keep a global seqno per-engine"), we moved from a global inflight counter to per-engine counters in the hope that will be easy to run concurrently in future. However, with the advent of the desire to move requests between engines, we do need a global counter to preserve the semantics that no engine wraps in the middle of a submit. (Although this semantic is now only required for gen7 semaphore support, which only supports greater-then comparisons!) v2: Keep a global counter of all requests ever submitted and force the reset when it wraps. References: 9b6586ae9f6b ("drm/i915: Keep a global seqno per-engine") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++-- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_timeline.h | 6 ----- drivers/gpu/drm/i915/i915_request.c | 33 ++++++++++++------------ drivers/gpu/drm/i915/intel_engine_cs.c | 5 ++-- 5 files changed, 22 insertions(+), 28 deletions(-)