Message ID | 20170222114610.5819-2-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 22/02/2017 11:45, Chris Wilson wrote: > Replace the global device seqno with one for each engine, and account > for in-flight seqno on each separately. This is consistent with > dma-fence as each timeline has separate fence-contexts for each engine > and a seqno is only ordered within a fence-context (i.e. seqno do not > need to be ordered wrt to other engines, just ordered within a single > engine). This is required to enable request rewinding for preemption on > individual engines (we have to rewind the global seqno to avoid > overflow, and we do not have to rewind all engines just to preempt one.) > > v2: Rename active_seqno to inflight_seqnos to more clearly indicate that > it is a counter and not equivalent to the existing seqno. Update > functions that operated on active_seqno similarly. Missed a couple details but never mind. Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 11 +---- > drivers/gpu/drm/i915/i915_gem_request.c | 83 +++++++++++++++++--------------- > drivers/gpu/drm/i915/i915_gem_request.h | 8 +-- > drivers/gpu/drm/i915/i915_gem_timeline.h | 9 +++- > drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 ++++++------- > drivers/gpu/drm/i915/intel_engine_cs.c | 2 - > drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +- > 7 files changed, 70 insertions(+), 80 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 655e60d609c2..1a28b5279bec 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -1080,15 +1080,6 @@ static const struct file_operations i915_error_state_fops = { > #endif > > static int > -i915_next_seqno_get(void *data, u64 *val) > -{ > - struct drm_i915_private *dev_priv = data; > - > - *val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno); > - return 0; > -} > - > -static int > i915_next_seqno_set(void *data, u64 val) > { > struct drm_i915_private *dev_priv = data; > @@ -1106,7 +1097,7 @@ i915_next_seqno_set(void *data, u64 val) > } > > DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, > - i915_next_seqno_get, i915_next_seqno_set, > + NULL, i915_next_seqno_set, > "0x%llx\n"); > > static int i915_frequency_info(struct seq_file *m, void *unused) > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index 51bc24d1d7c9..e8b354cf2f06 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -198,6 +198,12 @@ i915_priotree_init(struct i915_priotree *pt) > pt->priority = INT_MIN; > } > > +static void unreserve_seqno(struct intel_engine_cs *engine) > +{ > + GEM_BUG_ON(!engine->timeline->inflight_seqnos); > + engine->timeline->inflight_seqnos--; > +} > + > void i915_gem_retire_noop(struct i915_gem_active *active, > struct drm_i915_gem_request *request) > { > @@ -237,6 +243,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) > &request->i915->gt.idle_work, > msecs_to_jiffies(100)); > } > + unreserve_seqno(request->engine); > > /* Walk through the active list, calling retire on each. This allows > * objects to track their GPU activity and mark themselves as idle > @@ -307,7 +314,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) > } while (tmp != req); > } > > -static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) > +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) > { > struct i915_gem_timeline *timeline = &i915->gt.global_timeline; > struct intel_engine_cs *engine; > @@ -325,15 +332,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) > GEM_BUG_ON(i915->gt.active_requests > 1); > > /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ > - if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) { > - while (intel_breadcrumbs_busy(i915)) > - cond_resched(); /* spin until threads are complete */ > - } > - atomic_set(&timeline->seqno, seqno); > + for_each_engine(engine, i915, id) { > + struct intel_timeline *tl = &timeline->engine[id]; > > - /* Finally reset hw state */ > - for_each_engine(engine, i915, id) > + if (!i915_seqno_passed(seqno, tl->seqno)) { > + /* spin until threads are complete */ > + while (intel_breadcrumbs_busy(engine)) > + cond_resched(); > + } > + > + /* Finally reset hw state */ > + tl->seqno = seqno; > intel_engine_init_global_seqno(engine, seqno); > + } > > list_for_each_entry(timeline, &i915->gt.timelines, link) { > for_each_engine(engine, i915, id) { > @@ -358,37 +369,38 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) > /* HWS page needs to be set less than what we > * will inject to ring > */ > - return i915_gem_init_global_seqno(dev_priv, seqno - 1); > + return reset_all_global_seqno(dev_priv, seqno - 1); > } > > -static int reserve_global_seqno(struct drm_i915_private *i915) > +static int reserve_seqno(struct intel_engine_cs *engine) > { > - u32 active_requests = ++i915->gt.active_requests; > - u32 seqno = atomic_read(&i915->gt.global_timeline.seqno); > + u32 active = ++engine->timeline->inflight_seqnos; > + u32 seqno = engine->timeline->seqno; > int ret; > > /* Reservation is fine until we need to wrap around */ > - if (likely(seqno + active_requests > seqno)) > + if (likely(!add_overflows(seqno, active))) > return 0; > > - ret = i915_gem_init_global_seqno(i915, 0); > + /* Even though we are tracking inflight seqno individually on each > + * engine, other engines may be observing us using hw semaphores and > + * so we need to idle all engines before wrapping around this engine. > + * As all engines are then idle, we can reset the seqno on all, so > + * we don't stall in quick succession if each engine is being > + * similarly utilized. > + */ > + ret = reset_all_global_seqno(engine->i915, 0); > if (ret) { > - i915->gt.active_requests--; > + engine->timeline->inflight_seqnos--; > return ret; > } > > return 0; > } > > -static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) > -{ > - /* seqno only incremented under a mutex */ > - return ++tl->seqno.counter; > -} > - > -static u32 timeline_get_seqno(struct i915_gem_timeline *tl) > +static u32 timeline_get_seqno(struct intel_timeline *tl) > { > - return atomic_inc_return(&tl->seqno); > + return ++tl->seqno; > } > > void __i915_gem_request_submit(struct drm_i915_gem_request *request) > @@ -402,14 +414,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) > GEM_BUG_ON(timeline == request->timeline); > assert_spin_locked(&timeline->lock); > > - seqno = timeline_get_seqno(timeline->common); > + seqno = timeline_get_seqno(timeline); > GEM_BUG_ON(!seqno); > GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); > > - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno)); > - request->previous_seqno = timeline->last_submitted_seqno; > - timeline->last_submitted_seqno = seqno; > - > /* We may be recursing from the signal callback of another i915 fence */ > spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); > request->global_seqno = seqno; > @@ -516,7 +524,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, > if (ret) > return ERR_PTR(ret); > > - ret = reserve_global_seqno(dev_priv); > + ret = reserve_seqno(engine); > if (ret) > goto err_unpin; > > @@ -568,7 +576,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, > &i915_fence_ops, > &req->lock, > req->timeline->fence_context, > - __timeline_get_seqno(req->timeline->common)); > + timeline_get_seqno(req->timeline)); > > /* We bump the ref for the fence chain */ > i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); > @@ -613,6 +621,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, > */ > req->head = req->ring->tail; > > + /* Check that we didn't interrupt ourselves with a new request */ > + GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); > return req; > > err_ctx: > @@ -623,7 +633,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, > > kmem_cache_free(dev_priv->requests, req); > err_unreserve: > - dev_priv->gt.active_requests--; > + unreserve_seqno(engine); > err_unpin: > engine->context_unpin(engine, ctx); > return ERR_PTR(ret); > @@ -837,8 +847,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) > * our i915_gem_request_alloc() and called __i915_add_request() before > * us, the timeline will hold its seqno which is later than ours. > */ > - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, > - request->fence.seqno)); > + GEM_BUG_ON(timeline->seqno != request->fence.seqno); > > /* > * To ensure that this call will not fail, space for its emissions > @@ -892,16 +901,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) > list_add_tail(&request->link, &timeline->requests); > spin_unlock_irq(&timeline->lock); > > - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, > - request->fence.seqno)); > - > - timeline->last_submitted_seqno = request->fence.seqno; > + GEM_BUG_ON(timeline->seqno != request->fence.seqno); > i915_gem_active_set(&timeline->last_request, request); > > list_add_tail(&request->ring_link, &ring->request_list); > request->emitted_jiffies = jiffies; > > - i915_gem_mark_busy(engine); > + if (!request->i915->gt.active_requests++) > + i915_gem_mark_busy(engine); > > /* Let the backend know a new request has arrived that may need > * to adjust the existing execution schedule due to a high priority > diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h > index ea511f06efaf..9049936c571c 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.h > +++ b/drivers/gpu/drm/i915/i915_gem_request.h > @@ -145,12 +145,6 @@ struct drm_i915_gem_request { > > u32 global_seqno; > > - /** GEM sequence number associated with the previous request, > - * when the HWS breadcrumb is equal to this the GPU is processing > - * this request. > - */ > - u32 previous_seqno; > - > /** Position in the ring of the start of the request */ > u32 head; > > @@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req) > { > GEM_BUG_ON(!req->global_seqno); > return i915_seqno_passed(intel_engine_get_seqno(req->engine), > - req->previous_seqno); > + req->global_seqno - 1); > } > > static inline bool > diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h > index f2e51f42cc2f..6c53e14cab2a 100644 > --- a/drivers/gpu/drm/i915/i915_gem_timeline.h > +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h > @@ -33,7 +33,13 @@ struct i915_gem_timeline; > > struct intel_timeline { > u64 fence_context; > - u32 last_submitted_seqno; > + u32 seqno; > + > + /** > + * Count of outstanding requests, from the time they are constructed > + * to the moment they are retired. Loosely coupled to hardware. > + */ > + u32 inflight_seqnos; > > spinlock_t lock; > > @@ -56,7 +62,6 @@ struct intel_timeline { > > struct i915_gem_timeline { > struct list_head link; > - atomic_t seqno; > > struct drm_i915_private *i915; > const char *name; > diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c > index 5932e2dc0c6f..4f4e703d1b14 100644 > --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c > +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c > @@ -676,31 +676,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) > cancel_fake_irq(engine); > } > > -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) > +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) > { > - struct intel_engine_cs *engine; > - enum intel_engine_id id; > - unsigned int mask = 0; > - > - for_each_engine(engine, i915, id) { > - struct intel_breadcrumbs *b = &engine->breadcrumbs; > - > - spin_lock_irq(&b->lock); > + struct intel_breadcrumbs *b = &engine->breadcrumbs; > + bool busy = false; > > - if (b->first_wait) { > - wake_up_process(b->first_wait->tsk); > - mask |= intel_engine_flag(engine); > - } > + spin_lock_irq(&b->lock); > > - if (b->first_signal) { > - wake_up_process(b->signaler); > - mask |= intel_engine_flag(engine); > - } > + if (b->first_wait) { > + wake_up_process(b->first_wait->tsk); > + busy |= intel_engine_flag(engine); > + } > > - spin_unlock_irq(&b->lock); > + if (b->first_signal) { > + wake_up_process(b->signaler); > + busy |= intel_engine_flag(engine); > } > > - return mask; > + spin_unlock_irq(&b->lock); > + > + return busy; > } > > #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index 46c94740be53..c4d4698f98e7 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -252,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) > engine->irq_seqno_barrier(engine); > > GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); > - engine->timeline->last_submitted_seqno = seqno; > - > engine->hangcheck.seqno = seqno; > > /* After manually advancing the seqno, fake the interrupt in case > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 4091c97be6ec..b91c2c7ef5a6 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -556,7 +556,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) > * wtih serialising this hint with anything, so document it as > * a hint and nothing more. > */ > - return READ_ONCE(engine->timeline->last_submitted_seqno); > + return READ_ONCE(engine->timeline->seqno); > } > > int init_workarounds_ring(struct intel_engine_cs *engine); > @@ -630,7 +630,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) > > void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); > void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); > -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); > +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); > > static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) > { >
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 655e60d609c2..1a28b5279bec 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1080,15 +1080,6 @@ static const struct file_operations i915_error_state_fops = { #endif static int -i915_next_seqno_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno); - return 0; -} - -static int i915_next_seqno_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; @@ -1106,7 +1097,7 @@ i915_next_seqno_set(void *data, u64 val) } DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, - i915_next_seqno_get, i915_next_seqno_set, + NULL, i915_next_seqno_set, "0x%llx\n"); static int i915_frequency_info(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 51bc24d1d7c9..e8b354cf2f06 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -198,6 +198,12 @@ i915_priotree_init(struct i915_priotree *pt) pt->priority = INT_MIN; } +static void unreserve_seqno(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!engine->timeline->inflight_seqnos); + engine->timeline->inflight_seqnos--; +} + void i915_gem_retire_noop(struct i915_gem_active *active, struct drm_i915_gem_request *request) { @@ -237,6 +243,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) &request->i915->gt.idle_work, msecs_to_jiffies(100)); } + unreserve_seqno(request->engine); /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle @@ -307,7 +314,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct i915_gem_timeline *timeline = &i915->gt.global_timeline; struct intel_engine_cs *engine; @@ -325,15 +332,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) GEM_BUG_ON(i915->gt.active_requests > 1); /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) { - while (intel_breadcrumbs_busy(i915)) - cond_resched(); /* spin until threads are complete */ - } - atomic_set(&timeline->seqno, seqno); + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; - /* Finally reset hw state */ - for_each_engine(engine, i915, id) + if (!i915_seqno_passed(seqno, tl->seqno)) { + /* spin until threads are complete */ + while (intel_breadcrumbs_busy(engine)) + cond_resched(); + } + + /* Finally reset hw state */ + tl->seqno = seqno; intel_engine_init_global_seqno(engine, seqno); + } list_for_each_entry(timeline, &i915->gt.timelines, link) { for_each_engine(engine, i915, id) { @@ -358,37 +369,38 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) /* HWS page needs to be set less than what we * will inject to ring */ - return i915_gem_init_global_seqno(dev_priv, seqno - 1); + return reset_all_global_seqno(dev_priv, seqno - 1); } -static int reserve_global_seqno(struct drm_i915_private *i915) +static int reserve_seqno(struct intel_engine_cs *engine) { - u32 active_requests = ++i915->gt.active_requests; - u32 seqno = atomic_read(&i915->gt.global_timeline.seqno); + u32 active = ++engine->timeline->inflight_seqnos; + u32 seqno = engine->timeline->seqno; int ret; /* Reservation is fine until we need to wrap around */ - if (likely(seqno + active_requests > seqno)) + if (likely(!add_overflows(seqno, active))) return 0; - ret = i915_gem_init_global_seqno(i915, 0); + /* Even though we are tracking inflight seqno individually on each + * engine, other engines may be observing us using hw semaphores and + * so we need to idle all engines before wrapping around this engine. + * As all engines are then idle, we can reset the seqno on all, so + * we don't stall in quick succession if each engine is being + * similarly utilized. + */ + ret = reset_all_global_seqno(engine->i915, 0); if (ret) { - i915->gt.active_requests--; + engine->timeline->inflight_seqnos--; return ret; } return 0; } -static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) -{ - /* seqno only incremented under a mutex */ - return ++tl->seqno.counter; -} - -static u32 timeline_get_seqno(struct i915_gem_timeline *tl) +static u32 timeline_get_seqno(struct intel_timeline *tl) { - return atomic_inc_return(&tl->seqno); + return ++tl->seqno; } void __i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -402,14 +414,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) GEM_BUG_ON(timeline == request->timeline); assert_spin_locked(&timeline->lock); - seqno = timeline_get_seqno(timeline->common); + seqno = timeline_get_seqno(timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno)); - request->previous_seqno = timeline->last_submitted_seqno; - timeline->last_submitted_seqno = seqno; - /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = seqno; @@ -516,7 +524,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ERR_PTR(ret); - ret = reserve_global_seqno(dev_priv); + ret = reserve_seqno(engine); if (ret) goto err_unpin; @@ -568,7 +576,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, &i915_fence_ops, &req->lock, req->timeline->fence_context, - __timeline_get_seqno(req->timeline->common)); + timeline_get_seqno(req->timeline)); /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); @@ -613,6 +621,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, */ req->head = req->ring->tail; + /* Check that we didn't interrupt ourselves with a new request */ + GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); return req; err_ctx: @@ -623,7 +633,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, kmem_cache_free(dev_priv->requests, req); err_unreserve: - dev_priv->gt.active_requests--; + unreserve_seqno(engine); err_unpin: engine->context_unpin(engine, ctx); return ERR_PTR(ret); @@ -837,8 +847,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * our i915_gem_request_alloc() and called __i915_add_request() before * us, the timeline will hold its seqno which is later than ours. */ - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); + GEM_BUG_ON(timeline->seqno != request->fence.seqno); /* * To ensure that this call will not fail, space for its emissions @@ -892,16 +901,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) list_add_tail(&request->link, &timeline->requests); spin_unlock_irq(&timeline->lock); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); - - timeline->last_submitted_seqno = request->fence.seqno; + GEM_BUG_ON(timeline->seqno != request->fence.seqno); i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); request->emitted_jiffies = jiffies; - i915_gem_mark_busy(engine); + if (!request->i915->gt.active_requests++) + i915_gem_mark_busy(engine); /* Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ea511f06efaf..9049936c571c 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -145,12 +145,6 @@ struct drm_i915_gem_request { u32 global_seqno; - /** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. - */ - u32 previous_seqno; - /** Position in the ring of the start of the request */ u32 head; @@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req) { GEM_BUG_ON(!req->global_seqno); return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->previous_seqno); + req->global_seqno - 1); } static inline bool diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index f2e51f42cc2f..6c53e14cab2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -33,7 +33,13 @@ struct i915_gem_timeline; struct intel_timeline { u64 fence_context; - u32 last_submitted_seqno; + u32 seqno; + + /** + * Count of outstanding requests, from the time they are constructed + * to the moment they are retired. Loosely coupled to hardware. + */ + u32 inflight_seqnos; spinlock_t lock; @@ -56,7 +62,6 @@ struct intel_timeline { struct i915_gem_timeline { struct list_head link; - atomic_t seqno; struct drm_i915_private *i915; const char *name; diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 5932e2dc0c6f..4f4e703d1b14 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -676,31 +676,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); } -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int mask = 0; - - for_each_engine(engine, i915, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->lock); + struct intel_breadcrumbs *b = &engine->breadcrumbs; + bool busy = false; - if (b->first_wait) { - wake_up_process(b->first_wait->tsk); - mask |= intel_engine_flag(engine); - } + spin_lock_irq(&b->lock); - if (b->first_signal) { - wake_up_process(b->signaler); - mask |= intel_engine_flag(engine); - } + if (b->first_wait) { + wake_up_process(b->first_wait->tsk); + busy |= intel_engine_flag(engine); + } - spin_unlock_irq(&b->lock); + if (b->first_signal) { + wake_up_process(b->signaler); + busy |= intel_engine_flag(engine); } - return mask; + spin_unlock_irq(&b->lock); + + return busy; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 46c94740be53..c4d4698f98e7 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -252,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) engine->irq_seqno_barrier(engine); GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); - engine->timeline->last_submitted_seqno = seqno; - engine->hangcheck.seqno = seqno; /* After manually advancing the seqno, fake the interrupt in case diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4091c97be6ec..b91c2c7ef5a6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -556,7 +556,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->last_submitted_seqno); + return READ_ONCE(engine->timeline->seqno); } int init_workarounds_ring(struct intel_engine_cs *engine); @@ -630,7 +630,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) {