Message ID | 20190107115509.12523-41-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/46] drm/i915: Return immediately if trylock fails for direct-reclaim | expand |
On 1/7/2019 03:55, Chris Wilson wrote: > Supplement the per-engine HWSP with a per-timeline HWSP. That is a > per-request pointer through which we can check a local seqno, > abstracting away the presumption of a global seqno. In this first step, > we point each request back into the engine's HWSP so everything > continues to work with the global timeline. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_request.c | 16 +++++++++++----- > drivers/gpu/drm/i915/i915_request.h | 16 +++++++++------- > drivers/gpu/drm/i915/intel_lrc.c | 9 ++++++--- > 3 files changed, 26 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index c467392f62d7..3b69c62d040f 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request) > static void __retire_engine_request(struct intel_engine_cs *engine, > struct i915_request *rq) > { > - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", > + GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n", > __func__, engine->name, > rq->fence.context, rq->fence.seqno, > rq->global_seqno, > + i915_request_hwsp(rq), > intel_engine_get_seqno(engine)); > > GEM_BUG_ON(!i915_request_completed(rq)); > @@ -244,10 +245,11 @@ static void i915_request_retire(struct i915_request *request) > { > struct i915_gem_active *active, *next; > > - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", > + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", > request->engine->name, > request->fence.context, request->fence.seqno, > request->global_seqno, > + i915_request_hwsp(request), > intel_engine_get_seqno(request->engine)); > > lockdep_assert_held(&request->i915->drm.struct_mutex); > @@ -307,10 +309,11 @@ void i915_request_retire_upto(struct i915_request *rq) > struct intel_ring *ring = rq->ring; > struct i915_request *tmp; > > - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", > + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", > rq->engine->name, > rq->fence.context, rq->fence.seqno, > rq->global_seqno, > + i915_request_hwsp(rq), > intel_engine_get_seqno(rq->engine)); > > lockdep_assert_held(&rq->i915->drm.struct_mutex); > @@ -348,10 +351,11 @@ void __i915_request_submit(struct i915_request *request) > struct intel_engine_cs *engine = request->engine; > u32 seqno; > > - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", > + GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n", > engine->name, > request->fence.context, request->fence.seqno, > engine->timeline.seqno + 1, > + i915_request_hwsp(request), > intel_engine_get_seqno(engine)); > > GEM_BUG_ON(!irqs_disabled()); > @@ -398,10 +402,11 @@ void __i915_request_unsubmit(struct i915_request *request) > { > struct intel_engine_cs *engine = request->engine; > > - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", > + GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n", > engine->name, > request->fence.context, request->fence.seqno, > request->global_seqno, > + i915_request_hwsp(request), > intel_engine_get_seqno(engine)); > > GEM_BUG_ON(!irqs_disabled()); > @@ -585,6 +590,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > rq->ring = ce->ring; > rq->timeline = ce->ring->timeline; > GEM_BUG_ON(rq->timeline == &engine->timeline); > + rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX]; > > spin_lock_init(&rq->lock); > dma_fence_init(&rq->fence, > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index d014b0605445..e2b209a26a8e 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -130,6 +130,8 @@ struct i915_request { > struct i915_sched_node sched; > struct i915_dependency dep; > > + const u32 *hwsp_seqno; > + > /** > * GEM sequence number associated with this request on the > * global execution timeline. It is zero when the request is not > @@ -280,11 +282,6 @@ long i915_request_wait(struct i915_request *rq, > #define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ > #define I915_WAIT_FOR_IDLE_BOOST BIT(4) > > -static inline bool intel_engine_has_started(struct intel_engine_cs *engine, > - u32 seqno); > -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, > - u32 seqno); > - > /** > * Returns true if seq1 is later than seq2. > */ > @@ -293,6 +290,11 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) > return (s32)(seq1 - seq2) >= 0; > } > > +static inline u32 i915_request_hwsp(const struct i915_request *rq) > +{ > + return READ_ONCE(*rq->hwsp_seqno); > +} > + Shouldn't the function name have an _seqno as well? Just 'i915_request_hwsp()' is fairly ambiguous, there could be many different things stored in the HWSP. > /** > * i915_request_started - check if the request has begun being executed > * @rq: the request > @@ -310,14 +312,14 @@ static inline bool i915_request_started(const struct i915_request *rq) > if (!seqno) /* not yet submitted to HW */ > return false; > > - return intel_engine_has_started(rq->engine, seqno); > + return i915_seqno_passed(i915_request_hwsp(rq), seqno - 1); > } > > static inline bool > __i915_request_completed(const struct i915_request *rq, u32 seqno) > { > GEM_BUG_ON(!seqno); > - return intel_engine_has_completed(rq->engine, seqno) && > + return i915_seqno_passed(i915_request_hwsp(rq), seqno) && > seqno == i915_request_global_seqno(rq); > } > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 3b512a54aacb..1df2a1868622 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -445,11 +445,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > desc = execlists_update_context(rq); > GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); > > - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", > + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", > engine->name, n, > port[n].context_id, count, > rq->global_seqno, > rq->fence.context, rq->fence.seqno, > + i915_request_hwsp(rq), > intel_engine_get_seqno(engine), > rq_prio(rq)); > } else { > @@ -738,11 +739,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) > while (num_ports-- && port_isset(port)) { > struct i915_request *rq = port_request(port); > > - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", > + GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n", > rq->engine->name, > (unsigned int)(port - execlists->port), > rq->global_seqno, > rq->fence.context, rq->fence.seqno, > + i915_request_hwsp(rq), > intel_engine_get_seqno(rq->engine)); > > GEM_BUG_ON(!execlists->active); > @@ -966,12 +968,13 @@ static void process_csb(struct intel_engine_cs *engine) > EXECLISTS_ACTIVE_USER)); > > rq = port_unpack(port, &count); > - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", > + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", > engine->name, > port->context_id, count, > rq ? rq->global_seqno : 0, > rq ? rq->fence.context : 0, > rq ? rq->fence.seqno : 0, > + rq ? i915_request_hwsp(rq) : 0, > intel_engine_get_seqno(engine), > rq ? rq_prio(rq) : 0); >
Quoting John Harrison (2019-01-15 00:55:39) > On 1/7/2019 03:55, Chris Wilson wrote: > > Supplement the per-engine HWSP with a per-timeline HWSP. That is a > > per-request pointer through which we can check a local seqno, > > abstracting away the presumption of a global seqno. In this first step, > > we point each request back into the engine's HWSP so everything > > continues to work with the global timeline. > > --- > > +static inline u32 i915_request_hwsp(const struct i915_request *rq) > > +{ > > + return READ_ONCE(*rq->hwsp_seqno); > > +} > > + > Shouldn't the function name have an _seqno as well? Just > 'i915_request_hwsp()' is fairly ambiguous, there could be many different > things stored in the HWSP. It's not even necessarily the HWSP! :) i915_request_hw_seqno() // dissatisfying -> i915_request_hwsp_seqno() // but rq only stores one element in HWSP! -> i915_request_hwsp() Was the evolution of names I chose. Of that mix, i915_request_hwsp_seqno(). hw_seqno just feels nondescript. i915_request_current_[hw]_seqno() maybe, but because we start with i915_request I find it confusing and expect the seqno to be tied to the request. So maybe just drop i915_request here, and go with something like hwsp_breadcrumb(), that just happens to take i915_request as a convenience. -Chris
Quoting Chris Wilson (2019-01-15 09:14:17) > Quoting John Harrison (2019-01-15 00:55:39) > > On 1/7/2019 03:55, Chris Wilson wrote: > > > Supplement the per-engine HWSP with a per-timeline HWSP. That is a > > > per-request pointer through which we can check a local seqno, > > > abstracting away the presumption of a global seqno. In this first step, > > > we point each request back into the engine's HWSP so everything > > > continues to work with the global timeline. > > > --- > > > +static inline u32 i915_request_hwsp(const struct i915_request *rq) > > > +{ > > > + return READ_ONCE(*rq->hwsp_seqno); > > > +} > > > + > > Shouldn't the function name have an _seqno as well? Just > > 'i915_request_hwsp()' is fairly ambiguous, there could be many different > > things stored in the HWSP. > > It's not even necessarily the HWSP! :) > > i915_request_hw_seqno() // dissatisfying > -> i915_request_hwsp_seqno() // but rq only stores one element in HWSP! > -> i915_request_hwsp() > > Was the evolution of names I chose. > > Of that mix, i915_request_hwsp_seqno(). hw_seqno just feels nondescript. > > i915_request_current_[hw]_seqno() maybe, but because we start with > i915_request I find it confusing and expect the seqno to be tied to the > request. So maybe just drop i915_request here, and go with something > like hwsp_breadcrumb(), that just happens to take i915_request as a > convenience. Alternatively, static inline u32 i915_request_hwsp(struct i915_request *rq, int index) { return READ_ONCE(rq->hwsp_seqno[index]); } And probably rename s/rq->hwsp_seqno/rq->hwsp/. That should compile away the argument, but you'll still probably want a static inline u32 i915_request_hwsp_seqno(struct i915_request *rq) { return i915_request_hwsp(rq, 0); } I can't win! But it does look more methodical. -Chris
On 1/15/2019 07:40, Chris Wilson wrote: > Quoting Chris Wilson (2019-01-15 09:14:17) >> Quoting John Harrison (2019-01-15 00:55:39) >>> On 1/7/2019 03:55, Chris Wilson wrote: >>>> Supplement the per-engine HWSP with a per-timeline HWSP. That is a >>>> per-request pointer through which we can check a local seqno, >>>> abstracting away the presumption of a global seqno. In this first step, >>>> we point each request back into the engine's HWSP so everything >>>> continues to work with the global timeline. >>>> --- >>>> +static inline u32 i915_request_hwsp(const struct i915_request *rq) >>>> +{ >>>> + return READ_ONCE(*rq->hwsp_seqno); >>>> +} >>>> + >>> Shouldn't the function name have an _seqno as well? Just >>> 'i915_request_hwsp()' is fairly ambiguous, there could be many different >>> things stored in the HWSP. >> It's not even necessarily the HWSP! :) >> >> i915_request_hw_seqno() // dissatisfying >> -> i915_request_hwsp_seqno() // but rq only stores one element in HWSP! >> -> i915_request_hwsp() >> >> Was the evolution of names I chose. >> >> Of that mix, i915_request_hwsp_seqno(). hw_seqno just feels nondescript. >> >> i915_request_current_[hw]_seqno() maybe, but because we start with >> i915_request I find it confusing and expect the seqno to be tied to the >> request. So maybe just drop i915_request here, and go with something >> like hwsp_breadcrumb(), that just happens to take i915_request as a >> convenience. My vote would be 'hwsp_breadcrumb()' or similar. As you say, the seqno in the HWSP isn't actually tied to the request. Quite the opposite in fact - you are generally comparing multiple requests' seqnos to the HWSP seqno to see which have or have not completed. It should really be tied to the timeline (or more accurately, to the context as that is what dictates the timeline). The code is generally starting from a request structure so it makes sense to have a shortcut via the request. But logically, it should be req->timeline->hwsp[SEQNO]. Maybe even something like i915_timeline_out_seqno(rq)? Or i915_timeline_done_seqno(rq)? > Alternatively, > > static inline u32 i915_request_hwsp(struct i915_request *rq, int index) > { > return READ_ONCE(rq->hwsp_seqno[index]); > } > > And probably rename s/rq->hwsp_seqno/rq->hwsp/. That should compile away > the argument, but you'll still probably want a > > static inline u32 i915_request_hwsp_seqno(struct i915_request *rq) > { > return i915_request_hwsp(rq, 0); > } Given that there is only a single per context element in the HWSP at present, this version does seem overkill. It might be useful to move to that later when there are more entries, if that ever happens. For now, keep things simple I think. > > I can't win! But it does look more methodical. > -Chris
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c467392f62d7..3b69c62d040f 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request) static void __retire_engine_request(struct intel_engine_cs *engine, struct i915_request *rq) { - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n", __func__, engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, + i915_request_hwsp(rq), intel_engine_get_seqno(engine)); GEM_BUG_ON(!i915_request_completed(rq)); @@ -244,10 +245,11 @@ static void i915_request_retire(struct i915_request *request) { struct i915_gem_active *active, *next; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", request->engine->name, request->fence.context, request->fence.seqno, request->global_seqno, + i915_request_hwsp(request), intel_engine_get_seqno(request->engine)); lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -307,10 +309,11 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_ring *ring = rq->ring; struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", rq->engine->name, rq->fence.context, rq->fence.seqno, rq->global_seqno, + i915_request_hwsp(rq), intel_engine_get_seqno(rq->engine)); lockdep_assert_held(&rq->i915->drm.struct_mutex); @@ -348,10 +351,11 @@ void __i915_request_submit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; u32 seqno; - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n", engine->name, request->fence.context, request->fence.seqno, engine->timeline.seqno + 1, + i915_request_hwsp(request), intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); @@ -398,10 +402,11 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n", engine->name, request->fence.context, request->fence.seqno, request->global_seqno, + i915_request_hwsp(request), intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); @@ -585,6 +590,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->ring = ce->ring; rq->timeline = ce->ring->timeline; GEM_BUG_ON(rq->timeline == &engine->timeline); + rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX]; spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index d014b0605445..e2b209a26a8e 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -130,6 +130,8 @@ struct i915_request { struct i915_sched_node sched; struct i915_dependency dep; + const u32 *hwsp_seqno; + /** * GEM sequence number associated with this request on the * global execution timeline. It is zero when the request is not @@ -280,11 +282,6 @@ long i915_request_wait(struct i915_request *rq, #define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ #define I915_WAIT_FOR_IDLE_BOOST BIT(4) -static inline bool intel_engine_has_started(struct intel_engine_cs *engine, - u32 seqno); -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, - u32 seqno); - /** * Returns true if seq1 is later than seq2. */ @@ -293,6 +290,11 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) return (s32)(seq1 - seq2) >= 0; } +static inline u32 i915_request_hwsp(const struct i915_request *rq) +{ + return READ_ONCE(*rq->hwsp_seqno); +} + /** * i915_request_started - check if the request has begun being executed * @rq: the request @@ -310,14 +312,14 @@ static inline bool i915_request_started(const struct i915_request *rq) if (!seqno) /* not yet submitted to HW */ return false; - return intel_engine_has_started(rq->engine, seqno); + return i915_seqno_passed(i915_request_hwsp(rq), seqno - 1); } static inline bool __i915_request_completed(const struct i915_request *rq, u32 seqno) { GEM_BUG_ON(!seqno); - return intel_engine_has_completed(rq->engine, seqno) && + return i915_seqno_passed(i915_request_hwsp(rq), seqno) && seqno == i915_request_global_seqno(rq); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3b512a54aacb..1df2a1868622 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -445,11 +445,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, rq->fence.context, rq->fence.seqno, + i915_request_hwsp(rq), intel_engine_get_seqno(engine), rq_prio(rq)); } else { @@ -738,11 +739,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", + GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n", rq->engine->name, (unsigned int)(port - execlists->port), rq->global_seqno, rq->fence.context, rq->fence.seqno, + i915_request_hwsp(rq), intel_engine_get_seqno(rq->engine)); GEM_BUG_ON(!execlists->active); @@ -966,12 +968,13 @@ static void process_csb(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, rq ? rq->fence.context : 0, rq ? rq->fence.seqno : 0, + rq ? i915_request_hwsp(rq) : 0, intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0);
Supplement the per-engine HWSP with a per-timeline HWSP. That is a per-request pointer through which we can check a local seqno, abstracting away the presumption of a global seqno. In this first step, we point each request back into the engine's HWSP so everything continues to work with the global timeline. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_request.c | 16 +++++++++++----- drivers/gpu/drm/i915/i915_request.h | 16 +++++++++------- drivers/gpu/drm/i915/intel_lrc.c | 9 ++++++--- 3 files changed, 26 insertions(+), 15 deletions(-)