Message ID | 20190318095204.9913-9-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/22] drm/i915: Flush pages on acquisition | expand |
On 18/03/2019 09:51, Chris Wilson wrote: > If we use the STORE_DATA_INDEX function we can use a fixed offset and > avoid having to lookup up the engine HWS address. A step closer to being > able to emit the final breadcrumb during request_add rather than later > in the submission interrupt handler. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/intel_guc_submission.c | 3 ++- > drivers/gpu/drm/i915/intel_lrc.c | 17 +++++++---------- > drivers/gpu/drm/i915/intel_ringbuffer.c | 16 ++++++---------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- > 4 files changed, 17 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c > index 4a5727233419..c4ad73980988 100644 > --- a/drivers/gpu/drm/i915/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c > @@ -583,7 +583,8 @@ static void inject_preempt_context(struct work_struct *work) > } else { > cs = gen8_emit_ggtt_write(cs, > GUC_PREEMPT_FINISHED, > - addr); > + addr, > + 0); > *cs++ = MI_NOOP; > *cs++ = MI_NOOP; > } > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index fbf67105f040..7e0c20a2d733 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -173,12 +173,6 @@ static void execlists_init_reg_state(u32 *reg_state, > struct intel_engine_cs *engine, > struct intel_ring *ring); > > -static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine) > -{ > - return (i915_ggtt_offset(engine->status_page.vma) + > - I915_GEM_HWS_HANGCHECK_ADDR); > -} > - > static inline struct i915_priolist *to_priolist(struct rb_node *rb) > { > return rb_entry(rb, struct i915_priolist, node); > @@ -2213,11 +2207,14 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) > { > cs = gen8_emit_ggtt_write(cs, > request->fence.seqno, > - request->timeline->hwsp_offset); > + request->timeline->hwsp_offset, > + 0); > > cs = gen8_emit_ggtt_write(cs, > intel_engine_next_hangcheck_seqno(request->engine), > - intel_hws_hangcheck_address(request->engine)); > + I915_GEM_HWS_HANGCHECK_ADDR, > + MI_FLUSH_DW_STORE_INDEX); > + > > *cs++ = MI_USER_INTERRUPT; > *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; > @@ -2241,8 +2238,8 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) > > cs = gen8_emit_ggtt_write_rcs(cs, > intel_engine_next_hangcheck_seqno(request->engine), > - intel_hws_hangcheck_address(request->engine), > - 0); > + I915_GEM_HWS_HANGCHECK_ADDR, > + PIPE_CONTROL_STORE_DATA_INDEX); > > *cs++ = MI_USER_INTERRUPT; > *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 35fdebd67e5f..0310d5d53bf9 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -43,12 +43,6 @@ > */ > #define LEGACY_REQUEST_SIZE 200 > > -static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine) > -{ > - return (i915_ggtt_offset(engine->status_page.vma) + > - I915_GEM_HWS_HANGCHECK_ADDR); > -} > - > unsigned int intel_ring_update_space(struct intel_ring *ring) > { > unsigned int space; > @@ -317,8 +311,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) > *cs++ = rq->fence.seqno; > > *cs++ = GFX_OP_PIPE_CONTROL(4); > - *cs++ = PIPE_CONTROL_QW_WRITE; > - *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; > + *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX; > + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT; > *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); > > *cs++ = MI_USER_INTERRUPT; > @@ -423,8 +417,10 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) > *cs++ = rq->fence.seqno; > > *cs++ = GFX_OP_PIPE_CONTROL(4); > - *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; > - *cs++ = hws_hangcheck_address(rq->engine); > + *cs++ = (PIPE_CONTROL_QW_WRITE | > + PIPE_CONTROL_STORE_DATA_INDEX | > + PIPE_CONTROL_GLOBAL_GTT_IVB); > + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; > *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); > > *cs++ = MI_USER_INTERRUPT; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index a57489fcb302..a02c92dac5da 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -419,14 +419,14 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) > } > > static inline u32 * > -gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) > +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) > { > /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ > GEM_BUG_ON(gtt_offset & (1 << 5)); > /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ > GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); > > - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; > + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags; > *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; > *cs++ = 0; > *cs++ = value; > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 4a5727233419..c4ad73980988 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -583,7 +583,8 @@ static void inject_preempt_context(struct work_struct *work) } else { cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, - addr); + addr, + 0); *cs++ = MI_NOOP; *cs++ = MI_NOOP; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fbf67105f040..7e0c20a2d733 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -173,12 +173,6 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); -static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_HANGCHECK_ADDR); -} - static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -2213,11 +2207,14 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write(cs, request->fence.seqno, - request->timeline->hwsp_offset); + request->timeline->hwsp_offset, + 0); cs = gen8_emit_ggtt_write(cs, intel_engine_next_hangcheck_seqno(request->engine), - intel_hws_hangcheck_address(request->engine)); + I915_GEM_HWS_HANGCHECK_ADDR, + MI_FLUSH_DW_STORE_INDEX); + *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2241,8 +2238,8 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write_rcs(cs, intel_engine_next_hangcheck_seqno(request->engine), - intel_hws_hangcheck_address(request->engine), - 0); + I915_GEM_HWS_HANGCHECK_ADDR, + PIPE_CONTROL_STORE_DATA_INDEX); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 35fdebd67e5f..0310d5d53bf9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -43,12 +43,6 @@ */ #define LEGACY_REQUEST_SIZE 200 -static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_HANGCHECK_ADDR); -} - unsigned int intel_ring_update_space(struct intel_ring *ring) { unsigned int space; @@ -317,8 +311,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = rq->fence.seqno; *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE; - *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX; + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); *cs++ = MI_USER_INTERRUPT; @@ -423,8 +417,10 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = rq->fence.seqno; *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; - *cs++ = hws_hangcheck_address(rq->engine); + *cs++ = (PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_GLOBAL_GTT_IVB); + *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); *cs++ = MI_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a57489fcb302..a02c92dac5da 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -419,14 +419,14 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) } static inline u32 * -gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ GEM_BUG_ON(gtt_offset & (1 << 5)); /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags; *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; *cs++ = 0; *cs++ = value;
If we use the STORE_DATA_INDEX function we can use a fixed offset and avoid having to lookup up the engine HWS address. A step closer to being able to emit the final breadcrumb during request_add rather than later in the submission interrupt handler. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/intel_guc_submission.c | 3 ++- drivers/gpu/drm/i915/intel_lrc.c | 17 +++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 16 ++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- 4 files changed, 17 insertions(+), 23 deletions(-)