Message ID | 20190225162357.7166-3-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/4] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno | expand |
On 25/02/2019 16:23, Chris Wilson wrote: > Having weaned the interrupt handling off using a single global execution > queue, we no longer need to emit a global_seqno. Note that we still have > a few assumptions about execution order along engine timelines, but this > removes the most obvious artefact! > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gpu_error.c | 34 ++----------- > drivers/gpu/drm/i915/i915_gpu_error.h | 2 - > drivers/gpu/drm/i915/i915_request.c | 34 ++----------- > drivers/gpu/drm/i915/i915_request.h | 32 ------------ > drivers/gpu/drm/i915/i915_trace.h | 25 +++------- > drivers/gpu/drm/i915/intel_engine_cs.c | 5 +- > drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- > drivers/gpu/drm/i915/intel_lrc.c | 34 ++----------- > drivers/gpu/drm/i915/intel_ringbuffer.c | 50 +++---------------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 2 - > .../gpu/drm/i915/selftests/intel_hangcheck.c | 5 +- > drivers/gpu/drm/i915/selftests/mock_engine.c | 1 - > 12 files changed, 32 insertions(+), 194 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index 061a767e3bed..fa86c60fb56c 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -380,19 +380,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, > err_printf(m, "%s [%d]:\n", name, count); > > while (count--) { > - err_printf(m, " %08x_%08x %8u %02x %02x %02x", > + err_printf(m, " %08x_%08x %8u %02x %02x", > upper_32_bits(err->gtt_offset), > lower_32_bits(err->gtt_offset), > err->size, > err->read_domains, > - err->write_domain, > - err->wseqno); > + err->write_domain); > err_puts(m, tiling_flag(err->tiling)); > err_puts(m, dirty_flag(err->dirty)); > err_puts(m, purgeable_flag(err->purgeable)); > err_puts(m, err->userptr ? " userptr" : ""); > - err_puts(m, err->engine != -1 ? " " : ""); > - err_puts(m, engine_name(m->i915, err->engine)); > err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); > > if (err->name) > @@ -1048,27 +1045,6 @@ i915_error_object_create(struct drm_i915_private *i915, > return dst; > } > > -/* The error capture is special as tries to run underneath the normal > - * locking rules - so we use the raw version of the i915_active_request lookup. > - */ > -static inline u32 > -__active_get_seqno(struct i915_active_request *active) > -{ > - struct i915_request *request; > - > - request = __i915_active_request_peek(active); > - return request ? request->global_seqno : 0; > -} > - > -static inline int > -__active_get_engine_id(struct i915_active_request *active) > -{ > - struct i915_request *request; > - > - request = __i915_active_request_peek(active); > - return request ? request->engine->id : -1; > -} > - > static void capture_bo(struct drm_i915_error_buffer *err, > struct i915_vma *vma) > { > @@ -1077,9 +1053,6 @@ static void capture_bo(struct drm_i915_error_buffer *err, > err->size = obj->base.size; > err->name = obj->base.name; > > - err->wseqno = __active_get_seqno(&obj->frontbuffer_write); > - err->engine = __active_get_engine_id(&obj->frontbuffer_write); > - > err->gtt_offset = vma->node.start; > err->read_domains = obj->read_domains; > err->write_domain = obj->write_domain; > @@ -1284,7 +1257,8 @@ static void record_request(struct i915_request *request, > struct i915_gem_context *ctx = request->gem_context; > > erq->flags = request->fence.flags; > - erq->context = ctx->hw_id; > + erq->context = request->fence.context; > + erq->seqno = request->fence.seqno; > erq->sched_attr = request->sched.attr; > erq->jiffies = request->emitted_jiffies; > erq->start = i915_ggtt_offset(request->ring->vma); > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h > index 19ac102afaff..8c1569c1830d 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.h > +++ b/drivers/gpu/drm/i915/i915_gpu_error.h > @@ -164,7 +164,6 @@ struct i915_gpu_state { > struct drm_i915_error_buffer { > u32 size; > u32 name; > - u32 wseqno; > u64 gtt_offset; > u32 read_domains; > u32 write_domain; > @@ -173,7 +172,6 @@ struct i915_gpu_state { > u32 dirty:1; > u32 purgeable:1; > u32 userptr:1; > - s32 engine:4; > u32 cache_level:3; > } *active_bo[I915_NUM_ENGINES], *pinned_bo; > u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 596183f35b78..935db5548f80 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -179,10 +179,9 @@ static void free_capture_list(struct i915_request *request) > static void __retire_engine_request(struct intel_engine_cs *engine, > struct i915_request *rq) > { > - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", > + GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n", > __func__, engine->name, > rq->fence.context, rq->fence.seqno, > - rq->global_seqno, > hwsp_seqno(rq)); > > GEM_BUG_ON(!i915_request_completed(rq)); > @@ -242,10 +241,9 @@ static void i915_request_retire(struct i915_request *request) > { > struct i915_active_request *active, *next; > > - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", > + GEM_TRACE("%s fence %llx:%lld, current %d\n", > request->engine->name, > request->fence.context, request->fence.seqno, > - request->global_seqno, > hwsp_seqno(request)); > > lockdep_assert_held(&request->i915->drm.struct_mutex); > @@ -303,10 +301,9 @@ void i915_request_retire_upto(struct i915_request *rq) > struct intel_ring *ring = rq->ring; > struct i915_request *tmp; > > - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", > + GEM_TRACE("%s fence %llx:%lld, current %d\n", > rq->engine->name, > rq->fence.context, rq->fence.seqno, > - rq->global_seqno, > hwsp_seqno(rq)); > > lockdep_assert_held(&rq->i915->drm.struct_mutex); > @@ -339,22 +336,13 @@ static void move_to_timeline(struct i915_request *request, > spin_unlock(&request->timeline->lock); > } > > -static u32 next_global_seqno(struct i915_timeline *tl) > -{ > - if (!++tl->seqno) > - ++tl->seqno; > - return tl->seqno; > -} > - > void __i915_request_submit(struct i915_request *request) > { > struct intel_engine_cs *engine = request->engine; > - u32 seqno; > > - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", > + GEM_TRACE("%s fence %llx:%lld -> current %d\n", > engine->name, > request->fence.context, request->fence.seqno, > - engine->timeline.seqno + 1, > hwsp_seqno(request)); > > GEM_BUG_ON(!irqs_disabled()); > @@ -363,16 +351,10 @@ void __i915_request_submit(struct i915_request *request) > if (i915_gem_context_is_banned(request->gem_context)) > i915_request_skip(request, -EIO); > > - GEM_BUG_ON(request->global_seqno); > - > - seqno = next_global_seqno(&engine->timeline); > - GEM_BUG_ON(!seqno); > - > /* We may be recursing from the signal callback of another i915 fence */ > spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); > GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); > set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); > - request->global_seqno = seqno; > if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && > !i915_request_enable_breadcrumb(request)) > intel_engine_queue_breadcrumbs(engine); > @@ -404,10 +386,9 @@ void __i915_request_unsubmit(struct i915_request *request) > { > struct intel_engine_cs *engine = request->engine; > > - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", > + GEM_TRACE("%s fence %llx:%lld, current %d\n", > engine->name, > request->fence.context, request->fence.seqno, > - request->global_seqno, > hwsp_seqno(request)); > > GEM_BUG_ON(!irqs_disabled()); > @@ -417,13 +398,9 @@ void __i915_request_unsubmit(struct i915_request *request) > * Only unwind in reverse order, required so that the per-context list > * is kept in seqno/ring order. > */ > - GEM_BUG_ON(!request->global_seqno); > - GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); > - engine->timeline.seqno--; > > /* We may be recursing from the signal callback of another i915 fence */ > spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); > - request->global_seqno = 0; > if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) > i915_request_cancel_breadcrumb(request); > GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); > @@ -637,7 +614,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > i915_sched_node_init(&rq->sched); > > /* No zalloc, must clear what we need by hand */ > - rq->global_seqno = 0; > rq->file_priv = NULL; > rq->batch = NULL; > rq->capture_list = NULL; > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h > index 40f3e8dcbdd5..1e127c1c53fa 100644 > --- a/drivers/gpu/drm/i915/i915_request.h > +++ b/drivers/gpu/drm/i915/i915_request.h > @@ -147,14 +147,6 @@ struct i915_request { > */ > const u32 *hwsp_seqno; > > - /** > - * GEM sequence number associated with this request on the > - * global execution timeline. It is zero when the request is not > - * on the HW queue (i.e. not on the engine timeline list). > - * Its value is guarded by the timeline spinlock. > - */ > - u32 global_seqno; > - > /** Position in the ring of the start of the request */ > u32 head; > > @@ -247,30 +239,6 @@ i915_request_put(struct i915_request *rq) > dma_fence_put(&rq->fence); > } > > -/** > - * i915_request_global_seqno - report the current global seqno > - * @request - the request > - * > - * A request is assigned a global seqno only when it is on the hardware > - * execution queue. The global seqno can be used to maintain a list of > - * requests on the same engine in retirement order, for example for > - * constructing a priority queue for waiting. Prior to its execution, or > - * if it is subsequently removed in the event of preemption, its global > - * seqno is zero. As both insertion and removal from the execution queue > - * may operate in IRQ context, it is not guarded by the usual struct_mutex > - * BKL. Instead those relying on the global seqno must be prepared for its > - * value to change between reads. Only when the request is complete can > - * the global seqno be stable (due to the memory barriers on submitting > - * the commands to the hardware to write the breadcrumb, if the HWS shows > - * that it has passed the global seqno and the global seqno is unchanged > - * after the read, it is indeed complete). > - */ > -static inline u32 > -i915_request_global_seqno(const struct i915_request *request) > -{ > - return READ_ONCE(request->global_seqno); > -} > - > int i915_request_await_object(struct i915_request *to, > struct drm_i915_gem_object *obj, > bool write); > diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h > index 0d9cedb892b0..12893304c8f8 100644 > --- a/drivers/gpu/drm/i915/i915_trace.h > +++ b/drivers/gpu/drm/i915/i915_trace.h > @@ -708,7 +708,6 @@ DECLARE_EVENT_CLASS(i915_request, > __field(u16, class) > __field(u16, instance) > __field(u32, seqno) > - __field(u32, global) > ), > > TP_fast_assign( > @@ -718,13 +717,11 @@ DECLARE_EVENT_CLASS(i915_request, > __entry->instance = rq->engine->instance; > __entry->ctx = rq->fence.context; > __entry->seqno = rq->fence.seqno; > - __entry->global = rq->global_seqno; > ), > > - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u", > + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u", > __entry->dev, __entry->class, __entry->instance, > - __entry->hw_id, __entry->ctx, __entry->seqno, > - __entry->global) > + __entry->hw_id, __entry->ctx, __entry->seqno) > ); > > DEFINE_EVENT(i915_request, i915_request_add, > @@ -754,7 +751,6 @@ TRACE_EVENT(i915_request_in, > __field(u16, class) > __field(u16, instance) > __field(u32, seqno) > - __field(u32, global_seqno) > __field(u32, port) > __field(u32, prio) > ), > @@ -766,15 +762,14 @@ TRACE_EVENT(i915_request_in, > __entry->instance = rq->engine->instance; > __entry->ctx = rq->fence.context; > __entry->seqno = rq->fence.seqno; > - __entry->global_seqno = rq->global_seqno; > __entry->prio = rq->sched.attr.priority; > __entry->port = port; > ), > > - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, global=%u, port=%u", > + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u", > __entry->dev, __entry->class, __entry->instance, > __entry->hw_id, __entry->ctx, __entry->seqno, > - __entry->prio, __entry->global_seqno, __entry->port) > + __entry->prio, __entry->port) > ); > > TRACE_EVENT(i915_request_out, > @@ -788,7 +783,6 @@ TRACE_EVENT(i915_request_out, > __field(u16, class) > __field(u16, instance) > __field(u32, seqno) > - __field(u32, global_seqno) > __field(u32, completed) > ), > > @@ -799,14 +793,13 @@ TRACE_EVENT(i915_request_out, > __entry->instance = rq->engine->instance; > __entry->ctx = rq->fence.context; > __entry->seqno = rq->fence.seqno; > - __entry->global_seqno = rq->global_seqno; > __entry->completed = i915_request_completed(rq); > ), > > - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, completed?=%u", > + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u", > __entry->dev, __entry->class, __entry->instance, > __entry->hw_id, __entry->ctx, __entry->seqno, > - __entry->global_seqno, __entry->completed) > + __entry->completed) > ); > > #else > @@ -849,7 +842,6 @@ TRACE_EVENT(i915_request_wait_begin, > __field(u16, class) > __field(u16, instance) > __field(u32, seqno) > - __field(u32, global) > __field(unsigned int, flags) > ), > > @@ -866,14 +858,13 @@ TRACE_EVENT(i915_request_wait_begin, > __entry->instance = rq->engine->instance; > __entry->ctx = rq->fence.context; > __entry->seqno = rq->fence.seqno; > - __entry->global = rq->global_seqno; > __entry->flags = flags; > ), > > - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, blocking=%u, flags=0x%x", > + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x", > __entry->dev, __entry->class, __entry->instance, > __entry->hw_id, __entry->ctx, __entry->seqno, > - __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), > + !!(__entry->flags & I915_WAIT_LOCKED), > __entry->flags) > ); > > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index ec859c7b8c7c..b7b626195eda 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -1271,15 +1271,14 @@ static void print_request(struct drm_printer *m, > > x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); > > - drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n", > + drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", > prefix, > - rq->global_seqno, > + rq->fence.context, rq->fence.seqno, > i915_request_completed(rq) ? "!" : > i915_request_started(rq) ? "*" : > "", > test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, > &rq->fence.flags) ? "+" : "", > - rq->fence.context, rq->fence.seqno, > buf, > jiffies_to_msecs(jiffies - rq->emitted_jiffies), > name); > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c > index 8bc8aa54aa35..20cbceeabeae 100644 > --- a/drivers/gpu/drm/i915/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c > @@ -535,7 +535,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) > spin_lock(&client->wq_lock); > > guc_wq_item_append(client, engine->guc_id, ctx_desc, > - ring_tail, rq->global_seqno); > + ring_tail, rq->fence.seqno); > guc_ring_doorbell(client); > > client->submissions[engine->id] += 1; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index a820f32f0d70..d638ce9de089 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -172,12 +172,6 @@ static void execlists_init_reg_state(u32 *reg_state, > struct intel_engine_cs *engine, > struct intel_ring *ring); > > -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) > -{ > - return (i915_ggtt_offset(engine->status_page.vma) + > - I915_GEM_HWS_INDEX_ADDR); > -} > - > static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine) > { > return (i915_ggtt_offset(engine->status_page.vma) + > @@ -528,10 +522,9 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > desc = execlists_update_context(rq); > GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); > > - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", > + GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", > engine->name, n, > port[n].context_id, count, > - rq->global_seqno, > rq->fence.context, rq->fence.seqno, > hwsp_seqno(rq), > rq_prio(rq)); > @@ -839,10 +832,9 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) > while (num_ports-- && port_isset(port)) { > struct i915_request *rq = port_request(port); > > - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", > + GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n", > rq->engine->name, > (unsigned int)(port - execlists->port), > - rq->global_seqno, > rq->fence.context, rq->fence.seqno, > hwsp_seqno(rq)); > > @@ -924,8 +916,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) > > /* Mark all executing requests as skipped. */ > list_for_each_entry(rq, &engine->timeline.requests, link) { > - GEM_BUG_ON(!rq->global_seqno); > - > if (!i915_request_signaled(rq)) > dma_fence_set_error(&rq->fence, -EIO); > > @@ -1064,10 +1054,9 @@ static void process_csb(struct intel_engine_cs *engine) > EXECLISTS_ACTIVE_USER)); > > rq = port_unpack(port, &count); > - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", > + GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", > engine->name, > port->context_id, count, > - rq ? rq->global_seqno : 0, > rq ? rq->fence.context : 0, > rq ? rq->fence.seqno : 0, > rq ? hwsp_seqno(rq) : 0, > @@ -1938,10 +1927,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) > /* Following the reset, we need to reload the CSB read/write pointers */ > reset_csb_pointers(&engine->execlists); > > - GEM_TRACE("%s seqno=%d, stalled? %s\n", > - engine->name, > - rq ? rq->global_seqno : 0, > - yesno(stalled)); > + GEM_TRACE("%s stalled? %s\n", engine->name, yesno(stalled)); > if (!rq) > goto out_unlock; > > @@ -2196,9 +2182,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) > > static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) > { > - /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ > - BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); > - > cs = gen8_emit_ggtt_write(cs, > request->fence.seqno, > request->timeline->hwsp_offset); > @@ -2207,10 +2190,6 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) > intel_engine_next_hangcheck_seqno(request->engine), > intel_hws_hangcheck_address(request->engine)); > > - cs = gen8_emit_ggtt_write(cs, > - request->global_seqno, > - intel_hws_seqno_address(request->engine)); > - > *cs++ = MI_USER_INTERRUPT; > *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; > > @@ -2236,11 +2215,6 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) > intel_hws_hangcheck_address(request->engine), > PIPE_CONTROL_CS_STALL); > > - cs = gen8_emit_ggtt_write_rcs(cs, > - request->global_seqno, > - intel_hws_seqno_address(request->engine), > - PIPE_CONTROL_CS_STALL); > - > *cs++ = MI_USER_INTERRUPT; > *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 2d59e2990448..1b96b0960adc 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -49,12 +49,6 @@ static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine) > I915_GEM_HWS_HANGCHECK_ADDR); > } > > -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) > -{ > - return (i915_ggtt_offset(engine->status_page.vma) + > - I915_GEM_HWS_INDEX_ADDR); > -} > - > unsigned int intel_ring_update_space(struct intel_ring *ring) > { > unsigned int space; > @@ -327,11 +321,6 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) > *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; > *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); > > - *cs++ = GFX_OP_PIPE_CONTROL(4); > - *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; > - *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; > - *cs++ = rq->global_seqno; > - > *cs++ = MI_USER_INTERRUPT; > *cs++ = MI_NOOP; > > @@ -438,13 +427,6 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) > *cs++ = hws_hangcheck_address(rq->engine); > *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); > > - *cs++ = GFX_OP_PIPE_CONTROL(4); > - *cs++ = (PIPE_CONTROL_QW_WRITE | > - PIPE_CONTROL_GLOBAL_GTT_IVB | > - PIPE_CONTROL_CS_STALL); > - *cs++ = intel_hws_seqno_address(rq->engine); > - *cs++ = rq->global_seqno; > - > *cs++ = MI_USER_INTERRUPT; > *cs++ = MI_NOOP; > > @@ -467,11 +449,8 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) > *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; > *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); > > - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; > - *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; > - *cs++ = rq->global_seqno; > - > *cs++ = MI_USER_INTERRUPT; > + *cs++ = MI_NOOP; > > rq->tail = intel_ring_offset(rq, cs); > assert_ring_tail_valid(rq->ring, rq->tail); > @@ -495,10 +474,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) > *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; > *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); > > - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; > - *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; > - *cs++ = rq->global_seqno; > - > for (i = 0; i < GEN7_XCS_WA; i++) { > *cs++ = MI_STORE_DWORD_INDEX; > *cs++ = I915_GEM_HWS_SEQNO_ADDR; > @@ -510,7 +485,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) > *cs++ = 0; > > *cs++ = MI_USER_INTERRUPT; > - *cs++ = MI_NOOP; > > rq->tail = intel_ring_offset(rq, cs); > assert_ring_tail_valid(rq->ring, rq->tail); > @@ -782,10 +756,8 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) > } > } > > - GEM_TRACE("%s seqno=%d, stalled? %s\n", > - engine->name, > - rq ? rq->global_seqno : 0, > - yesno(stalled)); > + GEM_TRACE("%s stalled? %s\n", engine->name, yesno(stalled)); > + > /* > * The guilty request will get skipped on a hung engine. > * > @@ -915,8 +887,6 @@ static void cancel_requests(struct intel_engine_cs *engine) > > /* Mark all submitted requests as skipped. */ > list_for_each_entry(request, &engine->timeline.requests, link) { > - GEM_BUG_ON(!request->global_seqno); > - > if (!i915_request_signaled(request)) > dma_fence_set_error(&request->fence, -EIO); > > @@ -953,12 +923,7 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) > *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; > *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); > > - *cs++ = MI_STORE_DWORD_INDEX; > - *cs++ = I915_GEM_HWS_INDEX_ADDR; > - *cs++ = rq->global_seqno; > - > *cs++ = MI_USER_INTERRUPT; > - *cs++ = MI_NOOP; > > rq->tail = intel_ring_offset(rq, cs); > assert_ring_tail_valid(rq->ring, rq->tail); > @@ -976,10 +941,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) > > *cs++ = MI_FLUSH; > > - *cs++ = MI_STORE_DWORD_INDEX; > - *cs++ = I915_GEM_HWS_SEQNO_ADDR; > - *cs++ = rq->fence.seqno; > - > *cs++ = MI_STORE_DWORD_INDEX; > *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; > *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); > @@ -987,11 +948,12 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) > BUILD_BUG_ON(GEN5_WA_STORES < 1); > for (i = 0; i < GEN5_WA_STORES; i++) { > *cs++ = MI_STORE_DWORD_INDEX; > - *cs++ = I915_GEM_HWS_INDEX_ADDR; > - *cs++ = rq->global_seqno; > + *cs++ = I915_GEM_HWS_SEQNO_ADDR; > + *cs++ = rq->fence.seqno; > } > > *cs++ = MI_USER_INTERRUPT; > + *cs++ = MI_NOOP; > > rq->tail = intel_ring_offset(rq, cs); > assert_ring_tail_valid(rq->ring, rq->tail); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 551b3daa741c..de8dba7565b0 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -724,8 +724,6 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) > * > * The area from dword 0x30 to 0x3ff is available for driver usage. > */ > -#define I915_GEM_HWS_INDEX 0x30 > -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) > #define I915_GEM_HWS_PREEMPT 0x32 > #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) > #define I915_GEM_HWS_HANGCHECK 0x34 > diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > index a77e4bf1ab55..fa02cf9ce0cf 100644 > --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c > @@ -571,11 +571,10 @@ static int active_request_put(struct i915_request *rq) > return 0; > > if (i915_request_wait(rq, 0, 5 * HZ) < 0) { > - GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld, seqno %d.\n", > + GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", > rq->engine->name, > rq->fence.context, > - rq->fence.seqno, > - i915_request_global_seqno(rq)); > + rq->fence.seqno); > GEM_TRACE_DUMP(); > > i915_gem_set_wedged(rq->i915); > diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c > index 79bf27606ab8..6f3fb803c747 100644 > --- a/drivers/gpu/drm/i915/selftests/mock_engine.c > +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c > @@ -196,7 +196,6 @@ static void mock_submit_request(struct i915_request *request) > unsigned long flags; > > i915_request_submit(request); > - GEM_BUG_ON(!request->global_seqno); > > spin_lock_irqsave(&engine->hw_lock, flags); > list_add_tail(&mock->link, &engine->hw_queue); > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 061a767e3bed..fa86c60fb56c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -380,19 +380,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_printf(m, "%s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x_%08x %8u %02x %02x %02x", + err_printf(m, " %08x_%08x %8u %02x %02x", upper_32_bits(err->gtt_offset), lower_32_bits(err->gtt_offset), err->size, err->read_domains, - err->write_domain, - err->wseqno); + err->write_domain); err_puts(m, tiling_flag(err->tiling)); err_puts(m, dirty_flag(err->dirty)); err_puts(m, purgeable_flag(err->purgeable)); err_puts(m, err->userptr ? " userptr" : ""); - err_puts(m, err->engine != -1 ? " " : ""); - err_puts(m, engine_name(m->i915, err->engine)); err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); if (err->name) @@ -1048,27 +1045,6 @@ i915_error_object_create(struct drm_i915_private *i915, return dst; } -/* The error capture is special as tries to run underneath the normal - * locking rules - so we use the raw version of the i915_active_request lookup. - */ -static inline u32 -__active_get_seqno(struct i915_active_request *active) -{ - struct i915_request *request; - - request = __i915_active_request_peek(active); - return request ? request->global_seqno : 0; -} - -static inline int -__active_get_engine_id(struct i915_active_request *active) -{ - struct i915_request *request; - - request = __i915_active_request_peek(active); - return request ? request->engine->id : -1; -} - static void capture_bo(struct drm_i915_error_buffer *err, struct i915_vma *vma) { @@ -1077,9 +1053,6 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->size = obj->base.size; err->name = obj->base.name; - err->wseqno = __active_get_seqno(&obj->frontbuffer_write); - err->engine = __active_get_engine_id(&obj->frontbuffer_write); - err->gtt_offset = vma->node.start; err->read_domains = obj->read_domains; err->write_domain = obj->write_domain; @@ -1284,7 +1257,8 @@ static void record_request(struct i915_request *request, struct i915_gem_context *ctx = request->gem_context; erq->flags = request->fence.flags; - erq->context = ctx->hw_id; + erq->context = request->fence.context; + erq->seqno = request->fence.seqno; erq->sched_attr = request->sched.attr; erq->jiffies = request->emitted_jiffies; erq->start = i915_ggtt_offset(request->ring->vma); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 19ac102afaff..8c1569c1830d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -164,7 +164,6 @@ struct i915_gpu_state { struct drm_i915_error_buffer { u32 size; u32 name; - u32 wseqno; u64 gtt_offset; u32 read_domains; u32 write_domain; @@ -173,7 +172,6 @@ struct i915_gpu_state { u32 dirty:1; u32 purgeable:1; u32 userptr:1; - s32 engine:4; u32 cache_level:3; } *active_bo[I915_NUM_ENGINES], *pinned_bo; u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 596183f35b78..935db5548f80 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -179,10 +179,9 @@ static void free_capture_list(struct i915_request *request) static void __retire_engine_request(struct intel_engine_cs *engine, struct i915_request *rq) { - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n", __func__, engine->name, rq->fence.context, rq->fence.seqno, - rq->global_seqno, hwsp_seqno(rq)); GEM_BUG_ON(!i915_request_completed(rq)); @@ -242,10 +241,9 @@ static void i915_request_retire(struct i915_request *request) { struct i915_active_request *active, *next; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", request->engine->name, request->fence.context, request->fence.seqno, - request->global_seqno, hwsp_seqno(request)); lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -303,10 +301,9 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_ring *ring = rq->ring; struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", rq->engine->name, rq->fence.context, rq->fence.seqno, - rq->global_seqno, hwsp_seqno(rq)); lockdep_assert_held(&rq->i915->drm.struct_mutex); @@ -339,22 +336,13 @@ static void move_to_timeline(struct i915_request *request, spin_unlock(&request->timeline->lock); } -static u32 next_global_seqno(struct i915_timeline *tl) -{ - if (!++tl->seqno) - ++tl->seqno; - return tl->seqno; -} - void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - u32 seqno; - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld -> current %d\n", engine->name, request->fence.context, request->fence.seqno, - engine->timeline.seqno + 1, hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); @@ -363,16 +351,10 @@ void __i915_request_submit(struct i915_request *request) if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); - GEM_BUG_ON(request->global_seqno); - - seqno = next_global_seqno(&engine->timeline); - GEM_BUG_ON(!seqno); - /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); - request->global_seqno = seqno; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !i915_request_enable_breadcrumb(request)) intel_engine_queue_breadcrumbs(engine); @@ -404,10 +386,9 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", engine->name, request->fence.context, request->fence.seqno, - request->global_seqno, hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); @@ -417,13 +398,9 @@ void __i915_request_unsubmit(struct i915_request *request) * Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ - GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); - engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - request->global_seqno = 0; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) i915_request_cancel_breadcrumb(request); GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); @@ -637,7 +614,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ - rq->global_seqno = 0; rq->file_priv = NULL; rq->batch = NULL; rq->capture_list = NULL; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 40f3e8dcbdd5..1e127c1c53fa 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -147,14 +147,6 @@ struct i915_request { */ const u32 *hwsp_seqno; - /** - * GEM sequence number associated with this request on the - * global execution timeline. It is zero when the request is not - * on the HW queue (i.e. not on the engine timeline list). - * Its value is guarded by the timeline spinlock. - */ - u32 global_seqno; - /** Position in the ring of the start of the request */ u32 head; @@ -247,30 +239,6 @@ i915_request_put(struct i915_request *rq) dma_fence_put(&rq->fence); } -/** - * i915_request_global_seqno - report the current global seqno - * @request - the request - * - * A request is assigned a global seqno only when it is on the hardware - * execution queue. The global seqno can be used to maintain a list of - * requests on the same engine in retirement order, for example for - * constructing a priority queue for waiting. Prior to its execution, or - * if it is subsequently removed in the event of preemption, its global - * seqno is zero. As both insertion and removal from the execution queue - * may operate in IRQ context, it is not guarded by the usual struct_mutex - * BKL. Instead those relying on the global seqno must be prepared for its - * value to change between reads. Only when the request is complete can - * the global seqno be stable (due to the memory barriers on submitting - * the commands to the hardware to write the breadcrumb, if the HWS shows - * that it has passed the global seqno and the global seqno is unchanged - * after the read, it is indeed complete). - */ -static inline u32 -i915_request_global_seqno(const struct i915_request *request) -{ - return READ_ONCE(request->global_seqno); -} - int i915_request_await_object(struct i915_request *to, struct drm_i915_gem_object *obj, bool write); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 0d9cedb892b0..12893304c8f8 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -708,7 +708,6 @@ DECLARE_EVENT_CLASS(i915_request, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global) ), TP_fast_assign( @@ -718,13 +717,11 @@ DECLARE_EVENT_CLASS(i915_request, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global = rq->global_seqno; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global) + __entry->hw_id, __entry->ctx, __entry->seqno) ); DEFINE_EVENT(i915_request, i915_request_add, @@ -754,7 +751,6 @@ TRACE_EVENT(i915_request_in, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global_seqno) __field(u32, port) __field(u32, prio) ), @@ -766,15 +762,14 @@ TRACE_EVENT(i915_request_in, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; __entry->prio = rq->sched.attr.priority; __entry->port = port; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, global=%u, port=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->prio, __entry->global_seqno, __entry->port) + __entry->prio, __entry->port) ); TRACE_EVENT(i915_request_out, @@ -788,7 +783,6 @@ TRACE_EVENT(i915_request_out, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global_seqno) __field(u32, completed) ), @@ -799,14 +793,13 @@ TRACE_EVENT(i915_request_out, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; __entry->completed = i915_request_completed(rq); ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, completed?=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global_seqno, __entry->completed) + __entry->completed) ); #else @@ -849,7 +842,6 @@ TRACE_EVENT(i915_request_wait_begin, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global) __field(unsigned int, flags) ), @@ -866,14 +858,13 @@ TRACE_EVENT(i915_request_wait_begin, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global = rq->global_seqno; __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, blocking=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), + !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags) ); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ec859c7b8c7c..b7b626195eda 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1271,15 +1271,14 @@ static void print_request(struct drm_printer *m, x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n", + drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", prefix, - rq->global_seqno, + rq->fence.context, rq->fence.seqno, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : "", test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags) ? "+" : "", - rq->fence.context, rq->fence.seqno, buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 8bc8aa54aa35..20cbceeabeae 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -535,7 +535,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) spin_lock(&client->wq_lock); guc_wq_item_append(client, engine->guc_id, ctx_desc, - ring_tail, rq->global_seqno); + ring_tail, rq->fence.seqno); guc_ring_doorbell(client); client->submissions[engine->id] += 1; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a820f32f0d70..d638ce9de089 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -172,12 +172,6 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_INDEX_ADDR); -} - static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -528,10 +522,9 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, - rq->global_seqno, rq->fence.context, rq->fence.seqno, hwsp_seqno(rq), rq_prio(rq)); @@ -839,10 +832,9 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n", + GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n", rq->engine->name, (unsigned int)(port - execlists->port), - rq->global_seqno, rq->fence.context, rq->fence.seqno, hwsp_seqno(rq)); @@ -924,8 +916,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->timeline.requests, link) { - GEM_BUG_ON(!rq->global_seqno); - if (!i915_request_signaled(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -1064,10 +1054,9 @@ static void process_csb(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n", engine->name, port->context_id, count, - rq ? rq->global_seqno : 0, rq ? rq->fence.context : 0, rq ? rq->fence.seqno : 0, rq ? hwsp_seqno(rq) : 0, @@ -1938,10 +1927,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) /* Following the reset, we need to reload the CSB read/write pointers */ reset_csb_pointers(&engine->execlists); - GEM_TRACE("%s seqno=%d, stalled? %s\n", - engine->name, - rq ? rq->global_seqno : 0, - yesno(stalled)); + GEM_TRACE("%s stalled? %s\n", engine->name, yesno(stalled)); if (!rq) goto out_unlock; @@ -2196,9 +2182,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { - /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ - BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - cs = gen8_emit_ggtt_write(cs, request->fence.seqno, request->timeline->hwsp_offset); @@ -2207,10 +2190,6 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) intel_engine_next_hangcheck_seqno(request->engine), intel_hws_hangcheck_address(request->engine)); - cs = gen8_emit_ggtt_write(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine)); - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2236,11 +2215,6 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) intel_hws_hangcheck_address(request->engine), PIPE_CONTROL_CS_STALL); - cs = gen8_emit_ggtt_write_rcs(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine), - PIPE_CONTROL_CS_STALL); - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2d59e2990448..1b96b0960adc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -49,12 +49,6 @@ static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine) I915_GEM_HWS_HANGCHECK_ADDR); } -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_INDEX_ADDR); -} - unsigned int intel_ring_update_space(struct intel_ring *ring) { unsigned int space; @@ -327,11 +321,6 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; - *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; @@ -438,13 +427,6 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = hws_hangcheck_address(rq->engine); *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL); - *cs++ = intel_hws_seqno_address(rq->engine); - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; @@ -467,11 +449,8 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -495,10 +474,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->global_seqno; - for (i = 0; i < GEN7_XCS_WA; i++) { *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR; @@ -510,7 +485,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -782,10 +756,8 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) } } - GEM_TRACE("%s seqno=%d, stalled? %s\n", - engine->name, - rq ? rq->global_seqno : 0, - yesno(stalled)); + GEM_TRACE("%s stalled? %s\n", engine->name, yesno(stalled)); + /* * The guilty request will get skipped on a hung engine. * @@ -915,8 +887,6 @@ static void cancel_requests(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->timeline.requests, link) { - GEM_BUG_ON(!request->global_seqno); - if (!i915_request_signaled(request)) dma_fence_set_error(&request->fence, -EIO); @@ -953,12 +923,7 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -976,10 +941,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = MI_FLUSH; - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; - *cs++ = rq->fence.seqno; - *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); @@ -987,11 +948,12 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; + *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = rq->fence.seqno; } *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 551b3daa741c..de8dba7565b0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -724,8 +724,6 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) * * The area from dword 0x30 to 0x3ff is available for driver usage. */ -#define I915_GEM_HWS_INDEX 0x30 -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) #define I915_GEM_HWS_PREEMPT 0x32 #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) #define I915_GEM_HWS_HANGCHECK 0x34 diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index a77e4bf1ab55..fa02cf9ce0cf 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -571,11 +571,10 @@ static int active_request_put(struct i915_request *rq) return 0; if (i915_request_wait(rq, 0, 5 * HZ) < 0) { - GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld, seqno %d.\n", + GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", rq->engine->name, rq->fence.context, - rq->fence.seqno, - i915_request_global_seqno(rq)); + rq->fence.seqno); GEM_TRACE_DUMP(); i915_gem_set_wedged(rq->i915); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 79bf27606ab8..6f3fb803c747 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -196,7 +196,6 @@ static void mock_submit_request(struct i915_request *request) unsigned long flags; i915_request_submit(request); - GEM_BUG_ON(!request->global_seqno); spin_lock_irqsave(&engine->hw_lock, flags); list_add_tail(&mock->link, &engine->hw_queue);
Having weaned the interrupt handling off using a single global execution queue, we no longer need to emit a global_seqno. Note that we still have a few assumptions about execution order along engine timelines, but this removes the most obvious artefact! Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_gpu_error.c | 34 ++----------- drivers/gpu/drm/i915/i915_gpu_error.h | 2 - drivers/gpu/drm/i915/i915_request.c | 34 ++----------- drivers/gpu/drm/i915/i915_request.h | 32 ------------ drivers/gpu/drm/i915/i915_trace.h | 25 +++------- drivers/gpu/drm/i915/intel_engine_cs.c | 5 +- drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 34 ++----------- drivers/gpu/drm/i915/intel_ringbuffer.c | 50 +++---------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 - .../gpu/drm/i915/selftests/intel_hangcheck.c | 5 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 1 - 12 files changed, 32 insertions(+), 194 deletions(-)