Message ID | 20170202103548.3406-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Thanks Chris. This is Xiao from GVT-g team. GVT-g is actually looking for this patch once GUC submission enabled in host/native side. GVT-g requires a single submission (only one request in GUC FW anytime) to GUC FW because we need to do additional setup/cleanup for VM switch. One thing need to mention: we need 'execlists_context_status_change' notification before i915_guc_submit() just what we did for ELSP port write. Anyway we can submit another patch based on this. The question is: when will this patch be merged to upstream? > -----Original Message----- > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf > Of Chris Wilson > Sent: Thursday, February 2, 2017 6:36 PM > To: intel-gfx@lists.freedesktop.org > Subject: [Intel-gfx] [PATCH v8] drm/i915/scheduler: emulate a scheduler for > guc > > This emulates execlists on top of the GuC in order to defer submission of > requests to the hardware. This deferral allows time for high priority requests > to gazump their way to the head of the queue, however it nerfs the GuC by > converting it back into a simple execlist (where the CPU has to wake up after > every request to feed new commands into the GuC). > > v2: Drop hack status - though iirc there is still a lockdep inversion between > fence and engine->timeline->lock (which is impossible as the nesting only > occurs on different fences - hopefully just requires some judicious lockdep > annotation) > v3: Apply lockdep nesting to enabling signaling on the request, using the > pattern we already have in __i915_gem_request_submit(); > v4: Replaying requests after a hang also now needs the timeline spinlock, to > disable the interrupts at least > v5: Hold wq lock for completeness, and emit a tracepoint for enabling signal > v6: Reorder interrupt checking for a happier gcc. > v7: Only signal the tasklet after a user-interrupt if using guc scheduling > v8: Restore lost update of rq through the i915_guc_irq_handler (Tvrtko) > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_guc_submission.c | 110 > +++++++++++++++++++++++++++-- > drivers/gpu/drm/i915/i915_irq.c | 13 +++- > drivers/gpu/drm/i915/intel_lrc.c | 5 +- > 3 files changed, 114 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c > b/drivers/gpu/drm/i915/i915_guc_submission.c > index 8ced9e26f075..d99185aafe58 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -25,6 +25,8 @@ > #include "i915_drv.h" > #include "intel_uc.h" > > +#include <trace/events/dma_fence.h> > + > /** > * DOC: GuC-based command submission > * > @@ -348,7 +350,7 @@ int i915_guc_wq_reserve(struct > drm_i915_gem_request *request) > u32 freespace; > int ret; > > - spin_lock(&client->wq_lock); > + spin_lock_irq(&client->wq_lock); > freespace = CIRC_SPACE(client->wq_tail, desc->head, client- > >wq_size); > freespace -= client->wq_rsvd; > if (likely(freespace >= wqi_size)) { > @@ -358,7 +360,7 @@ int i915_guc_wq_reserve(struct > drm_i915_gem_request *request) > client->no_wq_space++; > ret = -EAGAIN; > } > - spin_unlock(&client->wq_lock); > + spin_unlock_irq(&client->wq_lock); > > return ret; > } > @@ -370,9 +372,9 @@ void i915_guc_wq_unreserve(struct > drm_i915_gem_request *request) > > GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); > > - spin_lock(&client->wq_lock); > + spin_lock_irq(&client->wq_lock); > client->wq_rsvd -= wqi_size; > - spin_unlock(&client->wq_lock); > + spin_unlock_irq(&client->wq_lock); > } > > /* Construct a Work Item and append it to the GuC's Work Queue */ @@ - > 532,10 +534,97 @@ static void __i915_guc_submit(struct > drm_i915_gem_request *rq) > > static void i915_guc_submit(struct drm_i915_gem_request *rq) { > - i915_gem_request_submit(rq); > + __i915_gem_request_submit(rq); > __i915_guc_submit(rq); > } > > +static void nested_enable_signaling(struct drm_i915_gem_request *rq) { > + /* If we use dma_fence_enable_sw_signaling() directly, lockdep > + * detects an ordering issue between the fence lockclass and the > + * global_timeline. This circular dependency can only occur via 2 > + * different fences (but same fence lockclass), so we use the nesting > + * annotation here to prevent the warn, equivalent to the nesting > + * inside i915_gem_request_submit() for when we also enable the > + * signaler. > + */ > + > + if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, > + &rq->fence.flags)) > + return; > + > + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq- > >fence.flags)); > + trace_dma_fence_enable_signal(&rq->fence); > + > + spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); > + intel_engine_enable_signaling(rq); > + spin_unlock(&rq->lock); > +} > + > +static bool i915_guc_dequeue(struct intel_engine_cs *engine) { > + struct execlist_port *port = engine->execlist_port; > + struct drm_i915_gem_request *last = port[0].request; > + unsigned long flags; > + struct rb_node *rb; > + bool submit = false; > + > + spin_lock_irqsave(&engine->timeline->lock, flags); > + rb = engine->execlist_first; > + while (rb) { > + struct drm_i915_gem_request *cursor = > + rb_entry(rb, typeof(*cursor), priotree.node); > + > + if (last && cursor->ctx != last->ctx) { > + if (port != engine->execlist_port) > + break; > + > + i915_gem_request_assign(&port->request, last); > + nested_enable_signaling(last); > + port++; > + } > + > + rb = rb_next(rb); > + rb_erase(&cursor->priotree.node, &engine- > >execlist_queue); > + RB_CLEAR_NODE(&cursor->priotree.node); > + cursor->priotree.priority = INT_MAX; > + > + i915_guc_submit(cursor); > + last = cursor; > + submit = true; > + } > + if (submit) { > + i915_gem_request_assign(&port->request, last); > + nested_enable_signaling(last); > + engine->execlist_first = rb; > + } > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > + > + return submit; > +} > + > +static void i915_guc_irq_handler(unsigned long data) { > + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; > + struct execlist_port *port = engine->execlist_port; > + struct drm_i915_gem_request *rq; > + bool submit; > + > + do { > + rq = port[0].request; > + while (rq && i915_gem_request_completed(rq)) { > + i915_gem_request_put(rq); > + port[0].request = port[1].request; > + port[1].request = NULL; > + rq = port[0].request; > + } > + > + submit = false; > + if (!port[1].request) > + submit = i915_guc_dequeue(engine); > + } while (submit); > +} > + > /* > * Everything below here is concerned with setup & teardown, and is > * therefore not part of the somewhat time-critical batch-submission @@ - > 944,15 +1033,22 @@ int i915_guc_submission_enable(struct > drm_i915_private *dev_priv) > /* Take over from manual control of ELSP (execlists) */ > for_each_engine(engine, dev_priv, id) { > struct drm_i915_gem_request *rq; > + unsigned long flags; > > - engine->submit_request = i915_guc_submit; > - engine->schedule = NULL; > + tasklet_init(&engine->irq_tasklet, > + i915_guc_irq_handler, > + (unsigned long)engine); > > /* Replay the current set of previously submitted requests */ > + spin_lock_irqsave(&engine->timeline->lock, flags); > list_for_each_entry(rq, &engine->timeline->requests, link) { > + spin_lock(&client->wq_lock); > client->wq_rsvd += sizeof(struct guc_wq_item); > + spin_unlock(&client->wq_lock); > + > __i915_guc_submit(rq); > } > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > } > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_irq.c > b/drivers/gpu/drm/i915/i915_irq.c index 436b5baf38c2..72f9497518bc 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1350,13 +1350,20 @@ static void snb_gt_irq_handler(struct > drm_i915_private *dev_priv, static __always_inline void > gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { > - if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) > - notify_ring(engine); > + bool tasklet = false; > > if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { > set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > - tasklet_hi_schedule(&engine->irq_tasklet); > + tasklet = true; > } > + > + if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { > + notify_ring(engine); > + tasklet |= i915.enable_guc_submission; > + } > + > + if (tasklet) > + tasklet_hi_schedule(&engine->irq_tasklet); > } > > static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, diff -- > git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 44a92ea464ba..c81d86afb52f 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1278,7 +1278,7 @@ static int gen8_init_common_ring(struct > intel_engine_cs *engine) > > /* After a GPU reset, we may have requests to replay */ > clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > - if (!execlists_elsp_idle(engine)) { > + if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) { > engine->execlist_port[0].count = 0; > engine->execlist_port[1].count = 0; > execlists_submit_ports(engine); > @@ -1345,9 +1345,6 @@ static void reset_common_ring(struct > intel_engine_cs *engine, > request->ring->last_retired_head = -1; > intel_ring_update_space(request->ring); > > - if (i915.enable_guc_submission) > - return; > - > /* Catch up with any missed context-switch interrupts */ > if (request->ctx != port[0].request->ctx) { > i915_gem_request_put(port[0].request); > -- > 2.11.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Hi Chris Just to check with you that this patch will checkin. Because it is essential for GVT once GUC enabled. > -----Original Message----- > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf > Of Zheng, Xiao > Sent: Monday, February 13, 2017 4:06 PM > To: Chris Wilson <chris@chris-wilson.co.uk>; intel-gfx@lists.freedesktop.org > Cc: Wang, Zhenyu Z <zhenyu.z.wang@intel.com> > Subject: Re: [Intel-gfx] [PATCH v8] drm/i915/scheduler: emulate a scheduler > for guc > > Thanks Chris. > This is Xiao from GVT-g team. GVT-g is actually looking for this patch once > GUC submission enabled in host/native side. GVT-g requires a single > submission (only one request in GUC FW anytime) to GUC FW because we > need to do additional setup/cleanup for VM switch. > One thing need to mention: we need 'execlists_context_status_change' > notification before i915_guc_submit() just what we did for ELSP port write. > Anyway we can submit another patch based on this. > The question is: when will this patch be merged to upstream? > > > -----Original Message----- > > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On > > Behalf Of Chris Wilson > > Sent: Thursday, February 2, 2017 6:36 PM > > To: intel-gfx@lists.freedesktop.org > > Subject: [Intel-gfx] [PATCH v8] drm/i915/scheduler: emulate a > > scheduler for guc > > > > This emulates execlists on top of the GuC in order to defer submission > > of requests to the hardware. This deferral allows time for high > > priority requests to gazump their way to the head of the queue, > > however it nerfs the GuC by converting it back into a simple execlist > > (where the CPU has to wake up after every request to feed new > commands into the GuC). > > > > v2: Drop hack status - though iirc there is still a lockdep inversion > > between fence and engine->timeline->lock (which is impossible as the > > nesting only occurs on different fences - hopefully just requires some > > judicious lockdep > > annotation) > > v3: Apply lockdep nesting to enabling signaling on the request, using > > the pattern we already have in __i915_gem_request_submit(); > > v4: Replaying requests after a hang also now needs the timeline > > spinlock, to disable the interrupts at least > > v5: Hold wq lock for completeness, and emit a tracepoint for enabling > > signal > > v6: Reorder interrupt checking for a happier gcc. > > v7: Only signal the tasklet after a user-interrupt if using guc > > scheduling > > v8: Restore lost update of rq through the i915_guc_irq_handler > > (Tvrtko) > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > > --- > > drivers/gpu/drm/i915/i915_guc_submission.c | 110 > > +++++++++++++++++++++++++++-- > > drivers/gpu/drm/i915/i915_irq.c | 13 +++- > > drivers/gpu/drm/i915/intel_lrc.c | 5 +- > > 3 files changed, 114 insertions(+), 14 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c > > b/drivers/gpu/drm/i915/i915_guc_submission.c > > index 8ced9e26f075..d99185aafe58 100644 > > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > > @@ -25,6 +25,8 @@ > > #include "i915_drv.h" > > #include "intel_uc.h" > > > > +#include <trace/events/dma_fence.h> > > + > > /** > > * DOC: GuC-based command submission > > * > > @@ -348,7 +350,7 @@ int i915_guc_wq_reserve(struct > > drm_i915_gem_request *request) > > u32 freespace; > > int ret; > > > > - spin_lock(&client->wq_lock); > > + spin_lock_irq(&client->wq_lock); > > freespace = CIRC_SPACE(client->wq_tail, desc->head, client- > > >wq_size); > > freespace -= client->wq_rsvd; > > if (likely(freespace >= wqi_size)) { @@ -358,7 +360,7 @@ int > > i915_guc_wq_reserve(struct drm_i915_gem_request *request) > > client->no_wq_space++; > > ret = -EAGAIN; > > } > > - spin_unlock(&client->wq_lock); > > + spin_unlock_irq(&client->wq_lock); > > > > return ret; > > } > > @@ -370,9 +372,9 @@ void i915_guc_wq_unreserve(struct > > drm_i915_gem_request *request) > > > > GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); > > > > - spin_lock(&client->wq_lock); > > + spin_lock_irq(&client->wq_lock); > > client->wq_rsvd -= wqi_size; > > - spin_unlock(&client->wq_lock); > > + spin_unlock_irq(&client->wq_lock); > > } > > > > /* Construct a Work Item and append it to the GuC's Work Queue */ @@ > > - > > 532,10 +534,97 @@ static void __i915_guc_submit(struct > > drm_i915_gem_request *rq) > > > > static void i915_guc_submit(struct drm_i915_gem_request *rq) { > > - i915_gem_request_submit(rq); > > + __i915_gem_request_submit(rq); > > __i915_guc_submit(rq); > > } > > > > +static void nested_enable_signaling(struct drm_i915_gem_request *rq) { > > + /* If we use dma_fence_enable_sw_signaling() directly, lockdep > > + * detects an ordering issue between the fence lockclass and the > > + * global_timeline. This circular dependency can only occur via 2 > > + * different fences (but same fence lockclass), so we use the nesting > > + * annotation here to prevent the warn, equivalent to the nesting > > + * inside i915_gem_request_submit() for when we also enable the > > + * signaler. > > + */ > > + > > + if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, > > + &rq->fence.flags)) > > + return; > > + > > + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq- > > >fence.flags)); > > + trace_dma_fence_enable_signal(&rq->fence); > > + > > + spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); > > + intel_engine_enable_signaling(rq); > > + spin_unlock(&rq->lock); > > +} > > + > > +static bool i915_guc_dequeue(struct intel_engine_cs *engine) { > > + struct execlist_port *port = engine->execlist_port; > > + struct drm_i915_gem_request *last = port[0].request; > > + unsigned long flags; > > + struct rb_node *rb; > > + bool submit = false; > > + > > + spin_lock_irqsave(&engine->timeline->lock, flags); > > + rb = engine->execlist_first; > > + while (rb) { > > + struct drm_i915_gem_request *cursor = > > + rb_entry(rb, typeof(*cursor), priotree.node); > > + > > + if (last && cursor->ctx != last->ctx) { > > + if (port != engine->execlist_port) > > + break; > > + > > + i915_gem_request_assign(&port->request, last); > > + nested_enable_signaling(last); > > + port++; > > + } > > + > > + rb = rb_next(rb); > > + rb_erase(&cursor->priotree.node, &engine- > > >execlist_queue); > > + RB_CLEAR_NODE(&cursor->priotree.node); > > + cursor->priotree.priority = INT_MAX; > > + > > + i915_guc_submit(cursor); > > + last = cursor; > > + submit = true; > > + } > > + if (submit) { > > + i915_gem_request_assign(&port->request, last); > > + nested_enable_signaling(last); > > + engine->execlist_first = rb; > > + } > > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > > + > > + return submit; > > +} > > + > > +static void i915_guc_irq_handler(unsigned long data) { > > + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; > > + struct execlist_port *port = engine->execlist_port; > > + struct drm_i915_gem_request *rq; > > + bool submit; > > + > > + do { > > + rq = port[0].request; > > + while (rq && i915_gem_request_completed(rq)) { > > + i915_gem_request_put(rq); > > + port[0].request = port[1].request; > > + port[1].request = NULL; > > + rq = port[0].request; > > + } > > + > > + submit = false; > > + if (!port[1].request) > > + submit = i915_guc_dequeue(engine); > > + } while (submit); > > +} > > + > > /* > > * Everything below here is concerned with setup & teardown, and is > > * therefore not part of the somewhat time-critical batch-submission > > @@ - > > 944,15 +1033,22 @@ int i915_guc_submission_enable(struct > > drm_i915_private *dev_priv) > > /* Take over from manual control of ELSP (execlists) */ > > for_each_engine(engine, dev_priv, id) { > > struct drm_i915_gem_request *rq; > > + unsigned long flags; > > > > - engine->submit_request = i915_guc_submit; > > - engine->schedule = NULL; > > + tasklet_init(&engine->irq_tasklet, > > + i915_guc_irq_handler, > > + (unsigned long)engine); > > > > /* Replay the current set of previously submitted requests */ > > + spin_lock_irqsave(&engine->timeline->lock, flags); > > list_for_each_entry(rq, &engine->timeline->requests, link) { > > + spin_lock(&client->wq_lock); > > client->wq_rsvd += sizeof(struct guc_wq_item); > > + spin_unlock(&client->wq_lock); > > + > > __i915_guc_submit(rq); > > } > > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > > } > > > > return 0; > > diff --git a/drivers/gpu/drm/i915/i915_irq.c > > b/drivers/gpu/drm/i915/i915_irq.c index 436b5baf38c2..72f9497518bc > > 100644 > > --- a/drivers/gpu/drm/i915/i915_irq.c > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > @@ -1350,13 +1350,20 @@ static void snb_gt_irq_handler(struct > > drm_i915_private *dev_priv, static __always_inline void > > gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) > { > > - if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) > > - notify_ring(engine); > > + bool tasklet = false; > > > > if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { > > set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > > - tasklet_hi_schedule(&engine->irq_tasklet); > > + tasklet = true; > > } > > + > > + if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { > > + notify_ring(engine); > > + tasklet |= i915.enable_guc_submission; > > + } > > + > > + if (tasklet) > > + tasklet_hi_schedule(&engine->irq_tasklet); > > } > > > > static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, > > diff -- git a/drivers/gpu/drm/i915/intel_lrc.c > > b/drivers/gpu/drm/i915/intel_lrc.c > > index 44a92ea464ba..c81d86afb52f 100644 > > --- a/drivers/gpu/drm/i915/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/intel_lrc.c > > @@ -1278,7 +1278,7 @@ static int gen8_init_common_ring(struct > > intel_engine_cs *engine) > > > > /* After a GPU reset, we may have requests to replay */ > > clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > > - if (!execlists_elsp_idle(engine)) { > > + if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) { > > engine->execlist_port[0].count = 0; > > engine->execlist_port[1].count = 0; > > execlists_submit_ports(engine); > > @@ -1345,9 +1345,6 @@ static void reset_common_ring(struct > > intel_engine_cs *engine, > > request->ring->last_retired_head = -1; > > intel_ring_update_space(request->ring); > > > > - if (i915.enable_guc_submission) > > - return; > > - > > /* Catch up with any missed context-switch interrupts */ > > if (request->ctx != port[0].request->ctx) { > > i915_gem_request_put(port[0].request); > > -- > > 2.11.0 > > > > _______________________________________________ > > Intel-gfx mailing list > > Intel-gfx@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 8ced9e26f075..d99185aafe58 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -25,6 +25,8 @@ #include "i915_drv.h" #include "intel_uc.h" +#include <trace/events/dma_fence.h> + /** * DOC: GuC-based command submission * @@ -348,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) u32 freespace; int ret; - spin_lock(&client->wq_lock); + spin_lock_irq(&client->wq_lock); freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); freespace -= client->wq_rsvd; if (likely(freespace >= wqi_size)) { @@ -358,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) client->no_wq_space++; ret = -EAGAIN; } - spin_unlock(&client->wq_lock); + spin_unlock_irq(&client->wq_lock); return ret; } @@ -370,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request) GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); - spin_lock(&client->wq_lock); + spin_lock_irq(&client->wq_lock); client->wq_rsvd -= wqi_size; - spin_unlock(&client->wq_lock); + spin_unlock_irq(&client->wq_lock); } /* Construct a Work Item and append it to the GuC's Work Queue */ @@ -532,10 +534,97 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) static void i915_guc_submit(struct drm_i915_gem_request *rq) { - i915_gem_request_submit(rq); + __i915_gem_request_submit(rq); __i915_guc_submit(rq); } +static void nested_enable_signaling(struct drm_i915_gem_request *rq) +{ + /* If we use dma_fence_enable_sw_signaling() directly, lockdep + * detects an ordering issue between the fence lockclass and the + * global_timeline. This circular dependency can only occur via 2 + * different fences (but same fence lockclass), so we use the nesting + * annotation here to prevent the warn, equivalent to the nesting + * inside i915_gem_request_submit() for when we also enable the + * signaler. + */ + + if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &rq->fence.flags)) + return; + + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); + trace_dma_fence_enable_signal(&rq->fence); + + spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); + intel_engine_enable_signaling(rq); + spin_unlock(&rq->lock); +} + +static bool i915_guc_dequeue(struct intel_engine_cs *engine) +{ + struct execlist_port *port = engine->execlist_port; + struct drm_i915_gem_request *last = port[0].request; + unsigned long flags; + struct rb_node *rb; + bool submit = false; + + spin_lock_irqsave(&engine->timeline->lock, flags); + rb = engine->execlist_first; + while (rb) { + struct drm_i915_gem_request *cursor = + rb_entry(rb, typeof(*cursor), priotree.node); + + if (last && cursor->ctx != last->ctx) { + if (port != engine->execlist_port) + break; + + i915_gem_request_assign(&port->request, last); + nested_enable_signaling(last); + port++; + } + + rb = rb_next(rb); + rb_erase(&cursor->priotree.node, &engine->execlist_queue); + RB_CLEAR_NODE(&cursor->priotree.node); + cursor->priotree.priority = INT_MAX; + + i915_guc_submit(cursor); + last = cursor; + submit = true; + } + if (submit) { + i915_gem_request_assign(&port->request, last); + nested_enable_signaling(last); + engine->execlist_first = rb; + } + spin_unlock_irqrestore(&engine->timeline->lock, flags); + + return submit; +} + +static void i915_guc_irq_handler(unsigned long data) +{ + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct execlist_port *port = engine->execlist_port; + struct drm_i915_gem_request *rq; + bool submit; + + do { + rq = port[0].request; + while (rq && i915_gem_request_completed(rq)) { + i915_gem_request_put(rq); + port[0].request = port[1].request; + port[1].request = NULL; + rq = port[0].request; + } + + submit = false; + if (!port[1].request) + submit = i915_guc_dequeue(engine); + } while (submit); +} + /* * Everything below here is concerned with setup & teardown, and is * therefore not part of the somewhat time-critical batch-submission @@ -944,15 +1033,22 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) /* Take over from manual control of ELSP (execlists) */ for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *rq; + unsigned long flags; - engine->submit_request = i915_guc_submit; - engine->schedule = NULL; + tasklet_init(&engine->irq_tasklet, + i915_guc_irq_handler, + (unsigned long)engine); /* Replay the current set of previously submitted requests */ + spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(rq, &engine->timeline->requests, link) { + spin_lock(&client->wq_lock); client->wq_rsvd += sizeof(struct guc_wq_item); + spin_unlock(&client->wq_lock); + __i915_guc_submit(rq); } + spin_unlock_irqrestore(&engine->timeline->lock, flags); } return 0; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 436b5baf38c2..72f9497518bc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1350,13 +1350,20 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, static __always_inline void gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { - if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) - notify_ring(engine); + bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet_hi_schedule(&engine->irq_tasklet); + tasklet = true; } + + if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { + notify_ring(engine); + tasklet |= i915.enable_guc_submission; + } + + if (tasklet) + tasklet_hi_schedule(&engine->irq_tasklet); } static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 44a92ea464ba..c81d86afb52f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1278,7 +1278,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - if (!execlists_elsp_idle(engine)) { + if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) { engine->execlist_port[0].count = 0; engine->execlist_port[1].count = 0; execlists_submit_ports(engine); @@ -1345,9 +1345,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, request->ring->last_retired_head = -1; intel_ring_update_space(request->ring); - if (i915.enable_guc_submission) - return; - /* Catch up with any missed context-switch interrupts */ if (request->ctx != port[0].request->ctx) { i915_gem_request_put(port[0].request);