Message ID | 20180320001848.4405-5-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Mar 20, 2018 at 12:18:48AM +0000, Chris Wilson wrote: > Catch up with the inflight CSB events, after disabling the tasklet > before deciding which request was truly guilty of hanging the GPU. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Michał Winiarski <michal.winiarski@intel.com> > CC: Michel Thierry <michel.thierry@intel.com> > Cc: Jeff McGee <jeff.mcgee@intel.com> > --- > drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++----------------- > 1 file changed, 197 insertions(+), 158 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index e5a3ffbc273a..beb81f13a3cc 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) > local_irq_restore(flags); > } > > -/* > - * Check the unread Context Status Buffers and manage the submission of new > - * contexts to the ELSP accordingly. > - */ > -static void execlists_submission_tasklet(unsigned long data) > +static void process_csb(struct intel_engine_cs *engine) > { > - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > struct intel_engine_execlists * const execlists = &engine->execlists; > struct execlist_port * const port = execlists->port; > - struct drm_i915_private *dev_priv = engine->i915; > + struct drm_i915_private *i915 = engine->i915; > + unsigned int head, tail; > + const u32 *buf; > bool fw = false; > > - /* We can skip acquiring intel_runtime_pm_get() here as it was taken > - * on our behalf by the request (see i915_gem_mark_busy()) and it will > - * not be relinquished until the device is idle (see > - * i915_gem_idle_work_handler()). As a precaution, we make sure > - * that all ELSP are drained i.e. we have processed the CSB, > - * before allowing ourselves to idle and calling intel_runtime_pm_put(). > - */ > - GEM_BUG_ON(!dev_priv->gt.awake); > + if (unlikely(execlists->csb_use_mmio)) { > + buf = (u32 * __force) > + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); > + execlists->csb_head = -1; /* force mmio read of CSB ptrs */ > + } else { > + /* The HWSP contains a (cacheable) mirror of the CSB */ > + buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; > + } > > - /* Prefer doing test_and_clear_bit() as a two stage operation to avoid > - * imposing the cost of a locked atomic transaction when submitting a > - * new request (outside of the context-switch interrupt). > + /* > + * The write will be ordered by the uncached read (itself > + * a memory barrier), so we do not need another in the form > + * of a locked instruction. The race between the interrupt > + * handler and the split test/clear is harmless as we order > + * our clear before the CSB read. If the interrupt arrived > + * first between the test and the clear, we read the updated > + * CSB and clear the bit. If the interrupt arrives as we read > + * the CSB or later (i.e. after we had cleared the bit) the bit > + * is set and we do a new loop. > */ > - while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { > - /* The HWSP contains a (cacheable) mirror of the CSB */ > - const u32 *buf = > - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; > - unsigned int head, tail; > - > - if (unlikely(execlists->csb_use_mmio)) { > - buf = (u32 * __force) > - (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); > - execlists->csb_head = -1; /* force mmio read of CSB ptrs */ > - } > + __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > + if (unlikely(execlists->csb_head == -1)) { /* following a reset */ > + intel_uncore_forcewake_get(i915, execlists->fw_domains); > + fw = true; > + > + head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > + tail = GEN8_CSB_WRITE_PTR(head); > + head = GEN8_CSB_READ_PTR(head); > + execlists->csb_head = head; > + } else { > + const int write_idx = > + intel_hws_csb_write_index(i915) - > + I915_HWS_CSB_BUF0_INDEX; > + > + head = execlists->csb_head; > + tail = READ_ONCE(buf[write_idx]); > + } > + GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", > + engine->name, > + head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", > + tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); > + > + while (head != tail) { > + struct i915_request *rq; > + unsigned int status; > + unsigned int count; > + > + if (++head == GEN8_CSB_ENTRIES) > + head = 0; > > - /* The write will be ordered by the uncached read (itself > - * a memory barrier), so we do not need another in the form > - * of a locked instruction. The race between the interrupt > - * handler and the split test/clear is harmless as we order > - * our clear before the CSB read. If the interrupt arrived > - * first between the test and the clear, we read the updated > - * CSB and clear the bit. If the interrupt arrives as we read > - * the CSB or later (i.e. after we had cleared the bit) the bit > - * is set and we do a new loop. > + /* > + * We are flying near dragons again. > + * > + * We hold a reference to the request in execlist_port[] > + * but no more than that. We are operating in softirq > + * context and so cannot hold any mutex or sleep. That > + * prevents us stopping the requests we are processing > + * in port[] from being retired simultaneously (the > + * breadcrumb will be complete before we see the > + * context-switch). As we only hold the reference to the > + * request, any pointer chasing underneath the request > + * is subject to a potential use-after-free. Thus we > + * store all of the bookkeeping within port[] as > + * required, and avoid using unguarded pointers beneath > + * request itself. The same applies to the atomic > + * status notifier. > */ > - __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > - if (unlikely(execlists->csb_head == -1)) { /* following a reset */ > - if (!fw) { > - intel_uncore_forcewake_get(dev_priv, > - execlists->fw_domains); > - fw = true; > - } > > - head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > - tail = GEN8_CSB_WRITE_PTR(head); > - head = GEN8_CSB_READ_PTR(head); > - execlists->csb_head = head; > - } else { > - const int write_idx = > - intel_hws_csb_write_index(dev_priv) - > - I915_HWS_CSB_BUF0_INDEX; > + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ > + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", > + engine->name, head, > + status, buf[2*head + 1], > + execlists->active); > + > + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | > + GEN8_CTX_STATUS_PREEMPTED)) > + execlists_set_active(execlists, > + EXECLISTS_ACTIVE_HWACK); > + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) > + execlists_clear_active(execlists, > + EXECLISTS_ACTIVE_HWACK); > + > + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) > + continue; > > - head = execlists->csb_head; > - tail = READ_ONCE(buf[write_idx]); > - } > - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", > - engine->name, > - head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", > - tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); > + /* We should never get a COMPLETED | IDLE_ACTIVE! */ > + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); > > - while (head != tail) { > - struct i915_request *rq; > - unsigned int status; > - unsigned int count; > + if (status & GEN8_CTX_STATUS_COMPLETE && > + buf[2*head + 1] == execlists->preempt_complete_status) { > + GEM_TRACE("%s preempt-idle\n", engine->name); > + complete_preempt_context(execlists); > + continue; > + } > > - if (++head == GEN8_CSB_ENTRIES) > - head = 0; > + if (status & GEN8_CTX_STATUS_PREEMPTED && > + execlists_is_active(execlists, > + EXECLISTS_ACTIVE_PREEMPT)) > + continue; > > - /* We are flying near dragons again. > - * > - * We hold a reference to the request in execlist_port[] > - * but no more than that. We are operating in softirq > - * context and so cannot hold any mutex or sleep. That > - * prevents us stopping the requests we are processing > - * in port[] from being retired simultaneously (the > - * breadcrumb will be complete before we see the > - * context-switch). As we only hold the reference to the > - * request, any pointer chasing underneath the request > - * is subject to a potential use-after-free. Thus we > - * store all of the bookkeeping within port[] as > - * required, and avoid using unguarded pointers beneath > - * request itself. The same applies to the atomic > - * status notifier. > - */ > + GEM_BUG_ON(!execlists_is_active(execlists, > + EXECLISTS_ACTIVE_USER)); > > - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ > - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", > - engine->name, head, > - status, buf[2*head + 1], > - execlists->active); > - > - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | > - GEN8_CTX_STATUS_PREEMPTED)) > - execlists_set_active(execlists, > - EXECLISTS_ACTIVE_HWACK); > - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) > - execlists_clear_active(execlists, > - EXECLISTS_ACTIVE_HWACK); > - > - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) > - continue; > + rq = port_unpack(port, &count); > + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", > + engine->name, > + port->context_id, count, > + rq ? rq->global_seqno : 0, > + rq ? rq_prio(rq) : 0); > + > + /* Check the context/desc id for this event matches */ > + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); > + > + GEM_BUG_ON(count == 0); > + if (--count == 0) { > + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); > + GEM_BUG_ON(port_isset(&port[1]) && > + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); > + GEM_BUG_ON(!i915_request_completed(rq)); > + execlists_context_schedule_out(rq); > + trace_i915_request_out(rq); > + i915_request_put(rq); > + > + GEM_TRACE("%s completed ctx=%d\n", > + engine->name, port->context_id); > + > + execlists_port_complete(execlists, port); > + } else { > + port_set(port, port_pack(rq, count)); > + } > > - /* We should never get a COMPLETED | IDLE_ACTIVE! */ > - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); > + /* After the final element, the hw should be idle */ > + GEM_BUG_ON(port_count(port) == 0 && > + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); > + if (port_count(port) == 0) > + execlists_clear_active(execlists, > + EXECLISTS_ACTIVE_USER); > + } > > - if (status & GEN8_CTX_STATUS_COMPLETE && > - buf[2*head + 1] == execlists->preempt_complete_status) { > - GEM_TRACE("%s preempt-idle\n", engine->name); > - complete_preempt_context(execlists); > - continue; > - } > + if (head != execlists->csb_head) { > + execlists->csb_head = head; > + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), > + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > + } > > - if (status & GEN8_CTX_STATUS_PREEMPTED && > - execlists_is_active(execlists, > - EXECLISTS_ACTIVE_PREEMPT)) > - continue; > + if (unlikely(fw)) > + intel_uncore_forcewake_put(i915, execlists->fw_domains); > +} > > - GEM_BUG_ON(!execlists_is_active(execlists, > - EXECLISTS_ACTIVE_USER)); > - > - rq = port_unpack(port, &count); > - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", > - engine->name, > - port->context_id, count, > - rq ? rq->global_seqno : 0, > - rq ? rq_prio(rq) : 0); > - > - /* Check the context/desc id for this event matches */ > - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); > - > - GEM_BUG_ON(count == 0); > - if (--count == 0) { > - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); > - GEM_BUG_ON(port_isset(&port[1]) && > - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); > - GEM_BUG_ON(!i915_request_completed(rq)); > - execlists_context_schedule_out(rq); > - trace_i915_request_out(rq); > - i915_request_put(rq); > - > - GEM_TRACE("%s completed ctx=%d\n", > - engine->name, port->context_id); > - > - execlists_port_complete(execlists, port); > - } else { > - port_set(port, port_pack(rq, count)); > - } > +/* > + * Check the unread Context Status Buffers and manage the submission of new > + * contexts to the ELSP accordingly. > + */ > +static void execlists_submission_tasklet(unsigned long data) > +{ > + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > > - /* After the final element, the hw should be idle */ > - GEM_BUG_ON(port_count(port) == 0 && > - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); > - if (port_count(port) == 0) > - execlists_clear_active(execlists, > - EXECLISTS_ACTIVE_USER); > - } > + /* > + * We can skip acquiring intel_runtime_pm_get() here as it was taken > + * on our behalf by the request (see i915_gem_mark_busy()) and it will > + * not be relinquished until the device is idle (see > + * i915_gem_idle_work_handler()). As a precaution, we make sure > + * that all ELSP are drained i.e. we have processed the CSB, > + * before allowing ourselves to idle and calling intel_runtime_pm_put(). > + */ > + GEM_BUG_ON(!engine->i915->gt.awake); > > - if (head != execlists->csb_head) { > - execlists->csb_head = head; > - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), > - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > - } > - } > + /* > + * Prefer doing test_and_clear_bit() as a two stage operation to avoid > + * imposing the cost of a locked atomic transaction when submitting a > + * new request (outside of the context-switch interrupt). > + */ > + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) This was a 'while' before refactoring. Shouldn't it still be in order to catch new irq_posted during processing? BTW, I have revised and rebased the force preemption patches on drm-tip with this and the other related patches you have proposed. I just started running my IGT test that covers the innocent context on port[1] scenario that we discussed on IRC. Getting a sporadic failure but need more time to root cause. Will update soon. > + process_csb(engine); > > - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) > + if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) > execlists_dequeue(engine); > - > - if (fw) > - intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); > } > > static void queue_request(struct intel_engine_cs *engine, > @@ -1667,6 +1673,7 @@ static struct i915_request * > execlists_reset_prepare(struct intel_engine_cs *engine) > { > struct intel_engine_execlists * const execlists = &engine->execlists; > + struct i915_request *request, *active; > > /* > * Prevent request submission to the hardware until we have > @@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine) > tasklet_kill(&execlists->tasklet); > tasklet_disable(&execlists->tasklet); > > - return i915_gem_find_active_request(engine); > + /* > + * We want to flush the pending context switches, having disabled > + * the tasklet above, we can assume exclusive access to the execlists. > + * For this allows us to catch up with an inflight preemption event, > + * and avoid blaming an innocent request if the stall was due to the > + * preemption itself. > + */ > + process_csb(engine); > + > + /* > + * The last active request can then be no later than the last request > + * now in ELSP[0]. So search backwards from there, so that is the GPU > + * has advanced beyond the last CSB update, it will be pardoned. > + */ > + active = NULL; > + request = port_request(execlists->port); > + if (request) { > + unsigned long flags; > + > + spin_lock_irqsave(&engine->timeline->lock, flags); > + list_for_each_entry_from_reverse(request, > + &engine->timeline->requests, > + link) { > + if (__i915_request_completed(request, > + request->global_seqno)) > + break; > + > + active = request; > + } > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > + } > + > + return active; > } > > static void reset_irq(struct intel_engine_cs *engine) > -- > 2.16.2 >
On Tue, Mar 20, 2018 at 05:17:34PM -0700, Jeff McGee wrote: > On Tue, Mar 20, 2018 at 12:18:48AM +0000, Chris Wilson wrote: > > Catch up with the inflight CSB events, after disabling the tasklet > > before deciding which request was truly guilty of hanging the GPU. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Michał Winiarski <michal.winiarski@intel.com> > > CC: Michel Thierry <michel.thierry@intel.com> > > Cc: Jeff McGee <jeff.mcgee@intel.com> > > --- > > drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++----------------- > > 1 file changed, 197 insertions(+), 158 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > > index e5a3ffbc273a..beb81f13a3cc 100644 > > --- a/drivers/gpu/drm/i915/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/intel_lrc.c > > @@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) > > local_irq_restore(flags); > > } > > > > -/* > > - * Check the unread Context Status Buffers and manage the submission of new > > - * contexts to the ELSP accordingly. > > - */ > > -static void execlists_submission_tasklet(unsigned long data) > > +static void process_csb(struct intel_engine_cs *engine) > > { > > - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > > struct intel_engine_execlists * const execlists = &engine->execlists; > > struct execlist_port * const port = execlists->port; > > - struct drm_i915_private *dev_priv = engine->i915; > > + struct drm_i915_private *i915 = engine->i915; > > + unsigned int head, tail; > > + const u32 *buf; > > bool fw = false; > > > > - /* We can skip acquiring intel_runtime_pm_get() here as it was taken > > - * on our behalf by the request (see i915_gem_mark_busy()) and it will > > - * not be relinquished until the device is idle (see > > - * i915_gem_idle_work_handler()). As a precaution, we make sure > > - * that all ELSP are drained i.e. we have processed the CSB, > > - * before allowing ourselves to idle and calling intel_runtime_pm_put(). > > - */ > > - GEM_BUG_ON(!dev_priv->gt.awake); > > + if (unlikely(execlists->csb_use_mmio)) { > > + buf = (u32 * __force) > > + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); > > + execlists->csb_head = -1; /* force mmio read of CSB ptrs */ > > + } else { > > + /* The HWSP contains a (cacheable) mirror of the CSB */ > > + buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; > > + } > > > > - /* Prefer doing test_and_clear_bit() as a two stage operation to avoid > > - * imposing the cost of a locked atomic transaction when submitting a > > - * new request (outside of the context-switch interrupt). > > + /* > > + * The write will be ordered by the uncached read (itself > > + * a memory barrier), so we do not need another in the form > > + * of a locked instruction. The race between the interrupt > > + * handler and the split test/clear is harmless as we order > > + * our clear before the CSB read. If the interrupt arrived > > + * first between the test and the clear, we read the updated > > + * CSB and clear the bit. If the interrupt arrives as we read > > + * the CSB or later (i.e. after we had cleared the bit) the bit > > + * is set and we do a new loop. > > */ > > - while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { > > - /* The HWSP contains a (cacheable) mirror of the CSB */ > > - const u32 *buf = > > - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; > > - unsigned int head, tail; > > - > > - if (unlikely(execlists->csb_use_mmio)) { > > - buf = (u32 * __force) > > - (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); > > - execlists->csb_head = -1; /* force mmio read of CSB ptrs */ > > - } > > + __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > > + if (unlikely(execlists->csb_head == -1)) { /* following a reset */ > > + intel_uncore_forcewake_get(i915, execlists->fw_domains); > > + fw = true; > > + > > + head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > > + tail = GEN8_CSB_WRITE_PTR(head); > > + head = GEN8_CSB_READ_PTR(head); > > + execlists->csb_head = head; > > + } else { > > + const int write_idx = > > + intel_hws_csb_write_index(i915) - > > + I915_HWS_CSB_BUF0_INDEX; > > + > > + head = execlists->csb_head; > > + tail = READ_ONCE(buf[write_idx]); > > + } > > + GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", > > + engine->name, > > + head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", > > + tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); > > + > > + while (head != tail) { > > + struct i915_request *rq; > > + unsigned int status; > > + unsigned int count; > > + > > + if (++head == GEN8_CSB_ENTRIES) > > + head = 0; > > > > - /* The write will be ordered by the uncached read (itself > > - * a memory barrier), so we do not need another in the form > > - * of a locked instruction. The race between the interrupt > > - * handler and the split test/clear is harmless as we order > > - * our clear before the CSB read. If the interrupt arrived > > - * first between the test and the clear, we read the updated > > - * CSB and clear the bit. If the interrupt arrives as we read > > - * the CSB or later (i.e. after we had cleared the bit) the bit > > - * is set and we do a new loop. > > + /* > > + * We are flying near dragons again. > > + * > > + * We hold a reference to the request in execlist_port[] > > + * but no more than that. We are operating in softirq > > + * context and so cannot hold any mutex or sleep. That > > + * prevents us stopping the requests we are processing > > + * in port[] from being retired simultaneously (the > > + * breadcrumb will be complete before we see the > > + * context-switch). As we only hold the reference to the > > + * request, any pointer chasing underneath the request > > + * is subject to a potential use-after-free. Thus we > > + * store all of the bookkeeping within port[] as > > + * required, and avoid using unguarded pointers beneath > > + * request itself. The same applies to the atomic > > + * status notifier. > > */ > > - __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > > - if (unlikely(execlists->csb_head == -1)) { /* following a reset */ > > - if (!fw) { > > - intel_uncore_forcewake_get(dev_priv, > > - execlists->fw_domains); > > - fw = true; > > - } > > > > - head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > > - tail = GEN8_CSB_WRITE_PTR(head); > > - head = GEN8_CSB_READ_PTR(head); > > - execlists->csb_head = head; > > - } else { > > - const int write_idx = > > - intel_hws_csb_write_index(dev_priv) - > > - I915_HWS_CSB_BUF0_INDEX; > > + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ > > + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", > > + engine->name, head, > > + status, buf[2*head + 1], > > + execlists->active); > > + > > + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | > > + GEN8_CTX_STATUS_PREEMPTED)) > > + execlists_set_active(execlists, > > + EXECLISTS_ACTIVE_HWACK); > > + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) > > + execlists_clear_active(execlists, > > + EXECLISTS_ACTIVE_HWACK); > > + > > + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) > > + continue; > > > > - head = execlists->csb_head; > > - tail = READ_ONCE(buf[write_idx]); > > - } > > - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", > > - engine->name, > > - head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", > > - tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); > > + /* We should never get a COMPLETED | IDLE_ACTIVE! */ > > + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); > > > > - while (head != tail) { > > - struct i915_request *rq; > > - unsigned int status; > > - unsigned int count; > > + if (status & GEN8_CTX_STATUS_COMPLETE && > > + buf[2*head + 1] == execlists->preempt_complete_status) { > > + GEM_TRACE("%s preempt-idle\n", engine->name); > > + complete_preempt_context(execlists); > > + continue; > > + } > > > > - if (++head == GEN8_CSB_ENTRIES) > > - head = 0; > > + if (status & GEN8_CTX_STATUS_PREEMPTED && > > + execlists_is_active(execlists, > > + EXECLISTS_ACTIVE_PREEMPT)) > > + continue; > > > > - /* We are flying near dragons again. > > - * > > - * We hold a reference to the request in execlist_port[] > > - * but no more than that. We are operating in softirq > > - * context and so cannot hold any mutex or sleep. That > > - * prevents us stopping the requests we are processing > > - * in port[] from being retired simultaneously (the > > - * breadcrumb will be complete before we see the > > - * context-switch). As we only hold the reference to the > > - * request, any pointer chasing underneath the request > > - * is subject to a potential use-after-free. Thus we > > - * store all of the bookkeeping within port[] as > > - * required, and avoid using unguarded pointers beneath > > - * request itself. The same applies to the atomic > > - * status notifier. > > - */ > > + GEM_BUG_ON(!execlists_is_active(execlists, > > + EXECLISTS_ACTIVE_USER)); > > > > - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ > > - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", > > - engine->name, head, > > - status, buf[2*head + 1], > > - execlists->active); > > - > > - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | > > - GEN8_CTX_STATUS_PREEMPTED)) > > - execlists_set_active(execlists, > > - EXECLISTS_ACTIVE_HWACK); > > - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) > > - execlists_clear_active(execlists, > > - EXECLISTS_ACTIVE_HWACK); > > - > > - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) > > - continue; > > + rq = port_unpack(port, &count); > > + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", > > + engine->name, > > + port->context_id, count, > > + rq ? rq->global_seqno : 0, > > + rq ? rq_prio(rq) : 0); > > + > > + /* Check the context/desc id for this event matches */ > > + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); > > + > > + GEM_BUG_ON(count == 0); > > + if (--count == 0) { > > + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); > > + GEM_BUG_ON(port_isset(&port[1]) && > > + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); > > + GEM_BUG_ON(!i915_request_completed(rq)); > > + execlists_context_schedule_out(rq); > > + trace_i915_request_out(rq); > > + i915_request_put(rq); > > + > > + GEM_TRACE("%s completed ctx=%d\n", > > + engine->name, port->context_id); > > + > > + execlists_port_complete(execlists, port); > > + } else { > > + port_set(port, port_pack(rq, count)); > > + } > > > > - /* We should never get a COMPLETED | IDLE_ACTIVE! */ > > - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); > > + /* After the final element, the hw should be idle */ > > + GEM_BUG_ON(port_count(port) == 0 && > > + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); > > + if (port_count(port) == 0) > > + execlists_clear_active(execlists, > > + EXECLISTS_ACTIVE_USER); > > + } > > > > - if (status & GEN8_CTX_STATUS_COMPLETE && > > - buf[2*head + 1] == execlists->preempt_complete_status) { > > - GEM_TRACE("%s preempt-idle\n", engine->name); > > - complete_preempt_context(execlists); > > - continue; > > - } > > + if (head != execlists->csb_head) { > > + execlists->csb_head = head; > > + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), > > + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > > + } > > > > - if (status & GEN8_CTX_STATUS_PREEMPTED && > > - execlists_is_active(execlists, > > - EXECLISTS_ACTIVE_PREEMPT)) > > - continue; > > + if (unlikely(fw)) > > + intel_uncore_forcewake_put(i915, execlists->fw_domains); > > +} > > > > - GEM_BUG_ON(!execlists_is_active(execlists, > > - EXECLISTS_ACTIVE_USER)); > > - > > - rq = port_unpack(port, &count); > > - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", > > - engine->name, > > - port->context_id, count, > > - rq ? rq->global_seqno : 0, > > - rq ? rq_prio(rq) : 0); > > - > > - /* Check the context/desc id for this event matches */ > > - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); > > - > > - GEM_BUG_ON(count == 0); > > - if (--count == 0) { > > - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); > > - GEM_BUG_ON(port_isset(&port[1]) && > > - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); > > - GEM_BUG_ON(!i915_request_completed(rq)); > > - execlists_context_schedule_out(rq); > > - trace_i915_request_out(rq); > > - i915_request_put(rq); > > - > > - GEM_TRACE("%s completed ctx=%d\n", > > - engine->name, port->context_id); > > - > > - execlists_port_complete(execlists, port); > > - } else { > > - port_set(port, port_pack(rq, count)); > > - } > > +/* > > + * Check the unread Context Status Buffers and manage the submission of new > > + * contexts to the ELSP accordingly. > > + */ > > +static void execlists_submission_tasklet(unsigned long data) > > +{ > > + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > > > > - /* After the final element, the hw should be idle */ > > - GEM_BUG_ON(port_count(port) == 0 && > > - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); > > - if (port_count(port) == 0) > > - execlists_clear_active(execlists, > > - EXECLISTS_ACTIVE_USER); > > - } > > + /* > > + * We can skip acquiring intel_runtime_pm_get() here as it was taken > > + * on our behalf by the request (see i915_gem_mark_busy()) and it will > > + * not be relinquished until the device is idle (see > > + * i915_gem_idle_work_handler()). As a precaution, we make sure > > + * that all ELSP are drained i.e. we have processed the CSB, > > + * before allowing ourselves to idle and calling intel_runtime_pm_put(). > > + */ > > + GEM_BUG_ON(!engine->i915->gt.awake); > > > > - if (head != execlists->csb_head) { > > - execlists->csb_head = head; > > - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), > > - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > > - } > > - } > > + /* > > + * Prefer doing test_and_clear_bit() as a two stage operation to avoid > > + * imposing the cost of a locked atomic transaction when submitting a > > + * new request (outside of the context-switch interrupt). > > + */ > > + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) > This was a 'while' before refactoring. Shouldn't it still be in order to catch > new irq_posted during processing? > > BTW, I have revised and rebased the force preemption patches on drm-tip with > this and the other related patches you have proposed. I just started running > my IGT test that covers the innocent context on port[1] scenario that we > discussed on IRC. Getting a sporadic failure but need more time to root cause. > Will update soon. > > > + process_csb(engine); > > > > - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) > > + if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) > > execlists_dequeue(engine); > > - > > - if (fw) > > - intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); > > } > > > > static void queue_request(struct intel_engine_cs *engine, > > @@ -1667,6 +1673,7 @@ static struct i915_request * > > execlists_reset_prepare(struct intel_engine_cs *engine) > > { > > struct intel_engine_execlists * const execlists = &engine->execlists; > > + struct i915_request *request, *active; > > > > /* > > * Prevent request submission to the hardware until we have > > @@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine) > > tasklet_kill(&execlists->tasklet); > > tasklet_disable(&execlists->tasklet); > > > > - return i915_gem_find_active_request(engine); > > + /* > > + * We want to flush the pending context switches, having disabled > > + * the tasklet above, we can assume exclusive access to the execlists. > > + * For this allows us to catch up with an inflight preemption event, > > + * and avoid blaming an innocent request if the stall was due to the > > + * preemption itself. > > + */ > > + process_csb(engine); > > + > > + /* > > + * The last active request can then be no later than the last request > > + * now in ELSP[0]. So search backwards from there, so that is the GPU > > + * has advanced beyond the last CSB update, it will be pardoned. > > + */ > > + active = NULL; > > + request = port_request(execlists->port); > > + if (request) { > > + unsigned long flags; > > + > > + spin_lock_irqsave(&engine->timeline->lock, flags); > > + list_for_each_entry_from_reverse(request, > > + &engine->timeline->requests, > > + link) { > > + if (__i915_request_completed(request, > > + request->global_seqno)) > > + break; > > + > > + active = request; > > + } > > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > > + } > > + > > + return active; Now we can see an abort of the reset if process_csb() processes a completed preemption. So we need to kick the tasklet to get a dequeue of the waiting request to happen. Currently we only kick if needed in gen8_init_common_ring which is skipped if we abort the reset. Otherwise this is working well in my force preemption tests. -Jeff > > } > > > > static void reset_irq(struct intel_engine_cs *engine) > > -- > > 2.16.2 > > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Quoting Jeff McGee (2018-03-22 15:14:01) > On Tue, Mar 20, 2018 at 05:17:34PM -0700, Jeff McGee wrote: > > On Tue, Mar 20, 2018 at 12:18:48AM +0000, Chris Wilson wrote: > > > Catch up with the inflight CSB events, after disabling the tasklet > > > before deciding which request was truly guilty of hanging the GPU. > > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > > Cc: Michał Winiarski <michal.winiarski@intel.com> > > > CC: Michel Thierry <michel.thierry@intel.com> > > > Cc: Jeff McGee <jeff.mcgee@intel.com> > > > --- > > > drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++----------------- > > > 1 file changed, 197 insertions(+), 158 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > > > index e5a3ffbc273a..beb81f13a3cc 100644 > > > --- a/drivers/gpu/drm/i915/intel_lrc.c > > > +++ b/drivers/gpu/drm/i915/intel_lrc.c > > > @@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) > > > local_irq_restore(flags); > > > } > > > > > > -/* > > > - * Check the unread Context Status Buffers and manage the submission of new > > > - * contexts to the ELSP accordingly. > > > - */ > > > -static void execlists_submission_tasklet(unsigned long data) > > > +static void process_csb(struct intel_engine_cs *engine) > > > { > > > - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > > > struct intel_engine_execlists * const execlists = &engine->execlists; > > > struct execlist_port * const port = execlists->port; > > > - struct drm_i915_private *dev_priv = engine->i915; > > > + struct drm_i915_private *i915 = engine->i915; > > > + unsigned int head, tail; > > > + const u32 *buf; > > > bool fw = false; > > > > > > - /* We can skip acquiring intel_runtime_pm_get() here as it was taken > > > - * on our behalf by the request (see i915_gem_mark_busy()) and it will > > > - * not be relinquished until the device is idle (see > > > - * i915_gem_idle_work_handler()). As a precaution, we make sure > > > - * that all ELSP are drained i.e. we have processed the CSB, > > > - * before allowing ourselves to idle and calling intel_runtime_pm_put(). > > > - */ > > > - GEM_BUG_ON(!dev_priv->gt.awake); > > > + if (unlikely(execlists->csb_use_mmio)) { > > > + buf = (u32 * __force) > > > + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); > > > + execlists->csb_head = -1; /* force mmio read of CSB ptrs */ > > > + } else { > > > + /* The HWSP contains a (cacheable) mirror of the CSB */ > > > + buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; > > > + } > > > > > > - /* Prefer doing test_and_clear_bit() as a two stage operation to avoid > > > - * imposing the cost of a locked atomic transaction when submitting a > > > - * new request (outside of the context-switch interrupt). > > > + /* > > > + * The write will be ordered by the uncached read (itself > > > + * a memory barrier), so we do not need another in the form > > > + * of a locked instruction. The race between the interrupt > > > + * handler and the split test/clear is harmless as we order > > > + * our clear before the CSB read. If the interrupt arrived > > > + * first between the test and the clear, we read the updated > > > + * CSB and clear the bit. If the interrupt arrives as we read > > > + * the CSB or later (i.e. after we had cleared the bit) the bit > > > + * is set and we do a new loop. > > > */ > > > - while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { > > > - /* The HWSP contains a (cacheable) mirror of the CSB */ > > > - const u32 *buf = > > > - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; > > > - unsigned int head, tail; > > > - > > > - if (unlikely(execlists->csb_use_mmio)) { > > > - buf = (u32 * __force) > > > - (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); > > > - execlists->csb_head = -1; /* force mmio read of CSB ptrs */ > > > - } > > > + __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > > > + if (unlikely(execlists->csb_head == -1)) { /* following a reset */ > > > + intel_uncore_forcewake_get(i915, execlists->fw_domains); > > > + fw = true; > > > + > > > + head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > > > + tail = GEN8_CSB_WRITE_PTR(head); > > > + head = GEN8_CSB_READ_PTR(head); > > > + execlists->csb_head = head; > > > + } else { > > > + const int write_idx = > > > + intel_hws_csb_write_index(i915) - > > > + I915_HWS_CSB_BUF0_INDEX; > > > + > > > + head = execlists->csb_head; > > > + tail = READ_ONCE(buf[write_idx]); > > > + } > > > + GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", > > > + engine->name, > > > + head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", > > > + tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); > > > + > > > + while (head != tail) { > > > + struct i915_request *rq; > > > + unsigned int status; > > > + unsigned int count; > > > + > > > + if (++head == GEN8_CSB_ENTRIES) > > > + head = 0; > > > > > > - /* The write will be ordered by the uncached read (itself > > > - * a memory barrier), so we do not need another in the form > > > - * of a locked instruction. The race between the interrupt > > > - * handler and the split test/clear is harmless as we order > > > - * our clear before the CSB read. If the interrupt arrived > > > - * first between the test and the clear, we read the updated > > > - * CSB and clear the bit. If the interrupt arrives as we read > > > - * the CSB or later (i.e. after we had cleared the bit) the bit > > > - * is set and we do a new loop. > > > + /* > > > + * We are flying near dragons again. > > > + * > > > + * We hold a reference to the request in execlist_port[] > > > + * but no more than that. We are operating in softirq > > > + * context and so cannot hold any mutex or sleep. That > > > + * prevents us stopping the requests we are processing > > > + * in port[] from being retired simultaneously (the > > > + * breadcrumb will be complete before we see the > > > + * context-switch). As we only hold the reference to the > > > + * request, any pointer chasing underneath the request > > > + * is subject to a potential use-after-free. Thus we > > > + * store all of the bookkeeping within port[] as > > > + * required, and avoid using unguarded pointers beneath > > > + * request itself. The same applies to the atomic > > > + * status notifier. > > > */ > > > - __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > > > - if (unlikely(execlists->csb_head == -1)) { /* following a reset */ > > > - if (!fw) { > > > - intel_uncore_forcewake_get(dev_priv, > > > - execlists->fw_domains); > > > - fw = true; > > > - } > > > > > > - head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > > > - tail = GEN8_CSB_WRITE_PTR(head); > > > - head = GEN8_CSB_READ_PTR(head); > > > - execlists->csb_head = head; > > > - } else { > > > - const int write_idx = > > > - intel_hws_csb_write_index(dev_priv) - > > > - I915_HWS_CSB_BUF0_INDEX; > > > + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ > > > + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", > > > + engine->name, head, > > > + status, buf[2*head + 1], > > > + execlists->active); > > > + > > > + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | > > > + GEN8_CTX_STATUS_PREEMPTED)) > > > + execlists_set_active(execlists, > > > + EXECLISTS_ACTIVE_HWACK); > > > + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) > > > + execlists_clear_active(execlists, > > > + EXECLISTS_ACTIVE_HWACK); > > > + > > > + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) > > > + continue; > > > > > > - head = execlists->csb_head; > > > - tail = READ_ONCE(buf[write_idx]); > > > - } > > > - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", > > > - engine->name, > > > - head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", > > > - tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); > > > + /* We should never get a COMPLETED | IDLE_ACTIVE! */ > > > + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); > > > > > > - while (head != tail) { > > > - struct i915_request *rq; > > > - unsigned int status; > > > - unsigned int count; > > > + if (status & GEN8_CTX_STATUS_COMPLETE && > > > + buf[2*head + 1] == execlists->preempt_complete_status) { > > > + GEM_TRACE("%s preempt-idle\n", engine->name); > > > + complete_preempt_context(execlists); > > > + continue; > > > + } > > > > > > - if (++head == GEN8_CSB_ENTRIES) > > > - head = 0; > > > + if (status & GEN8_CTX_STATUS_PREEMPTED && > > > + execlists_is_active(execlists, > > > + EXECLISTS_ACTIVE_PREEMPT)) > > > + continue; > > > > > > - /* We are flying near dragons again. > > > - * > > > - * We hold a reference to the request in execlist_port[] > > > - * but no more than that. We are operating in softirq > > > - * context and so cannot hold any mutex or sleep. That > > > - * prevents us stopping the requests we are processing > > > - * in port[] from being retired simultaneously (the > > > - * breadcrumb will be complete before we see the > > > - * context-switch). As we only hold the reference to the > > > - * request, any pointer chasing underneath the request > > > - * is subject to a potential use-after-free. Thus we > > > - * store all of the bookkeeping within port[] as > > > - * required, and avoid using unguarded pointers beneath > > > - * request itself. The same applies to the atomic > > > - * status notifier. > > > - */ > > > + GEM_BUG_ON(!execlists_is_active(execlists, > > > + EXECLISTS_ACTIVE_USER)); > > > > > > - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ > > > - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", > > > - engine->name, head, > > > - status, buf[2*head + 1], > > > - execlists->active); > > > - > > > - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | > > > - GEN8_CTX_STATUS_PREEMPTED)) > > > - execlists_set_active(execlists, > > > - EXECLISTS_ACTIVE_HWACK); > > > - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) > > > - execlists_clear_active(execlists, > > > - EXECLISTS_ACTIVE_HWACK); > > > - > > > - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) > > > - continue; > > > + rq = port_unpack(port, &count); > > > + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", > > > + engine->name, > > > + port->context_id, count, > > > + rq ? rq->global_seqno : 0, > > > + rq ? rq_prio(rq) : 0); > > > + > > > + /* Check the context/desc id for this event matches */ > > > + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); > > > + > > > + GEM_BUG_ON(count == 0); > > > + if (--count == 0) { > > > + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); > > > + GEM_BUG_ON(port_isset(&port[1]) && > > > + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); > > > + GEM_BUG_ON(!i915_request_completed(rq)); > > > + execlists_context_schedule_out(rq); > > > + trace_i915_request_out(rq); > > > + i915_request_put(rq); > > > + > > > + GEM_TRACE("%s completed ctx=%d\n", > > > + engine->name, port->context_id); > > > + > > > + execlists_port_complete(execlists, port); > > > + } else { > > > + port_set(port, port_pack(rq, count)); > > > + } > > > > > > - /* We should never get a COMPLETED | IDLE_ACTIVE! */ > > > - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); > > > + /* After the final element, the hw should be idle */ > > > + GEM_BUG_ON(port_count(port) == 0 && > > > + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); > > > + if (port_count(port) == 0) > > > + execlists_clear_active(execlists, > > > + EXECLISTS_ACTIVE_USER); > > > + } > > > > > > - if (status & GEN8_CTX_STATUS_COMPLETE && > > > - buf[2*head + 1] == execlists->preempt_complete_status) { > > > - GEM_TRACE("%s preempt-idle\n", engine->name); > > > - complete_preempt_context(execlists); > > > - continue; > > > - } > > > + if (head != execlists->csb_head) { > > > + execlists->csb_head = head; > > > + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), > > > + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > > > + } > > > > > > - if (status & GEN8_CTX_STATUS_PREEMPTED && > > > - execlists_is_active(execlists, > > > - EXECLISTS_ACTIVE_PREEMPT)) > > > - continue; > > > + if (unlikely(fw)) > > > + intel_uncore_forcewake_put(i915, execlists->fw_domains); > > > +} > > > > > > - GEM_BUG_ON(!execlists_is_active(execlists, > > > - EXECLISTS_ACTIVE_USER)); > > > - > > > - rq = port_unpack(port, &count); > > > - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", > > > - engine->name, > > > - port->context_id, count, > > > - rq ? rq->global_seqno : 0, > > > - rq ? rq_prio(rq) : 0); > > > - > > > - /* Check the context/desc id for this event matches */ > > > - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); > > > - > > > - GEM_BUG_ON(count == 0); > > > - if (--count == 0) { > > > - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); > > > - GEM_BUG_ON(port_isset(&port[1]) && > > > - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); > > > - GEM_BUG_ON(!i915_request_completed(rq)); > > > - execlists_context_schedule_out(rq); > > > - trace_i915_request_out(rq); > > > - i915_request_put(rq); > > > - > > > - GEM_TRACE("%s completed ctx=%d\n", > > > - engine->name, port->context_id); > > > - > > > - execlists_port_complete(execlists, port); > > > - } else { > > > - port_set(port, port_pack(rq, count)); > > > - } > > > +/* > > > + * Check the unread Context Status Buffers and manage the submission of new > > > + * contexts to the ELSP accordingly. > > > + */ > > > +static void execlists_submission_tasklet(unsigned long data) > > > +{ > > > + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > > > > > > - /* After the final element, the hw should be idle */ > > > - GEM_BUG_ON(port_count(port) == 0 && > > > - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); > > > - if (port_count(port) == 0) > > > - execlists_clear_active(execlists, > > > - EXECLISTS_ACTIVE_USER); > > > - } > > > + /* > > > + * We can skip acquiring intel_runtime_pm_get() here as it was taken > > > + * on our behalf by the request (see i915_gem_mark_busy()) and it will > > > + * not be relinquished until the device is idle (see > > > + * i915_gem_idle_work_handler()). As a precaution, we make sure > > > + * that all ELSP are drained i.e. we have processed the CSB, > > > + * before allowing ourselves to idle and calling intel_runtime_pm_put(). > > > + */ > > > + GEM_BUG_ON(!engine->i915->gt.awake); > > > > > > - if (head != execlists->csb_head) { > > > - execlists->csb_head = head; > > > - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), > > > - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > > > - } > > > - } > > > + /* > > > + * Prefer doing test_and_clear_bit() as a two stage operation to avoid > > > + * imposing the cost of a locked atomic transaction when submitting a > > > + * new request (outside of the context-switch interrupt). > > > + */ > > > + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) > > This was a 'while' before refactoring. Shouldn't it still be in order to catch > > new irq_posted during processing? > > > > BTW, I have revised and rebased the force preemption patches on drm-tip with > > this and the other related patches you have proposed. I just started running > > my IGT test that covers the innocent context on port[1] scenario that we > > discussed on IRC. Getting a sporadic failure but need more time to root cause. > > Will update soon. > > > > > + process_csb(engine); > > > > > > - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) > > > + if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) > > > execlists_dequeue(engine); > > > - > > > - if (fw) > > > - intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); > > > } > > > > > > static void queue_request(struct intel_engine_cs *engine, > > > @@ -1667,6 +1673,7 @@ static struct i915_request * > > > execlists_reset_prepare(struct intel_engine_cs *engine) > > > { > > > struct intel_engine_execlists * const execlists = &engine->execlists; > > > + struct i915_request *request, *active; > > > > > > /* > > > * Prevent request submission to the hardware until we have > > > @@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine) > > > tasklet_kill(&execlists->tasklet); > > > tasklet_disable(&execlists->tasklet); > > > > > > - return i915_gem_find_active_request(engine); > > > + /* > > > + * We want to flush the pending context switches, having disabled > > > + * the tasklet above, we can assume exclusive access to the execlists. > > > + * For this allows us to catch up with an inflight preemption event, > > > + * and avoid blaming an innocent request if the stall was due to the > > > + * preemption itself. > > > + */ > > > + process_csb(engine); > > > + > > > + /* > > > + * The last active request can then be no later than the last request > > > + * now in ELSP[0]. So search backwards from there, so that is the GPU > > > + * has advanced beyond the last CSB update, it will be pardoned. > > > + */ > > > + active = NULL; > > > + request = port_request(execlists->port); > > > + if (request) { > > > + unsigned long flags; > > > + > > > + spin_lock_irqsave(&engine->timeline->lock, flags); > > > + list_for_each_entry_from_reverse(request, > > > + &engine->timeline->requests, > > > + link) { > > > + if (__i915_request_completed(request, > > > + request->global_seqno)) > > > + break; > > > + > > > + active = request; > > > + } > > > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > > > + } > > > + > > > + return active; > > Now we can see an abort of the reset if process_csb() processes a completed > preemption. So we need to kick the tasklet to get a dequeue of the waiting > request to happen. Currently we only kick if needed in gen8_init_common_ring > which is skipped if we abort the reset. Yes, it is imperative that the tasklet be disabled and process_csb() be manually flushed in the preemption timeout (because of ksortirqd we may not have run the submission tasklet at all before the timer expires). Hence the desire to split out process_csb(). -Chris
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e5a3ffbc273a..beb81f13a3cc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) local_irq_restore(flags); } -/* - * Check the unread Context Status Buffers and manage the submission of new - * contexts to the ELSP accordingly. - */ -static void execlists_submission_tasklet(unsigned long data) +static void process_csb(struct intel_engine_cs *engine) { - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port * const port = execlists->port; - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *i915 = engine->i915; + unsigned int head, tail; + const u32 *buf; bool fw = false; - /* We can skip acquiring intel_runtime_pm_get() here as it was taken - * on our behalf by the request (see i915_gem_mark_busy()) and it will - * not be relinquished until the device is idle (see - * i915_gem_idle_work_handler()). As a precaution, we make sure - * that all ELSP are drained i.e. we have processed the CSB, - * before allowing ourselves to idle and calling intel_runtime_pm_put(). - */ - GEM_BUG_ON(!dev_priv->gt.awake); + if (unlikely(execlists->csb_use_mmio)) { + buf = (u32 * __force) + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); + execlists->csb_head = -1; /* force mmio read of CSB ptrs */ + } else { + /* The HWSP contains a (cacheable) mirror of the CSB */ + buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + } - /* Prefer doing test_and_clear_bit() as a two stage operation to avoid - * imposing the cost of a locked atomic transaction when submitting a - * new request (outside of the context-switch interrupt). + /* + * The write will be ordered by the uncached read (itself + * a memory barrier), so we do not need another in the form + * of a locked instruction. The race between the interrupt + * handler and the split test/clear is harmless as we order + * our clear before the CSB read. If the interrupt arrived + * first between the test and the clear, we read the updated + * CSB and clear the bit. If the interrupt arrives as we read + * the CSB or later (i.e. after we had cleared the bit) the bit + * is set and we do a new loop. */ - while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { - /* The HWSP contains a (cacheable) mirror of the CSB */ - const u32 *buf = - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; - unsigned int head, tail; - - if (unlikely(execlists->csb_use_mmio)) { - buf = (u32 * __force) - (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); - execlists->csb_head = -1; /* force mmio read of CSB ptrs */ - } + __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + if (unlikely(execlists->csb_head == -1)) { /* following a reset */ + intel_uncore_forcewake_get(i915, execlists->fw_domains); + fw = true; + + head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); + tail = GEN8_CSB_WRITE_PTR(head); + head = GEN8_CSB_READ_PTR(head); + execlists->csb_head = head; + } else { + const int write_idx = + intel_hws_csb_write_index(i915) - + I915_HWS_CSB_BUF0_INDEX; + + head = execlists->csb_head; + tail = READ_ONCE(buf[write_idx]); + } + GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", + engine->name, + head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", + tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); + + while (head != tail) { + struct i915_request *rq; + unsigned int status; + unsigned int count; + + if (++head == GEN8_CSB_ENTRIES) + head = 0; - /* The write will be ordered by the uncached read (itself - * a memory barrier), so we do not need another in the form - * of a locked instruction. The race between the interrupt - * handler and the split test/clear is harmless as we order - * our clear before the CSB read. If the interrupt arrived - * first between the test and the clear, we read the updated - * CSB and clear the bit. If the interrupt arrives as we read - * the CSB or later (i.e. after we had cleared the bit) the bit - * is set and we do a new loop. + /* + * We are flying near dragons again. + * + * We hold a reference to the request in execlist_port[] + * but no more than that. We are operating in softirq + * context and so cannot hold any mutex or sleep. That + * prevents us stopping the requests we are processing + * in port[] from being retired simultaneously (the + * breadcrumb will be complete before we see the + * context-switch). As we only hold the reference to the + * request, any pointer chasing underneath the request + * is subject to a potential use-after-free. Thus we + * store all of the bookkeeping within port[] as + * required, and avoid using unguarded pointers beneath + * request itself. The same applies to the atomic + * status notifier. */ - __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - if (unlikely(execlists->csb_head == -1)) { /* following a reset */ - if (!fw) { - intel_uncore_forcewake_get(dev_priv, - execlists->fw_domains); - fw = true; - } - head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); - tail = GEN8_CSB_WRITE_PTR(head); - head = GEN8_CSB_READ_PTR(head); - execlists->csb_head = head; - } else { - const int write_idx = - intel_hws_csb_write_index(dev_priv) - - I915_HWS_CSB_BUF0_INDEX; + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", + engine->name, head, + status, buf[2*head + 1], + execlists->active); + + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | + GEN8_CTX_STATUS_PREEMPTED)) + execlists_set_active(execlists, + EXECLISTS_ACTIVE_HWACK); + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_HWACK); + + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) + continue; - head = execlists->csb_head; - tail = READ_ONCE(buf[write_idx]); - } - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", - engine->name, - head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", - tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); + /* We should never get a COMPLETED | IDLE_ACTIVE! */ + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); - while (head != tail) { - struct i915_request *rq; - unsigned int status; - unsigned int count; + if (status & GEN8_CTX_STATUS_COMPLETE && + buf[2*head + 1] == execlists->preempt_complete_status) { + GEM_TRACE("%s preempt-idle\n", engine->name); + complete_preempt_context(execlists); + continue; + } - if (++head == GEN8_CSB_ENTRIES) - head = 0; + if (status & GEN8_CTX_STATUS_PREEMPTED && + execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)) + continue; - /* We are flying near dragons again. - * - * We hold a reference to the request in execlist_port[] - * but no more than that. We are operating in softirq - * context and so cannot hold any mutex or sleep. That - * prevents us stopping the requests we are processing - * in port[] from being retired simultaneously (the - * breadcrumb will be complete before we see the - * context-switch). As we only hold the reference to the - * request, any pointer chasing underneath the request - * is subject to a potential use-after-free. Thus we - * store all of the bookkeeping within port[] as - * required, and avoid using unguarded pointers beneath - * request itself. The same applies to the atomic - * status notifier. - */ + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", - engine->name, head, - status, buf[2*head + 1], - execlists->active); - - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | - GEN8_CTX_STATUS_PREEMPTED)) - execlists_set_active(execlists, - EXECLISTS_ACTIVE_HWACK); - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_HWACK); - - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) - continue; + rq = port_unpack(port, &count); + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", + engine->name, + port->context_id, count, + rq ? rq->global_seqno : 0, + rq ? rq_prio(rq) : 0); + + /* Check the context/desc id for this event matches */ + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); + + GEM_BUG_ON(count == 0); + if (--count == 0) { + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + GEM_BUG_ON(port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); + GEM_BUG_ON(!i915_request_completed(rq)); + execlists_context_schedule_out(rq); + trace_i915_request_out(rq); + i915_request_put(rq); + + GEM_TRACE("%s completed ctx=%d\n", + engine->name, port->context_id); + + execlists_port_complete(execlists, port); + } else { + port_set(port, port_pack(rq, count)); + } - /* We should never get a COMPLETED | IDLE_ACTIVE! */ - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); + /* After the final element, the hw should be idle */ + GEM_BUG_ON(port_count(port) == 0 && + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + if (port_count(port) == 0) + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_USER); + } - if (status & GEN8_CTX_STATUS_COMPLETE && - buf[2*head + 1] == execlists->preempt_complete_status) { - GEM_TRACE("%s preempt-idle\n", engine->name); - complete_preempt_context(execlists); - continue; - } + if (head != execlists->csb_head) { + execlists->csb_head = head; + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); + } - if (status & GEN8_CTX_STATUS_PREEMPTED && - execlists_is_active(execlists, - EXECLISTS_ACTIVE_PREEMPT)) - continue; + if (unlikely(fw)) + intel_uncore_forcewake_put(i915, execlists->fw_domains); +} - GEM_BUG_ON(!execlists_is_active(execlists, - EXECLISTS_ACTIVE_USER)); - - rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", - engine->name, - port->context_id, count, - rq ? rq->global_seqno : 0, - rq ? rq_prio(rq) : 0); - - /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); - - GEM_BUG_ON(count == 0); - if (--count == 0) { - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); - GEM_BUG_ON(port_isset(&port[1]) && - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); - GEM_BUG_ON(!i915_request_completed(rq)); - execlists_context_schedule_out(rq); - trace_i915_request_out(rq); - i915_request_put(rq); - - GEM_TRACE("%s completed ctx=%d\n", - engine->name, port->context_id); - - execlists_port_complete(execlists, port); - } else { - port_set(port, port_pack(rq, count)); - } +/* + * Check the unread Context Status Buffers and manage the submission of new + * contexts to the ELSP accordingly. + */ +static void execlists_submission_tasklet(unsigned long data) +{ + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - /* After the final element, the hw should be idle */ - GEM_BUG_ON(port_count(port) == 0 && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - if (port_count(port) == 0) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_USER); - } + /* + * We can skip acquiring intel_runtime_pm_get() here as it was taken + * on our behalf by the request (see i915_gem_mark_busy()) and it will + * not be relinquished until the device is idle (see + * i915_gem_idle_work_handler()). As a precaution, we make sure + * that all ELSP are drained i.e. we have processed the CSB, + * before allowing ourselves to idle and calling intel_runtime_pm_put(). + */ + GEM_BUG_ON(!engine->i915->gt.awake); - if (head != execlists->csb_head) { - execlists->csb_head = head; - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); - } - } + /* + * Prefer doing test_and_clear_bit() as a two stage operation to avoid + * imposing the cost of a locked atomic transaction when submitting a + * new request (outside of the context-switch interrupt). + */ + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) + process_csb(engine); - if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) + if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) execlists_dequeue(engine); - - if (fw) - intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); } static void queue_request(struct intel_engine_cs *engine, @@ -1667,6 +1673,7 @@ static struct i915_request * execlists_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *request, *active; /* * Prevent request submission to the hardware until we have @@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine) tasklet_kill(&execlists->tasklet); tasklet_disable(&execlists->tasklet); - return i915_gem_find_active_request(engine); + /* + * We want to flush the pending context switches, having disabled + * the tasklet above, we can assume exclusive access to the execlists. + * For this allows us to catch up with an inflight preemption event, + * and avoid blaming an innocent request if the stall was due to the + * preemption itself. + */ + process_csb(engine); + + /* + * The last active request can then be no later than the last request + * now in ELSP[0]. So search backwards from there, so that is the GPU + * has advanced beyond the last CSB update, it will be pardoned. + */ + active = NULL; + request = port_request(execlists->port); + if (request) { + unsigned long flags; + + spin_lock_irqsave(&engine->timeline->lock, flags); + list_for_each_entry_from_reverse(request, + &engine->timeline->requests, + link) { + if (__i915_request_completed(request, + request->global_seqno)) + break; + + active = request; + } + spin_unlock_irqrestore(&engine->timeline->lock, flags); + } + + return active; } static void reset_irq(struct intel_engine_cs *engine)
Catch up with the inflight CSB events, after disabling the tasklet before deciding which request was truly guilty of hanging the GPU. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Michał Winiarski <michal.winiarski@intel.com> CC: Michel Thierry <michel.thierry@intel.com> Cc: Jeff McGee <jeff.mcgee@intel.com> --- drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++----------------- 1 file changed, 197 insertions(+), 158 deletions(-)