diff mbox

[6/7] drm/i915/execlists: Flush pending preemption events during reset

Message ID 20180516064741.30912-6-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 16, 2018, 6:47 a.m. UTC
Catch up with the inflight CSB events, after disabling the tasklet
before deciding which request was truly guilty of hanging the GPU.

v2: Restore checking of use_csb_mmio on every loop, don't forget old
vgpu.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
CC: Michel Thierry <michel.thierry@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 127 +++++++++++++++++++++----------
 1 file changed, 87 insertions(+), 40 deletions(-)

Comments

Tvrtko Ursulin May 16, 2018, 2:24 p.m. UTC | #1
On 16/05/2018 07:47, Chris Wilson wrote:
> Catch up with the inflight CSB events, after disabling the tasklet
> before deciding which request was truly guilty of hanging the GPU.
> 
> v2: Restore checking of use_csb_mmio on every loop, don't forget old
> vgpu.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> CC: Michel Thierry <michel.thierry@intel.com>
> Cc: Jeff McGee <jeff.mcgee@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c | 127 +++++++++++++++++++++----------
>   1 file changed, 87 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 7afe52fa615d..cae10ebf9432 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -957,34 +957,14 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   	local_irq_restore(flags);
>   }
>   
> -/*
> - * Check the unread Context Status Buffers and manage the submission of new
> - * contexts to the ELSP accordingly.
> - */
> -static void execlists_submission_tasklet(unsigned long data)
> +static void process_csb(struct intel_engine_cs *engine)
>   {
> -	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
> -	struct drm_i915_private *dev_priv = engine->i915;
> +	struct drm_i915_private *i915 = engine->i915;
>   	bool fw = false;
>   
> -	/*
> -	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
> -	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
> -	 * not be relinquished until the device is idle (see
> -	 * i915_gem_idle_work_handler()). As a precaution, we make sure
> -	 * that all ELSP are drained i.e. we have processed the CSB,
> -	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
> -	 */
> -	GEM_BUG_ON(!dev_priv->gt.awake);
> -
> -	/*
> -	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> -	 * imposing the cost of a locked atomic transaction when submitting a
> -	 * new request (outside of the context-switch interrupt).
> -	 */
> -	while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
> +	do {
>   		/* The HWSP contains a (cacheable) mirror of the CSB */
>   		const u32 *buf =
>   			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> @@ -992,28 +972,27 @@ static void execlists_submission_tasklet(unsigned long data)
>   
>   		if (unlikely(execlists->csb_use_mmio)) {
>   			buf = (u32 * __force)
> -				(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> -			execlists->csb_head = -1; /* force mmio read of CSB ptrs */
> +				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> +			execlists->csb_head = -1; /* force mmio read of CSB */
>   		}
>   
>   		/* Clear before reading to catch new interrupts */
>   		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>   		smp_mb__after_atomic();
>   
> -		if (unlikely(execlists->csb_head == -1)) { /* following a reset */
> +		if (unlikely(execlists->csb_head == -1)) { /* after a reset */
>   			if (!fw) {
> -				intel_uncore_forcewake_get(dev_priv,
> -							   execlists->fw_domains);
> +				intel_uncore_forcewake_get(i915, execlists->fw_domains);
>   				fw = true;
>   			}
>   
> -			head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> +			head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
>   			tail = GEN8_CSB_WRITE_PTR(head);
>   			head = GEN8_CSB_READ_PTR(head);
>   			execlists->csb_head = head;
>   		} else {
>   			const int write_idx =
> -				intel_hws_csb_write_index(dev_priv) -
> +				intel_hws_csb_write_index(i915) -
>   				I915_HWS_CSB_BUF0_INDEX;
>   
>   			head = execlists->csb_head;
> @@ -1022,8 +1001,8 @@ static void execlists_submission_tasklet(unsigned long data)
>   		}
>   		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
>   			  engine->name,
> -			  head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> -			  tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> +			  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> +			  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
>   
>   		while (head != tail) {
>   			struct i915_request *rq;
> @@ -1033,7 +1012,8 @@ static void execlists_submission_tasklet(unsigned long data)
>   			if (++head == GEN8_CSB_ENTRIES)
>   				head = 0;
>   
> -			/* We are flying near dragons again.
> +			/*
> +			 * We are flying near dragons again.
>   			 *
>   			 * We hold a reference to the request in execlist_port[]
>   			 * but no more than that. We are operating in softirq
> @@ -1142,15 +1122,48 @@ static void execlists_submission_tasklet(unsigned long data)
>   		if (head != execlists->csb_head) {
>   			execlists->csb_head = head;
>   			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> -			       dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> +			       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
>   		}
> -	}
> +	} while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));

Ideally it should three patches, or at least two:

1. Extract out CSB processing.
2. Move ENGINE_IRQ_EXECLISTS check to the caller / end of do-while loop.
3. Reset changes.

>   
> -	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
> -		execlists_dequeue(engine);
> +	if (unlikely(fw))
> +		intel_uncore_forcewake_put(i915, execlists->fw_domains);
> +}
> +
> +/*
> + * Check the unread Context Status Buffers and manage the submission of new
> + * contexts to the ELSP accordingly.
> + */
> +static void execlists_submission_tasklet(unsigned long data)
> +{
> +	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>   
> -	if (fw)
> -		intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
> +	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
> +		  engine->name,
> +		  engine->i915->gt.awake,
> +		  engine->execlists.active,
> +		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
> +
> +	/*
> +	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
> +	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
> +	 * not be relinquished until the device is idle (see
> +	 * i915_gem_idle_work_handler()). As a precaution, we make sure
> +	 * that all ELSP are drained i.e. we have processed the CSB,
> +	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
> +	 */
> +	GEM_BUG_ON(!engine->i915->gt.awake);
> +
> +	/*
> +	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> +	 * imposing the cost of a locked atomic transaction when submitting a
> +	 * new request (outside of the context-switch interrupt).
> +	 */
> +	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> +		process_csb(engine);
> +
> +	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> +		execlists_dequeue(engine);
>   
>   	/* If the engine is now idle, so should be the flag; and vice versa. */
>   	GEM_BUG_ON(execlists_is_active(&engine->execlists,
> @@ -1830,6 +1843,7 @@ static struct i915_request *
>   execlists_reset_prepare(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> +	struct i915_request *request, *active;
>   
>   	GEM_TRACE("%s\n", engine->name);
>   
> @@ -1844,7 +1858,40 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   	 */
>   	__tasklet_disable_once(&execlists->tasklet);
>   
> -	return i915_gem_find_active_request(engine);
> +	/*
> +	 * We want to flush the pending context switches, having disabled
> +	 * the tasklet above, we can assume exclusive access to the execlists.
> +	 * For this allows us to catch up with an inflight preemption event,
> +	 * and avoid blaming an innocent request if the stall was due to the
> +	 * preemption itself.
> +	 */
> +	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> +		process_csb(engine);
> +
> +	/*
> +	 * The last active request can then be no later than the last request
> +	 * now in ELSP[0]. So search backwards from there, so that if the GPU
> +	 * has advanced beyond the last CSB update, it will be pardoned.
> +	 */
> +	active = NULL;
> +	request = port_request(execlists->port);
> +	if (request) {

Assignment to request is for nothing it seems.

> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&engine->timeline.lock, flags);
> +		list_for_each_entry_from_reverse(request,
> +						 &engine->timeline.requests,
> +						 link) {
> +			if (__i915_request_completed(request,
> +						     request->global_seqno))
> +				break;
> +
> +			active = request;
> +		}
> +		spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	}
> +
> +	return active;
>   }
>   
>   static void execlists_reset(struct intel_engine_cs *engine,
> 

No other complaints and I could be bribed to look past the request to 
split it. :)

Regards,

Tvrtko
Chris Wilson May 16, 2018, 3:01 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-05-16 15:24:33)
> > +     /*
> > +      * The last active request can then be no later than the last request
> > +      * now in ELSP[0]. So search backwards from there, so that if the GPU
> > +      * has advanced beyond the last CSB update, it will be pardoned.
> > +      */
> > +     active = NULL;
> > +     request = port_request(execlists->port);
> > +     if (request) {
> 
> Assignment to request is for nothing it seems.
> 
> > +             unsigned long flags;
> > +
> > +             spin_lock_irqsave(&engine->timeline.lock, flags);
> > +             list_for_each_entry_from_reverse(request,

It's a 'from' list_for_each variant.

> > +                                              &engine->timeline.requests,
> > +                                              link) {
> > +                     if (__i915_request_completed(request,
> > +                                                  request->global_seqno))
> > +                             break;
> > +
> > +                     active = request;
> > +             }
> > +             spin_unlock_irqrestore(&engine->timeline.lock, flags);
> > +     }
> > +
> > +     return active;
> >   }
> >   
> >   static void execlists_reset(struct intel_engine_cs *engine,
> > 
> 
> No other complaints and I could be bribed to look past the request to 
> split it. :)

Is not clearing the backlog so we can get onto features not enough
incentive? Chocolate? Beer? Chocolate-flavoured beer?
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7afe52fa615d..cae10ebf9432 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -957,34 +957,14 @@  static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	local_irq_restore(flags);
 }
 
-/*
- * Check the unread Context Status Buffers and manage the submission of new
- * contexts to the ELSP accordingly.
- */
-static void execlists_submission_tasklet(unsigned long data)
+static void process_csb(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
-	struct drm_i915_private *dev_priv = engine->i915;
+	struct drm_i915_private *i915 = engine->i915;
 	bool fw = false;
 
-	/*
-	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
-	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
-	 * not be relinquished until the device is idle (see
-	 * i915_gem_idle_work_handler()). As a precaution, we make sure
-	 * that all ELSP are drained i.e. we have processed the CSB,
-	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
-	 */
-	GEM_BUG_ON(!dev_priv->gt.awake);
-
-	/*
-	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
-	 * imposing the cost of a locked atomic transaction when submitting a
-	 * new request (outside of the context-switch interrupt).
-	 */
-	while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
+	do {
 		/* The HWSP contains a (cacheable) mirror of the CSB */
 		const u32 *buf =
 			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
@@ -992,28 +972,27 @@  static void execlists_submission_tasklet(unsigned long data)
 
 		if (unlikely(execlists->csb_use_mmio)) {
 			buf = (u32 * __force)
-				(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
-			execlists->csb_head = -1; /* force mmio read of CSB ptrs */
+				(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+			execlists->csb_head = -1; /* force mmio read of CSB */
 		}
 
 		/* Clear before reading to catch new interrupts */
 		clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 		smp_mb__after_atomic();
 
-		if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+		if (unlikely(execlists->csb_head == -1)) { /* after a reset */
 			if (!fw) {
-				intel_uncore_forcewake_get(dev_priv,
-							   execlists->fw_domains);
+				intel_uncore_forcewake_get(i915, execlists->fw_domains);
 				fw = true;
 			}
 
-			head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+			head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
 			tail = GEN8_CSB_WRITE_PTR(head);
 			head = GEN8_CSB_READ_PTR(head);
 			execlists->csb_head = head;
 		} else {
 			const int write_idx =
-				intel_hws_csb_write_index(dev_priv) -
+				intel_hws_csb_write_index(i915) -
 				I915_HWS_CSB_BUF0_INDEX;
 
 			head = execlists->csb_head;
@@ -1022,8 +1001,8 @@  static void execlists_submission_tasklet(unsigned long data)
 		}
 		GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
 			  engine->name,
-			  head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-			  tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+			  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+			  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
 
 		while (head != tail) {
 			struct i915_request *rq;
@@ -1033,7 +1012,8 @@  static void execlists_submission_tasklet(unsigned long data)
 			if (++head == GEN8_CSB_ENTRIES)
 				head = 0;
 
-			/* We are flying near dragons again.
+			/*
+			 * We are flying near dragons again.
 			 *
 			 * We hold a reference to the request in execlist_port[]
 			 * but no more than that. We are operating in softirq
@@ -1142,15 +1122,48 @@  static void execlists_submission_tasklet(unsigned long data)
 		if (head != execlists->csb_head) {
 			execlists->csb_head = head;
 			writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-			       dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+			       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
 		}
-	}
+	} while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
 
-	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
-		execlists_dequeue(engine);
+	if (unlikely(fw))
+		intel_uncore_forcewake_put(i915, execlists->fw_domains);
+}
+
+/*
+ * Check the unread Context Status Buffers and manage the submission of new
+ * contexts to the ELSP accordingly.
+ */
+static void execlists_submission_tasklet(unsigned long data)
+{
+	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 
-	if (fw)
-		intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
+	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
+		  engine->name,
+		  engine->i915->gt.awake,
+		  engine->execlists.active,
+		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+
+	/*
+	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
+	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
+	 * not be relinquished until the device is idle (see
+	 * i915_gem_idle_work_handler()). As a precaution, we make sure
+	 * that all ELSP are drained i.e. we have processed the CSB,
+	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
+	 */
+	GEM_BUG_ON(!engine->i915->gt.awake);
+
+	/*
+	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
+	 * imposing the cost of a locked atomic transaction when submitting a
+	 * new request (outside of the context-switch interrupt).
+	 */
+	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+		process_csb(engine);
+
+	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+		execlists_dequeue(engine);
 
 	/* If the engine is now idle, so should be the flag; and vice versa. */
 	GEM_BUG_ON(execlists_is_active(&engine->execlists,
@@ -1830,6 +1843,7 @@  static struct i915_request *
 execlists_reset_prepare(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct i915_request *request, *active;
 
 	GEM_TRACE("%s\n", engine->name);
 
@@ -1844,7 +1858,40 @@  execlists_reset_prepare(struct intel_engine_cs *engine)
 	 */
 	__tasklet_disable_once(&execlists->tasklet);
 
-	return i915_gem_find_active_request(engine);
+	/*
+	 * We want to flush the pending context switches, having disabled
+	 * the tasklet above, we can assume exclusive access to the execlists.
+	 * For this allows us to catch up with an inflight preemption event,
+	 * and avoid blaming an innocent request if the stall was due to the
+	 * preemption itself.
+	 */
+	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+		process_csb(engine);
+
+	/*
+	 * The last active request can then be no later than the last request
+	 * now in ELSP[0]. So search backwards from there, so that if the GPU
+	 * has advanced beyond the last CSB update, it will be pardoned.
+	 */
+	active = NULL;
+	request = port_request(execlists->port);
+	if (request) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&engine->timeline.lock, flags);
+		list_for_each_entry_from_reverse(request,
+						 &engine->timeline.requests,
+						 link) {
+			if (__i915_request_completed(request,
+						     request->global_seqno))
+				break;
+
+			active = request;
+		}
+		spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	}
+
+	return active;
 }
 
 static void execlists_reset(struct intel_engine_cs *engine,