[8/9] drm/i915/execlists: Trust the CSB

Message ID	20180628123351.31240-8-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Thu, 28 Jun 2018 13:33:50 +0100 Message-Id: <20180628123351.31240-8-chris@chris-wilson.co.uk> In-Reply-To: <20180628123351.31240-1-chris@chris-wilson.co.uk> References: <20180628123351.31240-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 8/9] drm/i915/execlists: Trust the CSB Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

20180628123351.31240-8-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Thu, 28 Jun 2018 13:33:50 +0100
Message-Id: <20180628123351.31240-8-chris@chris-wilson.co.uk>
In-Reply-To: <20180628123351.31240-1-chris@chris-wilson.co.uk>
References: <20180628123351.31240-1-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 8/9] drm/i915/execlists: Trust the CSB
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Chris Wilson June 28, 2018, 12:33 p.m. UTC

Now that we use the CSB stored in the CPU friendly HWSP, we do not need
to track interrupts for when the mmio CSB registers are valid and can
just check where we read up to last from the cached HWSP. This means we
can forgo the atomic bit tracking from interrupt, and in the next patch
it means we can check the CSB at any time.

v2: Change the splitting inside reset_prepare, we only want to lose
testing the interrupt in this patch, the next patch requires the change
in locking

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c         | 11 +++-------
 drivers/gpu/drm/i915/intel_engine_cs.c  |  8 ++-----
 drivers/gpu/drm/i915/intel_lrc.c        | 29 ++++---------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 4 files changed, 9 insertions(+), 40 deletions(-)

Comments

Tvrtko Ursulin June 28, 2018, 1:04 p.m. UTC | #1

On 28/06/2018 13:33, Chris Wilson wrote:
> Now that we use the CSB stored in the CPU friendly HWSP, we do not need
> to track interrupts for when the mmio CSB registers are valid and can
> just check where we read up to last from the cached HWSP. This means we
> can forgo the atomic bit tracking from interrupt, and in the next patch
> it means we can check the CSB at any time.
> 
> v2: Change the splitting inside reset_prepare, we only want to lose
> testing the interrupt in this patch, the next patch requires the change
> in locking
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_irq.c         | 11 +++-------
>   drivers/gpu/drm/i915/intel_engine_cs.c  |  8 ++-----
>   drivers/gpu/drm/i915/intel_lrc.c        | 29 ++++---------------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
>   4 files changed, 9 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 97418efec719..cb91b213aa67 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1478,15 +1478,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
>   static void
>   gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   {
> -	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	bool tasklet = false;
>   
> -	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
> -		if (READ_ONCE(engine->execlists.active)) {
> -			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -			tasklet = true;
> -		}
> -	}
> +	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> +		tasklet = true;
>   
>   	if (iir & GT_RENDER_USER_INTERRUPT) {
>   		notify_ring(engine);
> @@ -1494,7 +1489,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   	}
>   
>   	if (tasklet)
> -		tasklet_hi_schedule(&execlists->tasklet);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
>   }
>   
>   static void gen8_gt_irq_ack(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 7209c22798e6..ace93958689e 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1353,12 +1353,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
>   		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
>   		read = GEN8_CSB_READ_PTR(ptr);
>   		write = GEN8_CSB_WRITE_PTR(ptr);
> -		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
> +		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
>   			   read, execlists->csb_head,
>   			   write,
>   			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
> -			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
> -					  &engine->irq_posted)),
>   			   yesno(test_bit(TASKLET_STATE_SCHED,
>   					  &engine->execlists.tasklet.state)),
>   			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
> @@ -1570,11 +1568,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	spin_unlock(&b->rb_lock);
>   	local_irq_restore(flags);
>   
> -	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
> +	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
>   		   engine->irq_posted,
>   		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
> -				  &engine->irq_posted)),
> -		   yesno(test_bit(ENGINE_IRQ_EXECLIST,
>   				  &engine->irq_posted)));
>   
>   	drm_printf(m, "HWSP:\n");
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 14be53035610..7f8b29684d9d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -874,14 +874,6 @@ static void reset_irq(struct intel_engine_cs *engine)
>   	smp_store_mb(engine->execlists.active, 0);
>   
>   	clear_gtiir(engine);
> -
> -	/*
> -	 * The port is checked prior to scheduling a tasklet, but
> -	 * just in case we have suspended the tasklet to do the
> -	 * wedging make sure that when it wakes, it decides there
> -	 * is no work to do by clearing the irq_posted bit.
> -	 */
> -	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>   }
>   
>   static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> @@ -972,10 +964,6 @@ static void process_csb(struct intel_engine_cs *engine)
>   	const u32 * const buf = execlists->csb_status;
>   	u8 head, tail;
>   
> -	/* Clear before reading to catch new interrupts */
> -	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -	smp_mb__after_atomic();
> -
>   	/*
>   	 * Note that csb_write, csb_status may be either in HWSP or mmio.
>   	 * When reading from the csb_write mmio register, we have to be
> @@ -1128,11 +1116,10 @@ static void execlists_submission_tasklet(unsigned long data)
>   {
>   	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>   
> -	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
> +	GEM_TRACE("%s awake?=%d, active=%x\n",
>   		  engine->name,
>   		  engine->i915->gt.awake,
> -		  engine->execlists.active,
> -		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
> +		  engine->execlists.active);
>   
>   	/*
>   	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
> @@ -1144,14 +1131,7 @@ static void execlists_submission_tasklet(unsigned long data)
>   	 */
>   	GEM_BUG_ON(!engine->i915->gt.awake);
>   
> -	/*
> -	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> -	 * imposing the cost of a locked atomic transaction when submitting a
> -	 * new request (outside of the context-switch interrupt).
> -	 */
> -	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> -		process_csb(engine);
> -
> +	process_csb(engine);

I'd probably add a newline here.

>   	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
>   		execlists_dequeue(engine);
>   }
> @@ -1919,8 +1899,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   	 * and avoid blaming an innocent request if the stall was due to the
>   	 * preemption itself.
>   	 */
> -	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> -		process_csb(engine);
> +	process_csb(engine);
>   
>   	/*
>   	 * The last active request can then be no later than the last request
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 62e3db6f6f8d..e1ee1ca9ac16 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -368,7 +368,6 @@ struct intel_engine_cs {
>   	atomic_t irq_count;
>   	unsigned long irq_posted;
>   #define ENGINE_IRQ_BREADCRUMB 0
> -#define ENGINE_IRQ_EXECLIST 1
>   
>   	/* Rather than have every client wait upon all user interrupts,
>   	 * with the herd waking after every interrupt and each doing the
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 97418efec719..cb91b213aa67 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1478,15 +1478,10 @@  static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 {
-	struct intel_engine_execlists * const execlists = &engine->execlists;
 	bool tasklet = false;
 
-	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
-		if (READ_ONCE(engine->execlists.active)) {
-			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-			tasklet = true;
-		}
-	}
+	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
+		tasklet = true;
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
 		notify_ring(engine);
@@ -1494,7 +1489,7 @@  gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 
 	if (tasklet)
-		tasklet_hi_schedule(&execlists->tasklet);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void gen8_gt_irq_ack(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 7209c22798e6..ace93958689e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1353,12 +1353,10 @@  static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 		read = GEN8_CSB_READ_PTR(ptr);
 		write = GEN8_CSB_WRITE_PTR(ptr);
-		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
+		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
 			   read, execlists->csb_head,
 			   write,
 			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
-			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
-					  &engine->irq_posted)),
 			   yesno(test_bit(TASKLET_STATE_SCHED,
 					  &engine->execlists.tasklet.state)),
 			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
@@ -1570,11 +1568,9 @@  void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_unlock(&b->rb_lock);
 	local_irq_restore(flags);
 
-	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
+	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
 		   engine->irq_posted,
 		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-				  &engine->irq_posted)),
-		   yesno(test_bit(ENGINE_IRQ_EXECLIST,
 				  &engine->irq_posted)));
 
 	drm_printf(m, "HWSP:\n");
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 14be53035610..7f8b29684d9d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -874,14 +874,6 @@  static void reset_irq(struct intel_engine_cs *engine)
 	smp_store_mb(engine->execlists.active, 0);
 
 	clear_gtiir(engine);
-
-	/*
-	 * The port is checked prior to scheduling a tasklet, but
-	 * just in case we have suspended the tasklet to do the
-	 * wedging make sure that when it wakes, it decides there
-	 * is no work to do by clearing the irq_posted bit.
-	 */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
 static void reset_csb_pointers(struct intel_engine_execlists *execlists)
@@ -972,10 +964,6 @@  static void process_csb(struct intel_engine_cs *engine)
 	const u32 * const buf = execlists->csb_status;
 	u8 head, tail;
 
-	/* Clear before reading to catch new interrupts */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-	smp_mb__after_atomic();
-
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
 	 * When reading from the csb_write mmio register, we have to be
@@ -1128,11 +1116,10 @@  static void execlists_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 
-	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
+	GEM_TRACE("%s awake?=%d, active=%x\n",
 		  engine->name,
 		  engine->i915->gt.awake,
-		  engine->execlists.active,
-		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+		  engine->execlists.active);
 
 	/*
 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -1144,14 +1131,7 @@  static void execlists_submission_tasklet(unsigned long data)
 	 */
 	GEM_BUG_ON(!engine->i915->gt.awake);
 
-	/*
-	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
-	 * imposing the cost of a locked atomic transaction when submitting a
-	 * new request (outside of the context-switch interrupt).
-	 */
-	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
-		process_csb(engine);
-
+	process_csb(engine);
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
 		execlists_dequeue(engine);
 }
@@ -1919,8 +1899,7 @@  execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * and avoid blaming an innocent request if the stall was due to the
 	 * preemption itself.
 	 */
-	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
-		process_csb(engine);
+	process_csb(engine);
 
 	/*
 	 * The last active request can then be no later than the last request
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 62e3db6f6f8d..e1ee1ca9ac16 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -368,7 +368,6 @@  struct intel_engine_cs {
 	atomic_t irq_count;
 	unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
-#define ENGINE_IRQ_EXECLIST 1
 
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the

[8/9] drm/i915/execlists: Trust the CSB

Commit Message

Comments

Patch