diff mbox

drm/i915/execlists: Trim irq handler

Message ID 20170325201053.21306-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson March 25, 2017, 8:10 p.m. UTC
I noticed that gcc was spilling the CSB to the stack, so rearrange the
code to be more compact. Spilling in this function is slightly more
interesting due to the mmio reads acting as memory barriers and so
end up flushing the stack spills. Still miniscule to having to do at
least the pair of uncached reads :(

function                                     old     new   delta
intel_lrc_irq_handler                       1039     878    -161

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

Comments

Tvrtko Ursulin March 27, 2017, 9:10 a.m. UTC | #1
On 25/03/2017 20:10, Chris Wilson wrote:
> I noticed that gcc was spilling the CSB to the stack, so rearrange the
> code to be more compact. Spilling in this function is slightly more
> interesting due to the mmio reads acting as memory barriers and so
> end up flushing the stack spills. Still miniscule to having to do at
> least the pair of uncached reads :(
>
> function                                     old     new   delta
> intel_lrc_irq_handler                       1039     878    -161
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 27 ++++++++++++---------------
>  1 file changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d45e6d13545a..e9822b0b308d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -506,7 +506,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>  			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
>  		u32 __iomem *buf =
>  			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
> -		unsigned int csb, head, tail;
> +		unsigned int head, tail;
>
>  		/* The write will be ordered by the uncached read (itself
>  		 * a memory barrier), so we do not need another in the form
> @@ -519,17 +519,14 @@ static void intel_lrc_irq_handler(unsigned long data)
>  		 * is set and we do a new loop.
>  		 */
>  		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -		csb = readl(csb_mmio);
> -		head = GEN8_CSB_READ_PTR(csb);
> -		tail = GEN8_CSB_WRITE_PTR(csb);
> -		if (head == tail)
> -			break;
> +		head = readl(csb_mmio);
> +		tail = GEN8_CSB_WRITE_PTR(head);
> +		head = GEN8_CSB_READ_PTR(head);
> +		while (head != tail) {
> +			unsigned int status;
>
> -		if (tail < head)
> -			tail += GEN8_CSB_ENTRIES;
> -		do {
> -			unsigned int idx = ++head % GEN8_CSB_ENTRIES;
> -			unsigned int status = readl(buf + 2 * idx);
> +			if (++head == GEN8_CSB_ENTRIES)
> +				head = 0;
>
>  			/* We are flying near dragons again.
>  			 *
> @@ -548,11 +545,12 @@ static void intel_lrc_irq_handler(unsigned long data)
>  			 * status notifier.
>  			 */
>
> +			status = readl(buf + 2 * head);
>  			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>  				continue;
>
>  			/* Check the context/desc id for this event matches */
> -			GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) !=
> +			GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
>  					 port[0].context_id);
>
>  			GEM_BUG_ON(port[0].count == 0);
> @@ -570,10 +568,9 @@ static void intel_lrc_irq_handler(unsigned long data)
>
>  			GEM_BUG_ON(port[0].count == 0 &&
>  				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> -		} while (head < tail);
> +		}
>
> -		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
> -				     GEN8_CSB_WRITE_PTR(csb) << 8),
> +		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
>  		       csb_mmio);
>  	}
>
>

Looks correct, even I think a bit easier to understand.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Chris Wilson March 27, 2017, 12:26 p.m. UTC | #2
On Mon, Mar 27, 2017 at 10:10:54AM +0100, Tvrtko Ursulin wrote:
> 
> On 25/03/2017 20:10, Chris Wilson wrote:
> >I noticed that gcc was spilling the CSB to the stack, so rearrange the
> >code to be more compact. Spilling in this function is slightly more
> >interesting due to the mmio reads acting as memory barriers and so
> >end up flushing the stack spills. Still miniscule to having to do at
> >least the pair of uncached reads :(
> >
> >function                                     old     new   delta
> >intel_lrc_irq_handler                       1039     878    -161
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> Looks correct, even I think a bit easier to understand.
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Thanks for the review, it does feel like a step in the right direction
with regards the dance between csb, head, tail and index. Pushed,
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d45e6d13545a..e9822b0b308d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -506,7 +506,7 @@  static void intel_lrc_irq_handler(unsigned long data)
 			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
 		u32 __iomem *buf =
 			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
-		unsigned int csb, head, tail;
+		unsigned int head, tail;
 
 		/* The write will be ordered by the uncached read (itself
 		 * a memory barrier), so we do not need another in the form
@@ -519,17 +519,14 @@  static void intel_lrc_irq_handler(unsigned long data)
 		 * is set and we do a new loop.
 		 */
 		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		csb = readl(csb_mmio);
-		head = GEN8_CSB_READ_PTR(csb);
-		tail = GEN8_CSB_WRITE_PTR(csb);
-		if (head == tail)
-			break;
+		head = readl(csb_mmio);
+		tail = GEN8_CSB_WRITE_PTR(head);
+		head = GEN8_CSB_READ_PTR(head);
+		while (head != tail) {
+			unsigned int status;
 
-		if (tail < head)
-			tail += GEN8_CSB_ENTRIES;
-		do {
-			unsigned int idx = ++head % GEN8_CSB_ENTRIES;
-			unsigned int status = readl(buf + 2 * idx);
+			if (++head == GEN8_CSB_ENTRIES)
+				head = 0;
 
 			/* We are flying near dragons again.
 			 *
@@ -548,11 +545,12 @@  static void intel_lrc_irq_handler(unsigned long data)
 			 * status notifier.
 			 */
 
+			status = readl(buf + 2 * head);
 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 				continue;
 
 			/* Check the context/desc id for this event matches */
-			GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) !=
+			GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
 					 port[0].context_id);
 
 			GEM_BUG_ON(port[0].count == 0);
@@ -570,10 +568,9 @@  static void intel_lrc_irq_handler(unsigned long data)
 
 			GEM_BUG_ON(port[0].count == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-		} while (head < tail);
+		}
 
-		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
-				     GEN8_CSB_WRITE_PTR(csb) << 8),
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
 		       csb_mmio);
 	}