diff mbox

[v2] drm/i915: Trim gen11_gt_irq_handler

Message ID 20180309013308.12157-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson March 9, 2018, 1:33 a.m. UTC
Give the compiler a helping hand in mapping (bank,bit) to our struct
intel_engine_cs by trading object code size for data cache:

add/remove: 2/0 grow/shrink: 0/1 up/down: 48/-135 (-87)
Function                                     old     new   delta
bank1_map                                      -      32     +32
bank0_map                                      -      16     +16
gen11_irq_handler                            706     571    -135

v2: u8 arrays for tighter packing

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 53 +++++++++++------------------------------
 1 file changed, 14 insertions(+), 39 deletions(-)

Comments

Chris Wilson March 9, 2018, 1:38 a.m. UTC | #1
Quoting Chris Wilson (2018-03-09 01:33:08)
>  gen11_gt_engine_intr(struct drm_i915_private * const i915,
>                      const unsigned int bank, const unsigned int bit)
> @@ -2836,10 +2798,23 @@ static void
>  gen11_gt_irq_handler(struct drm_i915_private * const i915,
>                      const u32 master_ctl)
>  {
> +       static const u8 bank0_map[] = {
> +               [GEN11_RCS0] = RCS,
> +               [GEN11_BCS]  = BCS,

Is there a reason why its RCS0 but BCS? And the multi-instance classes
actually use VCS(x)?
-Chris
Tvrtko Ursulin March 9, 2018, 10:06 a.m. UTC | #2
On 09/03/2018 01:38, Chris Wilson wrote:
> Quoting Chris Wilson (2018-03-09 01:33:08)
>>   gen11_gt_engine_intr(struct drm_i915_private * const i915,
>>                       const unsigned int bank, const unsigned int bit)
>> @@ -2836,10 +2798,23 @@ static void
>>   gen11_gt_irq_handler(struct drm_i915_private * const i915,
>>                       const u32 master_ctl)
>>   {
>> +       static const u8 bank0_map[] = {
>> +               [GEN11_RCS0] = RCS,
>> +               [GEN11_BCS]  = BCS,

> Is there a reason why its RCS0 but BCS? And the multi-instance classes
> actually use VCS(x)?

I am pretty sure that naming came from the spec.

Side note - one thing I dislike a bit about the current code and this 
patch is that all engines have to be enumerated explicitly in the 
interrupt handler. I kind of thought it was handy to handle the 
multi-class engines from a loop, and so have one place less to remember 
to update after adding a new engine instance.

And in general I think too many bike-sheds on this area of code before 
we are even running it on real hw. :(

Regards,

Tvrtko
Chris Wilson March 9, 2018, 10:17 a.m. UTC | #3
Quoting Tvrtko Ursulin (2018-03-09 10:06:48)
> 
> On 09/03/2018 01:38, Chris Wilson wrote:
> And in general I think too many bike-sheds on this area of code before 
> we are even running it on real hw. :(

Come on now, it's not a proper bikeshed if the discussion is meaningful!
-Chris
Daniele Ceraolo Spurio March 9, 2018, 7:14 p.m. UTC | #4
On 08/03/18 17:33, Chris Wilson wrote:
> Give the compiler a helping hand in mapping (bank,bit) to our struct
> intel_engine_cs by trading object code size for data cache:
> 
> add/remove: 2/0 grow/shrink: 0/1 up/down: 48/-135 (-87)
> Function                                     old     new   delta
> bank1_map                                      -      32     +32
> bank0_map                                      -      16     +16
> gen11_irq_handler                            706     571    -135
> 
> v2: u8 arrays for tighter packing
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_irq.c | 53 +++++++++++------------------------------
>   1 file changed, 14 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index c8c29d8ecbab..e423ec58e5d2 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2762,44 +2762,6 @@ static void __fini_wedge(struct wedge_me *w)
>   	     (W)->i915;							\
>   	     __fini_wedge((W)))
>   
> -static void
> -gen11_gt_engine_irq_handler(struct drm_i915_private * const i915,
> -			    const unsigned int bank,
> -			    const unsigned int engine_n,
> -			    const u16 iir)
> -{
> -	struct intel_engine_cs ** const engine = i915->engine;
> -
> -	switch (bank) {
> -	case 0:
> -		switch (engine_n) {
> -
> -		case GEN11_RCS0:
> -			return gen8_cs_irq_handler(engine[RCS], iir);
> -
> -		case GEN11_BCS:
> -			return gen8_cs_irq_handler(engine[BCS], iir);
> -		}
> -	case 1:
> -		switch (engine_n) {
> -
> -		case GEN11_VCS(0):
> -			return gen8_cs_irq_handler(engine[_VCS(0)], iir);
> -		case GEN11_VCS(1):
> -			return gen8_cs_irq_handler(engine[_VCS(1)], iir);
> -		case GEN11_VCS(2):
> -			return gen8_cs_irq_handler(engine[_VCS(2)], iir);
> -		case GEN11_VCS(3):
> -			return gen8_cs_irq_handler(engine[_VCS(3)], iir);
> -
> -		case GEN11_VECS(0):
> -			return gen8_cs_irq_handler(engine[_VECS(0)], iir);
> -		case GEN11_VECS(1):
> -			return gen8_cs_irq_handler(engine[_VECS(1)], iir);
> -		}
> -	}
> -}
> -
>   static u32
>   gen11_gt_engine_intr(struct drm_i915_private * const i915,
>   		     const unsigned int bank, const unsigned int bit)
> @@ -2836,10 +2798,23 @@ static void
>   gen11_gt_irq_handler(struct drm_i915_private * const i915,
>   		     const u32 master_ctl)
>   {
> +	static const u8 bank0_map[] = {
> +		[GEN11_RCS0] = RCS,
> +		[GEN11_BCS]  = BCS,
> +	};
> +	static const u8 bank1_map[] = {
> +		[GEN11_VCS(0)]  = _VCS(0),
> +		[GEN11_VCS(1)]  = _VCS(1),
> +		[GEN11_VCS(2)]  = _VCS(2),
> +		[GEN11_VCS(3)]  = _VCS(3),
> +		[GEN11_VECS(0)] = _VECS(0),
> +		[GEN11_VECS(1)] = _VECS(1),
> +	};
>   	void __iomem * const regs = i915->regs;
>   	unsigned int bank;
>   
>   	for (bank = 0; bank < 2; bank++) {
> +		const u8 *map = bank ? bank1_map : bank0_map;
>   		unsigned long intr_dw;
>   		unsigned int bit;
>   
> @@ -2859,7 +2834,7 @@ gen11_gt_irq_handler(struct drm_i915_private * const i915,
>   			if (unlikely(!iir))
>   				continue;
>   
> -			gen11_gt_engine_irq_handler(i915, bank, bit, iir);
> +			gen8_cs_irq_handler(i915->engine[map[bit]], iir);

With guc and PM interrupts we get a couple of non-engine interrupts on 
bank0, so this may not scale very well depending how we approach it.

My vote still goes to deriving class and instance from the register 
(bits 16-18 and 20-25 respectively). If we return the full register 
value from gen11_gt_engine_intr, we can then do something like:

for_each_set_bit(bit, &intr_dw, 32) {
         const u32 ident = gen11_gt_engine_intr(i915, bank, bit);
         u16 iir = ident & GEN11_INTR_ENGINE_MASK;
         u8 class, instance;

         if (unlikely(!iir))
                 continue;

         class = (ident >> 16) & (BIT(3) - 1);
         instance = (ident >> 20) &
                 (BIT(GEN11_ENGINE_INSTANCE_WIDTH) - 1);

         gen8_cs_irq_handler(i915->engine_class[class][instance], iir);
}

Which saves a bit more code and is more adaptable to non-engine 
interrupts IMHO, since we have the class and all non-engine interrupts 
come under class 4.

add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-137 (-137)
Function                                     old     new   delta
gen11_irq_handler                            711     574    -137
Total: Before=1487912, After=1487775, chg -0.01%

Downside is that we have a few more instructions in an hot path.

Thanks,
Daniele

>   		}
>   
>   		/* Clear must be after shared has been served for engine */
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c8c29d8ecbab..e423ec58e5d2 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2762,44 +2762,6 @@  static void __fini_wedge(struct wedge_me *w)
 	     (W)->i915;							\
 	     __fini_wedge((W)))
 
-static void
-gen11_gt_engine_irq_handler(struct drm_i915_private * const i915,
-			    const unsigned int bank,
-			    const unsigned int engine_n,
-			    const u16 iir)
-{
-	struct intel_engine_cs ** const engine = i915->engine;
-
-	switch (bank) {
-	case 0:
-		switch (engine_n) {
-
-		case GEN11_RCS0:
-			return gen8_cs_irq_handler(engine[RCS], iir);
-
-		case GEN11_BCS:
-			return gen8_cs_irq_handler(engine[BCS], iir);
-		}
-	case 1:
-		switch (engine_n) {
-
-		case GEN11_VCS(0):
-			return gen8_cs_irq_handler(engine[_VCS(0)], iir);
-		case GEN11_VCS(1):
-			return gen8_cs_irq_handler(engine[_VCS(1)], iir);
-		case GEN11_VCS(2):
-			return gen8_cs_irq_handler(engine[_VCS(2)], iir);
-		case GEN11_VCS(3):
-			return gen8_cs_irq_handler(engine[_VCS(3)], iir);
-
-		case GEN11_VECS(0):
-			return gen8_cs_irq_handler(engine[_VECS(0)], iir);
-		case GEN11_VECS(1):
-			return gen8_cs_irq_handler(engine[_VECS(1)], iir);
-		}
-	}
-}
-
 static u32
 gen11_gt_engine_intr(struct drm_i915_private * const i915,
 		     const unsigned int bank, const unsigned int bit)
@@ -2836,10 +2798,23 @@  static void
 gen11_gt_irq_handler(struct drm_i915_private * const i915,
 		     const u32 master_ctl)
 {
+	static const u8 bank0_map[] = {
+		[GEN11_RCS0] = RCS,
+		[GEN11_BCS]  = BCS,
+	};
+	static const u8 bank1_map[] = {
+		[GEN11_VCS(0)]  = _VCS(0),
+		[GEN11_VCS(1)]  = _VCS(1),
+		[GEN11_VCS(2)]  = _VCS(2),
+		[GEN11_VCS(3)]  = _VCS(3),
+		[GEN11_VECS(0)] = _VECS(0),
+		[GEN11_VECS(1)] = _VECS(1),
+	};
 	void __iomem * const regs = i915->regs;
 	unsigned int bank;
 
 	for (bank = 0; bank < 2; bank++) {
+		const u8 *map = bank ? bank1_map : bank0_map;
 		unsigned long intr_dw;
 		unsigned int bit;
 
@@ -2859,7 +2834,7 @@  gen11_gt_irq_handler(struct drm_i915_private * const i915,
 			if (unlikely(!iir))
 				continue;
 
-			gen11_gt_engine_irq_handler(i915, bank, bit, iir);
+			gen8_cs_irq_handler(i915->engine[map[bit]], iir);
 		}
 
 		/* Clear must be after shared has been served for engine */