diff mbox

[10/19] drm/i915: Remove USES_GUC_SUBMISSION() pointer chasing from gen8_cs_irq_handler

Message ID 20180517074055.14638-10-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 17, 2018, 7:40 a.m. UTC
Store whether or not we need to kick the guc's execlists emulation on
the engine itself to avoid chasing the device info.

gen8_cs_irq_handler                          512     428     -84

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c             | 4 +++-
 drivers/gpu/drm/i915/intel_guc_submission.c | 1 +
 drivers/gpu/drm/i915/intel_lrc.c            | 1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h     | 7 +++++++
 4 files changed, 12 insertions(+), 1 deletion(-)

Comments

Tvrtko Ursulin May 17, 2018, 10:58 a.m. UTC | #1
On 17/05/2018 08:40, Chris Wilson wrote:
> Store whether or not we need to kick the guc's execlists emulation on
> the engine itself to avoid chasing the device info.

We do not chase device info but modparams in this case.

> gen8_cs_irq_handler                          512     428     -84

I guess my point from before, (unfortunately I forgot to reply), was how 
much of the saving remains if GEM_BUG_ON is compiled out?

If nothing or almost nothing, I don't see a need to fiddle with this now.

Regards,

Tvrtko

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_irq.c             | 4 +++-
>   drivers/gpu/drm/i915/intel_guc_submission.c | 1 +
>   drivers/gpu/drm/i915/intel_lrc.c            | 1 +
>   drivers/gpu/drm/i915/intel_ringbuffer.h     | 7 +++++++
>   4 files changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index f9bc3aaa90d0..460878572515 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1472,8 +1472,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   	}
>   
>   	if (iir & GT_RENDER_USER_INTERRUPT) {
> +		if (intel_engine_uses_guc(engine))
> +			tasklet = true;
> +
>   		notify_ring(engine);
> -		tasklet |= USES_GUC_SUBMISSION(engine->i915);
>   	}
>   
>   	if (tasklet)
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 133367a17863..d9fcd5db4ea4 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -1312,6 +1312,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
>   		engine->unpark = guc_submission_unpark;
>   
>   		engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
> +		engine->flags |= I915_ENGINE_USES_GUC;
>   	}
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 857ab04452f0..4928e9ad7826 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2305,6 +2305,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>   	engine->park = NULL;
>   	engine->unpark = NULL;
>   
> +	engine->flags &= ~I915_ENGINE_USES_GUC;
>   	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>   	if (engine->i915->preempt_context)
>   		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index acef385c4c80..4ad9c5842575 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -574,6 +574,7 @@ struct intel_engine_cs {
>   #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
>   #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
>   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
> +#define I915_ENGINE_USES_GUC         BIT(3)
>   	unsigned int flags;
>   
>   	/*
> @@ -651,6 +652,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
>   }
>   
> +static inline bool
> +intel_engine_uses_guc(const struct intel_engine_cs *engine)
> +{
> +	return engine->flags & I915_ENGINE_USES_GUC;
> +}
> +
>   static inline bool __execlists_need_preempt(int prio, int last)
>   {
>   	return prio > max(0, last);
>
Chris Wilson May 17, 2018, 11:24 a.m. UTC | #2
Quoting Tvrtko Ursulin (2018-05-17 11:58:21)
> 
> On 17/05/2018 08:40, Chris Wilson wrote:
> > Store whether or not we need to kick the guc's execlists emulation on
> > the engine itself to avoid chasing the device info.
> 
> We do not chase device info but modparams in this case.
> 
> > gen8_cs_irq_handler                          512     428     -84
> 
> I guess my point from before, (unfortunately I forgot to reply), was how 
> much of the saving remains if GEM_BUG_ON is compiled out?

Remember the motto of killing off checking globals and only checking
derived state? :) (I'm definitely not in favour of sprinkling more global
checking CAPS over the code.)

Out of the debug build

gen8_cs_irq_handler                          170     185     +15
gen8_cs_irq_handler                          170     128     -42 (later)
gen8_cs_irq_handler                          170     128     -42 (+ USES_GUC_SUBMISSION)

Hmm,

With USES_GUC_SUBMISSION:

   0x000000000000a780 <+0>:	callq  0xa785 <gen8_cs_irq_handler+5>
   0x000000000000a785 <+5>:	push   %r12
   0x000000000000a787 <+7>:	mov    %esi,%r12d
   0x000000000000a78a <+10>:	and    $0x1,%r12d
   0x000000000000a78e <+14>:	and    $0x100,%esi
   0x000000000000a794 <+20>:	push   %rbp
   0x000000000000a795 <+21>:	push   %rbx
   0x000000000000a796 <+22>:	mov    %rdi,%rbx
   0x000000000000a799 <+25>:	je     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a79b <+27>:	lea    0x458(%rdi),%rdi
   0x000000000000a7a2 <+34>:	callq  0xa7a7 <gen8_cs_irq_handler+39>
   0x000000000000a7a7 <+39>:	mov    0x458(%rbx),%eax
   0x000000000000a7ad <+45>:	test   %eax,%eax
   0x000000000000a7af <+47>:	je     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a7b1 <+49>:	lock btsq $0x1,0x108(%rbx)
   0x000000000000a7bb <+59>:	setae  %bpl
   0x000000000000a7bf <+63>:	test   %r12d,%r12d
   0x000000000000a7c2 <+66>:	je     0xa7fa <gen8_cs_irq_handler+122>
   0x000000000000a7c4 <+68>:	jmp    0xa7cd <gen8_cs_irq_handler+77>
   0x000000000000a7c6 <+70>:	test   %r12d,%r12d
   0x000000000000a7c9 <+73>:	je     0xa812 <gen8_cs_irq_handler+146>
   0x000000000000a7cb <+75>:	xor    %ebp,%ebp
   0x000000000000a7cd <+77>:	lea    0x240(%rbx),%rdi
   0x000000000000a7d4 <+84>:	callq  0xa7d9 <gen8_cs_irq_handler+89>
   0x000000000000a7d9 <+89>:	testb  $0x1,0x240(%rbx)
   0x000000000000a7e0 <+96>:	jne    0xa820 <gen8_cs_irq_handler+160>
   0x000000000000a7e2 <+98>:	mov    $0x0,%rdi
   0x000000000000a7e9 <+105>:	callq  0xa7ee <gen8_cs_irq_handler+110>
   0x000000000000a7ee <+110>:	movzbl 0x0(%rip),%eax        # 0xa7f5 <gen8_cs_irq_handler+117>
   0x000000000000a7f5 <+117>:	and    $0x1,%eax
   0x000000000000a7f8 <+120>:	or     %eax,%ebp
   0x000000000000a7fa <+122>:	test   %bpl,%bpl
   0x000000000000a7fd <+125>:	je     0xa812 <gen8_cs_irq_handler+146>
   0x000000000000a7ff <+127>:	lea    0x3d8(%rbx),%rdi
   0x000000000000a806 <+134>:	lock btsq $0x0,0x3e0(%rbx)
   0x000000000000a810 <+144>:	jae    0xa817 <gen8_cs_irq_handler+151>
   0x000000000000a812 <+146>:	pop    %rbx
   0x000000000000a813 <+147>:	pop    %rbp
   0x000000000000a814 <+148>:	pop    %r12
   0x000000000000a816 <+150>:	retq   
   0x000000000000a817 <+151>:	pop    %rbx
   0x000000000000a818 <+152>:	pop    %rbp
   0x000000000000a819 <+153>:	pop    %r12
   0x000000000000a81b <+155>:	jmpq   0xa820 <gen8_cs_irq_handler+160>
   0x000000000000a820 <+160>:	mov    %rbx,%rdi
   0x000000000000a823 <+163>:	callq  0xa4a0 <notify_ring>
   0x000000000000a828 <+168>:	jmp    0xa7e2 <gen8_cs_irq_handler+98>

to

   0x000000000000a780 <+0>:	callq  0xa785 <gen8_cs_irq_handler+5>
   0x000000000000a785 <+5>:	push   %r12
   0x000000000000a787 <+7>:	push   %rbp
   0x000000000000a788 <+8>:	mov    %esi,%ebp
   0x000000000000a78a <+10>:	and    $0x1,%ebp
   0x000000000000a78d <+13>:	and    $0x100,%esi
   0x000000000000a793 <+19>:	push   %rbx
   0x000000000000a794 <+20>:	mov    %rdi,%rbx
   0x000000000000a797 <+23>:	je     0xa7cb <gen8_cs_irq_handler+75>
   0x000000000000a799 <+25>:	lea    0x458(%rdi),%rdi
   0x000000000000a7a0 <+32>:	callq  0xa7a5 <gen8_cs_irq_handler+37>
   0x000000000000a7a5 <+37>:	mov    0x458(%rbx),%eax
   0x000000000000a7ab <+43>:	test   %eax,%eax
   0x000000000000a7ad <+45>:	je     0xa7cb <gen8_cs_irq_handler+75>
   0x000000000000a7af <+47>:	lock btsq $0x1,0x108(%rbx)
   0x000000000000a7b9 <+57>:	setae  %r12b
   0x000000000000a7bd <+61>:	test   %ebp,%ebp
   0x000000000000a7bf <+63>:	jne    0xa7d2 <gen8_cs_irq_handler+82>
   0x000000000000a7c1 <+65>:	test   %r12b,%r12b
   0x000000000000a7c4 <+68>:	jne    0xa805 <gen8_cs_irq_handler+133>
   0x000000000000a7c6 <+70>:	pop    %rbx
   0x000000000000a7c7 <+71>:	pop    %rbp
   0x000000000000a7c8 <+72>:	pop    %r12
   0x000000000000a7ca <+74>:	retq   
   0x000000000000a7cb <+75>:	test   %ebp,%ebp
   0x000000000000a7cd <+77>:	je     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a7cf <+79>:	xor    %r12d,%r12d
   0x000000000000a7d2 <+82>:	lea    0x5d8(%rbx),%rdi
   0x000000000000a7d9 <+89>:	callq  0xa7de <gen8_cs_irq_handler+94>
   0x000000000000a7de <+94>:	mov    0x5d8(%rbx),%ebp
   0x000000000000a7e4 <+100>:	lea    0x240(%rbx),%rdi
   0x000000000000a7eb <+107>:	callq  0xa7f0 <gen8_cs_irq_handler+112>
   0x000000000000a7f0 <+112>:	movzbl 0x240(%rbx),%eax
   0x000000000000a7f7 <+119>:	and    $0x8,%ebp
   0x000000000000a7fa <+122>:	and    $0x1,%eax
   0x000000000000a7fd <+125>:	test   %ebp,%ebp
   0x000000000000a7ff <+127>:	je     0xa821 <gen8_cs_irq_handler+161>
   0x000000000000a801 <+129>:	test   %al,%al
   0x000000000000a803 <+131>:	jne    0xa82f <gen8_cs_irq_handler+175>
   0x000000000000a805 <+133>:	lea    0x3d8(%rbx),%rdi
   0x000000000000a80c <+140>:	lock btsq $0x0,0x3e0(%rbx)
   0x000000000000a816 <+150>:	jb     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a818 <+152>:	pop    %rbx
   0x000000000000a819 <+153>:	pop    %rbp
   0x000000000000a81a <+154>:	pop    %r12
   0x000000000000a81c <+156>:	jmpq   0xa821 <gen8_cs_irq_handler+161>
   0x000000000000a821 <+161>:	test   %al,%al
   0x000000000000a823 <+163>:	je     0xa7c1 <gen8_cs_irq_handler+65>
   0x000000000000a825 <+165>:	mov    %rbx,%rdi
   0x000000000000a828 <+168>:	callq  0xa4a0 <notify_ring>
   0x000000000000a82d <+173>:	jmp    0xa7c1 <gen8_cs_irq_handler+65>
   0x000000000000a82f <+175>:	mov    %rbx,%rdi
   0x000000000000a832 <+178>:	callq  0xa4a0 <notify_ring>
   0x000000000000a837 <+183>:	jmp    0xa805 <gen8_cs_irq_handler+133>

And then onwards to

   0x000000000000a680 <+0>:	callq  0xa685 <gen8_cs_irq_handler+5>
   0x000000000000a685 <+5>:	push   %rbx
   0x000000000000a686 <+6>:	mov    %rdi,%rbx
   0x000000000000a689 <+9>:	sub    $0x8,%rsp
   0x000000000000a68d <+13>:	test   $0x100,%esi
   0x000000000000a693 <+19>:	jne    0xa6ca <gen8_cs_irq_handler+74>
   0x000000000000a695 <+21>:	and    $0x1,%esi
   0x000000000000a698 <+24>:	je     0xa6c4 <gen8_cs_irq_handler+68>
   0x000000000000a69a <+26>:	lea    0x5f0(%rbx),%rdi
   0x000000000000a6a1 <+33>:	callq  0xa6a6 <gen8_cs_irq_handler+38>
   0x000000000000a6a6 <+38>:	testb  $0x8,0x5f0(%rbx)
   0x000000000000a6ad <+45>:	jne    0xa6e6 <gen8_cs_irq_handler+102>
   0x000000000000a6af <+47>:	lea    0x240(%rbx),%rdi
   0x000000000000a6b6 <+54>:	callq  0xa6bb <gen8_cs_irq_handler+59>
   0x000000000000a6bb <+59>:	testb  $0x1,0x240(%rbx)
   0x000000000000a6c2 <+66>:	jne    0xa6d9 <gen8_cs_irq_handler+89>
   0x000000000000a6c4 <+68>:	add    $0x8,%rsp
   0x000000000000a6c8 <+72>:	pop    %rbx
   0x000000000000a6c9 <+73>:	retq   
   0x000000000000a6ca <+74>:	mov    %esi,0x4(%rsp)
   0x000000000000a6ce <+78>:	callq  0xa6d3 <gen8_cs_irq_handler+83>
   0x000000000000a6d3 <+83>:	mov    0x4(%rsp),%esi
   0x000000000000a6d7 <+87>:	jmp    0xa695 <gen8_cs_irq_handler+21>
   0x000000000000a6d9 <+89>:	add    $0x8,%rsp
   0x000000000000a6dd <+93>:	mov    %rbx,%rdi
   0x000000000000a6e0 <+96>:	pop    %rbx
   0x000000000000a6e1 <+97>:	jmpq   0xa3a0 <notify_ring>
   0x000000000000a6e6 <+102>:	lea    0x3d8(%rbx),%rdi
   0x000000000000a6ed <+109>:	lock btsq $0x0,0x3e0(%rbx)
   0x000000000000a6f7 <+119>:	jb     0xa6af <gen8_cs_irq_handler+47>
   0x000000000000a6f9 <+121>:	callq  0xa6fe <gen8_cs_irq_handler+126>
   0x000000000000a6fe <+126>:	jmp    0xa6af <gen8_cs_irq_handler+47>

> If nothing or almost nothing, I don't see a need to fiddle with this now.

Also please consider later patches which also need conditionals as
execlists is unfortunately gaining new tricks that are harder to pull
off with the current guc submission :|
(Might be time to stop mixing the two backends?)
-Chris
Tvrtko Ursulin May 17, 2018, 1:13 p.m. UTC | #3
On 17/05/2018 12:24, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-05-17 11:58:21)
>>
>> On 17/05/2018 08:40, Chris Wilson wrote:
>>> Store whether or not we need to kick the guc's execlists emulation on
>>> the engine itself to avoid chasing the device info.
>>
>> We do not chase device info but modparams in this case.
>>
>>> gen8_cs_irq_handler                          512     428     -84
>>
>> I guess my point from before, (unfortunately I forgot to reply), was how
>> much of the saving remains if GEM_BUG_ON is compiled out?
> 
> Remember the motto of killing off checking globals and only checking
> derived state? :) (I'm definitely not in favour of sprinkling more global
> checking CAPS over the code.)
> 
> Out of the debug build
> 
> gen8_cs_irq_handler                          170     185     +15
> gen8_cs_irq_handler                          170     128     -42 (later)
> gen8_cs_irq_handler                          170     128     -42 (+ USES_GUC_SUBMISSION)

I don't get which is which.

[snip]

>> If nothing or almost nothing, I don't see a need to fiddle with this now.
> 
> Also please consider later patches which also need conditionals as
> execlists is unfortunately gaining new tricks that are harder to pull
> off with the current guc submission :|
> (Might be time to stop mixing the two backends?)

I might say fine if the commit message contained truth. :)

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f9bc3aaa90d0..460878572515 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1472,8 +1472,10 @@  gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 	}
 
 	if (iir & GT_RENDER_USER_INTERRUPT) {
+		if (intel_engine_uses_guc(engine))
+			tasklet = true;
+
 		notify_ring(engine);
-		tasklet |= USES_GUC_SUBMISSION(engine->i915);
 	}
 
 	if (tasklet)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 133367a17863..d9fcd5db4ea4 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -1312,6 +1312,7 @@  int intel_guc_submission_enable(struct intel_guc *guc)
 		engine->unpark = guc_submission_unpark;
 
 		engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
+		engine->flags |= I915_ENGINE_USES_GUC;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 857ab04452f0..4928e9ad7826 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2305,6 +2305,7 @@  static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->park = NULL;
 	engine->unpark = NULL;
 
+	engine->flags &= ~I915_ENGINE_USES_GUC;
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (engine->i915->preempt_context)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index acef385c4c80..4ad9c5842575 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -574,6 +574,7 @@  struct intel_engine_cs {
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
+#define I915_ENGINE_USES_GUC         BIT(3)
 	unsigned int flags;
 
 	/*
@@ -651,6 +652,12 @@  intel_engine_has_preemption(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
 }
 
+static inline bool
+intel_engine_uses_guc(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_USES_GUC;
+}
+
 static inline bool __execlists_need_preempt(int prio, int last)
 {
 	return prio > max(0, last);