Message ID | 20180517074055.14638-10-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 17/05/2018 08:40, Chris Wilson wrote: > Store whether or not we need to kick the guc's execlists emulation on > the engine itself to avoid chasing the device info. We do not chase device info but modparams in this case. > gen8_cs_irq_handler 512 428 -84 I guess my point from before, (unfortunately I forgot to reply), was how much of the saving remains if GEM_BUG_ON is compiled out? If nothing or almost nothing, I don't see a need to fiddle with this now. Regards, Tvrtko > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_irq.c | 4 +++- > drivers/gpu/drm/i915/intel_guc_submission.c | 1 + > drivers/gpu/drm/i915/intel_lrc.c | 1 + > drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++++++ > 4 files changed, 12 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index f9bc3aaa90d0..460878572515 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1472,8 +1472,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) > } > > if (iir & GT_RENDER_USER_INTERRUPT) { > + if (intel_engine_uses_guc(engine)) > + tasklet = true; > + > notify_ring(engine); > - tasklet |= USES_GUC_SUBMISSION(engine->i915); > } > > if (tasklet) > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c > index 133367a17863..d9fcd5db4ea4 100644 > --- a/drivers/gpu/drm/i915/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c > @@ -1312,6 +1312,7 @@ int intel_guc_submission_enable(struct intel_guc *guc) > engine->unpark = guc_submission_unpark; > > engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; > + engine->flags |= I915_ENGINE_USES_GUC; > } > > return 0; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 857ab04452f0..4928e9ad7826 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -2305,6 +2305,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) > engine->park = NULL; > engine->unpark = NULL; > > + engine->flags &= ~I915_ENGINE_USES_GUC; > engine->flags |= I915_ENGINE_SUPPORTS_STATS; > if (engine->i915->preempt_context) > engine->flags |= I915_ENGINE_HAS_PREEMPTION; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index acef385c4c80..4ad9c5842575 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -574,6 +574,7 @@ struct intel_engine_cs { > #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) > #define I915_ENGINE_SUPPORTS_STATS BIT(1) > #define I915_ENGINE_HAS_PREEMPTION BIT(2) > +#define I915_ENGINE_USES_GUC BIT(3) > unsigned int flags; > > /* > @@ -651,6 +652,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine) > return engine->flags & I915_ENGINE_HAS_PREEMPTION; > } > > +static inline bool > +intel_engine_uses_guc(const struct intel_engine_cs *engine) > +{ > + return engine->flags & I915_ENGINE_USES_GUC; > +} > + > static inline bool __execlists_need_preempt(int prio, int last) > { > return prio > max(0, last); >
Quoting Tvrtko Ursulin (2018-05-17 11:58:21) > > On 17/05/2018 08:40, Chris Wilson wrote: > > Store whether or not we need to kick the guc's execlists emulation on > > the engine itself to avoid chasing the device info. > > We do not chase device info but modparams in this case. > > > gen8_cs_irq_handler 512 428 -84 > > I guess my point from before, (unfortunately I forgot to reply), was how > much of the saving remains if GEM_BUG_ON is compiled out? Remember the motto of killing off checking globals and only checking derived state? :) (I'm definitely not in favour of sprinkling more global checking CAPS over the code.) Out of the debug build gen8_cs_irq_handler 170 185 +15 gen8_cs_irq_handler 170 128 -42 (later) gen8_cs_irq_handler 170 128 -42 (+ USES_GUC_SUBMISSION) Hmm, With USES_GUC_SUBMISSION: 0x000000000000a780 <+0>: callq 0xa785 <gen8_cs_irq_handler+5> 0x000000000000a785 <+5>: push %r12 0x000000000000a787 <+7>: mov %esi,%r12d 0x000000000000a78a <+10>: and $0x1,%r12d 0x000000000000a78e <+14>: and $0x100,%esi 0x000000000000a794 <+20>: push %rbp 0x000000000000a795 <+21>: push %rbx 0x000000000000a796 <+22>: mov %rdi,%rbx 0x000000000000a799 <+25>: je 0xa7c6 <gen8_cs_irq_handler+70> 0x000000000000a79b <+27>: lea 0x458(%rdi),%rdi 0x000000000000a7a2 <+34>: callq 0xa7a7 <gen8_cs_irq_handler+39> 0x000000000000a7a7 <+39>: mov 0x458(%rbx),%eax 0x000000000000a7ad <+45>: test %eax,%eax 0x000000000000a7af <+47>: je 0xa7c6 <gen8_cs_irq_handler+70> 0x000000000000a7b1 <+49>: lock btsq $0x1,0x108(%rbx) 0x000000000000a7bb <+59>: setae %bpl 0x000000000000a7bf <+63>: test %r12d,%r12d 0x000000000000a7c2 <+66>: je 0xa7fa <gen8_cs_irq_handler+122> 0x000000000000a7c4 <+68>: jmp 0xa7cd <gen8_cs_irq_handler+77> 0x000000000000a7c6 <+70>: test %r12d,%r12d 0x000000000000a7c9 <+73>: je 0xa812 <gen8_cs_irq_handler+146> 0x000000000000a7cb <+75>: xor %ebp,%ebp 0x000000000000a7cd <+77>: lea 0x240(%rbx),%rdi 0x000000000000a7d4 <+84>: callq 0xa7d9 <gen8_cs_irq_handler+89> 0x000000000000a7d9 <+89>: testb $0x1,0x240(%rbx) 0x000000000000a7e0 <+96>: jne 0xa820 <gen8_cs_irq_handler+160> 0x000000000000a7e2 <+98>: mov $0x0,%rdi 0x000000000000a7e9 <+105>: callq 0xa7ee <gen8_cs_irq_handler+110> 0x000000000000a7ee <+110>: movzbl 0x0(%rip),%eax # 0xa7f5 <gen8_cs_irq_handler+117> 0x000000000000a7f5 <+117>: and $0x1,%eax 0x000000000000a7f8 <+120>: or %eax,%ebp 0x000000000000a7fa <+122>: test %bpl,%bpl 0x000000000000a7fd <+125>: je 0xa812 <gen8_cs_irq_handler+146> 0x000000000000a7ff <+127>: lea 0x3d8(%rbx),%rdi 0x000000000000a806 <+134>: lock btsq $0x0,0x3e0(%rbx) 0x000000000000a810 <+144>: jae 0xa817 <gen8_cs_irq_handler+151> 0x000000000000a812 <+146>: pop %rbx 0x000000000000a813 <+147>: pop %rbp 0x000000000000a814 <+148>: pop %r12 0x000000000000a816 <+150>: retq 0x000000000000a817 <+151>: pop %rbx 0x000000000000a818 <+152>: pop %rbp 0x000000000000a819 <+153>: pop %r12 0x000000000000a81b <+155>: jmpq 0xa820 <gen8_cs_irq_handler+160> 0x000000000000a820 <+160>: mov %rbx,%rdi 0x000000000000a823 <+163>: callq 0xa4a0 <notify_ring> 0x000000000000a828 <+168>: jmp 0xa7e2 <gen8_cs_irq_handler+98> to 0x000000000000a780 <+0>: callq 0xa785 <gen8_cs_irq_handler+5> 0x000000000000a785 <+5>: push %r12 0x000000000000a787 <+7>: push %rbp 0x000000000000a788 <+8>: mov %esi,%ebp 0x000000000000a78a <+10>: and $0x1,%ebp 0x000000000000a78d <+13>: and $0x100,%esi 0x000000000000a793 <+19>: push %rbx 0x000000000000a794 <+20>: mov %rdi,%rbx 0x000000000000a797 <+23>: je 0xa7cb <gen8_cs_irq_handler+75> 0x000000000000a799 <+25>: lea 0x458(%rdi),%rdi 0x000000000000a7a0 <+32>: callq 0xa7a5 <gen8_cs_irq_handler+37> 0x000000000000a7a5 <+37>: mov 0x458(%rbx),%eax 0x000000000000a7ab <+43>: test %eax,%eax 0x000000000000a7ad <+45>: je 0xa7cb <gen8_cs_irq_handler+75> 0x000000000000a7af <+47>: lock btsq $0x1,0x108(%rbx) 0x000000000000a7b9 <+57>: setae %r12b 0x000000000000a7bd <+61>: test %ebp,%ebp 0x000000000000a7bf <+63>: jne 0xa7d2 <gen8_cs_irq_handler+82> 0x000000000000a7c1 <+65>: test %r12b,%r12b 0x000000000000a7c4 <+68>: jne 0xa805 <gen8_cs_irq_handler+133> 0x000000000000a7c6 <+70>: pop %rbx 0x000000000000a7c7 <+71>: pop %rbp 0x000000000000a7c8 <+72>: pop %r12 0x000000000000a7ca <+74>: retq 0x000000000000a7cb <+75>: test %ebp,%ebp 0x000000000000a7cd <+77>: je 0xa7c6 <gen8_cs_irq_handler+70> 0x000000000000a7cf <+79>: xor %r12d,%r12d 0x000000000000a7d2 <+82>: lea 0x5d8(%rbx),%rdi 0x000000000000a7d9 <+89>: callq 0xa7de <gen8_cs_irq_handler+94> 0x000000000000a7de <+94>: mov 0x5d8(%rbx),%ebp 0x000000000000a7e4 <+100>: lea 0x240(%rbx),%rdi 0x000000000000a7eb <+107>: callq 0xa7f0 <gen8_cs_irq_handler+112> 0x000000000000a7f0 <+112>: movzbl 0x240(%rbx),%eax 0x000000000000a7f7 <+119>: and $0x8,%ebp 0x000000000000a7fa <+122>: and $0x1,%eax 0x000000000000a7fd <+125>: test %ebp,%ebp 0x000000000000a7ff <+127>: je 0xa821 <gen8_cs_irq_handler+161> 0x000000000000a801 <+129>: test %al,%al 0x000000000000a803 <+131>: jne 0xa82f <gen8_cs_irq_handler+175> 0x000000000000a805 <+133>: lea 0x3d8(%rbx),%rdi 0x000000000000a80c <+140>: lock btsq $0x0,0x3e0(%rbx) 0x000000000000a816 <+150>: jb 0xa7c6 <gen8_cs_irq_handler+70> 0x000000000000a818 <+152>: pop %rbx 0x000000000000a819 <+153>: pop %rbp 0x000000000000a81a <+154>: pop %r12 0x000000000000a81c <+156>: jmpq 0xa821 <gen8_cs_irq_handler+161> 0x000000000000a821 <+161>: test %al,%al 0x000000000000a823 <+163>: je 0xa7c1 <gen8_cs_irq_handler+65> 0x000000000000a825 <+165>: mov %rbx,%rdi 0x000000000000a828 <+168>: callq 0xa4a0 <notify_ring> 0x000000000000a82d <+173>: jmp 0xa7c1 <gen8_cs_irq_handler+65> 0x000000000000a82f <+175>: mov %rbx,%rdi 0x000000000000a832 <+178>: callq 0xa4a0 <notify_ring> 0x000000000000a837 <+183>: jmp 0xa805 <gen8_cs_irq_handler+133> And then onwards to 0x000000000000a680 <+0>: callq 0xa685 <gen8_cs_irq_handler+5> 0x000000000000a685 <+5>: push %rbx 0x000000000000a686 <+6>: mov %rdi,%rbx 0x000000000000a689 <+9>: sub $0x8,%rsp 0x000000000000a68d <+13>: test $0x100,%esi 0x000000000000a693 <+19>: jne 0xa6ca <gen8_cs_irq_handler+74> 0x000000000000a695 <+21>: and $0x1,%esi 0x000000000000a698 <+24>: je 0xa6c4 <gen8_cs_irq_handler+68> 0x000000000000a69a <+26>: lea 0x5f0(%rbx),%rdi 0x000000000000a6a1 <+33>: callq 0xa6a6 <gen8_cs_irq_handler+38> 0x000000000000a6a6 <+38>: testb $0x8,0x5f0(%rbx) 0x000000000000a6ad <+45>: jne 0xa6e6 <gen8_cs_irq_handler+102> 0x000000000000a6af <+47>: lea 0x240(%rbx),%rdi 0x000000000000a6b6 <+54>: callq 0xa6bb <gen8_cs_irq_handler+59> 0x000000000000a6bb <+59>: testb $0x1,0x240(%rbx) 0x000000000000a6c2 <+66>: jne 0xa6d9 <gen8_cs_irq_handler+89> 0x000000000000a6c4 <+68>: add $0x8,%rsp 0x000000000000a6c8 <+72>: pop %rbx 0x000000000000a6c9 <+73>: retq 0x000000000000a6ca <+74>: mov %esi,0x4(%rsp) 0x000000000000a6ce <+78>: callq 0xa6d3 <gen8_cs_irq_handler+83> 0x000000000000a6d3 <+83>: mov 0x4(%rsp),%esi 0x000000000000a6d7 <+87>: jmp 0xa695 <gen8_cs_irq_handler+21> 0x000000000000a6d9 <+89>: add $0x8,%rsp 0x000000000000a6dd <+93>: mov %rbx,%rdi 0x000000000000a6e0 <+96>: pop %rbx 0x000000000000a6e1 <+97>: jmpq 0xa3a0 <notify_ring> 0x000000000000a6e6 <+102>: lea 0x3d8(%rbx),%rdi 0x000000000000a6ed <+109>: lock btsq $0x0,0x3e0(%rbx) 0x000000000000a6f7 <+119>: jb 0xa6af <gen8_cs_irq_handler+47> 0x000000000000a6f9 <+121>: callq 0xa6fe <gen8_cs_irq_handler+126> 0x000000000000a6fe <+126>: jmp 0xa6af <gen8_cs_irq_handler+47> > If nothing or almost nothing, I don't see a need to fiddle with this now. Also please consider later patches which also need conditionals as execlists is unfortunately gaining new tricks that are harder to pull off with the current guc submission :| (Might be time to stop mixing the two backends?) -Chris
On 17/05/2018 12:24, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-05-17 11:58:21) >> >> On 17/05/2018 08:40, Chris Wilson wrote: >>> Store whether or not we need to kick the guc's execlists emulation on >>> the engine itself to avoid chasing the device info. >> >> We do not chase device info but modparams in this case. >> >>> gen8_cs_irq_handler 512 428 -84 >> >> I guess my point from before, (unfortunately I forgot to reply), was how >> much of the saving remains if GEM_BUG_ON is compiled out? > > Remember the motto of killing off checking globals and only checking > derived state? :) (I'm definitely not in favour of sprinkling more global > checking CAPS over the code.) > > Out of the debug build > > gen8_cs_irq_handler 170 185 +15 > gen8_cs_irq_handler 170 128 -42 (later) > gen8_cs_irq_handler 170 128 -42 (+ USES_GUC_SUBMISSION) I don't get which is which. [snip] >> If nothing or almost nothing, I don't see a need to fiddle with this now. > > Also please consider later patches which also need conditionals as > execlists is unfortunately gaining new tricks that are harder to pull > off with the current guc submission :| > (Might be time to stop mixing the two backends?) I might say fine if the commit message contained truth. :) Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f9bc3aaa90d0..460878572515 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1472,8 +1472,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) } if (iir & GT_RENDER_USER_INTERRUPT) { + if (intel_engine_uses_guc(engine)) + tasklet = true; + notify_ring(engine); - tasklet |= USES_GUC_SUBMISSION(engine->i915); } if (tasklet) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 133367a17863..d9fcd5db4ea4 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -1312,6 +1312,7 @@ int intel_guc_submission_enable(struct intel_guc *guc) engine->unpark = guc_submission_unpark; engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; + engine->flags |= I915_ENGINE_USES_GUC; } return 0; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 857ab04452f0..4928e9ad7826 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2305,6 +2305,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->park = NULL; engine->unpark = NULL; + engine->flags &= ~I915_ENGINE_USES_GUC; engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (engine->i915->preempt_context) engine->flags |= I915_ENGINE_HAS_PREEMPTION; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index acef385c4c80..4ad9c5842575 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -574,6 +574,7 @@ struct intel_engine_cs { #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) +#define I915_ENGINE_USES_GUC BIT(3) unsigned int flags; /* @@ -651,6 +652,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_HAS_PREEMPTION; } +static inline bool +intel_engine_uses_guc(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_USES_GUC; +} + static inline bool __execlists_need_preempt(int prio, int last) { return prio > max(0, last);
Store whether or not we need to kick the guc's execlists emulation on the engine itself to avoid chasing the device info. gen8_cs_irq_handler 512 428 -84 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_irq.c | 4 +++- drivers/gpu/drm/i915/intel_guc_submission.c | 1 + drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++++++ 4 files changed, 12 insertions(+), 1 deletion(-)