Message ID | 20180813080218.28994-3-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Reviewed perf cleanups | expand |
Quoting Tvrtko Ursulin (2018-08-13 09:02:18) > From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > > Abstract the context image access a bit. > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++----------------- > 1 file changed, 16 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 49597cf31707..ccb20230df2c 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -210,6 +210,7 @@ > #include "i915_oa_cflgt3.h" > #include "i915_oa_cnl.h" > #include "i915_oa_icl.h" > +#include "intel_lrc_reg.h" > > /* HW requires this to be a power of two, between 128k and 16M, though driver > * is currently generally designed assuming the largest 16M size is used such > @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, > u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; > u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; > /* The MMIO offsets for Flex EU registers aren't contiguous */ > - u32 flex_mmio[] = { > - i915_mmio_reg_offset(EU_PERF_CNTL0), > - i915_mmio_reg_offset(EU_PERF_CNTL1), > - i915_mmio_reg_offset(EU_PERF_CNTL2), > - i915_mmio_reg_offset(EU_PERF_CNTL3), > - i915_mmio_reg_offset(EU_PERF_CNTL4), > - i915_mmio_reg_offset(EU_PERF_CNTL5), > - i915_mmio_reg_offset(EU_PERF_CNTL6), > + i915_reg_t flex_regs[] = { > + EU_PERF_CNTL0, > + EU_PERF_CNTL1, > + EU_PERF_CNTL2, > + EU_PERF_CNTL3, > + EU_PERF_CNTL4, > + EU_PERF_CNTL5, > + EU_PERF_CNTL6, > }; > int i; > > - reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); > - reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << > - GEN8_OA_TIMER_PERIOD_SHIFT) | > - (dev_priv->perf.oa.periodic ? > - GEN8_OA_TIMER_ENABLE : 0) | > - GEN8_OA_COUNTER_RESUME; > + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, > + (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | > + (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | > + GEN8_OA_COUNTER_RESUME); I'll be honest but, I don't think it's CTX_REG() that helps improve the readability here. The really odd part is that this sticks itself into a bare part of the register state not surrounded by any LRI and after a BB_END. This routine can only work for established contexts, it should not work for execlists_init_reg_state. -Chris
On 13/08/2018 09:16, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-08-13 09:02:18) >> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> >> Abstract the context image access a bit. >> >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> --- >> drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++----------------- >> 1 file changed, 16 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >> index 49597cf31707..ccb20230df2c 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -210,6 +210,7 @@ >> #include "i915_oa_cflgt3.h" >> #include "i915_oa_cnl.h" >> #include "i915_oa_icl.h" >> +#include "intel_lrc_reg.h" >> >> /* HW requires this to be a power of two, between 128k and 16M, though driver >> * is currently generally designed assuming the largest 16M size is used such >> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, >> u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; >> u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; >> /* The MMIO offsets for Flex EU registers aren't contiguous */ >> - u32 flex_mmio[] = { >> - i915_mmio_reg_offset(EU_PERF_CNTL0), >> - i915_mmio_reg_offset(EU_PERF_CNTL1), >> - i915_mmio_reg_offset(EU_PERF_CNTL2), >> - i915_mmio_reg_offset(EU_PERF_CNTL3), >> - i915_mmio_reg_offset(EU_PERF_CNTL4), >> - i915_mmio_reg_offset(EU_PERF_CNTL5), >> - i915_mmio_reg_offset(EU_PERF_CNTL6), >> + i915_reg_t flex_regs[] = { >> + EU_PERF_CNTL0, >> + EU_PERF_CNTL1, >> + EU_PERF_CNTL2, >> + EU_PERF_CNTL3, >> + EU_PERF_CNTL4, >> + EU_PERF_CNTL5, >> + EU_PERF_CNTL6, >> }; >> int i; >> >> - reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); >> - reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << >> - GEN8_OA_TIMER_PERIOD_SHIFT) | >> - (dev_priv->perf.oa.periodic ? >> - GEN8_OA_TIMER_ENABLE : 0) | >> - GEN8_OA_COUNTER_RESUME; >> + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, >> + (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >> + (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | >> + GEN8_OA_COUNTER_RESUME); > > I'll be honest but, I don't think it's CTX_REG() that helps improve the > readability here. > > The really odd part is that this sticks itself into a bare part of the > register state not surrounded by any LRI and after a BB_END. This > routine can only work for established contexts, it should not work for > execlists_init_reg_state. Unless I am missing something change is completely mechanical, so any question marks you have are already there, right? What do you suggest is the action here? Regards, Tvrtko
Quoting Tvrtko Ursulin (2018-08-13 10:11:44) > > On 13/08/2018 09:16, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2018-08-13 09:02:18) > >> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > >> > >> Abstract the context image access a bit. > >> > >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > >> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> --- > >> drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++----------------- > >> 1 file changed, 16 insertions(+), 18 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > >> index 49597cf31707..ccb20230df2c 100644 > >> --- a/drivers/gpu/drm/i915/i915_perf.c > >> +++ b/drivers/gpu/drm/i915/i915_perf.c > >> @@ -210,6 +210,7 @@ > >> #include "i915_oa_cflgt3.h" > >> #include "i915_oa_cnl.h" > >> #include "i915_oa_icl.h" > >> +#include "intel_lrc_reg.h" > >> > >> /* HW requires this to be a power of two, between 128k and 16M, though driver > >> * is currently generally designed assuming the largest 16M size is used such > >> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, > >> u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; > >> u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; > >> /* The MMIO offsets for Flex EU registers aren't contiguous */ > >> - u32 flex_mmio[] = { > >> - i915_mmio_reg_offset(EU_PERF_CNTL0), > >> - i915_mmio_reg_offset(EU_PERF_CNTL1), > >> - i915_mmio_reg_offset(EU_PERF_CNTL2), > >> - i915_mmio_reg_offset(EU_PERF_CNTL3), > >> - i915_mmio_reg_offset(EU_PERF_CNTL4), > >> - i915_mmio_reg_offset(EU_PERF_CNTL5), > >> - i915_mmio_reg_offset(EU_PERF_CNTL6), > >> + i915_reg_t flex_regs[] = { > >> + EU_PERF_CNTL0, > >> + EU_PERF_CNTL1, > >> + EU_PERF_CNTL2, > >> + EU_PERF_CNTL3, > >> + EU_PERF_CNTL4, > >> + EU_PERF_CNTL5, > >> + EU_PERF_CNTL6, > >> }; > >> int i; > >> > >> - reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); > >> - reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << > >> - GEN8_OA_TIMER_PERIOD_SHIFT) | > >> - (dev_priv->perf.oa.periodic ? > >> - GEN8_OA_TIMER_ENABLE : 0) | > >> - GEN8_OA_COUNTER_RESUME; > >> + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, > >> + (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | > >> + (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | > >> + GEN8_OA_COUNTER_RESUME); > > > > I'll be honest but, I don't think it's CTX_REG() that helps improve the > > readability here. > > > > The really odd part is that this sticks itself into a bare part of the > > register state not surrounded by any LRI and after a BB_END. This > > routine can only work for established contexts, it should not work for > > execlists_init_reg_state. > > Unless I am missing something change is completely mechanical, so any > question marks you have are already there, right? What do you suggest is > the action here? Sure, the only thing I question of this patch itself is whether CTX_REG() is simply too much horrible obfuscating magic. -Chris
On 13/08/2018 10:16, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-08-13 10:11:44) >> >> On 13/08/2018 09:16, Chris Wilson wrote: >>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18) >>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >>>> >>>> Abstract the context image access a bit. >>>> >>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>>> --- >>>> drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++----------------- >>>> 1 file changed, 16 insertions(+), 18 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >>>> index 49597cf31707..ccb20230df2c 100644 >>>> --- a/drivers/gpu/drm/i915/i915_perf.c >>>> +++ b/drivers/gpu/drm/i915/i915_perf.c >>>> @@ -210,6 +210,7 @@ >>>> #include "i915_oa_cflgt3.h" >>>> #include "i915_oa_cnl.h" >>>> #include "i915_oa_icl.h" >>>> +#include "intel_lrc_reg.h" >>>> >>>> /* HW requires this to be a power of two, between 128k and 16M, though driver >>>> * is currently generally designed assuming the largest 16M size is used such >>>> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, >>>> u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; >>>> u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; >>>> /* The MMIO offsets for Flex EU registers aren't contiguous */ >>>> - u32 flex_mmio[] = { >>>> - i915_mmio_reg_offset(EU_PERF_CNTL0), >>>> - i915_mmio_reg_offset(EU_PERF_CNTL1), >>>> - i915_mmio_reg_offset(EU_PERF_CNTL2), >>>> - i915_mmio_reg_offset(EU_PERF_CNTL3), >>>> - i915_mmio_reg_offset(EU_PERF_CNTL4), >>>> - i915_mmio_reg_offset(EU_PERF_CNTL5), >>>> - i915_mmio_reg_offset(EU_PERF_CNTL6), >>>> + i915_reg_t flex_regs[] = { >>>> + EU_PERF_CNTL0, >>>> + EU_PERF_CNTL1, >>>> + EU_PERF_CNTL2, >>>> + EU_PERF_CNTL3, >>>> + EU_PERF_CNTL4, >>>> + EU_PERF_CNTL5, >>>> + EU_PERF_CNTL6, >>>> }; >>>> int i; >>>> >>>> - reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); >>>> - reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << >>>> - GEN8_OA_TIMER_PERIOD_SHIFT) | >>>> - (dev_priv->perf.oa.periodic ? >>>> - GEN8_OA_TIMER_ENABLE : 0) | >>>> - GEN8_OA_COUNTER_RESUME; >>>> + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, >>>> + (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >>>> + (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | >>>> + GEN8_OA_COUNTER_RESUME); >>> >>> I'll be honest but, I don't think it's CTX_REG() that helps improve the >>> readability here. >>> >>> The really odd part is that this sticks itself into a bare part of the >>> register state not surrounded by any LRI and after a BB_END. This >>> routine can only work for established contexts, it should not work for >>> execlists_init_reg_state. >> >> Unless I am missing something change is completely mechanical, so any >> question marks you have are already there, right? What do you suggest is >> the action here? > > Sure, the only thing I question of this patch itself is whether > CTX_REG() is simply too much horrible obfuscating magic. Turn a blind eye if the perceived badness factor is below some threshold? Following patch depends on this one, so if I have to drop this one, then I have to rework the next one etc.. well, it's not the worst problem, so yeah, whatever. Make a call and let me know. Regards, Tvrtko
Quoting Tvrtko Ursulin (2018-08-14 19:49:46) > > On 13/08/2018 10:16, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2018-08-13 10:11:44) > >> > >> On 13/08/2018 09:16, Chris Wilson wrote: > >>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18) > >>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > >>>> > >>>> Abstract the context image access a bit. > >>>> > >>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > >>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >>>> --- > >>>> drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++----------------- > >>>> 1 file changed, 16 insertions(+), 18 deletions(-) > >>>> > >>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > >>>> index 49597cf31707..ccb20230df2c 100644 > >>>> --- a/drivers/gpu/drm/i915/i915_perf.c > >>>> +++ b/drivers/gpu/drm/i915/i915_perf.c > >>>> @@ -210,6 +210,7 @@ > >>>> #include "i915_oa_cflgt3.h" > >>>> #include "i915_oa_cnl.h" > >>>> #include "i915_oa_icl.h" > >>>> +#include "intel_lrc_reg.h" > >>>> > >>>> /* HW requires this to be a power of two, between 128k and 16M, though driver > >>>> * is currently generally designed assuming the largest 16M size is used such > >>>> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, > >>>> u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; > >>>> u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; > >>>> /* The MMIO offsets for Flex EU registers aren't contiguous */ > >>>> - u32 flex_mmio[] = { > >>>> - i915_mmio_reg_offset(EU_PERF_CNTL0), > >>>> - i915_mmio_reg_offset(EU_PERF_CNTL1), > >>>> - i915_mmio_reg_offset(EU_PERF_CNTL2), > >>>> - i915_mmio_reg_offset(EU_PERF_CNTL3), > >>>> - i915_mmio_reg_offset(EU_PERF_CNTL4), > >>>> - i915_mmio_reg_offset(EU_PERF_CNTL5), > >>>> - i915_mmio_reg_offset(EU_PERF_CNTL6), > >>>> + i915_reg_t flex_regs[] = { > >>>> + EU_PERF_CNTL0, > >>>> + EU_PERF_CNTL1, > >>>> + EU_PERF_CNTL2, > >>>> + EU_PERF_CNTL3, > >>>> + EU_PERF_CNTL4, > >>>> + EU_PERF_CNTL5, > >>>> + EU_PERF_CNTL6, > >>>> }; > >>>> int i; > >>>> > >>>> - reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); > >>>> - reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << > >>>> - GEN8_OA_TIMER_PERIOD_SHIFT) | > >>>> - (dev_priv->perf.oa.periodic ? > >>>> - GEN8_OA_TIMER_ENABLE : 0) | > >>>> - GEN8_OA_COUNTER_RESUME; > >>>> + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, > >>>> + (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | > >>>> + (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | > >>>> + GEN8_OA_COUNTER_RESUME); > >>> > >>> I'll be honest but, I don't think it's CTX_REG() that helps improve the > >>> readability here. > >>> > >>> The really odd part is that this sticks itself into a bare part of the > >>> register state not surrounded by any LRI and after a BB_END. This > >>> routine can only work for established contexts, it should not work for > >>> execlists_init_reg_state. > >> > >> Unless I am missing something change is completely mechanical, so any > >> question marks you have are already there, right? What do you suggest is > >> the action here? > > > > Sure, the only thing I question of this patch itself is whether > > CTX_REG() is simply too much horrible obfuscating magic. > > Turn a blind eye if the perceived badness factor is below some > threshold? Following patch depends on this one, so if I have to drop > this one, then I have to rework the next one etc.. well, it's not the > worst problem, so yeah, whatever. Make a call and let me know. The patch was fine, just worrying about the surrounding code. -Chris
On 14/08/2018 19:57, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-08-14 19:49:46) >> >> On 13/08/2018 10:16, Chris Wilson wrote: >>> Quoting Tvrtko Ursulin (2018-08-13 10:11:44) >>>> >>>> On 13/08/2018 09:16, Chris Wilson wrote: >>>>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18) >>>>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >>>>>> >>>>>> Abstract the context image access a bit. >>>>>> >>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >>>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>>>>> --- >>>>>> drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++----------------- >>>>>> 1 file changed, 16 insertions(+), 18 deletions(-) >>>>>> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >>>>>> index 49597cf31707..ccb20230df2c 100644 >>>>>> --- a/drivers/gpu/drm/i915/i915_perf.c >>>>>> +++ b/drivers/gpu/drm/i915/i915_perf.c >>>>>> @@ -210,6 +210,7 @@ >>>>>> #include "i915_oa_cflgt3.h" >>>>>> #include "i915_oa_cnl.h" >>>>>> #include "i915_oa_icl.h" >>>>>> +#include "intel_lrc_reg.h" >>>>>> >>>>>> /* HW requires this to be a power of two, between 128k and 16M, though driver >>>>>> * is currently generally designed assuming the largest 16M size is used such >>>>>> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, >>>>>> u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; >>>>>> u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; >>>>>> /* The MMIO offsets for Flex EU registers aren't contiguous */ >>>>>> - u32 flex_mmio[] = { >>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL0), >>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL1), >>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL2), >>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL3), >>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL4), >>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL5), >>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL6), >>>>>> + i915_reg_t flex_regs[] = { >>>>>> + EU_PERF_CNTL0, >>>>>> + EU_PERF_CNTL1, >>>>>> + EU_PERF_CNTL2, >>>>>> + EU_PERF_CNTL3, >>>>>> + EU_PERF_CNTL4, >>>>>> + EU_PERF_CNTL5, >>>>>> + EU_PERF_CNTL6, >>>>>> }; >>>>>> int i; >>>>>> >>>>>> - reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); >>>>>> - reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << >>>>>> - GEN8_OA_TIMER_PERIOD_SHIFT) | >>>>>> - (dev_priv->perf.oa.periodic ? >>>>>> - GEN8_OA_TIMER_ENABLE : 0) | >>>>>> - GEN8_OA_COUNTER_RESUME; >>>>>> + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, >>>>>> + (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | >>>>>> + (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | >>>>>> + GEN8_OA_COUNTER_RESUME); >>>>> >>>>> I'll be honest but, I don't think it's CTX_REG() that helps improve the >>>>> readability here. >>>>> >>>>> The really odd part is that this sticks itself into a bare part of the >>>>> register state not surrounded by any LRI and after a BB_END. This >>>>> routine can only work for established contexts, it should not work for >>>>> execlists_init_reg_state. >>>> >>>> Unless I am missing something change is completely mechanical, so any >>>> question marks you have are already there, right? What do you suggest is >>>> the action here? >>> >>> Sure, the only thing I question of this patch itself is whether >>> CTX_REG() is simply too much horrible obfuscating magic. >> >> Turn a blind eye if the perceived badness factor is below some >> threshold? Following patch depends on this one, so if I have to drop >> this one, then I have to rework the next one etc.. well, it's not the >> worst problem, so yeah, whatever. Make a call and let me know. > > The patch was fine, just worrying about the surrounding code. I misunderstood. So only about ctx_oactxctrl_offset and ctx_flexeu0_offset from i915_perf.c? Maybe that is some OA magic, I have not idea. CC-ing Lionel in case he can shed some light on it. Regards, Tvrtko
On 15/08/18 09:49, Tvrtko Ursulin wrote: > > On 14/08/2018 19:57, Chris Wilson wrote: >> Quoting Tvrtko Ursulin (2018-08-14 19:49:46) >>> >>> On 13/08/2018 10:16, Chris Wilson wrote: >>>> Quoting Tvrtko Ursulin (2018-08-13 10:11:44) >>>>> >>>>> On 13/08/2018 09:16, Chris Wilson wrote: >>>>>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18) >>>>>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >>>>>>> >>>>>>> Abstract the context image access a bit. >>>>>>> >>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >>>>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>>>>>> --- >>>>>>> drivers/gpu/drm/i915/i915_perf.c | 34 >>>>>>> +++++++++++++++----------------- >>>>>>> 1 file changed, 16 insertions(+), 18 deletions(-) >>>>>>> >>>>>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c >>>>>>> b/drivers/gpu/drm/i915/i915_perf.c >>>>>>> index 49597cf31707..ccb20230df2c 100644 >>>>>>> --- a/drivers/gpu/drm/i915/i915_perf.c >>>>>>> +++ b/drivers/gpu/drm/i915/i915_perf.c >>>>>>> @@ -210,6 +210,7 @@ >>>>>>> #include "i915_oa_cflgt3.h" >>>>>>> #include "i915_oa_cnl.h" >>>>>>> #include "i915_oa_icl.h" >>>>>>> +#include "intel_lrc_reg.h" >>>>>>> /* HW requires this to be a power of two, between 128k >>>>>>> and 16M, though driver >>>>>>> * is currently generally designed assuming the largest 16M >>>>>>> size is used such >>>>>>> @@ -1636,27 +1637,25 @@ static void >>>>>>> gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, >>>>>>> u32 ctx_oactxctrl = >>>>>>> dev_priv->perf.oa.ctx_oactxctrl_offset; >>>>>>> u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; >>>>>>> /* The MMIO offsets for Flex EU registers aren't >>>>>>> contiguous */ >>>>>>> - u32 flex_mmio[] = { >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL0), >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL1), >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL2), >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL3), >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL4), >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL5), >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL6), >>>>>>> + i915_reg_t flex_regs[] = { >>>>>>> + EU_PERF_CNTL0, >>>>>>> + EU_PERF_CNTL1, >>>>>>> + EU_PERF_CNTL2, >>>>>>> + EU_PERF_CNTL3, >>>>>>> + EU_PERF_CNTL4, >>>>>>> + EU_PERF_CNTL5, >>>>>>> + EU_PERF_CNTL6, >>>>>>> }; >>>>>>> int i; >>>>>>> - reg_state[ctx_oactxctrl] = >>>>>>> i915_mmio_reg_offset(GEN8_OACTXCONTROL); >>>>>>> - reg_state[ctx_oactxctrl+1] = >>>>>>> (dev_priv->perf.oa.period_exponent << >>>>>>> - GEN8_OA_TIMER_PERIOD_SHIFT) | >>>>>>> - (dev_priv->perf.oa.periodic ? >>>>>>> - GEN8_OA_TIMER_ENABLE : 0) | >>>>>>> - GEN8_OA_COUNTER_RESUME; >>>>>>> + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, >>>>>>> + (dev_priv->perf.oa.period_exponent << >>>>>>> GEN8_OA_TIMER_PERIOD_SHIFT) | >>>>>>> + (dev_priv->perf.oa.periodic ? >>>>>>> GEN8_OA_TIMER_ENABLE : 0) | >>>>>>> + GEN8_OA_COUNTER_RESUME); >>>>>> >>>>>> I'll be honest but, I don't think it's CTX_REG() that helps >>>>>> improve the >>>>>> readability here. >>>>>> >>>>>> The really odd part is that this sticks itself into a bare part >>>>>> of the >>>>>> register state not surrounded by any LRI and after a BB_END. This >>>>>> routine can only work for established contexts, it should not >>>>>> work for >>>>>> execlists_init_reg_state. >>>>> >>>>> Unless I am missing something change is completely mechanical, so any >>>>> question marks you have are already there, right? What do you >>>>> suggest is >>>>> the action here? >>>> >>>> Sure, the only thing I question of this patch itself is whether >>>> CTX_REG() is simply too much horrible obfuscating magic. >>> >>> Turn a blind eye if the perceived badness factor is below some >>> threshold? Following patch depends on this one, so if I have to drop >>> this one, then I have to rework the next one etc.. well, it's not the >>> worst problem, so yeah, whatever. Make a call and let me know. >> >> The patch was fine, just worrying about the surrounding code. > > I misunderstood. So only about ctx_oactxctrl_offset and > ctx_flexeu0_offset from i915_perf.c? Maybe that is some OA magic, I > have not idea. CC-ing Lionel in case he can shed some light on it. Those are the offsets at which the hardware will store the OA_CTXCTRL/FLEX_EU registers values as documented. I can see that's it's a bit odd not to have the MI_LRI written before we do the first restore. I'm 99% sure I've verified in practice that application started after i915/perf is opened have the right values programmed. Not completely sure that the IGT tests cover that case though. So maybe there is a problem with the first restore... What's the value set into most register that aren't explicitly programmed in intel_lrc.c ? - Lionel > > Regards, > > Tvrtko >
Quoting Lionel Landwerlin (2018-08-15 11:50:55) > On 15/08/18 09:49, Tvrtko Ursulin wrote: > > > > On 14/08/2018 19:57, Chris Wilson wrote: > >> Quoting Tvrtko Ursulin (2018-08-14 19:49:46) > >>> > >>> On 13/08/2018 10:16, Chris Wilson wrote: > >>>> Quoting Tvrtko Ursulin (2018-08-13 10:11:44) > >>>>> > >>>>> On 13/08/2018 09:16, Chris Wilson wrote: > >>>>>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18) > >>>>>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > >>>>>>> > >>>>>>> Abstract the context image access a bit. > >>>>>>> > >>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > >>>>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >>>>>>> --- > >>>>>>> drivers/gpu/drm/i915/i915_perf.c | 34 > >>>>>>> +++++++++++++++----------------- > >>>>>>> 1 file changed, 16 insertions(+), 18 deletions(-) > >>>>>>> > >>>>>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c > >>>>>>> b/drivers/gpu/drm/i915/i915_perf.c > >>>>>>> index 49597cf31707..ccb20230df2c 100644 > >>>>>>> --- a/drivers/gpu/drm/i915/i915_perf.c > >>>>>>> +++ b/drivers/gpu/drm/i915/i915_perf.c > >>>>>>> @@ -210,6 +210,7 @@ > >>>>>>> #include "i915_oa_cflgt3.h" > >>>>>>> #include "i915_oa_cnl.h" > >>>>>>> #include "i915_oa_icl.h" > >>>>>>> +#include "intel_lrc_reg.h" > >>>>>>> /* HW requires this to be a power of two, between 128k > >>>>>>> and 16M, though driver > >>>>>>> * is currently generally designed assuming the largest 16M > >>>>>>> size is used such > >>>>>>> @@ -1636,27 +1637,25 @@ static void > >>>>>>> gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, > >>>>>>> u32 ctx_oactxctrl = > >>>>>>> dev_priv->perf.oa.ctx_oactxctrl_offset; > >>>>>>> u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; > >>>>>>> /* The MMIO offsets for Flex EU registers aren't > >>>>>>> contiguous */ > >>>>>>> - u32 flex_mmio[] = { > >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL0), > >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL1), > >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL2), > >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL3), > >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL4), > >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL5), > >>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL6), > >>>>>>> + i915_reg_t flex_regs[] = { > >>>>>>> + EU_PERF_CNTL0, > >>>>>>> + EU_PERF_CNTL1, > >>>>>>> + EU_PERF_CNTL2, > >>>>>>> + EU_PERF_CNTL3, > >>>>>>> + EU_PERF_CNTL4, > >>>>>>> + EU_PERF_CNTL5, > >>>>>>> + EU_PERF_CNTL6, > >>>>>>> }; > >>>>>>> int i; > >>>>>>> - reg_state[ctx_oactxctrl] = > >>>>>>> i915_mmio_reg_offset(GEN8_OACTXCONTROL); > >>>>>>> - reg_state[ctx_oactxctrl+1] = > >>>>>>> (dev_priv->perf.oa.period_exponent << > >>>>>>> - GEN8_OA_TIMER_PERIOD_SHIFT) | > >>>>>>> - (dev_priv->perf.oa.periodic ? > >>>>>>> - GEN8_OA_TIMER_ENABLE : 0) | > >>>>>>> - GEN8_OA_COUNTER_RESUME; > >>>>>>> + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, > >>>>>>> + (dev_priv->perf.oa.period_exponent << > >>>>>>> GEN8_OA_TIMER_PERIOD_SHIFT) | > >>>>>>> + (dev_priv->perf.oa.periodic ? > >>>>>>> GEN8_OA_TIMER_ENABLE : 0) | > >>>>>>> + GEN8_OA_COUNTER_RESUME); > >>>>>> > >>>>>> I'll be honest but, I don't think it's CTX_REG() that helps > >>>>>> improve the > >>>>>> readability here. > >>>>>> > >>>>>> The really odd part is that this sticks itself into a bare part > >>>>>> of the > >>>>>> register state not surrounded by any LRI and after a BB_END. This > >>>>>> routine can only work for established contexts, it should not > >>>>>> work for > >>>>>> execlists_init_reg_state. > >>>>> > >>>>> Unless I am missing something change is completely mechanical, so any > >>>>> question marks you have are already there, right? What do you > >>>>> suggest is > >>>>> the action here? > >>>> > >>>> Sure, the only thing I question of this patch itself is whether > >>>> CTX_REG() is simply too much horrible obfuscating magic. > >>> > >>> Turn a blind eye if the perceived badness factor is below some > >>> threshold? Following patch depends on this one, so if I have to drop > >>> this one, then I have to rework the next one etc.. well, it's not the > >>> worst problem, so yeah, whatever. Make a call and let me know. > >> > >> The patch was fine, just worrying about the surrounding code. > > > > I misunderstood. So only about ctx_oactxctrl_offset and > > ctx_flexeu0_offset from i915_perf.c? Maybe that is some OA magic, I > > have not idea. CC-ing Lionel in case he can shed some light on it. > > Those are the offsets at which the hardware will store the > OA_CTXCTRL/FLEX_EU registers values as documented. > I can see that's it's a bit odd not to have the MI_LRI written before we > do the first restore. > > I'm 99% sure I've verified in practice that application started after > i915/perf is opened have the right values programmed. > Not completely sure that the IGT tests cover that case though. > So maybe there is a problem with the first restore... > > What's the value set into most register that aren't explicitly > programmed in intel_lrc.c ? That would be whatever was previously there In practice, since we now initialise all real contexts from the default state, the LRI will be there. execlists_init_reg_state() is wonderfully, perhaps dangerously, misleading atm. -Chris
On 15/08/18 11:56, Chris Wilson wrote: > Quoting Lionel Landwerlin (2018-08-15 11:50:55) >> On 15/08/18 09:49, Tvrtko Ursulin wrote: >>> On 14/08/2018 19:57, Chris Wilson wrote: >>>> Quoting Tvrtko Ursulin (2018-08-14 19:49:46) >>>>> On 13/08/2018 10:16, Chris Wilson wrote: >>>>>> Quoting Tvrtko Ursulin (2018-08-13 10:11:44) >>>>>>> On 13/08/2018 09:16, Chris Wilson wrote: >>>>>>>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18) >>>>>>>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >>>>>>>>> >>>>>>>>> Abstract the context image access a bit. >>>>>>>>> >>>>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >>>>>>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>>>>>>>> --- >>>>>>>>> drivers/gpu/drm/i915/i915_perf.c | 34 >>>>>>>>> +++++++++++++++----------------- >>>>>>>>> 1 file changed, 16 insertions(+), 18 deletions(-) >>>>>>>>> >>>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c >>>>>>>>> b/drivers/gpu/drm/i915/i915_perf.c >>>>>>>>> index 49597cf31707..ccb20230df2c 100644 >>>>>>>>> --- a/drivers/gpu/drm/i915/i915_perf.c >>>>>>>>> +++ b/drivers/gpu/drm/i915/i915_perf.c >>>>>>>>> @@ -210,6 +210,7 @@ >>>>>>>>> #include "i915_oa_cflgt3.h" >>>>>>>>> #include "i915_oa_cnl.h" >>>>>>>>> #include "i915_oa_icl.h" >>>>>>>>> +#include "intel_lrc_reg.h" >>>>>>>>> /* HW requires this to be a power of two, between 128k >>>>>>>>> and 16M, though driver >>>>>>>>> * is currently generally designed assuming the largest 16M >>>>>>>>> size is used such >>>>>>>>> @@ -1636,27 +1637,25 @@ static void >>>>>>>>> gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, >>>>>>>>> u32 ctx_oactxctrl = >>>>>>>>> dev_priv->perf.oa.ctx_oactxctrl_offset; >>>>>>>>> u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; >>>>>>>>> /* The MMIO offsets for Flex EU registers aren't >>>>>>>>> contiguous */ >>>>>>>>> - u32 flex_mmio[] = { >>>>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL0), >>>>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL1), >>>>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL2), >>>>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL3), >>>>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL4), >>>>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL5), >>>>>>>>> - i915_mmio_reg_offset(EU_PERF_CNTL6), >>>>>>>>> + i915_reg_t flex_regs[] = { >>>>>>>>> + EU_PERF_CNTL0, >>>>>>>>> + EU_PERF_CNTL1, >>>>>>>>> + EU_PERF_CNTL2, >>>>>>>>> + EU_PERF_CNTL3, >>>>>>>>> + EU_PERF_CNTL4, >>>>>>>>> + EU_PERF_CNTL5, >>>>>>>>> + EU_PERF_CNTL6, >>>>>>>>> }; >>>>>>>>> int i; >>>>>>>>> - reg_state[ctx_oactxctrl] = >>>>>>>>> i915_mmio_reg_offset(GEN8_OACTXCONTROL); >>>>>>>>> - reg_state[ctx_oactxctrl+1] = >>>>>>>>> (dev_priv->perf.oa.period_exponent << >>>>>>>>> - GEN8_OA_TIMER_PERIOD_SHIFT) | >>>>>>>>> - (dev_priv->perf.oa.periodic ? >>>>>>>>> - GEN8_OA_TIMER_ENABLE : 0) | >>>>>>>>> - GEN8_OA_COUNTER_RESUME; >>>>>>>>> + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, >>>>>>>>> + (dev_priv->perf.oa.period_exponent << >>>>>>>>> GEN8_OA_TIMER_PERIOD_SHIFT) | >>>>>>>>> + (dev_priv->perf.oa.periodic ? >>>>>>>>> GEN8_OA_TIMER_ENABLE : 0) | >>>>>>>>> + GEN8_OA_COUNTER_RESUME); >>>>>>>> I'll be honest but, I don't think it's CTX_REG() that helps >>>>>>>> improve the >>>>>>>> readability here. >>>>>>>> >>>>>>>> The really odd part is that this sticks itself into a bare part >>>>>>>> of the >>>>>>>> register state not surrounded by any LRI and after a BB_END. This >>>>>>>> routine can only work for established contexts, it should not >>>>>>>> work for >>>>>>>> execlists_init_reg_state. >>>>>>> Unless I am missing something change is completely mechanical, so any >>>>>>> question marks you have are already there, right? What do you >>>>>>> suggest is >>>>>>> the action here? >>>>>> Sure, the only thing I question of this patch itself is whether >>>>>> CTX_REG() is simply too much horrible obfuscating magic. >>>>> Turn a blind eye if the perceived badness factor is below some >>>>> threshold? Following patch depends on this one, so if I have to drop >>>>> this one, then I have to rework the next one etc.. well, it's not the >>>>> worst problem, so yeah, whatever. Make a call and let me know. >>>> The patch was fine, just worrying about the surrounding code. >>> I misunderstood. So only about ctx_oactxctrl_offset and >>> ctx_flexeu0_offset from i915_perf.c? Maybe that is some OA magic, I >>> have not idea. CC-ing Lionel in case he can shed some light on it. >> Those are the offsets at which the hardware will store the >> OA_CTXCTRL/FLEX_EU registers values as documented. >> I can see that's it's a bit odd not to have the MI_LRI written before we >> do the first restore. >> >> I'm 99% sure I've verified in practice that application started after >> i915/perf is opened have the right values programmed. >> Not completely sure that the IGT tests cover that case though. >> So maybe there is a problem with the first restore... >> >> What's the value set into most register that aren't explicitly >> programmed in intel_lrc.c ? > That would be whatever was previously there > > In practice, since we now initialise all real contexts from the default > state, the LRI will be there. execlists_init_reg_state() is wonderfully, > perhaps dangerously, misleading atm. > -Chris > Then I guess it's all fine :) Thanks, - Lionel
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 49597cf31707..ccb20230df2c 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -210,6 +210,7 @@ #include "i915_oa_cflgt3.h" #include "i915_oa_cnl.h" #include "i915_oa_icl.h" +#include "intel_lrc_reg.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; /* The MMIO offsets for Flex EU registers aren't contiguous */ - u32 flex_mmio[] = { - i915_mmio_reg_offset(EU_PERF_CNTL0), - i915_mmio_reg_offset(EU_PERF_CNTL1), - i915_mmio_reg_offset(EU_PERF_CNTL2), - i915_mmio_reg_offset(EU_PERF_CNTL3), - i915_mmio_reg_offset(EU_PERF_CNTL4), - i915_mmio_reg_offset(EU_PERF_CNTL5), - i915_mmio_reg_offset(EU_PERF_CNTL6), + i915_reg_t flex_regs[] = { + EU_PERF_CNTL0, + EU_PERF_CNTL1, + EU_PERF_CNTL2, + EU_PERF_CNTL3, + EU_PERF_CNTL4, + EU_PERF_CNTL5, + EU_PERF_CNTL6, }; int i; - reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL); - reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent << - GEN8_OA_TIMER_PERIOD_SHIFT) | - (dev_priv->perf.oa.periodic ? - GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; + CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, + (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME); - for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { u32 state_offset = ctx_flexeu0 + i * 2; - u32 mmio = flex_mmio[i]; + u32 mmio = i915_mmio_reg_offset(flex_regs[i]); /* * This arbitrary default will select the 'EU FPU0 Pipeline @@ -1676,8 +1675,7 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, } } - reg_state[state_offset] = mmio; - reg_state[state_offset+1] = value; + CTX_REG(reg_state, state_offset, flex_regs[i], value); } }