diff mbox series

[CI,2/2] drm/i915/perf: reuse intel_lrc ctx regs macro

Message ID 20180813080218.28994-3-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Reviewed perf cleanups | expand

Commit Message

Tvrtko Ursulin Aug. 13, 2018, 8:02 a.m. UTC
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Abstract the context image access a bit.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++-----------------
 1 file changed, 16 insertions(+), 18 deletions(-)

Comments

Chris Wilson Aug. 13, 2018, 8:16 a.m. UTC | #1
Quoting Tvrtko Ursulin (2018-08-13 09:02:18)
> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> 
> Abstract the context image access a bit.
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++-----------------
>  1 file changed, 16 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 49597cf31707..ccb20230df2c 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -210,6 +210,7 @@
>  #include "i915_oa_cflgt3.h"
>  #include "i915_oa_cnl.h"
>  #include "i915_oa_icl.h"
> +#include "intel_lrc_reg.h"
>  
>  /* HW requires this to be a power of two, between 128k and 16M, though driver
>   * is currently generally designed assuming the largest 16M size is used such
> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
>         u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
>         u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
>         /* The MMIO offsets for Flex EU registers aren't contiguous */
> -       u32 flex_mmio[] = {
> -               i915_mmio_reg_offset(EU_PERF_CNTL0),
> -               i915_mmio_reg_offset(EU_PERF_CNTL1),
> -               i915_mmio_reg_offset(EU_PERF_CNTL2),
> -               i915_mmio_reg_offset(EU_PERF_CNTL3),
> -               i915_mmio_reg_offset(EU_PERF_CNTL4),
> -               i915_mmio_reg_offset(EU_PERF_CNTL5),
> -               i915_mmio_reg_offset(EU_PERF_CNTL6),
> +       i915_reg_t flex_regs[] = {
> +               EU_PERF_CNTL0,
> +               EU_PERF_CNTL1,
> +               EU_PERF_CNTL2,
> +               EU_PERF_CNTL3,
> +               EU_PERF_CNTL4,
> +               EU_PERF_CNTL5,
> +               EU_PERF_CNTL6,
>         };
>         int i;
>  
> -       reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
> -       reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
> -                                     GEN8_OA_TIMER_PERIOD_SHIFT) |
> -                                    (dev_priv->perf.oa.periodic ?
> -                                     GEN8_OA_TIMER_ENABLE : 0) |
> -                                    GEN8_OA_COUNTER_RESUME;
> +       CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> +               (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> +               (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> +               GEN8_OA_COUNTER_RESUME);

I'll be honest but, I don't think it's CTX_REG() that helps improve the
readability here. 

The really odd part is that this sticks itself into a bare part of the
register state not surrounded by any LRI and after a BB_END. This
routine can only work for established contexts, it should not work for
execlists_init_reg_state.
-Chris
Tvrtko Ursulin Aug. 13, 2018, 9:11 a.m. UTC | #2
On 13/08/2018 09:16, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-08-13 09:02:18)
>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>
>> Abstract the context image access a bit.
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++-----------------
>>   1 file changed, 16 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index 49597cf31707..ccb20230df2c 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -210,6 +210,7 @@
>>   #include "i915_oa_cflgt3.h"
>>   #include "i915_oa_cnl.h"
>>   #include "i915_oa_icl.h"
>> +#include "intel_lrc_reg.h"
>>   
>>   /* HW requires this to be a power of two, between 128k and 16M, though driver
>>    * is currently generally designed assuming the largest 16M size is used such
>> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
>>          u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
>>          u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
>>          /* The MMIO offsets for Flex EU registers aren't contiguous */
>> -       u32 flex_mmio[] = {
>> -               i915_mmio_reg_offset(EU_PERF_CNTL0),
>> -               i915_mmio_reg_offset(EU_PERF_CNTL1),
>> -               i915_mmio_reg_offset(EU_PERF_CNTL2),
>> -               i915_mmio_reg_offset(EU_PERF_CNTL3),
>> -               i915_mmio_reg_offset(EU_PERF_CNTL4),
>> -               i915_mmio_reg_offset(EU_PERF_CNTL5),
>> -               i915_mmio_reg_offset(EU_PERF_CNTL6),
>> +       i915_reg_t flex_regs[] = {
>> +               EU_PERF_CNTL0,
>> +               EU_PERF_CNTL1,
>> +               EU_PERF_CNTL2,
>> +               EU_PERF_CNTL3,
>> +               EU_PERF_CNTL4,
>> +               EU_PERF_CNTL5,
>> +               EU_PERF_CNTL6,
>>          };
>>          int i;
>>   
>> -       reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
>> -       reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
>> -                                     GEN8_OA_TIMER_PERIOD_SHIFT) |
>> -                                    (dev_priv->perf.oa.periodic ?
>> -                                     GEN8_OA_TIMER_ENABLE : 0) |
>> -                                    GEN8_OA_COUNTER_RESUME;
>> +       CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
>> +               (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>> +               (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>> +               GEN8_OA_COUNTER_RESUME);
> 
> I'll be honest but, I don't think it's CTX_REG() that helps improve the
> readability here.
> 
> The really odd part is that this sticks itself into a bare part of the
> register state not surrounded by any LRI and after a BB_END. This
> routine can only work for established contexts, it should not work for
> execlists_init_reg_state.

Unless I am missing something change is completely mechanical, so any 
question marks you have are already there, right? What do you suggest is 
the action here?

Regards,

Tvrtko
Chris Wilson Aug. 13, 2018, 9:16 a.m. UTC | #3
Quoting Tvrtko Ursulin (2018-08-13 10:11:44)
> 
> On 13/08/2018 09:16, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-08-13 09:02:18)
> >> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >>
> >> Abstract the context image access a bit.
> >>
> >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++-----------------
> >>   1 file changed, 16 insertions(+), 18 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> >> index 49597cf31707..ccb20230df2c 100644
> >> --- a/drivers/gpu/drm/i915/i915_perf.c
> >> +++ b/drivers/gpu/drm/i915/i915_perf.c
> >> @@ -210,6 +210,7 @@
> >>   #include "i915_oa_cflgt3.h"
> >>   #include "i915_oa_cnl.h"
> >>   #include "i915_oa_icl.h"
> >> +#include "intel_lrc_reg.h"
> >>   
> >>   /* HW requires this to be a power of two, between 128k and 16M, though driver
> >>    * is currently generally designed assuming the largest 16M size is used such
> >> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
> >>          u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
> >>          u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
> >>          /* The MMIO offsets for Flex EU registers aren't contiguous */
> >> -       u32 flex_mmio[] = {
> >> -               i915_mmio_reg_offset(EU_PERF_CNTL0),
> >> -               i915_mmio_reg_offset(EU_PERF_CNTL1),
> >> -               i915_mmio_reg_offset(EU_PERF_CNTL2),
> >> -               i915_mmio_reg_offset(EU_PERF_CNTL3),
> >> -               i915_mmio_reg_offset(EU_PERF_CNTL4),
> >> -               i915_mmio_reg_offset(EU_PERF_CNTL5),
> >> -               i915_mmio_reg_offset(EU_PERF_CNTL6),
> >> +       i915_reg_t flex_regs[] = {
> >> +               EU_PERF_CNTL0,
> >> +               EU_PERF_CNTL1,
> >> +               EU_PERF_CNTL2,
> >> +               EU_PERF_CNTL3,
> >> +               EU_PERF_CNTL4,
> >> +               EU_PERF_CNTL5,
> >> +               EU_PERF_CNTL6,
> >>          };
> >>          int i;
> >>   
> >> -       reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
> >> -       reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
> >> -                                     GEN8_OA_TIMER_PERIOD_SHIFT) |
> >> -                                    (dev_priv->perf.oa.periodic ?
> >> -                                     GEN8_OA_TIMER_ENABLE : 0) |
> >> -                                    GEN8_OA_COUNTER_RESUME;
> >> +       CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> >> +               (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> >> +               (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> >> +               GEN8_OA_COUNTER_RESUME);
> > 
> > I'll be honest but, I don't think it's CTX_REG() that helps improve the
> > readability here.
> > 
> > The really odd part is that this sticks itself into a bare part of the
> > register state not surrounded by any LRI and after a BB_END. This
> > routine can only work for established contexts, it should not work for
> > execlists_init_reg_state.
> 
> Unless I am missing something change is completely mechanical, so any 
> question marks you have are already there, right? What do you suggest is 
> the action here?

Sure, the only thing I question of this patch itself is whether
CTX_REG() is simply too much horrible obfuscating magic.
-Chris
Tvrtko Ursulin Aug. 14, 2018, 6:49 p.m. UTC | #4
On 13/08/2018 10:16, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-08-13 10:11:44)
>>
>> On 13/08/2018 09:16, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18)
>>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>
>>>> Abstract the context image access a bit.
>>>>
>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++-----------------
>>>>    1 file changed, 16 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>>> index 49597cf31707..ccb20230df2c 100644
>>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>>> @@ -210,6 +210,7 @@
>>>>    #include "i915_oa_cflgt3.h"
>>>>    #include "i915_oa_cnl.h"
>>>>    #include "i915_oa_icl.h"
>>>> +#include "intel_lrc_reg.h"
>>>>    
>>>>    /* HW requires this to be a power of two, between 128k and 16M, though driver
>>>>     * is currently generally designed assuming the largest 16M size is used such
>>>> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
>>>>           u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
>>>>           u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
>>>>           /* The MMIO offsets for Flex EU registers aren't contiguous */
>>>> -       u32 flex_mmio[] = {
>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL0),
>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL1),
>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL2),
>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL3),
>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL4),
>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL5),
>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL6),
>>>> +       i915_reg_t flex_regs[] = {
>>>> +               EU_PERF_CNTL0,
>>>> +               EU_PERF_CNTL1,
>>>> +               EU_PERF_CNTL2,
>>>> +               EU_PERF_CNTL3,
>>>> +               EU_PERF_CNTL4,
>>>> +               EU_PERF_CNTL5,
>>>> +               EU_PERF_CNTL6,
>>>>           };
>>>>           int i;
>>>>    
>>>> -       reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
>>>> -       reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
>>>> -                                     GEN8_OA_TIMER_PERIOD_SHIFT) |
>>>> -                                    (dev_priv->perf.oa.periodic ?
>>>> -                                     GEN8_OA_TIMER_ENABLE : 0) |
>>>> -                                    GEN8_OA_COUNTER_RESUME;
>>>> +       CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
>>>> +               (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>>>> +               (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>>>> +               GEN8_OA_COUNTER_RESUME);
>>>
>>> I'll be honest but, I don't think it's CTX_REG() that helps improve the
>>> readability here.
>>>
>>> The really odd part is that this sticks itself into a bare part of the
>>> register state not surrounded by any LRI and after a BB_END. This
>>> routine can only work for established contexts, it should not work for
>>> execlists_init_reg_state.
>>
>> Unless I am missing something change is completely mechanical, so any
>> question marks you have are already there, right? What do you suggest is
>> the action here?
> 
> Sure, the only thing I question of this patch itself is whether
> CTX_REG() is simply too much horrible obfuscating magic.

Turn a blind eye if the perceived badness factor is below some 
threshold? Following patch depends on this one, so if I have to drop 
this one, then I have to rework the next one etc.. well, it's not the 
worst problem, so yeah, whatever. Make a call and let me know.

Regards,

Tvrtko
Chris Wilson Aug. 14, 2018, 6:57 p.m. UTC | #5
Quoting Tvrtko Ursulin (2018-08-14 19:49:46)
> 
> On 13/08/2018 10:16, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-08-13 10:11:44)
> >>
> >> On 13/08/2018 09:16, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18)
> >>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >>>>
> >>>> Abstract the context image access a bit.
> >>>>
> >>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>> ---
> >>>>    drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++-----------------
> >>>>    1 file changed, 16 insertions(+), 18 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> >>>> index 49597cf31707..ccb20230df2c 100644
> >>>> --- a/drivers/gpu/drm/i915/i915_perf.c
> >>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
> >>>> @@ -210,6 +210,7 @@
> >>>>    #include "i915_oa_cflgt3.h"
> >>>>    #include "i915_oa_cnl.h"
> >>>>    #include "i915_oa_icl.h"
> >>>> +#include "intel_lrc_reg.h"
> >>>>    
> >>>>    /* HW requires this to be a power of two, between 128k and 16M, though driver
> >>>>     * is currently generally designed assuming the largest 16M size is used such
> >>>> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
> >>>>           u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
> >>>>           u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
> >>>>           /* The MMIO offsets for Flex EU registers aren't contiguous */
> >>>> -       u32 flex_mmio[] = {
> >>>> -               i915_mmio_reg_offset(EU_PERF_CNTL0),
> >>>> -               i915_mmio_reg_offset(EU_PERF_CNTL1),
> >>>> -               i915_mmio_reg_offset(EU_PERF_CNTL2),
> >>>> -               i915_mmio_reg_offset(EU_PERF_CNTL3),
> >>>> -               i915_mmio_reg_offset(EU_PERF_CNTL4),
> >>>> -               i915_mmio_reg_offset(EU_PERF_CNTL5),
> >>>> -               i915_mmio_reg_offset(EU_PERF_CNTL6),
> >>>> +       i915_reg_t flex_regs[] = {
> >>>> +               EU_PERF_CNTL0,
> >>>> +               EU_PERF_CNTL1,
> >>>> +               EU_PERF_CNTL2,
> >>>> +               EU_PERF_CNTL3,
> >>>> +               EU_PERF_CNTL4,
> >>>> +               EU_PERF_CNTL5,
> >>>> +               EU_PERF_CNTL6,
> >>>>           };
> >>>>           int i;
> >>>>    
> >>>> -       reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
> >>>> -       reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
> >>>> -                                     GEN8_OA_TIMER_PERIOD_SHIFT) |
> >>>> -                                    (dev_priv->perf.oa.periodic ?
> >>>> -                                     GEN8_OA_TIMER_ENABLE : 0) |
> >>>> -                                    GEN8_OA_COUNTER_RESUME;
> >>>> +       CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> >>>> +               (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> >>>> +               (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> >>>> +               GEN8_OA_COUNTER_RESUME);
> >>>
> >>> I'll be honest but, I don't think it's CTX_REG() that helps improve the
> >>> readability here.
> >>>
> >>> The really odd part is that this sticks itself into a bare part of the
> >>> register state not surrounded by any LRI and after a BB_END. This
> >>> routine can only work for established contexts, it should not work for
> >>> execlists_init_reg_state.
> >>
> >> Unless I am missing something change is completely mechanical, so any
> >> question marks you have are already there, right? What do you suggest is
> >> the action here?
> > 
> > Sure, the only thing I question of this patch itself is whether
> > CTX_REG() is simply too much horrible obfuscating magic.
> 
> Turn a blind eye if the perceived badness factor is below some 
> threshold? Following patch depends on this one, so if I have to drop 
> this one, then I have to rework the next one etc.. well, it's not the 
> worst problem, so yeah, whatever. Make a call and let me know.

The patch was fine, just worrying about the surrounding code.
-Chris
Tvrtko Ursulin Aug. 15, 2018, 8:49 a.m. UTC | #6
On 14/08/2018 19:57, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-08-14 19:49:46)
>>
>> On 13/08/2018 10:16, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2018-08-13 10:11:44)
>>>>
>>>> On 13/08/2018 09:16, Chris Wilson wrote:
>>>>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18)
>>>>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>>
>>>>>> Abstract the context image access a bit.
>>>>>>
>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/i915/i915_perf.c | 34 +++++++++++++++-----------------
>>>>>>     1 file changed, 16 insertions(+), 18 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>>>>> index 49597cf31707..ccb20230df2c 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>>>>> @@ -210,6 +210,7 @@
>>>>>>     #include "i915_oa_cflgt3.h"
>>>>>>     #include "i915_oa_cnl.h"
>>>>>>     #include "i915_oa_icl.h"
>>>>>> +#include "intel_lrc_reg.h"
>>>>>>     
>>>>>>     /* HW requires this to be a power of two, between 128k and 16M, though driver
>>>>>>      * is currently generally designed assuming the largest 16M size is used such
>>>>>> @@ -1636,27 +1637,25 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
>>>>>>            u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
>>>>>>            u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
>>>>>>            /* The MMIO offsets for Flex EU registers aren't contiguous */
>>>>>> -       u32 flex_mmio[] = {
>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL0),
>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL1),
>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL2),
>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL3),
>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL4),
>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL5),
>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL6),
>>>>>> +       i915_reg_t flex_regs[] = {
>>>>>> +               EU_PERF_CNTL0,
>>>>>> +               EU_PERF_CNTL1,
>>>>>> +               EU_PERF_CNTL2,
>>>>>> +               EU_PERF_CNTL3,
>>>>>> +               EU_PERF_CNTL4,
>>>>>> +               EU_PERF_CNTL5,
>>>>>> +               EU_PERF_CNTL6,
>>>>>>            };
>>>>>>            int i;
>>>>>>     
>>>>>> -       reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
>>>>>> -       reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
>>>>>> -                                     GEN8_OA_TIMER_PERIOD_SHIFT) |
>>>>>> -                                    (dev_priv->perf.oa.periodic ?
>>>>>> -                                     GEN8_OA_TIMER_ENABLE : 0) |
>>>>>> -                                    GEN8_OA_COUNTER_RESUME;
>>>>>> +       CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
>>>>>> +               (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>>>>>> +               (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>>>>>> +               GEN8_OA_COUNTER_RESUME);
>>>>>
>>>>> I'll be honest but, I don't think it's CTX_REG() that helps improve the
>>>>> readability here.
>>>>>
>>>>> The really odd part is that this sticks itself into a bare part of the
>>>>> register state not surrounded by any LRI and after a BB_END. This
>>>>> routine can only work for established contexts, it should not work for
>>>>> execlists_init_reg_state.
>>>>
>>>> Unless I am missing something change is completely mechanical, so any
>>>> question marks you have are already there, right? What do you suggest is
>>>> the action here?
>>>
>>> Sure, the only thing I question of this patch itself is whether
>>> CTX_REG() is simply too much horrible obfuscating magic.
>>
>> Turn a blind eye if the perceived badness factor is below some
>> threshold? Following patch depends on this one, so if I have to drop
>> this one, then I have to rework the next one etc.. well, it's not the
>> worst problem, so yeah, whatever. Make a call and let me know.
> 
> The patch was fine, just worrying about the surrounding code.

I misunderstood. So only about ctx_oactxctrl_offset and 
ctx_flexeu0_offset from i915_perf.c? Maybe that is some OA magic, I have 
not idea. CC-ing Lionel in case he can shed some light on it.

Regards,

Tvrtko
Lionel Landwerlin Aug. 15, 2018, 10:50 a.m. UTC | #7
On 15/08/18 09:49, Tvrtko Ursulin wrote:
>
> On 14/08/2018 19:57, Chris Wilson wrote:
>> Quoting Tvrtko Ursulin (2018-08-14 19:49:46)
>>>
>>> On 13/08/2018 10:16, Chris Wilson wrote:
>>>> Quoting Tvrtko Ursulin (2018-08-13 10:11:44)
>>>>>
>>>>> On 13/08/2018 09:16, Chris Wilson wrote:
>>>>>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18)
>>>>>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>>>
>>>>>>> Abstract the context image access a bit.
>>>>>>>
>>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>> ---
>>>>>>>     drivers/gpu/drm/i915/i915_perf.c | 34 
>>>>>>> +++++++++++++++-----------------
>>>>>>>     1 file changed, 16 insertions(+), 18 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>>>>>> b/drivers/gpu/drm/i915/i915_perf.c
>>>>>>> index 49597cf31707..ccb20230df2c 100644
>>>>>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>>>>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>>>>>> @@ -210,6 +210,7 @@
>>>>>>>     #include "i915_oa_cflgt3.h"
>>>>>>>     #include "i915_oa_cnl.h"
>>>>>>>     #include "i915_oa_icl.h"
>>>>>>> +#include "intel_lrc_reg.h"
>>>>>>>         /* HW requires this to be a power of two, between 128k 
>>>>>>> and 16M, though driver
>>>>>>>      * is currently generally designed assuming the largest 16M 
>>>>>>> size is used such
>>>>>>> @@ -1636,27 +1637,25 @@ static void 
>>>>>>> gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
>>>>>>>            u32 ctx_oactxctrl = 
>>>>>>> dev_priv->perf.oa.ctx_oactxctrl_offset;
>>>>>>>            u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
>>>>>>>            /* The MMIO offsets for Flex EU registers aren't 
>>>>>>> contiguous */
>>>>>>> -       u32 flex_mmio[] = {
>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL0),
>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL1),
>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL2),
>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL3),
>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL4),
>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL5),
>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL6),
>>>>>>> +       i915_reg_t flex_regs[] = {
>>>>>>> +               EU_PERF_CNTL0,
>>>>>>> +               EU_PERF_CNTL1,
>>>>>>> +               EU_PERF_CNTL2,
>>>>>>> +               EU_PERF_CNTL3,
>>>>>>> +               EU_PERF_CNTL4,
>>>>>>> +               EU_PERF_CNTL5,
>>>>>>> +               EU_PERF_CNTL6,
>>>>>>>            };
>>>>>>>            int i;
>>>>>>>     -       reg_state[ctx_oactxctrl] = 
>>>>>>> i915_mmio_reg_offset(GEN8_OACTXCONTROL);
>>>>>>> -       reg_state[ctx_oactxctrl+1] = 
>>>>>>> (dev_priv->perf.oa.period_exponent <<
>>>>>>> - GEN8_OA_TIMER_PERIOD_SHIFT) |
>>>>>>> - (dev_priv->perf.oa.periodic ?
>>>>>>> - GEN8_OA_TIMER_ENABLE : 0) |
>>>>>>> - GEN8_OA_COUNTER_RESUME;
>>>>>>> +       CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
>>>>>>> +               (dev_priv->perf.oa.period_exponent << 
>>>>>>> GEN8_OA_TIMER_PERIOD_SHIFT) |
>>>>>>> +               (dev_priv->perf.oa.periodic ? 
>>>>>>> GEN8_OA_TIMER_ENABLE : 0) |
>>>>>>> +               GEN8_OA_COUNTER_RESUME);
>>>>>>
>>>>>> I'll be honest but, I don't think it's CTX_REG() that helps 
>>>>>> improve the
>>>>>> readability here.
>>>>>>
>>>>>> The really odd part is that this sticks itself into a bare part 
>>>>>> of the
>>>>>> register state not surrounded by any LRI and after a BB_END. This
>>>>>> routine can only work for established contexts, it should not 
>>>>>> work for
>>>>>> execlists_init_reg_state.
>>>>>
>>>>> Unless I am missing something change is completely mechanical, so any
>>>>> question marks you have are already there, right? What do you 
>>>>> suggest is
>>>>> the action here?
>>>>
>>>> Sure, the only thing I question of this patch itself is whether
>>>> CTX_REG() is simply too much horrible obfuscating magic.
>>>
>>> Turn a blind eye if the perceived badness factor is below some
>>> threshold? Following patch depends on this one, so if I have to drop
>>> this one, then I have to rework the next one etc.. well, it's not the
>>> worst problem, so yeah, whatever. Make a call and let me know.
>>
>> The patch was fine, just worrying about the surrounding code.
>
> I misunderstood. So only about ctx_oactxctrl_offset and 
> ctx_flexeu0_offset from i915_perf.c? Maybe that is some OA magic, I 
> have not idea. CC-ing Lionel in case he can shed some light on it.

Those are the offsets at which the hardware will store the 
OA_CTXCTRL/FLEX_EU registers values as documented.
I can see that's it's a bit odd not to have the MI_LRI written before we 
do the first restore.

I'm 99% sure I've verified in practice that application started after 
i915/perf is opened have the right values programmed.
Not completely sure that the IGT tests cover that case though.
So maybe there is a problem with the first restore...

What's the value set into most register that aren't explicitly 
programmed in intel_lrc.c ?

-
Lionel

>
> Regards,
>
> Tvrtko
>
Chris Wilson Aug. 15, 2018, 10:56 a.m. UTC | #8
Quoting Lionel Landwerlin (2018-08-15 11:50:55)
> On 15/08/18 09:49, Tvrtko Ursulin wrote:
> >
> > On 14/08/2018 19:57, Chris Wilson wrote:
> >> Quoting Tvrtko Ursulin (2018-08-14 19:49:46)
> >>>
> >>> On 13/08/2018 10:16, Chris Wilson wrote:
> >>>> Quoting Tvrtko Ursulin (2018-08-13 10:11:44)
> >>>>>
> >>>>> On 13/08/2018 09:16, Chris Wilson wrote:
> >>>>>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18)
> >>>>>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >>>>>>>
> >>>>>>> Abstract the context image access a bit.
> >>>>>>>
> >>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> >>>>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>>>> ---
> >>>>>>>     drivers/gpu/drm/i915/i915_perf.c | 34 
> >>>>>>> +++++++++++++++-----------------
> >>>>>>>     1 file changed, 16 insertions(+), 18 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> >>>>>>> b/drivers/gpu/drm/i915/i915_perf.c
> >>>>>>> index 49597cf31707..ccb20230df2c 100644
> >>>>>>> --- a/drivers/gpu/drm/i915/i915_perf.c
> >>>>>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
> >>>>>>> @@ -210,6 +210,7 @@
> >>>>>>>     #include "i915_oa_cflgt3.h"
> >>>>>>>     #include "i915_oa_cnl.h"
> >>>>>>>     #include "i915_oa_icl.h"
> >>>>>>> +#include "intel_lrc_reg.h"
> >>>>>>>         /* HW requires this to be a power of two, between 128k 
> >>>>>>> and 16M, though driver
> >>>>>>>      * is currently generally designed assuming the largest 16M 
> >>>>>>> size is used such
> >>>>>>> @@ -1636,27 +1637,25 @@ static void 
> >>>>>>> gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
> >>>>>>>            u32 ctx_oactxctrl = 
> >>>>>>> dev_priv->perf.oa.ctx_oactxctrl_offset;
> >>>>>>>            u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
> >>>>>>>            /* The MMIO offsets for Flex EU registers aren't 
> >>>>>>> contiguous */
> >>>>>>> -       u32 flex_mmio[] = {
> >>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL0),
> >>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL1),
> >>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL2),
> >>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL3),
> >>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL4),
> >>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL5),
> >>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL6),
> >>>>>>> +       i915_reg_t flex_regs[] = {
> >>>>>>> +               EU_PERF_CNTL0,
> >>>>>>> +               EU_PERF_CNTL1,
> >>>>>>> +               EU_PERF_CNTL2,
> >>>>>>> +               EU_PERF_CNTL3,
> >>>>>>> +               EU_PERF_CNTL4,
> >>>>>>> +               EU_PERF_CNTL5,
> >>>>>>> +               EU_PERF_CNTL6,
> >>>>>>>            };
> >>>>>>>            int i;
> >>>>>>>     -       reg_state[ctx_oactxctrl] = 
> >>>>>>> i915_mmio_reg_offset(GEN8_OACTXCONTROL);
> >>>>>>> -       reg_state[ctx_oactxctrl+1] = 
> >>>>>>> (dev_priv->perf.oa.period_exponent <<
> >>>>>>> - GEN8_OA_TIMER_PERIOD_SHIFT) |
> >>>>>>> - (dev_priv->perf.oa.periodic ?
> >>>>>>> - GEN8_OA_TIMER_ENABLE : 0) |
> >>>>>>> - GEN8_OA_COUNTER_RESUME;
> >>>>>>> +       CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> >>>>>>> +               (dev_priv->perf.oa.period_exponent << 
> >>>>>>> GEN8_OA_TIMER_PERIOD_SHIFT) |
> >>>>>>> +               (dev_priv->perf.oa.periodic ? 
> >>>>>>> GEN8_OA_TIMER_ENABLE : 0) |
> >>>>>>> +               GEN8_OA_COUNTER_RESUME);
> >>>>>>
> >>>>>> I'll be honest but, I don't think it's CTX_REG() that helps 
> >>>>>> improve the
> >>>>>> readability here.
> >>>>>>
> >>>>>> The really odd part is that this sticks itself into a bare part 
> >>>>>> of the
> >>>>>> register state not surrounded by any LRI and after a BB_END. This
> >>>>>> routine can only work for established contexts, it should not 
> >>>>>> work for
> >>>>>> execlists_init_reg_state.
> >>>>>
> >>>>> Unless I am missing something change is completely mechanical, so any
> >>>>> question marks you have are already there, right? What do you 
> >>>>> suggest is
> >>>>> the action here?
> >>>>
> >>>> Sure, the only thing I question of this patch itself is whether
> >>>> CTX_REG() is simply too much horrible obfuscating magic.
> >>>
> >>> Turn a blind eye if the perceived badness factor is below some
> >>> threshold? Following patch depends on this one, so if I have to drop
> >>> this one, then I have to rework the next one etc.. well, it's not the
> >>> worst problem, so yeah, whatever. Make a call and let me know.
> >>
> >> The patch was fine, just worrying about the surrounding code.
> >
> > I misunderstood. So only about ctx_oactxctrl_offset and 
> > ctx_flexeu0_offset from i915_perf.c? Maybe that is some OA magic, I 
> > have not idea. CC-ing Lionel in case he can shed some light on it.
> 
> Those are the offsets at which the hardware will store the 
> OA_CTXCTRL/FLEX_EU registers values as documented.
> I can see that's it's a bit odd not to have the MI_LRI written before we 
> do the first restore.
> 
> I'm 99% sure I've verified in practice that application started after 
> i915/perf is opened have the right values programmed.
> Not completely sure that the IGT tests cover that case though.
> So maybe there is a problem with the first restore...
> 
> What's the value set into most register that aren't explicitly 
> programmed in intel_lrc.c ?

That would be whatever was previously there

In practice, since we now initialise all real contexts from the default
state, the LRI will be there. execlists_init_reg_state() is wonderfully,
perhaps dangerously, misleading atm.
-Chris
Lionel Landwerlin Aug. 15, 2018, 11:15 a.m. UTC | #9
On 15/08/18 11:56, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2018-08-15 11:50:55)
>> On 15/08/18 09:49, Tvrtko Ursulin wrote:
>>> On 14/08/2018 19:57, Chris Wilson wrote:
>>>> Quoting Tvrtko Ursulin (2018-08-14 19:49:46)
>>>>> On 13/08/2018 10:16, Chris Wilson wrote:
>>>>>> Quoting Tvrtko Ursulin (2018-08-13 10:11:44)
>>>>>>> On 13/08/2018 09:16, Chris Wilson wrote:
>>>>>>>> Quoting Tvrtko Ursulin (2018-08-13 09:02:18)
>>>>>>>>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>>>>>
>>>>>>>>> Abstract the context image access a bit.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>>>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>>>> ---
>>>>>>>>>      drivers/gpu/drm/i915/i915_perf.c | 34
>>>>>>>>> +++++++++++++++-----------------
>>>>>>>>>      1 file changed, 16 insertions(+), 18 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c
>>>>>>>>> b/drivers/gpu/drm/i915/i915_perf.c
>>>>>>>>> index 49597cf31707..ccb20230df2c 100644
>>>>>>>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>>>>>>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>>>>>>>> @@ -210,6 +210,7 @@
>>>>>>>>>      #include "i915_oa_cflgt3.h"
>>>>>>>>>      #include "i915_oa_cnl.h"
>>>>>>>>>      #include "i915_oa_icl.h"
>>>>>>>>> +#include "intel_lrc_reg.h"
>>>>>>>>>          /* HW requires this to be a power of two, between 128k
>>>>>>>>> and 16M, though driver
>>>>>>>>>       * is currently generally designed assuming the largest 16M
>>>>>>>>> size is used such
>>>>>>>>> @@ -1636,27 +1637,25 @@ static void
>>>>>>>>> gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
>>>>>>>>>             u32 ctx_oactxctrl =
>>>>>>>>> dev_priv->perf.oa.ctx_oactxctrl_offset;
>>>>>>>>>             u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
>>>>>>>>>             /* The MMIO offsets for Flex EU registers aren't
>>>>>>>>> contiguous */
>>>>>>>>> -       u32 flex_mmio[] = {
>>>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL0),
>>>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL1),
>>>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL2),
>>>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL3),
>>>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL4),
>>>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL5),
>>>>>>>>> -               i915_mmio_reg_offset(EU_PERF_CNTL6),
>>>>>>>>> +       i915_reg_t flex_regs[] = {
>>>>>>>>> +               EU_PERF_CNTL0,
>>>>>>>>> +               EU_PERF_CNTL1,
>>>>>>>>> +               EU_PERF_CNTL2,
>>>>>>>>> +               EU_PERF_CNTL3,
>>>>>>>>> +               EU_PERF_CNTL4,
>>>>>>>>> +               EU_PERF_CNTL5,
>>>>>>>>> +               EU_PERF_CNTL6,
>>>>>>>>>             };
>>>>>>>>>             int i;
>>>>>>>>>      -       reg_state[ctx_oactxctrl] =
>>>>>>>>> i915_mmio_reg_offset(GEN8_OACTXCONTROL);
>>>>>>>>> -       reg_state[ctx_oactxctrl+1] =
>>>>>>>>> (dev_priv->perf.oa.period_exponent <<
>>>>>>>>> - GEN8_OA_TIMER_PERIOD_SHIFT) |
>>>>>>>>> - (dev_priv->perf.oa.periodic ?
>>>>>>>>> - GEN8_OA_TIMER_ENABLE : 0) |
>>>>>>>>> - GEN8_OA_COUNTER_RESUME;
>>>>>>>>> +       CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
>>>>>>>>> +               (dev_priv->perf.oa.period_exponent <<
>>>>>>>>> GEN8_OA_TIMER_PERIOD_SHIFT) |
>>>>>>>>> +               (dev_priv->perf.oa.periodic ?
>>>>>>>>> GEN8_OA_TIMER_ENABLE : 0) |
>>>>>>>>> +               GEN8_OA_COUNTER_RESUME);
>>>>>>>> I'll be honest but, I don't think it's CTX_REG() that helps
>>>>>>>> improve the
>>>>>>>> readability here.
>>>>>>>>
>>>>>>>> The really odd part is that this sticks itself into a bare part
>>>>>>>> of the
>>>>>>>> register state not surrounded by any LRI and after a BB_END. This
>>>>>>>> routine can only work for established contexts, it should not
>>>>>>>> work for
>>>>>>>> execlists_init_reg_state.
>>>>>>> Unless I am missing something change is completely mechanical, so any
>>>>>>> question marks you have are already there, right? What do you
>>>>>>> suggest is
>>>>>>> the action here?
>>>>>> Sure, the only thing I question of this patch itself is whether
>>>>>> CTX_REG() is simply too much horrible obfuscating magic.
>>>>> Turn a blind eye if the perceived badness factor is below some
>>>>> threshold? Following patch depends on this one, so if I have to drop
>>>>> this one, then I have to rework the next one etc.. well, it's not the
>>>>> worst problem, so yeah, whatever. Make a call and let me know.
>>>> The patch was fine, just worrying about the surrounding code.
>>> I misunderstood. So only about ctx_oactxctrl_offset and
>>> ctx_flexeu0_offset from i915_perf.c? Maybe that is some OA magic, I
>>> have not idea. CC-ing Lionel in case he can shed some light on it.
>> Those are the offsets at which the hardware will store the
>> OA_CTXCTRL/FLEX_EU registers values as documented.
>> I can see that's it's a bit odd not to have the MI_LRI written before we
>> do the first restore.
>>
>> I'm 99% sure I've verified in practice that application started after
>> i915/perf is opened have the right values programmed.
>> Not completely sure that the IGT tests cover that case though.
>> So maybe there is a problem with the first restore...
>>
>> What's the value set into most register that aren't explicitly
>> programmed in intel_lrc.c ?
> That would be whatever was previously there
>
> In practice, since we now initialise all real contexts from the default
> state, the LRI will be there. execlists_init_reg_state() is wonderfully,
> perhaps dangerously, misleading atm.
> -Chris
>
Then I guess it's all fine :)
Thanks,

-
Lionel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 49597cf31707..ccb20230df2c 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -210,6 +210,7 @@ 
 #include "i915_oa_cflgt3.h"
 #include "i915_oa_cnl.h"
 #include "i915_oa_icl.h"
+#include "intel_lrc_reg.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
  * is currently generally designed assuming the largest 16M size is used such
@@ -1636,27 +1637,25 @@  static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
 	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
 	u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
 	/* The MMIO offsets for Flex EU registers aren't contiguous */
-	u32 flex_mmio[] = {
-		i915_mmio_reg_offset(EU_PERF_CNTL0),
-		i915_mmio_reg_offset(EU_PERF_CNTL1),
-		i915_mmio_reg_offset(EU_PERF_CNTL2),
-		i915_mmio_reg_offset(EU_PERF_CNTL3),
-		i915_mmio_reg_offset(EU_PERF_CNTL4),
-		i915_mmio_reg_offset(EU_PERF_CNTL5),
-		i915_mmio_reg_offset(EU_PERF_CNTL6),
+	i915_reg_t flex_regs[] = {
+		EU_PERF_CNTL0,
+		EU_PERF_CNTL1,
+		EU_PERF_CNTL2,
+		EU_PERF_CNTL3,
+		EU_PERF_CNTL4,
+		EU_PERF_CNTL5,
+		EU_PERF_CNTL6,
 	};
 	int i;
 
-	reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
-	reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
-				      GEN8_OA_TIMER_PERIOD_SHIFT) |
-				     (dev_priv->perf.oa.periodic ?
-				      GEN8_OA_TIMER_ENABLE : 0) |
-				     GEN8_OA_COUNTER_RESUME;
+	CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
+		(dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+		(dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+		GEN8_OA_COUNTER_RESUME);
 
-	for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
+	for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
 		u32 state_offset = ctx_flexeu0 + i * 2;
-		u32 mmio = flex_mmio[i];
+		u32 mmio = i915_mmio_reg_offset(flex_regs[i]);
 
 		/*
 		 * This arbitrary default will select the 'EU FPU0 Pipeline
@@ -1676,8 +1675,7 @@  static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
 			}
 		}
 
-		reg_state[state_offset] = mmio;
-		reg_state[state_offset+1] = value;
+		CTX_REG(reg_state, state_offset, flex_regs[i], value);
 	}
 }