diff mbox series

[v2,6/9] drm/msm/A6xx: Use posamble to reset counters on preemption

Message ID 20240830-preemption-a750-t-v2-6-86aeead2cd80@gmail.com (mailing list archive)
State New, archived
Headers show
Series Preemption support for A7XX | expand

Commit Message

Antonino Maniscalco Aug. 30, 2024, 3:32 p.m. UTC
Use the postamble to reset perf counters when switching between rings,
except when sysprof is enabled, analogously to how they are reset
between submissions when switching pagetables.

Signed-off-by: Antonino Maniscalco <antomani103@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 14 +++++++++++++-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  6 ++++++
 drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 26 +++++++++++++++++++++++++-
 drivers/gpu/drm/msm/adreno/adreno_gpu.h   |  7 +++++--
 4 files changed, 49 insertions(+), 4 deletions(-)

Comments

Rob Clark Aug. 30, 2024, 6:32 p.m. UTC | #1
On Fri, Aug 30, 2024 at 8:33 AM Antonino Maniscalco
<antomani103@gmail.com> wrote:
>
> Use the postamble to reset perf counters when switching between rings,
> except when sysprof is enabled, analogously to how they are reset
> between submissions when switching pagetables.
>
> Signed-off-by: Antonino Maniscalco <antomani103@gmail.com>
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 14 +++++++++++++-
>  drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  6 ++++++
>  drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 26 +++++++++++++++++++++++++-
>  drivers/gpu/drm/msm/adreno/adreno_gpu.h   |  7 +++++--
>  4 files changed, 49 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 1a90db5759b8..3528ecbbc1ab 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -366,7 +366,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
>  static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
>                 struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue)
>  {
> -       u64 preempt_offset_priv_secure;
> +       bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
> +       u64 preempt_offset_priv_secure, preempt_postamble;
>
>         OUT_PKT7(ring, CP_SET_PSEUDO_REG, 15);
>
> @@ -403,6 +404,17 @@ static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
>         /* seems OK to set to 0 to disable it */
>         OUT_RING(ring, 0);
>         OUT_RING(ring, 0);
> +
> +       if (!sysprof && a6xx_gpu->preempt_postamble_len) {
> +               preempt_postamble = SCRATCH_PREEMPT_POSTAMBLE_IOVA(a6xx_gpu);
> +
> +               OUT_PKT7(ring, CP_SET_AMBLE, 3);
> +               OUT_RING(ring, lower_32_bits(preempt_postamble));
> +               OUT_RING(ring, upper_32_bits(preempt_postamble));
> +               OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(
> +                                       a6xx_gpu->preempt_postamble_len) |
> +                               CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE));
> +       }

Hmm, ok, we set this in the submit path.. but do we need to clear it
somehow when transitioning from !sysprof to sysprof?

Also, how does this interact with UMD perfctr queries, I would expect
they would prefer save/restore?

BR,
-R

>  }
>
>  static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> index 652e49f01428..2338e36c8f47 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> @@ -66,6 +66,7 @@ struct a6xx_gpu {
>         atomic_t preempt_state;
>         spinlock_t eval_lock;
>         struct timer_list preempt_timer;
> +       uint64_t preempt_postamble_len;
>
>         unsigned int preempt_level;
>         bool uses_gmem;
> @@ -99,6 +100,11 @@ struct a6xx_gpu {
>  #define SCRATCH_USER_CTX_IOVA(ring_id, a6xx_gpu) \
>         (a6xx_gpu->scratch_iova + (ring_id * sizeof(uint64_t)))
>
> +#define SCRATCH_PREEMPT_POSTAMBLE_OFFSET (100 * sizeof(u64))
> +
> +#define SCRATCH_PREEMPT_POSTAMBLE_IOVA(a6xx_gpu) \
> +       (a6xx_gpu->scratch_iova + SCRATCH_PREEMPT_POSTAMBLE_OFFSET)
> +
>  /*
>   * In order to do lockless preemption we use a simple state machine to progress
>   * through the process.
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> index 4b61b993f75f..f586615db97e 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> @@ -351,6 +351,28 @@ static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
>         return 0;
>  }
>
> +static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu)
> +{
> +       u32 *postamble = a6xx_gpu->scratch_ptr + SCRATCH_PREEMPT_POSTAMBLE_OFFSET;
> +       u32 count = 0;
> +
> +       postamble[count++] = PKT7(CP_REG_RMW, 3);
> +       postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD;
> +       postamble[count++] = 0;
> +       postamble[count++] = 1;
> +
> +       postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6);
> +       postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ);
> +       postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
> +                               REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS);
> +       postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0);
> +       postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1);
> +       postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1);
> +       postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0);
> +
> +       a6xx_gpu->preempt_postamble_len = count;
> +}
> +
>  void a6xx_preempt_fini(struct msm_gpu *gpu)
>  {
>         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> @@ -382,10 +404,12 @@ void a6xx_preempt_init(struct msm_gpu *gpu)
>         a6xx_gpu->skip_save_restore = 1;
>
>         a6xx_gpu->scratch_ptr  = msm_gem_kernel_new(gpu->dev,
> -                       gpu->nr_rings * sizeof(uint64_t), MSM_BO_WC,
> +                       PAGE_SIZE, MSM_BO_WC,
>                         gpu->aspace, &a6xx_gpu->scratch_bo,
>                         &a6xx_gpu->scratch_iova);
>
> +       preempt_prepare_postamble(a6xx_gpu);
> +
>         if (IS_ERR(a6xx_gpu->scratch_ptr))
>                 goto fail;
>
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> index 6b1888280a83..87098567483b 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> @@ -610,12 +610,15 @@ OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
>         OUT_RING(ring, PKT4(regindx, cnt));
>  }
>
> +#define PKT7(opcode, cnt) \
> +       (CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | \
> +               ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23))
> +
>  static inline void
>  OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
>  {
>         adreno_wait_ring(ring, cnt + 1);
> -       OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) |
> -               ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23));
> +       OUT_RING(ring, PKT7(opcode, cnt));
>  }
>
>  struct msm_gpu *a2xx_gpu_init(struct drm_device *dev);
>
> --
> 2.46.0
>
Antonino Maniscalco Sept. 4, 2024, 1:38 p.m. UTC | #2
On 8/30/24 8:32 PM, Rob Clark wrote:
> On Fri, Aug 30, 2024 at 8:33 AM Antonino Maniscalco
> <antomani103@gmail.com> wrote:
>>
>> Use the postamble to reset perf counters when switching between rings,
>> except when sysprof is enabled, analogously to how they are reset
>> between submissions when switching pagetables.
>>
>> Signed-off-by: Antonino Maniscalco <antomani103@gmail.com>
>> ---
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 14 +++++++++++++-
>>   drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  6 ++++++
>>   drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 26 +++++++++++++++++++++++++-
>>   drivers/gpu/drm/msm/adreno/adreno_gpu.h   |  7 +++++--
>>   4 files changed, 49 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index 1a90db5759b8..3528ecbbc1ab 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -366,7 +366,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
>>   static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
>>                  struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue)
>>   {
>> -       u64 preempt_offset_priv_secure;
>> +       bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
>> +       u64 preempt_offset_priv_secure, preempt_postamble;
>>
>>          OUT_PKT7(ring, CP_SET_PSEUDO_REG, 15);
>>
>> @@ -403,6 +404,17 @@ static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
>>          /* seems OK to set to 0 to disable it */
>>          OUT_RING(ring, 0);
>>          OUT_RING(ring, 0);
>> +
>> +       if (!sysprof && a6xx_gpu->preempt_postamble_len) {
>> +               preempt_postamble = SCRATCH_PREEMPT_POSTAMBLE_IOVA(a6xx_gpu);
>> +
>> +               OUT_PKT7(ring, CP_SET_AMBLE, 3);
>> +               OUT_RING(ring, lower_32_bits(preempt_postamble));
>> +               OUT_RING(ring, upper_32_bits(preempt_postamble));
>> +               OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(
>> +                                       a6xx_gpu->preempt_postamble_len) |
>> +                               CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE));
>> +       }
> 
> Hmm, ok, we set this in the submit path.. but do we need to clear it
> somehow when transitioning from !sysprof to sysprof?
> 

We can always emit the packet and 0 fields out when sysprof is enabled. 
Would that be ok for you? Only emitting it when needed might be 
nontrivial given that there are multiple rings and we would be paying 
the overhead for emitting it in the more common case (not profiling) anyway.

> Also, how does this interact with UMD perfctr queries, I would expect
> they would prefer save/restore?

Right so my understanding given previous discussions is that we want to 
disable preemption from userspace in that case? The vulkan extension 
requires to acquire and release a lock so we could use that to emit the 
packets that enable and disable preemption perhaps.

> 
> BR,
> -R
> 
>>   }
>>
>>   static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> index 652e49f01428..2338e36c8f47 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> @@ -66,6 +66,7 @@ struct a6xx_gpu {
>>          atomic_t preempt_state;
>>          spinlock_t eval_lock;
>>          struct timer_list preempt_timer;
>> +       uint64_t preempt_postamble_len;
>>
>>          unsigned int preempt_level;
>>          bool uses_gmem;
>> @@ -99,6 +100,11 @@ struct a6xx_gpu {
>>   #define SCRATCH_USER_CTX_IOVA(ring_id, a6xx_gpu) \
>>          (a6xx_gpu->scratch_iova + (ring_id * sizeof(uint64_t)))
>>
>> +#define SCRATCH_PREEMPT_POSTAMBLE_OFFSET (100 * sizeof(u64))
>> +
>> +#define SCRATCH_PREEMPT_POSTAMBLE_IOVA(a6xx_gpu) \
>> +       (a6xx_gpu->scratch_iova + SCRATCH_PREEMPT_POSTAMBLE_OFFSET)
>> +
>>   /*
>>    * In order to do lockless preemption we use a simple state machine to progress
>>    * through the process.
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> index 4b61b993f75f..f586615db97e 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> @@ -351,6 +351,28 @@ static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
>>          return 0;
>>   }
>>
>> +static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu)
>> +{
>> +       u32 *postamble = a6xx_gpu->scratch_ptr + SCRATCH_PREEMPT_POSTAMBLE_OFFSET;
>> +       u32 count = 0;
>> +
>> +       postamble[count++] = PKT7(CP_REG_RMW, 3);
>> +       postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD;
>> +       postamble[count++] = 0;
>> +       postamble[count++] = 1;
>> +
>> +       postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6);
>> +       postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ);
>> +       postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
>> +                               REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS);
>> +       postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0);
>> +       postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1);
>> +       postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1);
>> +       postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0);
>> +
>> +       a6xx_gpu->preempt_postamble_len = count;
>> +}
>> +
>>   void a6xx_preempt_fini(struct msm_gpu *gpu)
>>   {
>>          struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> @@ -382,10 +404,12 @@ void a6xx_preempt_init(struct msm_gpu *gpu)
>>          a6xx_gpu->skip_save_restore = 1;
>>
>>          a6xx_gpu->scratch_ptr  = msm_gem_kernel_new(gpu->dev,
>> -                       gpu->nr_rings * sizeof(uint64_t), MSM_BO_WC,
>> +                       PAGE_SIZE, MSM_BO_WC,
>>                          gpu->aspace, &a6xx_gpu->scratch_bo,
>>                          &a6xx_gpu->scratch_iova);
>>
>> +       preempt_prepare_postamble(a6xx_gpu);
>> +
>>          if (IS_ERR(a6xx_gpu->scratch_ptr))
>>                  goto fail;
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
>> index 6b1888280a83..87098567483b 100644
>> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
>> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
>> @@ -610,12 +610,15 @@ OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
>>          OUT_RING(ring, PKT4(regindx, cnt));
>>   }
>>
>> +#define PKT7(opcode, cnt) \
>> +       (CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | \
>> +               ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23))
>> +
>>   static inline void
>>   OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
>>   {
>>          adreno_wait_ring(ring, cnt + 1);
>> -       OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) |
>> -               ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23));
>> +       OUT_RING(ring, PKT7(opcode, cnt));
>>   }
>>
>>   struct msm_gpu *a2xx_gpu_init(struct drm_device *dev);
>>
>> --
>> 2.46.0
>>

Best regards,
Rob Clark Sept. 4, 2024, 2:30 p.m. UTC | #3
On Wed, Sep 4, 2024 at 6:39 AM Antonino Maniscalco
<antomani103@gmail.com> wrote:
>
> On 8/30/24 8:32 PM, Rob Clark wrote:
> > On Fri, Aug 30, 2024 at 8:33 AM Antonino Maniscalco
> > <antomani103@gmail.com> wrote:
> >>
> >> Use the postamble to reset perf counters when switching between rings,
> >> except when sysprof is enabled, analogously to how they are reset
> >> between submissions when switching pagetables.
> >>
> >> Signed-off-by: Antonino Maniscalco <antomani103@gmail.com>
> >> ---
> >>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c     | 14 +++++++++++++-
> >>   drivers/gpu/drm/msm/adreno/a6xx_gpu.h     |  6 ++++++
> >>   drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 26 +++++++++++++++++++++++++-
> >>   drivers/gpu/drm/msm/adreno/adreno_gpu.h   |  7 +++++--
> >>   4 files changed, 49 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> >> index 1a90db5759b8..3528ecbbc1ab 100644
> >> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> >> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> >> @@ -366,7 +366,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
> >>   static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
> >>                  struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue)
> >>   {
> >> -       u64 preempt_offset_priv_secure;
> >> +       bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
> >> +       u64 preempt_offset_priv_secure, preempt_postamble;
> >>
> >>          OUT_PKT7(ring, CP_SET_PSEUDO_REG, 15);
> >>
> >> @@ -403,6 +404,17 @@ static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
> >>          /* seems OK to set to 0 to disable it */
> >>          OUT_RING(ring, 0);
> >>          OUT_RING(ring, 0);
> >> +
> >> +       if (!sysprof && a6xx_gpu->preempt_postamble_len) {
> >> +               preempt_postamble = SCRATCH_PREEMPT_POSTAMBLE_IOVA(a6xx_gpu);
> >> +
> >> +               OUT_PKT7(ring, CP_SET_AMBLE, 3);
> >> +               OUT_RING(ring, lower_32_bits(preempt_postamble));
> >> +               OUT_RING(ring, upper_32_bits(preempt_postamble));
> >> +               OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(
> >> +                                       a6xx_gpu->preempt_postamble_len) |
> >> +                               CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE));
> >> +       }
> >
> > Hmm, ok, we set this in the submit path.. but do we need to clear it
> > somehow when transitioning from !sysprof to sysprof?
> >
>
> We can always emit the packet and 0 fields out when sysprof is enabled.
> Would that be ok for you? Only emitting it when needed might be
> nontrivial given that there are multiple rings and we would be paying
> the overhead for emitting it in the more common case (not profiling) anyway.

That sounds like it would work

> > Also, how does this interact with UMD perfctr queries, I would expect
> > they would prefer save/restore?
>
> Right so my understanding given previous discussions is that we want to
> disable preemption from userspace in that case? The vulkan extension
> requires to acquire and release a lock so we could use that to emit the
> packets that enable and disable preemption perhaps.

ack

BR,
-R

> >
> > BR,
> > -R
> >
> >>   }
> >>
> >>   static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
> >> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> >> index 652e49f01428..2338e36c8f47 100644
> >> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> >> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> >> @@ -66,6 +66,7 @@ struct a6xx_gpu {
> >>          atomic_t preempt_state;
> >>          spinlock_t eval_lock;
> >>          struct timer_list preempt_timer;
> >> +       uint64_t preempt_postamble_len;
> >>
> >>          unsigned int preempt_level;
> >>          bool uses_gmem;
> >> @@ -99,6 +100,11 @@ struct a6xx_gpu {
> >>   #define SCRATCH_USER_CTX_IOVA(ring_id, a6xx_gpu) \
> >>          (a6xx_gpu->scratch_iova + (ring_id * sizeof(uint64_t)))
> >>
> >> +#define SCRATCH_PREEMPT_POSTAMBLE_OFFSET (100 * sizeof(u64))
> >> +
> >> +#define SCRATCH_PREEMPT_POSTAMBLE_IOVA(a6xx_gpu) \
> >> +       (a6xx_gpu->scratch_iova + SCRATCH_PREEMPT_POSTAMBLE_OFFSET)
> >> +
> >>   /*
> >>    * In order to do lockless preemption we use a simple state machine to progress
> >>    * through the process.
> >> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> >> index 4b61b993f75f..f586615db97e 100644
> >> --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> >> +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
> >> @@ -351,6 +351,28 @@ static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
> >>          return 0;
> >>   }
> >>
> >> +static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu)
> >> +{
> >> +       u32 *postamble = a6xx_gpu->scratch_ptr + SCRATCH_PREEMPT_POSTAMBLE_OFFSET;
> >> +       u32 count = 0;
> >> +
> >> +       postamble[count++] = PKT7(CP_REG_RMW, 3);
> >> +       postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD;
> >> +       postamble[count++] = 0;
> >> +       postamble[count++] = 1;
> >> +
> >> +       postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6);
> >> +       postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ);
> >> +       postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
> >> +                               REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS);
> >> +       postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0);
> >> +       postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1);
> >> +       postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1);
> >> +       postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0);
> >> +
> >> +       a6xx_gpu->preempt_postamble_len = count;
> >> +}
> >> +
> >>   void a6xx_preempt_fini(struct msm_gpu *gpu)
> >>   {
> >>          struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> >> @@ -382,10 +404,12 @@ void a6xx_preempt_init(struct msm_gpu *gpu)
> >>          a6xx_gpu->skip_save_restore = 1;
> >>
> >>          a6xx_gpu->scratch_ptr  = msm_gem_kernel_new(gpu->dev,
> >> -                       gpu->nr_rings * sizeof(uint64_t), MSM_BO_WC,
> >> +                       PAGE_SIZE, MSM_BO_WC,
> >>                          gpu->aspace, &a6xx_gpu->scratch_bo,
> >>                          &a6xx_gpu->scratch_iova);
> >>
> >> +       preempt_prepare_postamble(a6xx_gpu);
> >> +
> >>          if (IS_ERR(a6xx_gpu->scratch_ptr))
> >>                  goto fail;
> >>
> >> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> >> index 6b1888280a83..87098567483b 100644
> >> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> >> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> >> @@ -610,12 +610,15 @@ OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
> >>          OUT_RING(ring, PKT4(regindx, cnt));
> >>   }
> >>
> >> +#define PKT7(opcode, cnt) \
> >> +       (CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | \
> >> +               ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23))
> >> +
> >>   static inline void
> >>   OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
> >>   {
> >>          adreno_wait_ring(ring, cnt + 1);
> >> -       OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) |
> >> -               ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23));
> >> +       OUT_RING(ring, PKT7(opcode, cnt));
> >>   }
> >>
> >>   struct msm_gpu *a2xx_gpu_init(struct drm_device *dev);
> >>
> >> --
> >> 2.46.0
> >>
>
> Best regards,
> --
> Antonino Maniscalco <antomani103@gmail.com>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 1a90db5759b8..3528ecbbc1ab 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -366,7 +366,8 @@  static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
 		struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue)
 {
-	u64 preempt_offset_priv_secure;
+	bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
+	u64 preempt_offset_priv_secure, preempt_postamble;
 
 	OUT_PKT7(ring, CP_SET_PSEUDO_REG, 15);
 
@@ -403,6 +404,17 @@  static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
 	/* seems OK to set to 0 to disable it */
 	OUT_RING(ring, 0);
 	OUT_RING(ring, 0);
+
+	if (!sysprof && a6xx_gpu->preempt_postamble_len) {
+		preempt_postamble = SCRATCH_PREEMPT_POSTAMBLE_IOVA(a6xx_gpu);
+
+		OUT_PKT7(ring, CP_SET_AMBLE, 3);
+		OUT_RING(ring, lower_32_bits(preempt_postamble));
+		OUT_RING(ring, upper_32_bits(preempt_postamble));
+		OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(
+					a6xx_gpu->preempt_postamble_len) |
+				CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE));
+	}
 }
 
 static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 652e49f01428..2338e36c8f47 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -66,6 +66,7 @@  struct a6xx_gpu {
 	atomic_t preempt_state;
 	spinlock_t eval_lock;
 	struct timer_list preempt_timer;
+	uint64_t preempt_postamble_len;
 
 	unsigned int preempt_level;
 	bool uses_gmem;
@@ -99,6 +100,11 @@  struct a6xx_gpu {
 #define SCRATCH_USER_CTX_IOVA(ring_id, a6xx_gpu) \
 	(a6xx_gpu->scratch_iova + (ring_id * sizeof(uint64_t)))
 
+#define SCRATCH_PREEMPT_POSTAMBLE_OFFSET (100 * sizeof(u64))
+
+#define SCRATCH_PREEMPT_POSTAMBLE_IOVA(a6xx_gpu) \
+	(a6xx_gpu->scratch_iova + SCRATCH_PREEMPT_POSTAMBLE_OFFSET)
+
 /*
  * In order to do lockless preemption we use a simple state machine to progress
  * through the process.
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
index 4b61b993f75f..f586615db97e 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
@@ -351,6 +351,28 @@  static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
 	return 0;
 }
 
+static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu)
+{
+	u32 *postamble = a6xx_gpu->scratch_ptr + SCRATCH_PREEMPT_POSTAMBLE_OFFSET;
+	u32 count = 0;
+
+	postamble[count++] = PKT7(CP_REG_RMW, 3);
+	postamble[count++] = REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD;
+	postamble[count++] = 0;
+	postamble[count++] = 1;
+
+	postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6);
+	postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ);
+	postamble[count++] = CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
+				REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS);
+	postamble[count++] = CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0);
+	postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1);
+	postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1);
+	postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0);
+
+	a6xx_gpu->preempt_postamble_len = count;
+}
+
 void a6xx_preempt_fini(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -382,10 +404,12 @@  void a6xx_preempt_init(struct msm_gpu *gpu)
 	a6xx_gpu->skip_save_restore = 1;
 
 	a6xx_gpu->scratch_ptr  = msm_gem_kernel_new(gpu->dev,
-			gpu->nr_rings * sizeof(uint64_t), MSM_BO_WC,
+			PAGE_SIZE, MSM_BO_WC,
 			gpu->aspace, &a6xx_gpu->scratch_bo,
 			&a6xx_gpu->scratch_iova);
 
+	preempt_prepare_postamble(a6xx_gpu);
+
 	if (IS_ERR(a6xx_gpu->scratch_ptr))
 		goto fail;
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 6b1888280a83..87098567483b 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -610,12 +610,15 @@  OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
 	OUT_RING(ring, PKT4(regindx, cnt));
 }
 
+#define PKT7(opcode, cnt) \
+	(CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | \
+		((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23))
+
 static inline void
 OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
 {
 	adreno_wait_ring(ring, cnt + 1);
-	OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) |
-		((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23));
+	OUT_RING(ring, PKT7(opcode, cnt));
 }
 
 struct msm_gpu *a2xx_gpu_init(struct drm_device *dev);