diff mbox

[14/15] drm/radeon: record what is next valid wptr for each ring v3

Message ID 1342109574-8107-15-git-send-email-deathsimple@vodafone.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christian König July 12, 2012, 4:12 p.m. UTC
Before emitting any indirect buffer, emit the offset of the next
valid ring content if any. This allow code that want to resume
ring to resume ring right after ib that caused GPU lockup.

v2: use scratch registers instead of storing it into memory
v3: skip over the surface sync for ni and si as well

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
 drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
 drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
 drivers/gpu/drm/radeon/radeon.h      |    1 +
 drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
 drivers/gpu/drm/radeon/rv770.c       |    4 +++-
 drivers/gpu/drm/radeon/si.c          |   22 +++++++++++++++++++---
 7 files changed, 60 insertions(+), 8 deletions(-)

Comments

Alex Deucher July 12, 2012, 4:36 p.m. UTC | #1
On Thu, Jul 12, 2012 at 12:12 PM, Christian König
<deathsimple@vodafone.de> wrote:
> Before emitting any indirect buffer, emit the offset of the next
> valid ring content if any. This allow code that want to resume
> ring to resume ring right after ib that caused GPU lockup.
>
> v2: use scratch registers instead of storing it into memory
> v3: skip over the surface sync for ni and si as well
>
> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
> Signed-off-by: Christian König <deathsimple@vodafone.de>
> ---
>  drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
>  drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
>  drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
>  drivers/gpu/drm/radeon/radeon.h      |    1 +
>  drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
>  drivers/gpu/drm/radeon/rv770.c       |    4 +++-
>  drivers/gpu/drm/radeon/si.c          |   22 +++++++++++++++++++---
>  7 files changed, 60 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
> index f39b900..40de347 100644
> --- a/drivers/gpu/drm/radeon/evergreen.c
> +++ b/drivers/gpu/drm/radeon/evergreen.c
> @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
>         /* set to DX10/11 mode */
>         radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
>         radeon_ring_write(ring, 1);
> -       /* FIXME: implement */
> +
> +       if (ring->rptr_save_reg) {
> +               uint32_t next_rptr = ring->wptr + 2 + 4;
> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
> +               radeon_ring_write(ring, next_rptr);
> +       }

On r600 and newer please use SET_CONFIG_REG rather than Packet0.

Alex

> +
>         radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>         radeon_ring_write(ring,
>  #ifdef __BIG_ENDIAN
> diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
> index f2afefb..5b7ce2c 100644
> --- a/drivers/gpu/drm/radeon/ni.c
> +++ b/drivers/gpu/drm/radeon/ni.c
> @@ -855,6 +855,13 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
>         /* set to DX10/11 mode */
>         radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
>         radeon_ring_write(ring, 1);
> +
> +       if (ring->rptr_save_reg) {
> +               uint32_t next_rptr = ring->wptr + 2 + 4 + 8;
> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
> +               radeon_ring_write(ring, next_rptr);
> +       }
> +
>         radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>         radeon_ring_write(ring,
>  #ifdef __BIG_ENDIAN
> @@ -981,8 +988,10 @@ static int cayman_cp_start(struct radeon_device *rdev)
>
>  static void cayman_cp_fini(struct radeon_device *rdev)
>  {
> +       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
>         cayman_cp_enable(rdev, false);
> -       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
> +       radeon_ring_fini(rdev, ring);
> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
>  }
>
>  int cayman_cp_resume(struct radeon_device *rdev)
> diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
> index c808fa9..74fca15 100644
> --- a/drivers/gpu/drm/radeon/r600.c
> +++ b/drivers/gpu/drm/radeon/r600.c
> @@ -2155,18 +2155,27 @@ int r600_cp_resume(struct radeon_device *rdev)
>  void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size)
>  {
>         u32 rb_bufsz;
> +       int r;
>
>         /* Align ring size */
>         rb_bufsz = drm_order(ring_size / 8);
>         ring_size = (1 << (rb_bufsz + 1)) * 4;
>         ring->ring_size = ring_size;
>         ring->align_mask = 16 - 1;
> +
> +       r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
> +       if (r) {
> +               DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
> +               ring->rptr_save_reg = 0;
> +       }
>  }
>
>  void r600_cp_fini(struct radeon_device *rdev)
>  {
> +       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
>         r600_cp_stop(rdev);
> -       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
> +       radeon_ring_fini(rdev, ring);
> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
>  }
>
>
> @@ -2568,7 +2577,12 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
>  {
>         struct radeon_ring *ring = &rdev->ring[ib->ring];
>
> -       /* FIXME: implement */
> +       if (ring->rptr_save_reg) {
> +               uint32_t next_rptr = ring->wptr + 2 + 4;
> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
> +               radeon_ring_write(ring, next_rptr);
> +       }
> +
>         radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>         radeon_ring_write(ring,
>  #ifdef __BIG_ENDIAN
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 872270c..64d39ad 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -622,6 +622,7 @@ struct radeon_ring {
>         unsigned                rptr;
>         unsigned                rptr_offs;
>         unsigned                rptr_reg;
> +       unsigned                rptr_save_reg;
>         unsigned                wptr;
>         unsigned                wptr_old;
>         unsigned                wptr_reg;
> diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
> index 0873834..ce8eb9d 100644
> --- a/drivers/gpu/drm/radeon/radeon_ring.c
> +++ b/drivers/gpu/drm/radeon/radeon_ring.c
> @@ -451,6 +451,10 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
>         count = (ring->ring_size / 4) - ring->ring_free_dw;
>         seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg));
>         seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg));
> +       if (ring->rptr_save_reg) {
> +               seq_printf(m, "rptr next(0x%04x): 0x%08x\n", ring->rptr_save_reg,
> +                          RREG32(ring->rptr_save_reg));
> +       }
>         seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr);
>         seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr);
>         seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
> diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
> index b4b1256..eb4704e 100644
> --- a/drivers/gpu/drm/radeon/rv770.c
> +++ b/drivers/gpu/drm/radeon/rv770.c
> @@ -358,8 +358,10 @@ static int rv770_cp_load_microcode(struct radeon_device *rdev)
>
>  void r700_cp_fini(struct radeon_device *rdev)
>  {
> +       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
>         r700_cp_stop(rdev);
> -       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
> +       radeon_ring_fini(rdev, ring);
> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
>  }
>
>  /*
> diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
> index f61b550..50efafb 100644
> --- a/drivers/gpu/drm/radeon/si.c
> +++ b/drivers/gpu/drm/radeon/si.c
> @@ -1765,6 +1765,12 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
>         struct radeon_ring *ring = &rdev->ring[ib->ring];
>         u32 header;
>
> +       if (ring->rptr_save_reg) {
> +               uint32_t next_rptr = ring->wptr + 2 + 4 + 8;
> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
> +               radeon_ring_write(ring, next_rptr);
> +       }
> +
>         if (ib->is_const_ib)
>                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
>         else
> @@ -1917,10 +1923,20 @@ static int si_cp_start(struct radeon_device *rdev)
>
>  static void si_cp_fini(struct radeon_device *rdev)
>  {
> +       struct radeon_ring *ring;
>         si_cp_enable(rdev, false);
> -       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
> -       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
> -       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
> +
> +       ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
> +       radeon_ring_fini(rdev, ring);
> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
> +
> +       ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
> +       radeon_ring_fini(rdev, ring);
> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
> +
> +       ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
> +       radeon_ring_fini(rdev, ring);
> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
>  }
>
>  static int si_cp_resume(struct radeon_device *rdev)
> --
> 1.7.9.5
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian König July 13, 2012, 9:09 a.m. UTC | #2
On 12.07.2012 18:36, Alex Deucher wrote:
> On Thu, Jul 12, 2012 at 12:12 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>> Before emitting any indirect buffer, emit the offset of the next
>> valid ring content if any. This allow code that want to resume
>> ring to resume ring right after ib that caused GPU lockup.
>>
>> v2: use scratch registers instead of storing it into memory
>> v3: skip over the surface sync for ni and si as well
>>
>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>> Signed-off-by: Christian König <deathsimple@vodafone.de>
>> ---
>>   drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
>>   drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
>>   drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
>>   drivers/gpu/drm/radeon/radeon.h      |    1 +
>>   drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
>>   drivers/gpu/drm/radeon/rv770.c       |    4 +++-
>>   drivers/gpu/drm/radeon/si.c          |   22 +++++++++++++++++++---
>>   7 files changed, 60 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
>> index f39b900..40de347 100644
>> --- a/drivers/gpu/drm/radeon/evergreen.c
>> +++ b/drivers/gpu/drm/radeon/evergreen.c
>> @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
>>          /* set to DX10/11 mode */
>>          radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
>>          radeon_ring_write(ring, 1);
>> -       /* FIXME: implement */
>> +
>> +       if (ring->rptr_save_reg) {
>> +               uint32_t next_rptr = ring->wptr + 2 + 4;
>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
>> +               radeon_ring_write(ring, next_rptr);
>> +       }
> On r600 and newer please use SET_CONFIG_REG rather than Packet0.
Why? Please note that it's on purpose that this doesn't interfere with 
the top/bottom of pipe handling and the draw commands, e.g. the register 
write isn't associated with drawing but instead just marks the beginning 
of parsing the IB.

Christian.
>
> Alex
>
>> +
>>          radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>>          radeon_ring_write(ring,
>>   #ifdef __BIG_ENDIAN
>> diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
>> index f2afefb..5b7ce2c 100644
>> --- a/drivers/gpu/drm/radeon/ni.c
>> +++ b/drivers/gpu/drm/radeon/ni.c
>> @@ -855,6 +855,13 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
>>          /* set to DX10/11 mode */
>>          radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
>>          radeon_ring_write(ring, 1);
>> +
>> +       if (ring->rptr_save_reg) {
>> +               uint32_t next_rptr = ring->wptr + 2 + 4 + 8;
>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
>> +               radeon_ring_write(ring, next_rptr);
>> +       }
>> +
>>          radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>>          radeon_ring_write(ring,
>>   #ifdef __BIG_ENDIAN
>> @@ -981,8 +988,10 @@ static int cayman_cp_start(struct radeon_device *rdev)
>>
>>   static void cayman_cp_fini(struct radeon_device *rdev)
>>   {
>> +       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
>>          cayman_cp_enable(rdev, false);
>> -       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
>> +       radeon_ring_fini(rdev, ring);
>> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
>>   }
>>
>>   int cayman_cp_resume(struct radeon_device *rdev)
>> diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
>> index c808fa9..74fca15 100644
>> --- a/drivers/gpu/drm/radeon/r600.c
>> +++ b/drivers/gpu/drm/radeon/r600.c
>> @@ -2155,18 +2155,27 @@ int r600_cp_resume(struct radeon_device *rdev)
>>   void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size)
>>   {
>>          u32 rb_bufsz;
>> +       int r;
>>
>>          /* Align ring size */
>>          rb_bufsz = drm_order(ring_size / 8);
>>          ring_size = (1 << (rb_bufsz + 1)) * 4;
>>          ring->ring_size = ring_size;
>>          ring->align_mask = 16 - 1;
>> +
>> +       r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
>> +       if (r) {
>> +               DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
>> +               ring->rptr_save_reg = 0;
>> +       }
>>   }
>>
>>   void r600_cp_fini(struct radeon_device *rdev)
>>   {
>> +       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
>>          r600_cp_stop(rdev);
>> -       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
>> +       radeon_ring_fini(rdev, ring);
>> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
>>   }
>>
>>
>> @@ -2568,7 +2577,12 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
>>   {
>>          struct radeon_ring *ring = &rdev->ring[ib->ring];
>>
>> -       /* FIXME: implement */
>> +       if (ring->rptr_save_reg) {
>> +               uint32_t next_rptr = ring->wptr + 2 + 4;
>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
>> +               radeon_ring_write(ring, next_rptr);
>> +       }
>> +
>>          radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
>>          radeon_ring_write(ring,
>>   #ifdef __BIG_ENDIAN
>> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
>> index 872270c..64d39ad 100644
>> --- a/drivers/gpu/drm/radeon/radeon.h
>> +++ b/drivers/gpu/drm/radeon/radeon.h
>> @@ -622,6 +622,7 @@ struct radeon_ring {
>>          unsigned                rptr;
>>          unsigned                rptr_offs;
>>          unsigned                rptr_reg;
>> +       unsigned                rptr_save_reg;
>>          unsigned                wptr;
>>          unsigned                wptr_old;
>>          unsigned                wptr_reg;
>> diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
>> index 0873834..ce8eb9d 100644
>> --- a/drivers/gpu/drm/radeon/radeon_ring.c
>> +++ b/drivers/gpu/drm/radeon/radeon_ring.c
>> @@ -451,6 +451,10 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
>>          count = (ring->ring_size / 4) - ring->ring_free_dw;
>>          seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg));
>>          seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg));
>> +       if (ring->rptr_save_reg) {
>> +               seq_printf(m, "rptr next(0x%04x): 0x%08x\n", ring->rptr_save_reg,
>> +                          RREG32(ring->rptr_save_reg));
>> +       }
>>          seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr);
>>          seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr);
>>          seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
>> diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
>> index b4b1256..eb4704e 100644
>> --- a/drivers/gpu/drm/radeon/rv770.c
>> +++ b/drivers/gpu/drm/radeon/rv770.c
>> @@ -358,8 +358,10 @@ static int rv770_cp_load_microcode(struct radeon_device *rdev)
>>
>>   void r700_cp_fini(struct radeon_device *rdev)
>>   {
>> +       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
>>          r700_cp_stop(rdev);
>> -       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
>> +       radeon_ring_fini(rdev, ring);
>> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
>>   }
>>
>>   /*
>> diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
>> index f61b550..50efafb 100644
>> --- a/drivers/gpu/drm/radeon/si.c
>> +++ b/drivers/gpu/drm/radeon/si.c
>> @@ -1765,6 +1765,12 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
>>          struct radeon_ring *ring = &rdev->ring[ib->ring];
>>          u32 header;
>>
>> +       if (ring->rptr_save_reg) {
>> +               uint32_t next_rptr = ring->wptr + 2 + 4 + 8;
>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
>> +               radeon_ring_write(ring, next_rptr);
>> +       }
>> +
>>          if (ib->is_const_ib)
>>                  header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
>>          else
>> @@ -1917,10 +1923,20 @@ static int si_cp_start(struct radeon_device *rdev)
>>
>>   static void si_cp_fini(struct radeon_device *rdev)
>>   {
>> +       struct radeon_ring *ring;
>>          si_cp_enable(rdev, false);
>> -       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
>> -       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
>> -       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
>> +
>> +       ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
>> +       radeon_ring_fini(rdev, ring);
>> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
>> +
>> +       ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
>> +       radeon_ring_fini(rdev, ring);
>> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
>> +
>> +       ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
>> +       radeon_ring_fini(rdev, ring);
>> +       radeon_scratch_free(rdev, ring->rptr_save_reg);
>>   }
>>
>>   static int si_cp_resume(struct radeon_device *rdev)
>> --
>> 1.7.9.5
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
Alex Deucher July 13, 2012, 12:27 p.m. UTC | #3
On Fri, Jul 13, 2012 at 5:09 AM, Christian König
<deathsimple@vodafone.de> wrote:
> On 12.07.2012 18:36, Alex Deucher wrote:
>>
>> On Thu, Jul 12, 2012 at 12:12 PM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>>
>>> Before emitting any indirect buffer, emit the offset of the next
>>> valid ring content if any. This allow code that want to resume
>>> ring to resume ring right after ib that caused GPU lockup.
>>>
>>> v2: use scratch registers instead of storing it into memory
>>> v3: skip over the surface sync for ni and si as well
>>>
>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>> Signed-off-by: Christian König <deathsimple@vodafone.de>
>>> ---
>>>   drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
>>>   drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
>>>   drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
>>>   drivers/gpu/drm/radeon/radeon.h      |    1 +
>>>   drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
>>>   drivers/gpu/drm/radeon/rv770.c       |    4 +++-
>>>   drivers/gpu/drm/radeon/si.c          |   22 +++++++++++++++++++---
>>>   7 files changed, 60 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/radeon/evergreen.c
>>> b/drivers/gpu/drm/radeon/evergreen.c
>>> index f39b900..40de347 100644
>>> --- a/drivers/gpu/drm/radeon/evergreen.c
>>> +++ b/drivers/gpu/drm/radeon/evergreen.c
>>> @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct
>>> radeon_device *rdev, struct radeon_ib *ib)
>>>          /* set to DX10/11 mode */
>>>          radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
>>>          radeon_ring_write(ring, 1);
>>> -       /* FIXME: implement */
>>> +
>>> +       if (ring->rptr_save_reg) {
>>> +               uint32_t next_rptr = ring->wptr + 2 + 4;
>>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
>>> +               radeon_ring_write(ring, next_rptr);
>>> +       }
>>
>> On r600 and newer please use SET_CONFIG_REG rather than Packet0.
>
> Why? Please note that it's on purpose that this doesn't interfere with the
> top/bottom of pipe handling and the draw commands, e.g. the register write
> isn't associated with drawing but instead just marks the beginning of
> parsing the IB.

Packet0's are have been semi-deprecated since r600.  They still work,
but the CP guys recommend using the appropriate packet3 whenever
possible.

Alex
Christian König July 13, 2012, 1:46 p.m. UTC | #4
On 13.07.2012 14:27, Alex Deucher wrote:
> On Fri, Jul 13, 2012 at 5:09 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> On 12.07.2012 18:36, Alex Deucher wrote:
>>> On Thu, Jul 12, 2012 at 12:12 PM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>> Before emitting any indirect buffer, emit the offset of the next
>>>> valid ring content if any. This allow code that want to resume
>>>> ring to resume ring right after ib that caused GPU lockup.
>>>>
>>>> v2: use scratch registers instead of storing it into memory
>>>> v3: skip over the surface sync for ni and si as well
>>>>
>>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>>> Signed-off-by: Christian König <deathsimple@vodafone.de>
>>>> ---
>>>>    drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
>>>>    drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
>>>>    drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
>>>>    drivers/gpu/drm/radeon/radeon.h      |    1 +
>>>>    drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
>>>>    drivers/gpu/drm/radeon/rv770.c       |    4 +++-
>>>>    drivers/gpu/drm/radeon/si.c          |   22 +++++++++++++++++++---
>>>>    7 files changed, 60 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/radeon/evergreen.c
>>>> b/drivers/gpu/drm/radeon/evergreen.c
>>>> index f39b900..40de347 100644
>>>> --- a/drivers/gpu/drm/radeon/evergreen.c
>>>> +++ b/drivers/gpu/drm/radeon/evergreen.c
>>>> @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct
>>>> radeon_device *rdev, struct radeon_ib *ib)
>>>>           /* set to DX10/11 mode */
>>>>           radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
>>>>           radeon_ring_write(ring, 1);
>>>> -       /* FIXME: implement */
>>>> +
>>>> +       if (ring->rptr_save_reg) {
>>>> +               uint32_t next_rptr = ring->wptr + 2 + 4;
>>>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
>>>> +               radeon_ring_write(ring, next_rptr);
>>>> +       }
>>> On r600 and newer please use SET_CONFIG_REG rather than Packet0.
>> Why? Please note that it's on purpose that this doesn't interfere with the
>> top/bottom of pipe handling and the draw commands, e.g. the register write
>> isn't associated with drawing but instead just marks the beginning of
>> parsing the IB.
> Packet0's are have been semi-deprecated since r600.  They still work,
> but the CP guys recommend using the appropriate packet3 whenever
> possible.
Ok, that makes sense.

Any further comments on the patchset, or can I send that to Dave for 
merging now?

Cheers,
Christian.
Alex Deucher July 13, 2012, 1:57 p.m. UTC | #5
On Fri, Jul 13, 2012 at 9:46 AM, Christian König
<deathsimple@vodafone.de> wrote:
> On 13.07.2012 14:27, Alex Deucher wrote:
>>
>> On Fri, Jul 13, 2012 at 5:09 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>>
>>> On 12.07.2012 18:36, Alex Deucher wrote:
>>>>
>>>> On Thu, Jul 12, 2012 at 12:12 PM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>>
>>>>> Before emitting any indirect buffer, emit the offset of the next
>>>>> valid ring content if any. This allow code that want to resume
>>>>> ring to resume ring right after ib that caused GPU lockup.
>>>>>
>>>>> v2: use scratch registers instead of storing it into memory
>>>>> v3: skip over the surface sync for ni and si as well
>>>>>
>>>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>>>> Signed-off-by: Christian König <deathsimple@vodafone.de>
>>>>> ---
>>>>>    drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
>>>>>    drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
>>>>>    drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
>>>>>    drivers/gpu/drm/radeon/radeon.h      |    1 +
>>>>>    drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
>>>>>    drivers/gpu/drm/radeon/rv770.c       |    4 +++-
>>>>>    drivers/gpu/drm/radeon/si.c          |   22 +++++++++++++++++++---
>>>>>    7 files changed, 60 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/radeon/evergreen.c
>>>>> b/drivers/gpu/drm/radeon/evergreen.c
>>>>> index f39b900..40de347 100644
>>>>> --- a/drivers/gpu/drm/radeon/evergreen.c
>>>>> +++ b/drivers/gpu/drm/radeon/evergreen.c
>>>>> @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct
>>>>> radeon_device *rdev, struct radeon_ib *ib)
>>>>>           /* set to DX10/11 mode */
>>>>>           radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
>>>>>           radeon_ring_write(ring, 1);
>>>>> -       /* FIXME: implement */
>>>>> +
>>>>> +       if (ring->rptr_save_reg) {
>>>>> +               uint32_t next_rptr = ring->wptr + 2 + 4;
>>>>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg,
>>>>> 0));
>>>>> +               radeon_ring_write(ring, next_rptr);
>>>>> +       }
>>>>
>>>> On r600 and newer please use SET_CONFIG_REG rather than Packet0.
>>>
>>> Why? Please note that it's on purpose that this doesn't interfere with
>>> the
>>> top/bottom of pipe handling and the draw commands, e.g. the register
>>> write
>>> isn't associated with drawing but instead just marks the beginning of
>>> parsing the IB.
>>
>> Packet0's are have been semi-deprecated since r600.  They still work,
>> but the CP guys recommend using the appropriate packet3 whenever
>> possible.
>
> Ok, that makes sense.
>
> Any further comments on the patchset, or can I send that to Dave for merging
> now?

Other than that, it looks good to me.  For the series:

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Alex Deucher July 16, 2012, 11:13 p.m. UTC | #6
On Fri, Jul 13, 2012 at 9:57 AM, Alex Deucher <alexdeucher@gmail.com> wrote:
> On Fri, Jul 13, 2012 at 9:46 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> On 13.07.2012 14:27, Alex Deucher wrote:
>>>
>>> On Fri, Jul 13, 2012 at 5:09 AM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>>
>>>> On 12.07.2012 18:36, Alex Deucher wrote:
>>>>>
>>>>> On Thu, Jul 12, 2012 at 12:12 PM, Christian König
>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>
>>>>>> Before emitting any indirect buffer, emit the offset of the next
>>>>>> valid ring content if any. This allow code that want to resume
>>>>>> ring to resume ring right after ib that caused GPU lockup.
>>>>>>
>>>>>> v2: use scratch registers instead of storing it into memory
>>>>>> v3: skip over the surface sync for ni and si as well
>>>>>>
>>>>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>>>>> Signed-off-by: Christian König <deathsimple@vodafone.de>
>>>>>> ---
>>>>>>    drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
>>>>>>    drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
>>>>>>    drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
>>>>>>    drivers/gpu/drm/radeon/radeon.h      |    1 +
>>>>>>    drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
>>>>>>    drivers/gpu/drm/radeon/rv770.c       |    4 +++-
>>>>>>    drivers/gpu/drm/radeon/si.c          |   22 +++++++++++++++++++---
>>>>>>    7 files changed, 60 insertions(+), 8 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/radeon/evergreen.c
>>>>>> b/drivers/gpu/drm/radeon/evergreen.c
>>>>>> index f39b900..40de347 100644
>>>>>> --- a/drivers/gpu/drm/radeon/evergreen.c
>>>>>> +++ b/drivers/gpu/drm/radeon/evergreen.c
>>>>>> @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct
>>>>>> radeon_device *rdev, struct radeon_ib *ib)
>>>>>>           /* set to DX10/11 mode */
>>>>>>           radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
>>>>>>           radeon_ring_write(ring, 1);
>>>>>> -       /* FIXME: implement */
>>>>>> +
>>>>>> +       if (ring->rptr_save_reg) {
>>>>>> +               uint32_t next_rptr = ring->wptr + 2 + 4;
>>>>>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg,
>>>>>> 0));
>>>>>> +               radeon_ring_write(ring, next_rptr);
>>>>>> +       }
>>>>>
>>>>> On r600 and newer please use SET_CONFIG_REG rather than Packet0.
>>>>
>>>> Why? Please note that it's on purpose that this doesn't interfere with
>>>> the
>>>> top/bottom of pipe handling and the draw commands, e.g. the register
>>>> write
>>>> isn't associated with drawing but instead just marks the beginning of
>>>> parsing the IB.
>>>
>>> Packet0's are have been semi-deprecated since r600.  They still work,
>>> but the CP guys recommend using the appropriate packet3 whenever
>>> possible.
>>
>> Ok, that makes sense.
>>
>> Any further comments on the patchset, or can I send that to Dave for merging
>> now?
>
> Other than that, it looks good to me.  For the series:
>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

Thinking about this more, we should probably support a memory
locations as well in case there are rings that can't write to
registers and since most things now use memory (fences, etc.), I'm not
sure we'll always have scratch regs to use.

Alex
Christian König July 17, 2012, 8:49 a.m. UTC | #7
On 17.07.2012 01:13, Alex Deucher wrote:
> On Fri, Jul 13, 2012 at 9:57 AM, Alex Deucher <alexdeucher@gmail.com> wrote:
>> On Fri, Jul 13, 2012 at 9:46 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>> On 13.07.2012 14:27, Alex Deucher wrote:
>>>> On Fri, Jul 13, 2012 at 5:09 AM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>> On 12.07.2012 18:36, Alex Deucher wrote:
>>>>>> On Thu, Jul 12, 2012 at 12:12 PM, Christian König
>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>> Before emitting any indirect buffer, emit the offset of the next
>>>>>>> valid ring content if any. This allow code that want to resume
>>>>>>> ring to resume ring right after ib that caused GPU lockup.
>>>>>>>
>>>>>>> v2: use scratch registers instead of storing it into memory
>>>>>>> v3: skip over the surface sync for ni and si as well
>>>>>>>
>>>>>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>>>>>> Signed-off-by: Christian König <deathsimple@vodafone.de>
>>>>>>> ---
>>>>>>>     drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
>>>>>>>     drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
>>>>>>>     drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
>>>>>>>     drivers/gpu/drm/radeon/radeon.h      |    1 +
>>>>>>>     drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
>>>>>>>     drivers/gpu/drm/radeon/rv770.c       |    4 +++-
>>>>>>>     drivers/gpu/drm/radeon/si.c          |   22 +++++++++++++++++++---
>>>>>>>     7 files changed, 60 insertions(+), 8 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/radeon/evergreen.c
>>>>>>> b/drivers/gpu/drm/radeon/evergreen.c
>>>>>>> index f39b900..40de347 100644
>>>>>>> --- a/drivers/gpu/drm/radeon/evergreen.c
>>>>>>> +++ b/drivers/gpu/drm/radeon/evergreen.c
>>>>>>> @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct
>>>>>>> radeon_device *rdev, struct radeon_ib *ib)
>>>>>>>            /* set to DX10/11 mode */
>>>>>>>            radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
>>>>>>>            radeon_ring_write(ring, 1);
>>>>>>> -       /* FIXME: implement */
>>>>>>> +
>>>>>>> +       if (ring->rptr_save_reg) {
>>>>>>> +               uint32_t next_rptr = ring->wptr + 2 + 4;
>>>>>>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg,
>>>>>>> 0));
>>>>>>> +               radeon_ring_write(ring, next_rptr);
>>>>>>> +       }
>>>>>> On r600 and newer please use SET_CONFIG_REG rather than Packet0.
>>>>> Why? Please note that it's on purpose that this doesn't interfere with
>>>>> the
>>>>> top/bottom of pipe handling and the draw commands, e.g. the register
>>>>> write
>>>>> isn't associated with drawing but instead just marks the beginning of
>>>>> parsing the IB.
>>>> Packet0's are have been semi-deprecated since r600.  They still work,
>>>> but the CP guys recommend using the appropriate packet3 whenever
>>>> possible.
>>> Ok, that makes sense.
>>>
>>> Any further comments on the patchset, or can I send that to Dave for merging
>>> now?
>> Other than that, it looks good to me.  For the series:
>>
>> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> Thinking about this more, we should probably support a memory
> locations as well in case there are rings that can't write to
> registers and since most things now use memory (fences, etc.), I'm not
> sure we'll always have scratch regs to use.
The number of scratch registers could get a bit tight if we really get 
so much rings with the next hw generation, but I thing that this should 
do it for now.

We can always extend it in the future to also support a memory location, 
but then we also make sure that writing to that memory location really 
works as expected. Just remember the trouble we had with AGP and scratch 
writebacks.

Christian.


>
> Alex
>
Alex Deucher July 17, 2012, 12:51 p.m. UTC | #8
On Tue, Jul 17, 2012 at 4:49 AM, Christian König
<deathsimple@vodafone.de> wrote:
> On 17.07.2012 01:13, Alex Deucher wrote:
>>
>> On Fri, Jul 13, 2012 at 9:57 AM, Alex Deucher <alexdeucher@gmail.com>
>> wrote:
>>>
>>> On Fri, Jul 13, 2012 at 9:46 AM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>>
>>>> On 13.07.2012 14:27, Alex Deucher wrote:
>>>>>
>>>>> On Fri, Jul 13, 2012 at 5:09 AM, Christian König
>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>
>>>>>> On 12.07.2012 18:36, Alex Deucher wrote:
>>>>>>>
>>>>>>> On Thu, Jul 12, 2012 at 12:12 PM, Christian König
>>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>>
>>>>>>>> Before emitting any indirect buffer, emit the offset of the next
>>>>>>>> valid ring content if any. This allow code that want to resume
>>>>>>>> ring to resume ring right after ib that caused GPU lockup.
>>>>>>>>
>>>>>>>> v2: use scratch registers instead of storing it into memory
>>>>>>>> v3: skip over the surface sync for ni and si as well
>>>>>>>>
>>>>>>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>>>>>>> Signed-off-by: Christian König <deathsimple@vodafone.de>
>>>>>>>> ---
>>>>>>>>     drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
>>>>>>>>     drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
>>>>>>>>     drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
>>>>>>>>     drivers/gpu/drm/radeon/radeon.h      |    1 +
>>>>>>>>     drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
>>>>>>>>     drivers/gpu/drm/radeon/rv770.c       |    4 +++-
>>>>>>>>     drivers/gpu/drm/radeon/si.c          |   22
>>>>>>>> +++++++++++++++++++---
>>>>>>>>     7 files changed, 60 insertions(+), 8 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>> b/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>> index f39b900..40de347 100644
>>>>>>>> --- a/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>> +++ b/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>> @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct
>>>>>>>> radeon_device *rdev, struct radeon_ib *ib)
>>>>>>>>            /* set to DX10/11 mode */
>>>>>>>>            radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL,
>>>>>>>> 0));
>>>>>>>>            radeon_ring_write(ring, 1);
>>>>>>>> -       /* FIXME: implement */
>>>>>>>> +
>>>>>>>> +       if (ring->rptr_save_reg) {
>>>>>>>> +               uint32_t next_rptr = ring->wptr + 2 + 4;
>>>>>>>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg,
>>>>>>>> 0));
>>>>>>>> +               radeon_ring_write(ring, next_rptr);
>>>>>>>> +       }
>>>>>>>
>>>>>>> On r600 and newer please use SET_CONFIG_REG rather than Packet0.
>>>>>>
>>>>>> Why? Please note that it's on purpose that this doesn't interfere with
>>>>>> the
>>>>>> top/bottom of pipe handling and the draw commands, e.g. the register
>>>>>> write
>>>>>> isn't associated with drawing but instead just marks the beginning of
>>>>>> parsing the IB.
>>>>>
>>>>> Packet0's are have been semi-deprecated since r600.  They still work,
>>>>> but the CP guys recommend using the appropriate packet3 whenever
>>>>> possible.
>>>>
>>>> Ok, that makes sense.
>>>>
>>>> Any further comments on the patchset, or can I send that to Dave for
>>>> merging
>>>> now?
>>>
>>> Other than that, it looks good to me.  For the series:
>>>
>>> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>>
>> Thinking about this more, we should probably support a memory
>> locations as well in case there are rings that can't write to
>> registers and since most things now use memory (fences, etc.), I'm not
>> sure we'll always have scratch regs to use.
>
> The number of scratch registers could get a bit tight if we really get so
> much rings with the next hw generation, but I thing that this should do it
> for now.
>
> We can always extend it in the future to also support a memory location, but
> then we also make sure that writing to that memory location really works as
> expected. Just remember the trouble we had with AGP and scratch writebacks.
>

Ok, I'll put a new patch on top when we need it.

Alex

> Christian.
>
>
>>
>> Alex
>>
>
>
Jerome Glisse July 17, 2012, 2:17 p.m. UTC | #9
On Tue, Jul 17, 2012 at 8:51 AM, Alex Deucher <alexdeucher@gmail.com> wrote:
> On Tue, Jul 17, 2012 at 4:49 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> On 17.07.2012 01:13, Alex Deucher wrote:
>>>
>>> On Fri, Jul 13, 2012 at 9:57 AM, Alex Deucher <alexdeucher@gmail.com>
>>> wrote:
>>>>
>>>> On Fri, Jul 13, 2012 at 9:46 AM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>>
>>>>> On 13.07.2012 14:27, Alex Deucher wrote:
>>>>>>
>>>>>> On Fri, Jul 13, 2012 at 5:09 AM, Christian König
>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>
>>>>>>> On 12.07.2012 18:36, Alex Deucher wrote:
>>>>>>>>
>>>>>>>> On Thu, Jul 12, 2012 at 12:12 PM, Christian König
>>>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>>>
>>>>>>>>> Before emitting any indirect buffer, emit the offset of the next
>>>>>>>>> valid ring content if any. This allow code that want to resume
>>>>>>>>> ring to resume ring right after ib that caused GPU lockup.
>>>>>>>>>
>>>>>>>>> v2: use scratch registers instead of storing it into memory
>>>>>>>>> v3: skip over the surface sync for ni and si as well
>>>>>>>>>
>>>>>>>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>>>>>>>> Signed-off-by: Christian König <deathsimple@vodafone.de>
>>>>>>>>> ---
>>>>>>>>>     drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
>>>>>>>>>     drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
>>>>>>>>>     drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
>>>>>>>>>     drivers/gpu/drm/radeon/radeon.h      |    1 +
>>>>>>>>>     drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
>>>>>>>>>     drivers/gpu/drm/radeon/rv770.c       |    4 +++-
>>>>>>>>>     drivers/gpu/drm/radeon/si.c          |   22
>>>>>>>>> +++++++++++++++++++---
>>>>>>>>>     7 files changed, 60 insertions(+), 8 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>>> b/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>>> index f39b900..40de347 100644
>>>>>>>>> --- a/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>>> +++ b/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>>> @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct
>>>>>>>>> radeon_device *rdev, struct radeon_ib *ib)
>>>>>>>>>            /* set to DX10/11 mode */
>>>>>>>>>            radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL,
>>>>>>>>> 0));
>>>>>>>>>            radeon_ring_write(ring, 1);
>>>>>>>>> -       /* FIXME: implement */
>>>>>>>>> +
>>>>>>>>> +       if (ring->rptr_save_reg) {
>>>>>>>>> +               uint32_t next_rptr = ring->wptr + 2 + 4;
>>>>>>>>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg,
>>>>>>>>> 0));
>>>>>>>>> +               radeon_ring_write(ring, next_rptr);
>>>>>>>>> +       }
>>>>>>>>
>>>>>>>> On r600 and newer please use SET_CONFIG_REG rather than Packet0.
>>>>>>>
>>>>>>> Why? Please note that it's on purpose that this doesn't interfere with
>>>>>>> the
>>>>>>> top/bottom of pipe handling and the draw commands, e.g. the register
>>>>>>> write
>>>>>>> isn't associated with drawing but instead just marks the beginning of
>>>>>>> parsing the IB.
>>>>>>
>>>>>> Packet0's are have been semi-deprecated since r600.  They still work,
>>>>>> but the CP guys recommend using the appropriate packet3 whenever
>>>>>> possible.
>>>>>
>>>>> Ok, that makes sense.
>>>>>
>>>>> Any further comments on the patchset, or can I send that to Dave for
>>>>> merging
>>>>> now?
>>>>
>>>> Other than that, it looks good to me.  For the series:
>>>>
>>>> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>>>
>>> Thinking about this more, we should probably support a memory
>>> locations as well in case there are rings that can't write to
>>> registers and since most things now use memory (fences, etc.), I'm not
>>> sure we'll always have scratch regs to use.
>>
>> The number of scratch registers could get a bit tight if we really get so
>> much rings with the next hw generation, but I thing that this should do it
>> for now.
>>
>> We can always extend it in the future to also support a memory location, but
>> then we also make sure that writing to that memory location really works as
>> expected. Just remember the trouble we had with AGP and scratch writebacks.
>>
>
> Ok, I'll put a new patch on top when we need it.
>
> Alex

My first version used memory write and i think we should forget about
AGP this will never gonna happen again (if i were in the mob i would
say that we made them an offer they could not refuse ;))

Cheers,
Jerome
Christian König July 17, 2012, 2:37 p.m. UTC | #10
On 17.07.2012 16:17, Jerome Glisse wrote:
> On Tue, Jul 17, 2012 at 8:51 AM, Alex Deucher <alexdeucher@gmail.com> wrote:
>> On Tue, Jul 17, 2012 at 4:49 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>> On 17.07.2012 01:13, Alex Deucher wrote:
>>>> On Fri, Jul 13, 2012 at 9:57 AM, Alex Deucher <alexdeucher@gmail.com>
>>>> wrote:
>>>>> On Fri, Jul 13, 2012 at 9:46 AM, Christian König
>>>>> <deathsimple@vodafone.de> wrote:
>>>>>> On 13.07.2012 14:27, Alex Deucher wrote:
>>>>>>> On Fri, Jul 13, 2012 at 5:09 AM, Christian König
>>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>> On 12.07.2012 18:36, Alex Deucher wrote:
>>>>>>>>> On Thu, Jul 12, 2012 at 12:12 PM, Christian König
>>>>>>>>> <deathsimple@vodafone.de> wrote:
>>>>>>>>>> Before emitting any indirect buffer, emit the offset of the next
>>>>>>>>>> valid ring content if any. This allow code that want to resume
>>>>>>>>>> ring to resume ring right after ib that caused GPU lockup.
>>>>>>>>>>
>>>>>>>>>> v2: use scratch registers instead of storing it into memory
>>>>>>>>>> v3: skip over the surface sync for ni and si as well
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>>>>>>>>> Signed-off-by: Christian König <deathsimple@vodafone.de>
>>>>>>>>>> ---
>>>>>>>>>>      drivers/gpu/drm/radeon/evergreen.c   |    8 +++++++-
>>>>>>>>>>      drivers/gpu/drm/radeon/ni.c          |   11 ++++++++++-
>>>>>>>>>>      drivers/gpu/drm/radeon/r600.c        |   18 ++++++++++++++++--
>>>>>>>>>>      drivers/gpu/drm/radeon/radeon.h      |    1 +
>>>>>>>>>>      drivers/gpu/drm/radeon/radeon_ring.c |    4 ++++
>>>>>>>>>>      drivers/gpu/drm/radeon/rv770.c       |    4 +++-
>>>>>>>>>>      drivers/gpu/drm/radeon/si.c          |   22
>>>>>>>>>> +++++++++++++++++++---
>>>>>>>>>>      7 files changed, 60 insertions(+), 8 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>>>> b/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>>>> index f39b900..40de347 100644
>>>>>>>>>> --- a/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>>>> +++ b/drivers/gpu/drm/radeon/evergreen.c
>>>>>>>>>> @@ -1368,7 +1368,13 @@ void evergreen_ring_ib_execute(struct
>>>>>>>>>> radeon_device *rdev, struct radeon_ib *ib)
>>>>>>>>>>             /* set to DX10/11 mode */
>>>>>>>>>>             radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL,
>>>>>>>>>> 0));
>>>>>>>>>>             radeon_ring_write(ring, 1);
>>>>>>>>>> -       /* FIXME: implement */
>>>>>>>>>> +
>>>>>>>>>> +       if (ring->rptr_save_reg) {
>>>>>>>>>> +               uint32_t next_rptr = ring->wptr + 2 + 4;
>>>>>>>>>> +               radeon_ring_write(ring, PACKET0(ring->rptr_save_reg,
>>>>>>>>>> 0));
>>>>>>>>>> +               radeon_ring_write(ring, next_rptr);
>>>>>>>>>> +       }
>>>>>>>>> On r600 and newer please use SET_CONFIG_REG rather than Packet0.
>>>>>>>> Why? Please note that it's on purpose that this doesn't interfere with
>>>>>>>> the
>>>>>>>> top/bottom of pipe handling and the draw commands, e.g. the register
>>>>>>>> write
>>>>>>>> isn't associated with drawing but instead just marks the beginning of
>>>>>>>> parsing the IB.
>>>>>>> Packet0's are have been semi-deprecated since r600.  They still work,
>>>>>>> but the CP guys recommend using the appropriate packet3 whenever
>>>>>>> possible.
>>>>>> Ok, that makes sense.
>>>>>>
>>>>>> Any further comments on the patchset, or can I send that to Dave for
>>>>>> merging
>>>>>> now?
>>>>> Other than that, it looks good to me.  For the series:
>>>>>
>>>>> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
>>>> Thinking about this more, we should probably support a memory
>>>> locations as well in case there are rings that can't write to
>>>> registers and since most things now use memory (fences, etc.), I'm not
>>>> sure we'll always have scratch regs to use.
>>> The number of scratch registers could get a bit tight if we really get so
>>> much rings with the next hw generation, but I thing that this should do it
>>> for now.
>>>
>>> We can always extend it in the future to also support a memory location, but
>>> then we also make sure that writing to that memory location really works as
>>> expected. Just remember the trouble we had with AGP and scratch writebacks.
>>>
>> Ok, I'll put a new patch on top when we need it.
>>
>> Alex
> My first version used memory write and i think we should forget about
> AGP this will never gonna happen again (if i were in the mob i would
> say that we made them an offer they could not refuse ;))
LOL, yeah that's somewhat true. Well it just looked simpler to me to use 
a register instead of memory.

Feel free to use a memory write for the CP if the need really arise, but 
keep in mind that we still have rings which can't do so.

Christian.

> Cheers,
> Jerome
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index f39b900..40de347 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1368,7 +1368,13 @@  void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 	/* set to DX10/11 mode */
 	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
 	radeon_ring_write(ring, 1);
-	/* FIXME: implement */
+
+	if (ring->rptr_save_reg) {
+		uint32_t next_rptr = ring->wptr + 2 + 4;
+		radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
+		radeon_ring_write(ring, next_rptr);
+	}
+
 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index f2afefb..5b7ce2c 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -855,6 +855,13 @@  void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 	/* set to DX10/11 mode */
 	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
 	radeon_ring_write(ring, 1);
+
+	if (ring->rptr_save_reg) {
+		uint32_t next_rptr = ring->wptr + 2 + 4 + 8;
+		radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
+		radeon_ring_write(ring, next_rptr);
+	}
+
 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
@@ -981,8 +988,10 @@  static int cayman_cp_start(struct radeon_device *rdev)
 
 static void cayman_cp_fini(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	cayman_cp_enable(rdev, false);
-	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 int cayman_cp_resume(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index c808fa9..74fca15 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2155,18 +2155,27 @@  int r600_cp_resume(struct radeon_device *rdev)
 void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size)
 {
 	u32 rb_bufsz;
+	int r;
 
 	/* Align ring size */
 	rb_bufsz = drm_order(ring_size / 8);
 	ring_size = (1 << (rb_bufsz + 1)) * 4;
 	ring->ring_size = ring_size;
 	ring->align_mask = 16 - 1;
+
+	r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
+	if (r) {
+		DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
+		ring->rptr_save_reg = 0;
+	}
 }
 
 void r600_cp_fini(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r600_cp_stop(rdev);
-	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 
@@ -2568,7 +2577,12 @@  void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
 
-	/* FIXME: implement */
+	if (ring->rptr_save_reg) {
+		uint32_t next_rptr = ring->wptr + 2 + 4;
+		radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
+		radeon_ring_write(ring, next_rptr);
+	}
+
 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 872270c..64d39ad 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -622,6 +622,7 @@  struct radeon_ring {
 	unsigned		rptr;
 	unsigned		rptr_offs;
 	unsigned		rptr_reg;
+	unsigned		rptr_save_reg;
 	unsigned		wptr;
 	unsigned		wptr_old;
 	unsigned		wptr_reg;
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 0873834..ce8eb9d 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -451,6 +451,10 @@  static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
 	count = (ring->ring_size / 4) - ring->ring_free_dw;
 	seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg));
 	seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg));
+	if (ring->rptr_save_reg) {
+		seq_printf(m, "rptr next(0x%04x): 0x%08x\n", ring->rptr_save_reg,
+			   RREG32(ring->rptr_save_reg));
+	}
 	seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr);
 	seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr);
 	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index b4b1256..eb4704e 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -358,8 +358,10 @@  static int rv770_cp_load_microcode(struct radeon_device *rdev)
 
 void r700_cp_fini(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r700_cp_stop(rdev);
-	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 /*
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index f61b550..50efafb 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1765,6 +1765,12 @@  void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
 	u32 header;
 
+	if (ring->rptr_save_reg) {
+		uint32_t next_rptr = ring->wptr + 2 + 4 + 8;
+		radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
+		radeon_ring_write(ring, next_rptr);
+	}
+
 	if (ib->is_const_ib)
 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
 	else
@@ -1917,10 +1923,20 @@  static int si_cp_start(struct radeon_device *rdev)
 
 static void si_cp_fini(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring;
 	si_cp_enable(rdev, false);
-	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
-	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
+
+	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 static int si_cp_resume(struct radeon_device *rdev)