diff mbox

drm/radeon: set VM base addr using the PFP v2

Message ID 1406733492-9996-1-git-send-email-deathsimple@vodafone.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christian König July 30, 2014, 3:18 p.m. UTC
From: Christian König <christian.koenig@amd.com>

Seems to make VM flushes more stable on SI and CIK.

v2: only use the PFP on the GFX ring on CIK

Signed-off-by: Christian König <christian.koenig@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/cik.c | 5 +++--
 drivers/gpu/drm/radeon/si.c  | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

Comments

Alex Deucher July 30, 2014, 3:54 p.m. UTC | #1
On Wed, Jul 30, 2014 at 11:18 AM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Seems to make VM flushes more stable on SI and CIK.
>
> v2: only use the PFP on the GFX ring on CIK
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Cc: stable@vger.kernel.org

Applied to my 3.17 tree.  We should use PFP whenever possible for all packets.

Alex

> ---
>  drivers/gpu/drm/radeon/cik.c | 5 +++--
>  drivers/gpu/drm/radeon/si.c  | 2 +-
>  2 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
> index cc1f02f..db53616 100644
> --- a/drivers/gpu/drm/radeon/cik.c
> +++ b/drivers/gpu/drm/radeon/cik.c
> @@ -5641,12 +5641,13 @@ static void cik_vm_decode_fault(struct radeon_device *rdev,
>  void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
>  {
>         struct radeon_ring *ring = &rdev->ring[ridx];
> +       int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
>
>         if (vm == NULL)
>                 return;
>
>         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> -       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> +       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>                                  WRITE_DATA_DST_SEL(0)));
>         if (vm->id < 8) {
>                 radeon_ring_write(ring,
> @@ -5696,7 +5697,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
>         radeon_ring_write(ring, 1 << vm->id);
>
>         /* compute doesn't have PFP */
> -       if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
> +       if (usepfp) {
>                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
>                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>                 radeon_ring_write(ring, 0x0);
> diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
> index 9e854fd..f87d82a 100644
> --- a/drivers/gpu/drm/radeon/si.c
> +++ b/drivers/gpu/drm/radeon/si.c
> @@ -4815,7 +4815,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
>
>         /* write new base address */
>         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> -       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> +       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
>                                  WRITE_DATA_DST_SEL(0)));
>
>         if (vm->id < 8) {
> --
> 1.9.1
>
Jerome Glisse July 30, 2014, 5:24 p.m. UTC | #2
On Wed, Jul 30, 2014 at 11:54 AM, Alex Deucher <alexdeucher@gmail.com>
wrote:

> On Wed, Jul 30, 2014 at 11:18 AM, Christian König
> <deathsimple@vodafone.de> wrote:
> > From: Christian König <christian.koenig@amd.com>
> >
> > Seems to make VM flushes more stable on SI and CIK.
> >
> > v2: only use the PFP on the GFX ring on CIK
> >
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Cc: stable@vger.kernel.org
>
> Applied to my 3.17 tree.  We should use PFP whenever possible for all
> packets.
>

Can all this be explained with more words ? What are the choice ? What are
the difference ? What we gain ?


>
> Alex
>
> > ---
> >  drivers/gpu/drm/radeon/cik.c | 5 +++--
> >  drivers/gpu/drm/radeon/si.c  | 2 +-
> >  2 files changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
> > index cc1f02f..db53616 100644
> > --- a/drivers/gpu/drm/radeon/cik.c
> > +++ b/drivers/gpu/drm/radeon/cik.c
> > @@ -5641,12 +5641,13 @@ static void cik_vm_decode_fault(struct
> radeon_device *rdev,
> >  void cik_vm_flush(struct radeon_device *rdev, int ridx, struct
> radeon_vm *vm)
> >  {
> >         struct radeon_ring *ring = &rdev->ring[ridx];
> > +       int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
> >
> >         if (vm == NULL)
> >                 return;
> >
> >         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> > -       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> > +       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
> >                                  WRITE_DATA_DST_SEL(0)));
> >         if (vm->id < 8) {
> >                 radeon_ring_write(ring,
> > @@ -5696,7 +5697,7 @@ void cik_vm_flush(struct radeon_device *rdev, int
> ridx, struct radeon_vm *vm)
> >         radeon_ring_write(ring, 1 << vm->id);
> >
> >         /* compute doesn't have PFP */
> > -       if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
> > +       if (usepfp) {
> >                 /* sync PFP to ME, otherwise we might get invalid PFP
> reads */
> >                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
> >                 radeon_ring_write(ring, 0x0);
> > diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
> > index 9e854fd..f87d82a 100644
> > --- a/drivers/gpu/drm/radeon/si.c
> > +++ b/drivers/gpu/drm/radeon/si.c
> > @@ -4815,7 +4815,7 @@ void si_vm_flush(struct radeon_device *rdev, int
> ridx, struct radeon_vm *vm)
> >
> >         /* write new base address */
> >         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
> > -       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
> > +       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
> >                                  WRITE_DATA_DST_SEL(0)));
> >
> >         if (vm->id < 8) {
> > --
> > 1.9.1
> >
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>
Christian König July 30, 2014, 6:15 p.m. UTC | #3
Am 30.07.2014 um 19:24 schrieb Jerome Glisse:
> On Wed, Jul 30, 2014 at 11:54 AM, Alex Deucher <alexdeucher@gmail.com 
> <mailto:alexdeucher@gmail.com>> wrote:
>
>     On Wed, Jul 30, 2014 at 11:18 AM, Christian König
>     <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
>     > From: Christian König <christian.koenig@amd.com
>     <mailto:christian.koenig@amd.com>>
>     >
>     > Seems to make VM flushes more stable on SI and CIK.
>     >
>     > v2: only use the PFP on the GFX ring on CIK
>     >
>     > Signed-off-by: Christian König <christian.koenig@amd.com
>     <mailto:christian.koenig@amd.com>>
>     > Cc: stable@vger.kernel.org <mailto:stable@vger.kernel.org>
>
>     Applied to my 3.17 tree.  We should use PFP whenever possible for
>     all packets.
>
>
> Can all this be explained with more words ? What are the choice ? What 
> are the difference ? What we gain ?

The PFP is responsible of fetching the data the CP executes from memory, 
classify them and if necessary forward them to the ME. The ME is the 
part where normally all the juicy stuff happens, e.g. register writes, 
most type3 packet execution etc...

What we did before was setting the VM base address with the ME instead 
of the PFP, but since the PFP needs to fetch the IB data we inserted an 
"Wait for ME" packet into the command stream to let the ME catch up to 
the PFP before proceeding.

The problem is that this is sometimes not enough, e.g. on CIK it could 
happen that the IB read goes out while the the page table base address 
wasn't updated yet with the obviously bad consequences.

Compute rings don't have a PFP on CIK so this won't work there.

Regards,
Christian.

>
>     Alex
>
>     > ---
>     >  drivers/gpu/drm/radeon/cik.c | 5 +++--
>     >  drivers/gpu/drm/radeon/si.c  | 2 +-
>     >  2 files changed, 4 insertions(+), 3 deletions(-)
>     >
>     > diff --git a/drivers/gpu/drm/radeon/cik.c
>     b/drivers/gpu/drm/radeon/cik.c
>     > index cc1f02f..db53616 100644
>     > --- a/drivers/gpu/drm/radeon/cik.c
>     > +++ b/drivers/gpu/drm/radeon/cik.c
>     > @@ -5641,12 +5641,13 @@ static void cik_vm_decode_fault(struct
>     radeon_device *rdev,
>     >  void cik_vm_flush(struct radeon_device *rdev, int ridx, struct
>     radeon_vm *vm)
>     >  {
>     >         struct radeon_ring *ring = &rdev->ring[ridx];
>     > +       int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
>     >
>     >         if (vm == NULL)
>     >                 return;
>     >
>     >         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>     > -       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>     > +       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>     >  WRITE_DATA_DST_SEL(0)));
>     >         if (vm->id < 8) {
>     >                 radeon_ring_write(ring,
>     > @@ -5696,7 +5697,7 @@ void cik_vm_flush(struct radeon_device
>     *rdev, int ridx, struct radeon_vm *vm)
>     >         radeon_ring_write(ring, 1 << vm->id);
>     >
>     >         /* compute doesn't have PFP */
>     > -       if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
>     > +       if (usepfp) {
>     >                 /* sync PFP to ME, otherwise we might get
>     invalid PFP reads */
>     >                 radeon_ring_write(ring,
>     PACKET3(PACKET3_PFP_SYNC_ME, 0));
>     >                 radeon_ring_write(ring, 0x0);
>     > diff --git a/drivers/gpu/drm/radeon/si.c
>     b/drivers/gpu/drm/radeon/si.c
>     > index 9e854fd..f87d82a 100644
>     > --- a/drivers/gpu/drm/radeon/si.c
>     > +++ b/drivers/gpu/drm/radeon/si.c
>     > @@ -4815,7 +4815,7 @@ void si_vm_flush(struct radeon_device
>     *rdev, int ridx, struct radeon_vm *vm)
>     >
>     >         /* write new base address */
>     >         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>     > -       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>     > +       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
>     >  WRITE_DATA_DST_SEL(0)));
>     >
>     >         if (vm->id < 8) {
>     > --
>     > 1.9.1
>     >
>     _______________________________________________
>     dri-devel mailing list
>     dri-devel@lists.freedesktop.org
>     <mailto:dri-devel@lists.freedesktop.org>
>     http://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
Alex Deucher July 30, 2014, 7:10 p.m. UTC | #4
On Wed, Jul 30, 2014 at 2:15 PM, Christian König
<deathsimple@vodafone.de> wrote:
> Am 30.07.2014 um 19:24 schrieb Jerome Glisse:
>
> On Wed, Jul 30, 2014 at 11:54 AM, Alex Deucher <alexdeucher@gmail.com>
> wrote:
>>
>> On Wed, Jul 30, 2014 at 11:18 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>> > From: Christian König <christian.koenig@amd.com>
>> >
>> > Seems to make VM flushes more stable on SI and CIK.
>> >
>> > v2: only use the PFP on the GFX ring on CIK
>> >
>> > Signed-off-by: Christian König <christian.koenig@amd.com>
>> > Cc: stable@vger.kernel.org
>>
>> Applied to my 3.17 tree.  We should use PFP whenever possible for all
>> packets.
>
>
> Can all this be explained with more words ? What are the choice ? What are
> the difference ? What we gain ?
>
>
> The PFP is responsible of fetching the data the CP executes from memory,
> classify them and if necessary forward them to the ME. The ME is the part
> where normally all the juicy stuff happens, e.g. register writes, most type3
> packet execution etc...
>
> What we did before was setting the VM base address with the ME instead of
> the PFP, but since the PFP needs to fetch the IB data we inserted an "Wait
> for ME" packet into the command stream to let the ME catch up to the PFP
> before proceeding.
>
> The problem is that this is sometimes not enough, e.g. on CIK it could
> happen that the IB read goes out while the the page table base address
> wasn't updated yet with the obviously bad consequences.
>
> Compute rings don't have a PFP on CIK so this won't work there.
>

In the more general case, the PFP will fetch the packets and either
process them directly (if the packet is handled by the PFP or the
engine bit is set to PFP) or forward it to the ME.  Since the ME does
most of the packet processing, it tends to be busier than the PFP, so
it makes sense to have the PFP do as much additional work as possible.
Depending on the packets or hw conditions, the PFP may have to defer
the processing of a packet with the engine bit set to PFP to the ME
anyway.  E.g., if the packet depends on some synchronization logic
results in the ME.

Alex

> Regards,
> Christian.
>
>
>
>>
>>
>> Alex
>>
>> > ---
>> >  drivers/gpu/drm/radeon/cik.c | 5 +++--
>> >  drivers/gpu/drm/radeon/si.c  | 2 +-
>> >  2 files changed, 4 insertions(+), 3 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
>> > index cc1f02f..db53616 100644
>> > --- a/drivers/gpu/drm/radeon/cik.c
>> > +++ b/drivers/gpu/drm/radeon/cik.c
>> > @@ -5641,12 +5641,13 @@ static void cik_vm_decode_fault(struct
>> > radeon_device *rdev,
>> >  void cik_vm_flush(struct radeon_device *rdev, int ridx, struct
>> > radeon_vm *vm)
>> >  {
>> >         struct radeon_ring *ring = &rdev->ring[ridx];
>> > +       int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
>> >
>> >         if (vm == NULL)
>> >                 return;
>> >
>> >         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>> > -       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>> > +       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
>> >                                  WRITE_DATA_DST_SEL(0)));
>> >         if (vm->id < 8) {
>> >                 radeon_ring_write(ring,
>> > @@ -5696,7 +5697,7 @@ void cik_vm_flush(struct radeon_device *rdev, int
>> > ridx, struct radeon_vm *vm)
>> >         radeon_ring_write(ring, 1 << vm->id);
>> >
>> >         /* compute doesn't have PFP */
>> > -       if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
>> > +       if (usepfp) {
>> >                 /* sync PFP to ME, otherwise we might get invalid PFP
>> > reads */
>> >                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
>> > 0));
>> >                 radeon_ring_write(ring, 0x0);
>> > diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
>> > index 9e854fd..f87d82a 100644
>> > --- a/drivers/gpu/drm/radeon/si.c
>> > +++ b/drivers/gpu/drm/radeon/si.c
>> > @@ -4815,7 +4815,7 @@ void si_vm_flush(struct radeon_device *rdev, int
>> > ridx, struct radeon_vm *vm)
>> >
>> >         /* write new base address */
>> >         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
>> > -       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
>> > +       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
>> >                                  WRITE_DATA_DST_SEL(0)));
>> >
>> >         if (vm->id < 8) {
>> > --
>> > 1.9.1
>> >
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index cc1f02f..db53616 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -5641,12 +5641,13 @@  static void cik_vm_decode_fault(struct radeon_device *rdev,
 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 {
 	struct radeon_ring *ring = &rdev->ring[ridx];
+	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
 
 	if (vm == NULL)
 		return;
 
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)));
 	if (vm->id < 8) {
 		radeon_ring_write(ring,
@@ -5696,7 +5697,7 @@  void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 	radeon_ring_write(ring, 1 << vm->id);
 
 	/* compute doesn't have PFP */
-	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
+	if (usepfp) {
 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 		radeon_ring_write(ring, 0x0);
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 9e854fd..f87d82a 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -4815,7 +4815,7 @@  void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
 
 	/* write new base address */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
 				 WRITE_DATA_DST_SEL(0)));
 
 	if (vm->id < 8) {