Message ID | 1406733492-9996-1-git-send-email-deathsimple@vodafone.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jul 30, 2014 at 11:18 AM, Christian König <deathsimple@vodafone.de> wrote: > From: Christian König <christian.koenig@amd.com> > > Seems to make VM flushes more stable on SI and CIK. > > v2: only use the PFP on the GFX ring on CIK > > Signed-off-by: Christian König <christian.koenig@amd.com> > Cc: stable@vger.kernel.org Applied to my 3.17 tree. We should use PFP whenever possible for all packets. Alex > --- > drivers/gpu/drm/radeon/cik.c | 5 +++-- > drivers/gpu/drm/radeon/si.c | 2 +- > 2 files changed, 4 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c > index cc1f02f..db53616 100644 > --- a/drivers/gpu/drm/radeon/cik.c > +++ b/drivers/gpu/drm/radeon/cik.c > @@ -5641,12 +5641,13 @@ static void cik_vm_decode_fault(struct radeon_device *rdev, > void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) > { > struct radeon_ring *ring = &rdev->ring[ridx]; > + int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX); > > if (vm == NULL) > return; > > radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | > WRITE_DATA_DST_SEL(0))); > if (vm->id < 8) { > radeon_ring_write(ring, > @@ -5696,7 +5697,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) > radeon_ring_write(ring, 1 << vm->id); > > /* compute doesn't have PFP */ > - if (ridx == RADEON_RING_TYPE_GFX_INDEX) { > + if (usepfp) { > /* sync PFP to ME, otherwise we might get invalid PFP reads */ > radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); > radeon_ring_write(ring, 0x0); > diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c > index 9e854fd..f87d82a 100644 > --- a/drivers/gpu/drm/radeon/si.c > +++ b/drivers/gpu/drm/radeon/si.c > @@ -4815,7 +4815,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) > > /* write new base address */ > radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | > WRITE_DATA_DST_SEL(0))); > > if (vm->id < 8) { > -- > 1.9.1 >
On Wed, Jul 30, 2014 at 11:54 AM, Alex Deucher <alexdeucher@gmail.com> wrote: > On Wed, Jul 30, 2014 at 11:18 AM, Christian König > <deathsimple@vodafone.de> wrote: > > From: Christian König <christian.koenig@amd.com> > > > > Seems to make VM flushes more stable on SI and CIK. > > > > v2: only use the PFP on the GFX ring on CIK > > > > Signed-off-by: Christian König <christian.koenig@amd.com> > > Cc: stable@vger.kernel.org > > Applied to my 3.17 tree. We should use PFP whenever possible for all > packets. > Can all this be explained with more words ? What are the choice ? What are the difference ? What we gain ? > > Alex > > > --- > > drivers/gpu/drm/radeon/cik.c | 5 +++-- > > drivers/gpu/drm/radeon/si.c | 2 +- > > 2 files changed, 4 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c > > index cc1f02f..db53616 100644 > > --- a/drivers/gpu/drm/radeon/cik.c > > +++ b/drivers/gpu/drm/radeon/cik.c > > @@ -5641,12 +5641,13 @@ static void cik_vm_decode_fault(struct > radeon_device *rdev, > > void cik_vm_flush(struct radeon_device *rdev, int ridx, struct > radeon_vm *vm) > > { > > struct radeon_ring *ring = &rdev->ring[ridx]; > > + int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX); > > > > if (vm == NULL) > > return; > > > > radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > > - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > > + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | > > WRITE_DATA_DST_SEL(0))); > > if (vm->id < 8) { > > radeon_ring_write(ring, > > @@ -5696,7 +5697,7 @@ void cik_vm_flush(struct radeon_device *rdev, int > ridx, struct radeon_vm *vm) > > radeon_ring_write(ring, 1 << vm->id); > > > > /* compute doesn't have PFP */ > > - if (ridx == RADEON_RING_TYPE_GFX_INDEX) { > > + if (usepfp) { > > /* sync PFP to ME, otherwise we might get invalid PFP > reads */ > > radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); > > radeon_ring_write(ring, 0x0); > > diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c > > index 9e854fd..f87d82a 100644 > > --- a/drivers/gpu/drm/radeon/si.c > > +++ b/drivers/gpu/drm/radeon/si.c > > @@ -4815,7 +4815,7 @@ void si_vm_flush(struct radeon_device *rdev, int > ridx, struct radeon_vm *vm) > > > > /* write new base address */ > > radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > > - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > > + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | > > WRITE_DATA_DST_SEL(0))); > > > > if (vm->id < 8) { > > -- > > 1.9.1 > > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel >
Am 30.07.2014 um 19:24 schrieb Jerome Glisse: > On Wed, Jul 30, 2014 at 11:54 AM, Alex Deucher <alexdeucher@gmail.com > <mailto:alexdeucher@gmail.com>> wrote: > > On Wed, Jul 30, 2014 at 11:18 AM, Christian König > <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote: > > From: Christian König <christian.koenig@amd.com > <mailto:christian.koenig@amd.com>> > > > > Seems to make VM flushes more stable on SI and CIK. > > > > v2: only use the PFP on the GFX ring on CIK > > > > Signed-off-by: Christian König <christian.koenig@amd.com > <mailto:christian.koenig@amd.com>> > > Cc: stable@vger.kernel.org <mailto:stable@vger.kernel.org> > > Applied to my 3.17 tree. We should use PFP whenever possible for > all packets. > > > Can all this be explained with more words ? What are the choice ? What > are the difference ? What we gain ? The PFP is responsible of fetching the data the CP executes from memory, classify them and if necessary forward them to the ME. The ME is the part where normally all the juicy stuff happens, e.g. register writes, most type3 packet execution etc... What we did before was setting the VM base address with the ME instead of the PFP, but since the PFP needs to fetch the IB data we inserted an "Wait for ME" packet into the command stream to let the ME catch up to the PFP before proceeding. The problem is that this is sometimes not enough, e.g. on CIK it could happen that the IB read goes out while the the page table base address wasn't updated yet with the obviously bad consequences. Compute rings don't have a PFP on CIK so this won't work there. Regards, Christian. > > Alex > > > --- > > drivers/gpu/drm/radeon/cik.c | 5 +++-- > > drivers/gpu/drm/radeon/si.c | 2 +- > > 2 files changed, 4 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/radeon/cik.c > b/drivers/gpu/drm/radeon/cik.c > > index cc1f02f..db53616 100644 > > --- a/drivers/gpu/drm/radeon/cik.c > > +++ b/drivers/gpu/drm/radeon/cik.c > > @@ -5641,12 +5641,13 @@ static void cik_vm_decode_fault(struct > radeon_device *rdev, > > void cik_vm_flush(struct radeon_device *rdev, int ridx, struct > radeon_vm *vm) > > { > > struct radeon_ring *ring = &rdev->ring[ridx]; > > + int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX); > > > > if (vm == NULL) > > return; > > > > radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > > - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > > + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | > > WRITE_DATA_DST_SEL(0))); > > if (vm->id < 8) { > > radeon_ring_write(ring, > > @@ -5696,7 +5697,7 @@ void cik_vm_flush(struct radeon_device > *rdev, int ridx, struct radeon_vm *vm) > > radeon_ring_write(ring, 1 << vm->id); > > > > /* compute doesn't have PFP */ > > - if (ridx == RADEON_RING_TYPE_GFX_INDEX) { > > + if (usepfp) { > > /* sync PFP to ME, otherwise we might get > invalid PFP reads */ > > radeon_ring_write(ring, > PACKET3(PACKET3_PFP_SYNC_ME, 0)); > > radeon_ring_write(ring, 0x0); > > diff --git a/drivers/gpu/drm/radeon/si.c > b/drivers/gpu/drm/radeon/si.c > > index 9e854fd..f87d82a 100644 > > --- a/drivers/gpu/drm/radeon/si.c > > +++ b/drivers/gpu/drm/radeon/si.c > > @@ -4815,7 +4815,7 @@ void si_vm_flush(struct radeon_device > *rdev, int ridx, struct radeon_vm *vm) > > > > /* write new base address */ > > radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > > - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > > + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | > > WRITE_DATA_DST_SEL(0))); > > > > if (vm->id < 8) { > > -- > > 1.9.1 > > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > <mailto:dri-devel@lists.freedesktop.org> > http://lists.freedesktop.org/mailman/listinfo/dri-devel > >
On Wed, Jul 30, 2014 at 2:15 PM, Christian König <deathsimple@vodafone.de> wrote: > Am 30.07.2014 um 19:24 schrieb Jerome Glisse: > > On Wed, Jul 30, 2014 at 11:54 AM, Alex Deucher <alexdeucher@gmail.com> > wrote: >> >> On Wed, Jul 30, 2014 at 11:18 AM, Christian König >> <deathsimple@vodafone.de> wrote: >> > From: Christian König <christian.koenig@amd.com> >> > >> > Seems to make VM flushes more stable on SI and CIK. >> > >> > v2: only use the PFP on the GFX ring on CIK >> > >> > Signed-off-by: Christian König <christian.koenig@amd.com> >> > Cc: stable@vger.kernel.org >> >> Applied to my 3.17 tree. We should use PFP whenever possible for all >> packets. > > > Can all this be explained with more words ? What are the choice ? What are > the difference ? What we gain ? > > > The PFP is responsible of fetching the data the CP executes from memory, > classify them and if necessary forward them to the ME. The ME is the part > where normally all the juicy stuff happens, e.g. register writes, most type3 > packet execution etc... > > What we did before was setting the VM base address with the ME instead of > the PFP, but since the PFP needs to fetch the IB data we inserted an "Wait > for ME" packet into the command stream to let the ME catch up to the PFP > before proceeding. > > The problem is that this is sometimes not enough, e.g. on CIK it could > happen that the IB read goes out while the the page table base address > wasn't updated yet with the obviously bad consequences. > > Compute rings don't have a PFP on CIK so this won't work there. > In the more general case, the PFP will fetch the packets and either process them directly (if the packet is handled by the PFP or the engine bit is set to PFP) or forward it to the ME. Since the ME does most of the packet processing, it tends to be busier than the PFP, so it makes sense to have the PFP do as much additional work as possible. Depending on the packets or hw conditions, the PFP may have to defer the processing of a packet with the engine bit set to PFP to the ME anyway. E.g., if the packet depends on some synchronization logic results in the ME. Alex > Regards, > Christian. > > > >> >> >> Alex >> >> > --- >> > drivers/gpu/drm/radeon/cik.c | 5 +++-- >> > drivers/gpu/drm/radeon/si.c | 2 +- >> > 2 files changed, 4 insertions(+), 3 deletions(-) >> > >> > diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c >> > index cc1f02f..db53616 100644 >> > --- a/drivers/gpu/drm/radeon/cik.c >> > +++ b/drivers/gpu/drm/radeon/cik.c >> > @@ -5641,12 +5641,13 @@ static void cik_vm_decode_fault(struct >> > radeon_device *rdev, >> > void cik_vm_flush(struct radeon_device *rdev, int ridx, struct >> > radeon_vm *vm) >> > { >> > struct radeon_ring *ring = &rdev->ring[ridx]; >> > + int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX); >> > >> > if (vm == NULL) >> > return; >> > >> > radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); >> > - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | >> > + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | >> > WRITE_DATA_DST_SEL(0))); >> > if (vm->id < 8) { >> > radeon_ring_write(ring, >> > @@ -5696,7 +5697,7 @@ void cik_vm_flush(struct radeon_device *rdev, int >> > ridx, struct radeon_vm *vm) >> > radeon_ring_write(ring, 1 << vm->id); >> > >> > /* compute doesn't have PFP */ >> > - if (ridx == RADEON_RING_TYPE_GFX_INDEX) { >> > + if (usepfp) { >> > /* sync PFP to ME, otherwise we might get invalid PFP >> > reads */ >> > radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, >> > 0)); >> > radeon_ring_write(ring, 0x0); >> > diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c >> > index 9e854fd..f87d82a 100644 >> > --- a/drivers/gpu/drm/radeon/si.c >> > +++ b/drivers/gpu/drm/radeon/si.c >> > @@ -4815,7 +4815,7 @@ void si_vm_flush(struct radeon_device *rdev, int >> > ridx, struct radeon_vm *vm) >> > >> > /* write new base address */ >> > radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); >> > - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | >> > + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | >> > WRITE_DATA_DST_SEL(0))); >> > >> > if (vm->id < 8) { >> > -- >> > 1.9.1 >> > >> _______________________________________________ >> dri-devel mailing list >> dri-devel@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/dri-devel > > >
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index cc1f02f..db53616 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5641,12 +5641,13 @@ static void cik_vm_decode_fault(struct radeon_device *rdev, void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) { struct radeon_ring *ring = &rdev->ring[ridx]; + int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX); if (vm == NULL) return; radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); if (vm->id < 8) { radeon_ring_write(ring, @@ -5696,7 +5697,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 1 << vm->id); /* compute doesn't have PFP */ - if (ridx == RADEON_RING_TYPE_GFX_INDEX) { + if (usepfp) { /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 9e854fd..f87d82a 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -4815,7 +4815,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* write new base address */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0))); if (vm->id < 8) {