diff mbox series

[08/11] drm/i915/gt: Only wait for GPU activity before unbinding a GGTT fence

Message ID 20200331213108.11340-8-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/11] drm/i915/gem: Ignore readonly failures when updating relocs | expand

Commit Message

Chris Wilson March 31, 2020, 9:31 p.m. UTC
Only GPU activity via the GGTT fence is asynchronous, we know that we
control the CPU access directly, so we only need to wait for the GPU to
stop using the fence before we relinquish it.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 12 ++++++++----
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h |  3 +++
 drivers/gpu/drm/i915/i915_vma.c              |  4 ++++
 3 files changed, 15 insertions(+), 4 deletions(-)

Comments

Matthew Auld April 1, 2020, 6:56 p.m. UTC | #1
On Tue, 31 Mar 2020 at 22:31, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Only GPU activity via the GGTT fence is asynchronous, we know that we
> control the CPU access directly, so we only need to wait for the GPU to
> stop using the fence before we relinquish it.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 12 ++++++++----
>  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h |  3 +++
>  drivers/gpu/drm/i915/i915_vma.c              |  4 ++++
>  3 files changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> index 225970f4a4ef..74f8201486b2 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> @@ -239,15 +239,18 @@ static int fence_update(struct i915_fence_reg *fence,
>                 if (!i915_vma_is_map_and_fenceable(vma))
>                         return -EINVAL;
>
> -               ret = i915_vma_sync(vma);
> -               if (ret)
> -                       return ret;
> +               if (INTEL_GEN(fence_to_i915(fence)) < 4) {
> +                       /* implicit 'unfenced' GPU blits */
> +                       ret = i915_vma_sync(vma);

What was the strangeness with gen < 4 again?

> +                       if (ret)
> +                               return ret;
> +               }
>         }
>
>         old = xchg(&fence->vma, NULL);
>         if (old) {
>                 /* XXX Ideally we would move the waiting to outside the mutex */
> -               ret = i915_vma_sync(old);
> +               ret = i915_active_wait(&fence->active);
>                 if (ret) {
>                         fence->vma = old;
>                         return ret;
> @@ -869,6 +872,7 @@ void intel_ggtt_init_fences(struct i915_ggtt *ggtt)
>         for (i = 0; i < num_fences; i++) {
>                 struct i915_fence_reg *fence = &ggtt->fence_regs[i];
>
> +               i915_active_init(&fence->active, NULL, NULL);

Some active_fini?

>                 fence->ggtt = ggtt;
>                 fence->id = i;
>                 list_add_tail(&fence->link, &ggtt->fence_list);
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
> index 9850f6a85d2a..08c6bb667581 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
> @@ -28,6 +28,8 @@
>  #include <linux/list.h>
>  #include <linux/types.h>
>
> +#include "i915_active.h"
> +
>  struct drm_i915_gem_object;
>  struct i915_ggtt;
>  struct i915_vma;
> @@ -41,6 +43,7 @@ struct i915_fence_reg {
>         struct i915_ggtt *ggtt;
>         struct i915_vma *vma;
>         atomic_t pin_count;
> +       struct i915_active active;
>         int id;
>         /**
>          * Whether the tiling parameters for the currently
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 18069df2a9e5..616ca5a7c875 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -1232,6 +1232,10 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>                 dma_resv_add_shared_fence(vma->resv, &rq->fence);
>                 obj->write_domain = 0;
>         }
> +
> +       if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
> +               i915_active_add_request(&vma->fence->active, rq);
> +
>         obj->read_domains |= I915_GEM_GPU_DOMAINS;
>         obj->mm.dirty = true;
>
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson April 1, 2020, 7:02 p.m. UTC | #2
Quoting Matthew Auld (2020-04-01 19:56:23)
> On Tue, 31 Mar 2020 at 22:31, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Only GPU activity via the GGTT fence is asynchronous, we know that we
> > control the CPU access directly, so we only need to wait for the GPU to
> > stop using the fence before we relinquish it.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 12 ++++++++----
> >  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h |  3 +++
> >  drivers/gpu/drm/i915/i915_vma.c              |  4 ++++
> >  3 files changed, 15 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > index 225970f4a4ef..74f8201486b2 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > @@ -239,15 +239,18 @@ static int fence_update(struct i915_fence_reg *fence,
> >                 if (!i915_vma_is_map_and_fenceable(vma))
> >                         return -EINVAL;
> >
> > -               ret = i915_vma_sync(vma);
> > -               if (ret)
> > -                       return ret;
> > +               if (INTEL_GEN(fence_to_i915(fence)) < 4) {
> > +                       /* implicit 'unfenced' GPU blits */
> > +                       ret = i915_vma_sync(vma);
> 
> What was the strangeness with gen < 4 again?

From gen4, all gpu ops have implicit fences and never reference the
global fence registers.

if (gpu_uses_fence_registers())

worksforme

> > +                       if (ret)
> > +                               return ret;
> > +               }
> >         }
> >
> >         old = xchg(&fence->vma, NULL);
> >         if (old) {
> >                 /* XXX Ideally we would move the waiting to outside the mutex */
> > -               ret = i915_vma_sync(old);
> > +               ret = i915_active_wait(&fence->active);
> >                 if (ret) {
> >                         fence->vma = old;
> >                         return ret;
> > @@ -869,6 +872,7 @@ void intel_ggtt_init_fences(struct i915_ggtt *ggtt)
> >         for (i = 0; i < num_fences; i++) {
> >                 struct i915_fence_reg *fence = &ggtt->fence_regs[i];
> >
> > +               i915_active_init(&fence->active, NULL, NULL);
> 
> Some active_fini?

For debug peace of mind, I think we added fini_fences so should be easy
to type up.
-Chris
Matthew Auld April 1, 2020, 7:25 p.m. UTC | #3
On Wed, 1 Apr 2020 at 20:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Matthew Auld (2020-04-01 19:56:23)
> > On Tue, 31 Mar 2020 at 22:31, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > >
> > > Only GPU activity via the GGTT fence is asynchronous, we know that we
> > > control the CPU access directly, so we only need to wait for the GPU to
> > > stop using the fence before we relinquish it.
> > >
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 12 ++++++++----
> > >  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h |  3 +++
> > >  drivers/gpu/drm/i915/i915_vma.c              |  4 ++++
> > >  3 files changed, 15 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > index 225970f4a4ef..74f8201486b2 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > @@ -239,15 +239,18 @@ static int fence_update(struct i915_fence_reg *fence,
> > >                 if (!i915_vma_is_map_and_fenceable(vma))
> > >                         return -EINVAL;
> > >
> > > -               ret = i915_vma_sync(vma);
> > > -               if (ret)
> > > -                       return ret;
> > > +               if (INTEL_GEN(fence_to_i915(fence)) < 4) {
> > > +                       /* implicit 'unfenced' GPU blits */
> > > +                       ret = i915_vma_sync(vma);
> >
> > What was the strangeness with gen < 4 again?
>
> From gen4, all gpu ops have implicit fences and never reference the
> global fence registers.
>
> if (gpu_uses_fence_registers())
>
> worksforme
>
> > > +                       if (ret)
> > > +                               return ret;
> > > +               }
> > >         }
> > >
> > >         old = xchg(&fence->vma, NULL);
> > >         if (old) {
> > >                 /* XXX Ideally we would move the waiting to outside the mutex */
> > > -               ret = i915_vma_sync(old);
> > > +               ret = i915_active_wait(&fence->active);
> > >                 if (ret) {
> > >                         fence->vma = old;
> > >                         return ret;
> > > @@ -869,6 +872,7 @@ void intel_ggtt_init_fences(struct i915_ggtt *ggtt)
> > >         for (i = 0; i < num_fences; i++) {
> > >                 struct i915_fence_reg *fence = &ggtt->fence_regs[i];
> > >
> > > +               i915_active_init(&fence->active, NULL, NULL);
> >
> > Some active_fini?
>
> For debug peace of mind, I think we added fini_fences so should be easy
> to type up.

Ok,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>

> -Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index 225970f4a4ef..74f8201486b2 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -239,15 +239,18 @@  static int fence_update(struct i915_fence_reg *fence,
 		if (!i915_vma_is_map_and_fenceable(vma))
 			return -EINVAL;
 
-		ret = i915_vma_sync(vma);
-		if (ret)
-			return ret;
+		if (INTEL_GEN(fence_to_i915(fence)) < 4) {
+			/* implicit 'unfenced' GPU blits */
+			ret = i915_vma_sync(vma);
+			if (ret)
+				return ret;
+		}
 	}
 
 	old = xchg(&fence->vma, NULL);
 	if (old) {
 		/* XXX Ideally we would move the waiting to outside the mutex */
-		ret = i915_vma_sync(old);
+		ret = i915_active_wait(&fence->active);
 		if (ret) {
 			fence->vma = old;
 			return ret;
@@ -869,6 +872,7 @@  void intel_ggtt_init_fences(struct i915_ggtt *ggtt)
 	for (i = 0; i < num_fences; i++) {
 		struct i915_fence_reg *fence = &ggtt->fence_regs[i];
 
+		i915_active_init(&fence->active, NULL, NULL);
 		fence->ggtt = ggtt;
 		fence->id = i;
 		list_add_tail(&fence->link, &ggtt->fence_list);
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
index 9850f6a85d2a..08c6bb667581 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h
@@ -28,6 +28,8 @@ 
 #include <linux/list.h>
 #include <linux/types.h>
 
+#include "i915_active.h"
+
 struct drm_i915_gem_object;
 struct i915_ggtt;
 struct i915_vma;
@@ -41,6 +43,7 @@  struct i915_fence_reg {
 	struct i915_ggtt *ggtt;
 	struct i915_vma *vma;
 	atomic_t pin_count;
+	struct i915_active active;
 	int id;
 	/**
 	 * Whether the tiling parameters for the currently
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 18069df2a9e5..616ca5a7c875 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1232,6 +1232,10 @@  int i915_vma_move_to_active(struct i915_vma *vma,
 		dma_resv_add_shared_fence(vma->resv, &rq->fence);
 		obj->write_domain = 0;
 	}
+
+	if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
+		i915_active_add_request(&vma->fence->active, rq);
+
 	obj->read_domains |= I915_GEM_GPU_DOMAINS;
 	obj->mm.dirty = true;