diff mbox series

[1/6] drm/i915/gem: Almagamate clflushes on suspend

Message ID 20210119144912.12653-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/6] drm/i915/gem: Almagamate clflushes on suspend | expand

Commit Message

Chris Wilson Jan. 19, 2021, 2:49 p.m. UTC
When flushing objects larger than the CPU cache it is preferrable to use
a single wbinvd() rather than overlapping clflush(). At runtime, we
avoid wbinvd() due to its system-wide latencies, but during
singlethreaded suspend, no one will observe the imposed latency and we
can opt for the faster wbinvd to clear all objects in a single hit.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c | 40 +++++++++-----------------
 1 file changed, 13 insertions(+), 27 deletions(-)

Comments

Matthew Auld Jan. 19, 2021, 3:30 p.m. UTC | #1
On Tue, 19 Jan 2021 at 14:49, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> When flushing objects larger than the CPU cache it is preferrable to use
> a single wbinvd() rather than overlapping clflush(). At runtime, we
> avoid wbinvd() due to its system-wide latencies, but during
> singlethreaded suspend, no one will observe the imposed latency and we
> can opt for the faster wbinvd to clear all objects in a single hit.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_pm.c | 40 +++++++++-----------------
>  1 file changed, 13 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 40d3e40500fa..38c1298cb14b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -11,6 +11,12 @@
>
>  #include "i915_drv.h"
>
> +#if defined(CONFIG_X86)
> +#include <asm/smp.h>
> +#else
> +#define wbinvd_on_all_cpus()
> +#endif
> +
>  void i915_gem_suspend(struct drm_i915_private *i915)
>  {
>         GEM_TRACE("%s\n", dev_name(i915->drm.dev));
> @@ -32,13 +38,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
>         i915_gem_drain_freed_objects(i915);
>  }
>
> -static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
> -{
> -       return list_first_entry_or_null(list,
> -                                       struct drm_i915_gem_object,
> -                                       mm.link);
> -}
> -
>  void i915_gem_suspend_late(struct drm_i915_private *i915)
>  {
>         struct drm_i915_gem_object *obj;
> @@ -48,6 +47,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
>                 NULL
>         }, **phase;
>         unsigned long flags;
> +       bool flush = false;
>
>         /*
>          * Neither the BIOS, ourselves or any other kernel
> @@ -73,29 +73,15 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
>
>         spin_lock_irqsave(&i915->mm.obj_lock, flags);
>         for (phase = phases; *phase; phase++) {
> -               LIST_HEAD(keep);
> -
> -               while ((obj = first_mm_object(*phase))) {
> -                       list_move_tail(&obj->mm.link, &keep);
> -
> -                       /* Beware the background _i915_gem_free_objects */
> -                       if (!kref_get_unless_zero(&obj->base.refcount))
> -                               continue;
> -
> -                       spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
> -
> -                       i915_gem_object_lock(obj, NULL);
> -                       drm_WARN_ON(&i915->drm,
> -                           i915_gem_object_set_to_gtt_domain(obj, false));
> -                       i915_gem_object_unlock(obj);
> -                       i915_gem_object_put(obj);
> -
> -                       spin_lock_irqsave(&i915->mm.obj_lock, flags);
> +               list_for_each_entry(obj, *phase, mm.link) {
> +                       if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
> +                               flush |= (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0;
> +                       __start_cpu_write(obj); /* presume auto-hibernate */
>                 }
> -
> -               list_splice_tail(&keep, *phase);
>         }
>         spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
> +       if (flush)
> +               wbinvd_on_all_cpus();

Hmmm, this builds on !CONFIG_X86?

>  }
>
>  void i915_gem_resume(struct drm_i915_private *i915)
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Jan. 19, 2021, 3:37 p.m. UTC | #2
Quoting Matthew Auld (2021-01-19 15:30:41)
> On Tue, 19 Jan 2021 at 14:49, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > When flushing objects larger than the CPU cache it is preferrable to use
> > a single wbinvd() rather than overlapping clflush(). At runtime, we
> > avoid wbinvd() due to its system-wide latencies, but during
> > singlethreaded suspend, no one will observe the imposed latency and we
> > can opt for the faster wbinvd to clear all objects in a single hit.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_pm.c | 40 +++++++++-----------------
> >  1 file changed, 13 insertions(+), 27 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > index 40d3e40500fa..38c1298cb14b 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > @@ -11,6 +11,12 @@
> >
> >  #include "i915_drv.h"
> >
> > +#if defined(CONFIG_X86)
> > +#include <asm/smp.h>
> > +#else
> > +#define wbinvd_on_all_cpus()
> > +#endif
> > +
> >  void i915_gem_suspend(struct drm_i915_private *i915)
> >  {
> >         GEM_TRACE("%s\n", dev_name(i915->drm.dev));
> > @@ -32,13 +38,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
> >         i915_gem_drain_freed_objects(i915);
> >  }
> >
> > -static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
> > -{
> > -       return list_first_entry_or_null(list,
> > -                                       struct drm_i915_gem_object,
> > -                                       mm.link);
> > -}
> > -
> >  void i915_gem_suspend_late(struct drm_i915_private *i915)
> >  {
> >         struct drm_i915_gem_object *obj;
> > @@ -48,6 +47,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
> >                 NULL
> >         }, **phase;
> >         unsigned long flags;
> > +       bool flush = false;
> >
> >         /*
> >          * Neither the BIOS, ourselves or any other kernel
> > @@ -73,29 +73,15 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
> >
> >         spin_lock_irqsave(&i915->mm.obj_lock, flags);
> >         for (phase = phases; *phase; phase++) {
> > -               LIST_HEAD(keep);
> > -
> > -               while ((obj = first_mm_object(*phase))) {
> > -                       list_move_tail(&obj->mm.link, &keep);
> > -
> > -                       /* Beware the background _i915_gem_free_objects */
> > -                       if (!kref_get_unless_zero(&obj->base.refcount))
> > -                               continue;
> > -
> > -                       spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
> > -
> > -                       i915_gem_object_lock(obj, NULL);
> > -                       drm_WARN_ON(&i915->drm,
> > -                           i915_gem_object_set_to_gtt_domain(obj, false));
> > -                       i915_gem_object_unlock(obj);
> > -                       i915_gem_object_put(obj);
> > -
> > -                       spin_lock_irqsave(&i915->mm.obj_lock, flags);
> > +               list_for_each_entry(obj, *phase, mm.link) {
> > +                       if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
> > +                               flush |= (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0;
> > +                       __start_cpu_write(obj); /* presume auto-hibernate */
> >                 }
> > -
> > -               list_splice_tail(&keep, *phase);
> >         }
> >         spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
> > +       if (flush)
> > +               wbinvd_on_all_cpus();
> 
> Hmmm, this builds on !CONFIG_X86?

It builds; but does it do anything? The answer is no, but finding the
answer to that is a bridge I can cross later -- it's probably something
like flush_dcache_range(0, HUGEVAL) / __flush_dcache_all() I expect it to
be a solved probably, just not sure what the solution is.

Maybe dev_warn() instead of a quiet macro.
-Chris
Matthew Auld Jan. 19, 2021, 5:26 p.m. UTC | #3
On Tue, 19 Jan 2021 at 14:49, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> When flushing objects larger than the CPU cache it is preferrable to use
> a single wbinvd() rather than overlapping clflush(). At runtime, we
> avoid wbinvd() due to its system-wide latencies, but during
> singlethreaded suspend, no one will observe the imposed latency and we
> can opt for the faster wbinvd to clear all objects in a single hit.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 40d3e40500fa..38c1298cb14b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -11,6 +11,12 @@ 
 
 #include "i915_drv.h"
 
+#if defined(CONFIG_X86)
+#include <asm/smp.h>
+#else
+#define wbinvd_on_all_cpus()
+#endif
+
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	GEM_TRACE("%s\n", dev_name(i915->drm.dev));
@@ -32,13 +38,6 @@  void i915_gem_suspend(struct drm_i915_private *i915)
 	i915_gem_drain_freed_objects(i915);
 }
 
-static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
-{
-	return list_first_entry_or_null(list,
-					struct drm_i915_gem_object,
-					mm.link);
-}
-
 void i915_gem_suspend_late(struct drm_i915_private *i915)
 {
 	struct drm_i915_gem_object *obj;
@@ -48,6 +47,7 @@  void i915_gem_suspend_late(struct drm_i915_private *i915)
 		NULL
 	}, **phase;
 	unsigned long flags;
+	bool flush = false;
 
 	/*
 	 * Neither the BIOS, ourselves or any other kernel
@@ -73,29 +73,15 @@  void i915_gem_suspend_late(struct drm_i915_private *i915)
 
 	spin_lock_irqsave(&i915->mm.obj_lock, flags);
 	for (phase = phases; *phase; phase++) {
-		LIST_HEAD(keep);
-
-		while ((obj = first_mm_object(*phase))) {
-			list_move_tail(&obj->mm.link, &keep);
-
-			/* Beware the background _i915_gem_free_objects */
-			if (!kref_get_unless_zero(&obj->base.refcount))
-				continue;
-
-			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
-
-			i915_gem_object_lock(obj, NULL);
-			drm_WARN_ON(&i915->drm,
-			    i915_gem_object_set_to_gtt_domain(obj, false));
-			i915_gem_object_unlock(obj);
-			i915_gem_object_put(obj);
-
-			spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		list_for_each_entry(obj, *phase, mm.link) {
+			if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+				flush |= (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0;
+			__start_cpu_write(obj); /* presume auto-hibernate */
 		}
-
-		list_splice_tail(&keep, *phase);
 	}
 	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	if (flush)
+		wbinvd_on_all_cpus();
 }
 
 void i915_gem_resume(struct drm_i915_private *i915)