Message ID | 20210119144912.12653-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/6] drm/i915/gem: Almagamate clflushes on suspend | expand |
On Tue, 19 Jan 2021 at 14:49, Chris Wilson <chris@chris-wilson.co.uk> wrote: > > When flushing objects larger than the CPU cache it is preferrable to use > a single wbinvd() rather than overlapping clflush(). At runtime, we > avoid wbinvd() due to its system-wide latencies, but during > singlethreaded suspend, no one will observe the imposed latency and we > can opt for the faster wbinvd to clear all objects in a single hit. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/gem/i915_gem_pm.c | 40 +++++++++----------------- > 1 file changed, 13 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c > index 40d3e40500fa..38c1298cb14b 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c > @@ -11,6 +11,12 @@ > > #include "i915_drv.h" > > +#if defined(CONFIG_X86) > +#include <asm/smp.h> > +#else > +#define wbinvd_on_all_cpus() > +#endif > + > void i915_gem_suspend(struct drm_i915_private *i915) > { > GEM_TRACE("%s\n", dev_name(i915->drm.dev)); > @@ -32,13 +38,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) > i915_gem_drain_freed_objects(i915); > } > > -static struct drm_i915_gem_object *first_mm_object(struct list_head *list) > -{ > - return list_first_entry_or_null(list, > - struct drm_i915_gem_object, > - mm.link); > -} > - > void i915_gem_suspend_late(struct drm_i915_private *i915) > { > struct drm_i915_gem_object *obj; > @@ -48,6 +47,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) > NULL > }, **phase; > unsigned long flags; > + bool flush = false; > > /* > * Neither the BIOS, ourselves or any other kernel > @@ -73,29 +73,15 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) > > spin_lock_irqsave(&i915->mm.obj_lock, flags); > for (phase = phases; *phase; phase++) { > - LIST_HEAD(keep); > - > - while ((obj = first_mm_object(*phase))) { > - list_move_tail(&obj->mm.link, &keep); > - > - /* Beware the background _i915_gem_free_objects */ > - if (!kref_get_unless_zero(&obj->base.refcount)) > - continue; > - > - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); > - > - i915_gem_object_lock(obj, NULL); > - drm_WARN_ON(&i915->drm, > - i915_gem_object_set_to_gtt_domain(obj, false)); > - i915_gem_object_unlock(obj); > - i915_gem_object_put(obj); > - > - spin_lock_irqsave(&i915->mm.obj_lock, flags); > + list_for_each_entry(obj, *phase, mm.link) { > + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) > + flush |= (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0; > + __start_cpu_write(obj); /* presume auto-hibernate */ > } > - > - list_splice_tail(&keep, *phase); > } > spin_unlock_irqrestore(&i915->mm.obj_lock, flags); > + if (flush) > + wbinvd_on_all_cpus(); Hmmm, this builds on !CONFIG_X86? > } > > void i915_gem_resume(struct drm_i915_private *i915) > -- > 2.20.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Quoting Matthew Auld (2021-01-19 15:30:41) > On Tue, 19 Jan 2021 at 14:49, Chris Wilson <chris@chris-wilson.co.uk> wrote: > > > > When flushing objects larger than the CPU cache it is preferrable to use > > a single wbinvd() rather than overlapping clflush(). At runtime, we > > avoid wbinvd() due to its system-wide latencies, but during > > singlethreaded suspend, no one will observe the imposed latency and we > > can opt for the faster wbinvd to clear all objects in a single hit. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > drivers/gpu/drm/i915/gem/i915_gem_pm.c | 40 +++++++++----------------- > > 1 file changed, 13 insertions(+), 27 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c > > index 40d3e40500fa..38c1298cb14b 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c > > @@ -11,6 +11,12 @@ > > > > #include "i915_drv.h" > > > > +#if defined(CONFIG_X86) > > +#include <asm/smp.h> > > +#else > > +#define wbinvd_on_all_cpus() > > +#endif > > + > > void i915_gem_suspend(struct drm_i915_private *i915) > > { > > GEM_TRACE("%s\n", dev_name(i915->drm.dev)); > > @@ -32,13 +38,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) > > i915_gem_drain_freed_objects(i915); > > } > > > > -static struct drm_i915_gem_object *first_mm_object(struct list_head *list) > > -{ > > - return list_first_entry_or_null(list, > > - struct drm_i915_gem_object, > > - mm.link); > > -} > > - > > void i915_gem_suspend_late(struct drm_i915_private *i915) > > { > > struct drm_i915_gem_object *obj; > > @@ -48,6 +47,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) > > NULL > > }, **phase; > > unsigned long flags; > > + bool flush = false; > > > > /* > > * Neither the BIOS, ourselves or any other kernel > > @@ -73,29 +73,15 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) > > > > spin_lock_irqsave(&i915->mm.obj_lock, flags); > > for (phase = phases; *phase; phase++) { > > - LIST_HEAD(keep); > > - > > - while ((obj = first_mm_object(*phase))) { > > - list_move_tail(&obj->mm.link, &keep); > > - > > - /* Beware the background _i915_gem_free_objects */ > > - if (!kref_get_unless_zero(&obj->base.refcount)) > > - continue; > > - > > - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); > > - > > - i915_gem_object_lock(obj, NULL); > > - drm_WARN_ON(&i915->drm, > > - i915_gem_object_set_to_gtt_domain(obj, false)); > > - i915_gem_object_unlock(obj); > > - i915_gem_object_put(obj); > > - > > - spin_lock_irqsave(&i915->mm.obj_lock, flags); > > + list_for_each_entry(obj, *phase, mm.link) { > > + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) > > + flush |= (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0; > > + __start_cpu_write(obj); /* presume auto-hibernate */ > > } > > - > > - list_splice_tail(&keep, *phase); > > } > > spin_unlock_irqrestore(&i915->mm.obj_lock, flags); > > + if (flush) > > + wbinvd_on_all_cpus(); > > Hmmm, this builds on !CONFIG_X86? It builds; but does it do anything? The answer is no, but finding the answer to that is a bridge I can cross later -- it's probably something like flush_dcache_range(0, HUGEVAL) / __flush_dcache_all() I expect it to be a solved probably, just not sure what the solution is. Maybe dev_warn() instead of a quiet macro. -Chris
On Tue, 19 Jan 2021 at 14:49, Chris Wilson <chris@chris-wilson.co.uk> wrote: > > When flushing objects larger than the CPU cache it is preferrable to use > a single wbinvd() rather than overlapping clflush(). At runtime, we > avoid wbinvd() due to its system-wide latencies, but during > singlethreaded suspend, no one will observe the imposed latency and we > can opt for the faster wbinvd to clear all objects in a single hit. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 40d3e40500fa..38c1298cb14b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -11,6 +11,12 @@ #include "i915_drv.h" +#if defined(CONFIG_X86) +#include <asm/smp.h> +#else +#define wbinvd_on_all_cpus() +#endif + void i915_gem_suspend(struct drm_i915_private *i915) { GEM_TRACE("%s\n", dev_name(i915->drm.dev)); @@ -32,13 +38,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) i915_gem_drain_freed_objects(i915); } -static struct drm_i915_gem_object *first_mm_object(struct list_head *list) -{ - return list_first_entry_or_null(list, - struct drm_i915_gem_object, - mm.link); -} - void i915_gem_suspend_late(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj; @@ -48,6 +47,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) NULL }, **phase; unsigned long flags; + bool flush = false; /* * Neither the BIOS, ourselves or any other kernel @@ -73,29 +73,15 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { - LIST_HEAD(keep); - - while ((obj = first_mm_object(*phase))) { - list_move_tail(&obj->mm.link, &keep); - - /* Beware the background _i915_gem_free_objects */ - if (!kref_get_unless_zero(&obj->base.refcount)) - continue; - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - - i915_gem_object_lock(obj, NULL); - drm_WARN_ON(&i915->drm, - i915_gem_object_set_to_gtt_domain(obj, false)); - i915_gem_object_unlock(obj); - i915_gem_object_put(obj); - - spin_lock_irqsave(&i915->mm.obj_lock, flags); + list_for_each_entry(obj, *phase, mm.link) { + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + flush |= (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0; + __start_cpu_write(obj); /* presume auto-hibernate */ } - - list_splice_tail(&keep, *phase); } spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + if (flush) + wbinvd_on_all_cpus(); } void i915_gem_resume(struct drm_i915_private *i915)
When flushing objects larger than the CPU cache it is preferrable to use a single wbinvd() rather than overlapping clflush(). At runtime, we avoid wbinvd() due to its system-wide latencies, but during singlethreaded suspend, no one will observe the imposed latency and we can opt for the faster wbinvd to clear all objects in a single hit. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 40 +++++++++----------------- 1 file changed, 13 insertions(+), 27 deletions(-)