drm/i915: Flush all user surfaces prior to first use
diff mbox series

Message ID 20190718090334.28297-1-chris@chris-wilson.co.uk
State New
Headers show
Series
  • drm/i915: Flush all user surfaces prior to first use
Related show

Commit Message

Chris Wilson July 18, 2019, 9:03 a.m. UTC
Since userspace has the ability to bypass the CPU cache from within its
unpriviledged command stream, we have to flush the CPU cache to memory
in order to overwrite the previous contents on creation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: stablevger.kernel.org
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 26 ++++++-----------------
 1 file changed, 7 insertions(+), 19 deletions(-)

Comments

Chris Wilson July 18, 2019, 9:14 a.m. UTC | #1
Quoting Chris Wilson (2019-07-18 10:03:34)
> Since userspace has the ability to bypass the CPU cache from within its
> unpriviledged command stream, we have to flush the CPU cache to memory
> in order to overwrite the previous contents on creation.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: stablevger.kernel.org
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 26 ++++++-----------------
>  1 file changed, 7 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> index d2a1158868e7..f752b326d399 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> @@ -459,7 +459,6 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
>  {
>         struct drm_i915_gem_object *obj;
>         struct address_space *mapping;
> -       unsigned int cache_level;
>         gfp_t mask;
>         int ret;
>  
> @@ -498,24 +497,13 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
>         obj->write_domain = I915_GEM_DOMAIN_CPU;
>         obj->read_domains = I915_GEM_DOMAIN_CPU;
>  
> -       if (HAS_LLC(i915))
> -               /* On some devices, we can have the GPU use the LLC (the CPU
> -                * cache) for about a 10% performance improvement
> -                * compared to uncached.  Graphics requests other than
> -                * display scanout are coherent with the CPU in
> -                * accessing this cache.  This means in this mode we
> -                * don't need to clflush on the CPU side, and on the
> -                * GPU side we only need to flush internal caches to
> -                * get data visible to the CPU.
> -                *
> -                * However, we maintain the display planes as UC, and so
> -                * need to rebind when first used as such.
> -                */
> -               cache_level = I915_CACHE_LLC;
> -       else
> -               cache_level = I915_CACHE_NONE;
> -
> -       i915_gem_object_set_cache_coherency(obj, cache_level);
> +       /*
> +        * Note that userspace has control over cache-bypass
> +        * via its command stream, so even on LLC architectures
> +        * we have to flush out the CPU cache to memory to
> +        * clear previous contents.
> +        */
> +       i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);

An alternative would be to do a GPU clear, but that requires some
confidence that the first access will from the GPU (or else we pay the
extra latency). Do I hear a request for placement flags in the extended
create_ioctl?
-Chris

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index d2a1158868e7..f752b326d399 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -459,7 +459,6 @@  i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
 {
 	struct drm_i915_gem_object *obj;
 	struct address_space *mapping;
-	unsigned int cache_level;
 	gfp_t mask;
 	int ret;
 
@@ -498,24 +497,13 @@  i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
 	obj->write_domain = I915_GEM_DOMAIN_CPU;
 	obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-	if (HAS_LLC(i915))
-		/* On some devices, we can have the GPU use the LLC (the CPU
-		 * cache) for about a 10% performance improvement
-		 * compared to uncached.  Graphics requests other than
-		 * display scanout are coherent with the CPU in
-		 * accessing this cache.  This means in this mode we
-		 * don't need to clflush on the CPU side, and on the
-		 * GPU side we only need to flush internal caches to
-		 * get data visible to the CPU.
-		 *
-		 * However, we maintain the display planes as UC, and so
-		 * need to rebind when first used as such.
-		 */
-		cache_level = I915_CACHE_LLC;
-	else
-		cache_level = I915_CACHE_NONE;
-
-	i915_gem_object_set_cache_coherency(obj, cache_level);
+	/*
+	 * Note that userspace has control over cache-bypass
+	 * via its command stream, so even on LLC architectures
+	 * we have to flush out the CPU cache to memory to
+	 * clear previous contents.
+	 */
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
 
 	trace_i915_gem_object_create(obj);