diff mbox

[3/3] drm/i915: Only move to the CPU write domain if keeping the GTT pages

Message ID 1440673461-13716-4-git-send-email-ankitprasad.r.sharma@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

ankitprasad.r.sharma@intel.com Aug. 27, 2015, 11:04 a.m. UTC
From: Chris Wilson <chris@chris-wilson.co.uk>

We have for a long time been ultra-paranoid about the situation whereby
we hand back pages to the system that have been written to by the GPU
and potentially simultaneously by the user through a CPU mmapping. We
can relax this restriction when we know that the cache domain tracking
is true and there can be no stale cacheline invalidatations. This is
true if the object has never been CPU mmaped as all internal accesses
(i.e. kmap/iomap) are carefully flushed. For a CPU mmaping, one would
expect that the invalid cache lines are resolved on PTE/TLB shootdown
during munmap(), so the only situation we need to be paranoid about is
when such a CPU mmaping exists at the time of put_pages. Given that we
need to treat put_pages carefully as we may return live data to the
system that we want to use again in the future (i.e. I915_MADV_WILLNEED
pages) we can simply treat a live CPU mmaping as a special case of
WILLNEED (which it is!). Any I915_MADV_DONTNEED pages and their
mmapings are shot down immediately following put_pages.

v2: Add a new flag to check if ever a cached CPU mapping was created
for the object. This is needed as we have verified that the CPU
cachelines are not invalidated upon munmap(). So to ensure correctness,
object still needs to be moved to CPU write domain in put_pages(), even
if there are no live CPU mappings for I915_MADV_DONTNEED pages.

v3: Formatted comments, removed redundant braces (Chris)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  5 ++++
 drivers/gpu/drm/i915/i915_gem.c | 62 +++++++++++++++++++++++++++++++----------
 2 files changed, 52 insertions(+), 15 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8db905a..bb85401 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2096,6 +2096,11 @@  struct drm_i915_gem_object {
 
 	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
 
+	/*
+	 * Whether the object was ever mapped with cached CPU mapping
+	 */
+	unsigned int has_stale_cachelines:1;
+
 	unsigned int pin_display;
 
 	struct sg_table *pages;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fa4cfb0..43e2a9f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1749,7 +1749,17 @@  i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 		else
 			addr = -ENOMEM;
 		up_write(&mm->mmap_sem);
+	} else {
+		mutex_lock(&dev->struct_mutex);
+		/*
+		 * The cached mapping could lead to stale cachelines. So an
+		 * invalidation is needed for the object pages, when they are
+		 * released back to kernel.
+		 */
+		to_intel_bo(obj)->has_stale_cachelines = 1;
+		mutex_unlock(&dev->struct_mutex);
 	}
+
 	drm_gem_object_unreference_unlocked(obj);
 	if (IS_ERR((void *)addr))
 		return addr;
@@ -2166,24 +2176,46 @@  i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 
 	BUG_ON(obj->madv == __I915_MADV_PURGED);
 
-	ret = i915_gem_object_set_to_cpu_domain(obj, true);
-	if (ret) {
-		/* In the event of a disaster, abandon all caches and
-		 * hope for the best.
-		 */
-		WARN_ON(ret != -EIO);
-		i915_gem_clflush_object(obj, true);
-		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
-	}
-
-	i915_gem_gtt_finish_object(obj);
-
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_save_bit_17_swizzle(obj);
+	/* If we need to access the data in the future, we need to
+	 * be sure that the contents of the object is coherent with
+	 * the CPU prior to releasing the pages back to the system.
+	 * Once we unpin them, the mm is free to move them to different
+	 * zones or even swap them out to disk - all without our
+	 * intervention. (Though we could track such operations with our
+	 * own gemfs, if we ever write one.) As such if we want to keep
+	 * the data, set it to the CPU domain now just in case someone
+	 * else touches it.
+	 *
+	 * For a long time we have been paranoid about handing back
+	 * pages to the system with stale cacheline invalidation. For
+	 * all internal use (kmap/iomap), we know that the domain tracking is
+	 * accurate. However, the userspace API is lax and the user can CPU
+	 * mmap the object and invalidate cachelines without our accurate
+	 * tracking. We have been paranoid to be sure that we always flushed
+	 * the cachelines when we stopped using the pages. For which we
+	 * maintain a flag for each object that has been CPU mmapped, based
+	 * on which we can invalidate the cachelines upon put_pages()
+	 */
+	if (obj->madv == I915_MADV_WILLNEED || obj->has_stale_cachelines) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+		if (ret) {
+			/* In the event of a disaster, abandon all caches and
+			 * hope for the best.
+			 */
+			WARN_ON(ret != -EIO);
+			i915_gem_clflush_object(obj, true);
+			obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+			obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+		}
 
-	if (obj->madv == I915_MADV_DONTNEED)
+		if (obj->madv == I915_MADV_WILLNEED)
+			if (i915_gem_object_needs_bit17_swizzle(obj))
+				i915_gem_object_save_bit_17_swizzle(obj);
+	} else
 		obj->dirty = 0;
 
+	i915_gem_gtt_finish_object(obj);
+
 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
 		struct page *page = sg_page_iter_page(&sg_iter);