@@ -2096,6 +2096,11 @@ struct drm_i915_gem_object {
unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
+ /*
+ * Whether the object was ever mapped with cached CPU mapping
+ */
+ unsigned int has_stale_cachelines:1;
+
unsigned int pin_display;
struct sg_table *pages;
@@ -1749,7 +1749,17 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
else
addr = -ENOMEM;
up_write(&mm->mmap_sem);
+ } else {
+ mutex_lock(&dev->struct_mutex);
+ /*
+ * The cached mapping could lead to stale cachelines. So an
+ * invalidation is needed for the object pages, when they are
+ * released back to kernel.
+ */
+ to_intel_bo(obj)->has_stale_cachelines = 1;
+ mutex_unlock(&dev->struct_mutex);
}
+
drm_gem_object_unreference_unlocked(obj);
if (IS_ERR((void *)addr))
return addr;
@@ -2166,24 +2176,46 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
BUG_ON(obj->madv == __I915_MADV_PURGED);
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret) {
- /* In the event of a disaster, abandon all caches and
- * hope for the best.
- */
- WARN_ON(ret != -EIO);
- i915_gem_clflush_object(obj, true);
- obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- }
-
- i915_gem_gtt_finish_object(obj);
-
- if (i915_gem_object_needs_bit17_swizzle(obj))
- i915_gem_object_save_bit_17_swizzle(obj);
+ /* If we need to access the data in the future, we need to
+ * be sure that the contents of the object is coherent with
+ * the CPU prior to releasing the pages back to the system.
+ * Once we unpin them, the mm is free to move them to different
+ * zones or even swap them out to disk - all without our
+ * intervention. (Though we could track such operations with our
+ * own gemfs, if we ever write one.) As such if we want to keep
+ * the data, set it to the CPU domain now just in case someone
+ * else touches it.
+ *
+ * For a long time we have been paranoid about handing back
+ * pages to the system with stale cacheline invalidation. For
+ * all internal use (kmap/iomap), we know that the domain tracking is
+ * accurate. However, the userspace API is lax and the user can CPU
+ * mmap the object and invalidate cachelines without our accurate
+ * tracking. We have been paranoid to be sure that we always flushed
+ * the cachelines when we stopped using the pages. For which we
+ * maintain a flag for each object that has been CPU mmapped, based
+ * on which we can invalidate the cachelines upon put_pages()
+ */
+ if (obj->madv == I915_MADV_WILLNEED || obj->has_stale_cachelines) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, true);
+ if (ret) {
+ /* In the event of a disaster, abandon all caches and
+ * hope for the best.
+ */
+ WARN_ON(ret != -EIO);
+ i915_gem_clflush_object(obj, true);
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ }
- if (obj->madv == I915_MADV_DONTNEED)
+ if (obj->madv == I915_MADV_WILLNEED)
+ if (i915_gem_object_needs_bit17_swizzle(obj))
+ i915_gem_object_save_bit_17_swizzle(obj);
+ } else
obj->dirty = 0;
+ i915_gem_gtt_finish_object(obj);
+
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
struct page *page = sg_page_iter_page(&sg_iter);