@@ -2099,6 +2099,15 @@ struct drm_i915_gem_object {
unsigned int cache_level:3;
unsigned int cache_dirty:1;
+ /*
+ * Tracks if the CPU cache has been completely clflushed.
+ * !cache_clean does not imply cache_dirty (there is some data in the
+ * CPU cachelines, but has not been dirtied), but cache_clean
+ * does imply !cache_dirty (no data in cachelines, so not dirty also).
+ * Actually cache_dirty tracks whether we have been omitting clflushes.
+ */
+ unsigned int cache_clean:1;
+
unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
unsigned int pin_display;
@@ -3552,6 +3552,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
trace_i915_gem_object_clflush(obj);
drm_clflush_sg(obj->pages);
obj->cache_dirty = false;
+ obj->cache_clean = true;
return true;
}
@@ -3982,7 +3983,18 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
/* Flush the CPU cache if it's still invalid. */
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
- i915_gem_clflush_object(obj, false);
+ /* If an object is moved out of the CPU domain following a
+ * CPU write and before a GPU or GTT write, we will clflush
+ * it out of the CPU cache, and mark the cache as clean.
+ * As the object has not been accessed on the CPU since
+ * (i.e. the CPU cache is still clean and it is out of the CPU
+ * domain), we know that no CPU cache line contains stale data
+ * and so we can skip invalidating the CPU cache in preparing
+ * to read from the object.
+ */
+ if (!obj->cache_clean)
+ i915_gem_clflush_object(obj, false);
+ obj->cache_clean = false;
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
}