@@ -1422,7 +1422,9 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size,
- unsigned alignment, bool mappable);
+ unsigned alignment,
+ unsigned cache_level,
+ bool mappable);
int i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only);
/* i915_gem_stolen.c */
@@ -2700,6 +2700,36 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
return 0;
}
+static bool i915_gem_valid_gtt_space(struct drm_device *dev,
+ struct drm_mm_node *gtt_space,
+ unsigned long cache_level)
+{
+ struct drm_mm_node *other;
+
+ /* On non-LLC machines we have to be careful when putting differing
+ * types of snoopable memory together to avoid the prefetcher
+ * crossing memory domains and dieing.
+ */
+ if (HAS_LLC(dev))
+ return true;
+
+ if (gtt_space == NULL)
+ return true;
+
+ if (list_empty(>t_space->node_list))
+ return true;
+
+ other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
+ if (other->allocated && !other->hole_follows && other->color != cache_level)
+ return false;
+
+ other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
+ if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
+ return false;
+
+ return true;
+}
+
/**
* Finds free space in the GTT aperture and binds the object there.
*/
@@ -2755,35 +2785,46 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
if (map_and_fenceable)
free_space =
drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
- size, alignment, 0,
+ size, alignment, obj->cache_level,
0, dev_priv->mm.gtt_mappable_end,
- 0);
+ false);
else
free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
- size, alignment, 0, 0);
+ size, alignment, obj->cache_level,
+ false);
if (free_space != NULL) {
if (map_and_fenceable)
obj->gtt_space =
drm_mm_get_block_range_generic(free_space,
- size, alignment, 0,
+ size, alignment, obj->cache_level,
0, dev_priv->mm.gtt_mappable_end,
- 0);
+ false);
else
obj->gtt_space =
- drm_mm_get_block(free_space, size, alignment);
+ drm_mm_get_block_generic(free_space,
+ size, alignment, obj->cache_level,
+ false);
}
if (obj->gtt_space == NULL) {
/* If the gtt is empty and we're still having trouble
* fitting our object in, we're out of memory.
*/
ret = i915_gem_evict_something(dev, size, alignment,
+ obj->cache_level,
map_and_fenceable);
if (ret)
return ret;
goto search_free;
}
+ if (WARN_ON(!i915_gem_valid_gtt_space(dev,
+ obj->gtt_space,
+ obj->cache_level))) {
+ drm_mm_put_block(obj->gtt_space);
+ obj->gtt_space = NULL;
+ return -EINVAL;
+ }
ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
if (ret) {
@@ -3004,6 +3045,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
return -EBUSY;
}
+ if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
+ ret = i915_gem_object_unbind(obj);
+ if (ret)
+ return ret;
+ }
+
if (obj->gtt_space) {
ret = i915_gem_object_finish_gpu(obj);
if (ret)
@@ -3015,7 +3062,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
* registers with snooped memory, so relinquish any fences
* currently pointing to our region in the aperture.
*/
- if (INTEL_INFO(obj->base.dev)->gen < 6) {
+ if (INTEL_INFO(dev)->gen < 6) {
ret = i915_gem_object_put_fence(obj);
if (ret)
return ret;
@@ -44,7 +44,8 @@ mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
int
i915_gem_evict_something(struct drm_device *dev, int min_size,
- unsigned alignment, bool mappable)
+ unsigned alignment, unsigned cache_level,
+ bool mappable)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct list_head eviction_list, unwind_list;
@@ -79,11 +80,11 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
INIT_LIST_HEAD(&unwind_list);
if (mappable)
drm_mm_init_scan_with_range(&dev_priv->mm.gtt_space,
- min_size, alignment, 0,
+ min_size, alignment, cache_level,
0, dev_priv->mm.gtt_mappable_end);
else
drm_mm_init_scan(&dev_priv->mm.gtt_space,
- min_size, alignment, 0);
+ min_size, alignment, cache_level);
/* First see if there is a large enough contiguous idle region... */
list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) {
@@ -422,6 +422,23 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
undo_idling(dev_priv, interruptible);
}
+static void i915_gtt_color_adjust(struct drm_mm_node *node,
+ unsigned long color,
+ unsigned long *start,
+ unsigned long *end)
+{
+ if (node->color != color)
+ *start += 4096;
+
+ if (!list_empty(&node->node_list)) {
+ node = list_entry(node->node_list.next,
+ struct drm_mm_node,
+ node_list);
+ if (node->allocated && node->color != color)
+ *end -= 4096;
+ }
+}
+
void i915_gem_init_global_gtt(struct drm_device *dev,
unsigned long start,
unsigned long mappable_end,
@@ -431,6 +448,8 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
/* Substract the guard page ... */
drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
+ if (!HAS_LLC(dev))
+ dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
dev_priv->mm.gtt_start = start;
dev_priv->mm.gtt_mappable_end = mappable_end;
Several functions of the GPU have the restriction that differing memory domains cannot be placed next to each other (as the GPU may prefetch beyond the end of one domain and hang as it crosses into the other domain). We use the facility of the drm_mm to mark ranges with a particular color that corresponds to the cache attributes of those pages in order to prevent allocating adjacent blocks of differing memory types. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Conflicts: drivers/gpu/drm/i915/i915_drv.h drivers/gpu/drm/i915/i915_gem.c drivers/gpu/drm/i915/i915_gem_evict.c --- drivers/gpu/drm/i915/i915_drv.h | 4 ++- drivers/gpu/drm/i915/i915_gem.c | 61 +++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/i915_gem_evict.c | 7 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 19 ++++++++++ 4 files changed, 80 insertions(+), 11 deletions(-)