@@ -2791,9 +2791,8 @@ int i915_vma_unbind(struct i915_vma *vma)
i915_gem_gtt_finish_object(obj);
list_del(&vma->mm_list);
- /* Avoid an unnecessary call to unbind on rebind. */
if (i915_is_ggtt(vma->vm))
- obj->map_and_fenceable = true;
+ obj->map_and_fenceable = false;
drm_mm_remove_node(&vma->node);
i915_gem_vma_destroy(vma);
@@ -4114,8 +4113,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->fence_reg = I915_FENCE_REG_NONE;
obj->madv = I915_MADV_WILLNEED;
- /* Avoid an unnecessary call to unbind on the first bind. */
- obj->map_and_fenceable = true;
i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
}
@@ -35,6 +35,7 @@
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
struct eb_vmas {
struct list_head vmas;
@@ -529,14 +530,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
}
static int
-need_reloc_mappable(struct i915_vma *vma)
-{
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
- i915_is_ggtt(vma->vm);
-}
-
-static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_ring_buffer *ring,
bool *need_reloc)
@@ -544,19 +537,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
- bool need_fence;
unsigned flags;
int ret;
flags = 0;
-
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- if (need_fence || need_reloc_mappable(vma))
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
flags |= PIN_MAPPABLE;
-
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL;
@@ -592,6 +578,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
return 0;
}
+static bool
+need_reloc_mappable(struct i915_vma *vma)
+{
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+
+ if (entry->relocation_count == 0)
+ return false;
+
+ if (!i915_is_ggtt(vma->vm))
+ return false;
+
+ /* See also use_cpu_reloc() */
+ if (HAS_LLC(vma->obj->base.dev))
+ return false;
+
+ if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ return false;
+
+ return true;
+}
+
static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct list_head *vmas,
@@ -624,9 +631,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
- if (need_mappable)
+ if (need_mappable) {
+ entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
list_move(&vma->exec_list, &ordered_vmas);
- else
+ } else
list_move_tail(&vma->exec_list, &ordered_vmas);
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -654,25 +662,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
/* Unbind any ill-fitting objects or pin. */
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- bool need_fence, need_mappable;
obj = vma->obj;
if (!drm_mm_node_allocated(&vma->node))
continue;
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- need_mappable = need_fence || need_reloc_mappable(vma);
-
- WARN_ON((need_mappable || need_fence) &&
- !i915_is_ggtt(vma->vm));
-
- if ((entry->alignment &&
- vma->node.start & (entry->alignment - 1)) ||
- (need_mappable && !obj->map_and_fenceable))
+ if ((entry->alignment && vma->node.start & (entry->alignment - 1)) ||
+ (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable))
ret = i915_vma_unbind(vma);
else
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -1185,33 +1182,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
- if (flags & I915_DISPATCH_SECURE &&
- !batch_obj->has_global_gtt_mapping) {
- /* When we have multiple VMs, we'll need to make sure that we
- * allocate space first */
- struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
- BUG_ON(!vma);
- vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
- }
+ if (flags & I915_DISPATCH_SECURE) {
+ ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+ if (ret)
+ goto err;
- if (flags & I915_DISPATCH_SECURE)
exec_start += i915_gem_obj_ggtt_offset(batch_obj);
- else
+ } else
exec_start += i915_gem_obj_offset(batch_obj, vm);
ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
if (ret)
- goto err;
+ goto err_batch;
ret = i915_switch_context(ring, file, ctx);
if (ret)
- goto err;
+ goto err_batch;
if (ring == &dev_priv->ring[RCS] &&
mode != dev_priv->relative_constants_mode) {
ret = intel_ring_begin(ring, 4);
if (ret)
- goto err;
+ goto err_batch;
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
@@ -1225,30 +1217,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
ret = i915_reset_gen7_sol_offsets(dev, ring);
if (ret)
- goto err;
+ goto err_batch;
}
-
exec_len = args->batch_len;
if (cliprects) {
for (i = 0; i < args->num_cliprects; i++) {
ret = i915_emit_box(dev, &cliprects[i],
args->DR1, args->DR4);
if (ret)
- goto err;
+ goto err_batch;
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
if (ret)
- goto err;
+ goto err_batch;
}
} else {
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
if (ret)
- goto err;
+ goto err_batch;
}
trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
@@ -1256,6 +1247,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
+err_batch:
+ if (flags & I915_DISPATCH_SECURE)
+ i915_gem_object_ggtt_unpin(batch_obj);
err:
/* the request owns the ref now */
i915_gem_context_unreference(ctx);
@@ -359,10 +359,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
*/
obj->map_and_fenceable =
- !i915_gem_obj_ggtt_bound(obj) ||
- (i915_gem_obj_ggtt_offset(obj) +
- obj->base.size <= dev_priv->gtt.mappable_end &&
- i915_gem_object_fence_ok(obj, args->tiling_mode));
+ i915_gem_obj_ggtt_bound(obj) &&
+ i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end &&
+ i915_gem_object_fence_ok(obj, args->tiling_mode);
/* Rebind if we need a change of alignment */
if (!obj->map_and_fenceable) {
With ppgtt, it is no longer correct to mark an object as map_and_fenceable if we simply unbind it from the global gtt. This has consequences during execbuffer where we simply use obj->map_and_fenceable in use_cpu_reloc() to decide which access method to use for writing into the object. Now for a ppgtt object, map_and_fenceable will be true when it is not bound into the ggtt but only in a ppgtt, leading to an invalid access on a non-llc architecture. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Ben Widawsky <benjamin.widawsky@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> --- drivers/gpu/drm/i915/i915_gem.c | 5 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 92 ++++++++++++++---------------- drivers/gpu/drm/i915/i915_gem_tiling.c | 7 +-- 3 files changed, 47 insertions(+), 57 deletions(-)