@@ -3122,9 +3122,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
}
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-int __must_check i915_vma_move_to_active(struct i915_vma *vma,
- struct i915_request *rq,
- unsigned int flags);
int i915_gem_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
@@ -1858,67 +1858,6 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
return true;
}
-static void export_fence(struct i915_vma *vma,
- struct i915_request *rq,
- unsigned int flags)
-{
- struct reservation_object *resv = vma->resv;
-
- /*
- * Ignore errors from failing to allocate the new fence, we can't
- * handle an error right now. Worst case should be missed
- * synchronisation leading to rendering corruption.
- */
- reservation_object_lock(resv, NULL);
- if (flags & EXEC_OBJECT_WRITE)
- reservation_object_add_excl_fence(resv, &rq->fence);
- else if (reservation_object_reserve_shared(resv) == 0)
- reservation_object_add_shared_fence(resv, &rq->fence);
- reservation_object_unlock(resv);
-}
-
-int i915_vma_move_to_active(struct i915_vma *vma,
- struct i915_request *rq,
- unsigned int flags)
-{
- struct drm_i915_gem_object *obj = vma->obj;
- const unsigned int idx = rq->engine->id;
-
- lockdep_assert_held(&rq->i915->drm.struct_mutex);
- GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-
- /*
- * Add a reference if we're newly entering the active list.
- * The order in which we add operations to the retirement queue is
- * vital here: mark_active adds to the start of the callback list,
- * such that subsequent callbacks are called first. Therefore we
- * add the active reference first and queue for it to be dropped
- * *last*.
- */
- if (!i915_vma_is_active(vma))
- obj->active_count++;
- i915_vma_set_active(vma, idx);
- i915_gem_active_set(&vma->last_read[idx], rq);
- list_move_tail(&vma->vm_link, &vma->vm->active_list);
-
- obj->write_domain = 0;
- if (flags & EXEC_OBJECT_WRITE) {
- obj->write_domain = I915_GEM_DOMAIN_RENDER;
-
- if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
- i915_gem_active_set(&obj->frontbuffer_write, rq);
-
- obj->read_domains = 0;
- }
- obj->read_domains |= I915_GEM_GPU_DOMAINS;
-
- if (flags & EXEC_OBJECT_NEEDS_FENCE)
- i915_gem_active_set(&vma->last_fence, rq);
-
- export_fence(vma, rq, flags);
- return 0;
-}
-
static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
{
u32 *cs;
@@ -1996,7 +1996,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
struct drm_i915_private *i915 = ppgtt->base.vm.i915;
struct i915_ggtt *ggtt = &i915->ggtt;
struct i915_vma *vma;
- int i;
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
GEM_BUG_ON(size > ggtt->vm.total);
@@ -2005,8 +2004,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
if (!vma)
return ERR_PTR(-ENOMEM);
- for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
- init_request_active(&vma->last_read[i], NULL);
+ INIT_RADIX_TREE(&vma->active_rt, GFP_KERNEL);
init_request_active(&vma->last_fence, NULL);
vma->vm = &ggtt->vm;
@@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
struct drm_i915_error_buffer *err,
int count)
{
- int i;
-
err_printf(m, "%s [%d]:\n", name, count);
while (count--) {
- err_printf(m, " %08x_%08x %8u %02x %02x [ ",
+ err_printf(m, " %08x_%08x %8u %02x %02x %02x",
upper_32_bits(err->gtt_offset),
lower_32_bits(err->gtt_offset),
err->size,
err->read_domains,
- err->write_domain);
- for (i = 0; i < I915_NUM_ENGINES; i++)
- err_printf(m, "%02x ", err->rseqno[i]);
-
- err_printf(m, "] %02x", err->wseqno);
+ err->write_domain,
+ err->wseqno);
err_puts(m, tiling_flag(err->tiling));
err_puts(m, dirty_flag(err->dirty));
err_puts(m, purgeable_flag(err->purgeable));
@@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
- int i;
err->size = obj->base.size;
err->name = obj->base.name;
- for (i = 0; i < I915_NUM_ENGINES; i++)
- err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
err->engine = __active_get_engine_id(&obj->frontbuffer_write);
@@ -177,7 +177,7 @@ struct i915_gpu_state {
struct drm_i915_error_buffer {
u32 size;
u32 name;
- u32 rseqno[I915_NUM_ENGINES], wseqno;
+ u32 wseqno;
u64 gtt_offset;
u32 read_domains;
u32 write_domain;
@@ -380,6 +380,7 @@ static inline void
init_request_active(struct i915_gem_active *active,
i915_gem_retire_fn retire)
{
+ RCU_INIT_POINTER(active->request, NULL);
INIT_LIST_HEAD(&active->link);
active->retire = retire ?: i915_gem_retire_noop;
}
@@ -30,18 +30,18 @@
#include <drm/drm_gem.h>
+struct i915_vma_active {
+ struct i915_gem_active base;
+ struct i915_vma *vma;
+};
+
static void
-i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
+__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
{
- const unsigned int idx = rq->engine->id;
- struct i915_vma *vma =
- container_of(active, struct i915_vma, last_read[idx]);
struct drm_i915_gem_object *obj = vma->obj;
- GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
-
- i915_vma_clear_active(vma, idx);
- if (i915_vma_is_active(vma))
+ GEM_BUG_ON(!i915_vma_is_active(vma));
+ if (--vma->active_count)
return;
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -75,6 +75,19 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
}
}
+static void
+i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+ struct i915_vma_active *active =
+ container_of(base, typeof(*active), base);
+ struct i915_vma *vma = active->vma;
+
+ GEM_BUG_ON(base != radix_tree_lookup(&vma->active_rt,
+ rq->fence.context));
+
+ __i915_vma_retire(vma, rq);
+}
+
static struct i915_vma *
vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -82,7 +95,6 @@ vma_create(struct drm_i915_gem_object *obj,
{
struct i915_vma *vma;
struct rb_node *rb, **p;
- int i;
/* The aliasing_ppgtt should never be used directly! */
GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
@@ -91,8 +103,8 @@ vma_create(struct drm_i915_gem_object *obj,
if (vma == NULL)
return ERR_PTR(-ENOMEM);
- for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
- init_request_active(&vma->last_read[i], i915_vma_retire);
+ INIT_RADIX_TREE(&vma->active_rt, GFP_KERNEL);
+
init_request_active(&vma->last_fence, NULL);
vma->vm = vm;
vma->ops = &vm->vma_ops;
@@ -745,13 +757,12 @@ void i915_vma_reopen(struct i915_vma *vma)
static void __i915_vma_destroy(struct i915_vma *vma)
{
struct drm_i915_private *i915 = vma->vm->i915;
- int i;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
GEM_BUG_ON(vma->node.allocated);
GEM_BUG_ON(vma->fence);
- for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
- GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
list_del(&vma->obj_link);
@@ -762,6 +773,17 @@ static void __i915_vma_destroy(struct i915_vma *vma)
if (!i915_vma_is_ggtt(vma))
i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, &vma->active_rt, &iter, 0) {
+ struct i915_vma_active *active = rcu_dereference_raw(*slot);
+
+ GEM_BUG_ON(i915_gem_active_isset(&active->base));
+ kfree(active);
+
+ radix_tree_delete(&vma->active_rt, iter.index);
+ }
+ rcu_read_unlock();
+
kmem_cache_free(i915->vmas, vma);
}
@@ -826,9 +848,111 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
list_del(&vma->obj->userfault_link);
}
+static void export_fence(struct i915_vma *vma,
+ struct i915_request *rq,
+ unsigned int flags)
+{
+ struct reservation_object *resv = vma->resv;
+
+ /*
+ * Ignore errors from failing to allocate the new fence, we can't
+ * handle an error right now. Worst case should be missed
+ * synchronisation leading to rendering corruption.
+ */
+ reservation_object_lock(resv, NULL);
+ if (flags & EXEC_OBJECT_WRITE)
+ reservation_object_add_excl_fence(resv, &rq->fence);
+ else if (reservation_object_reserve_shared(resv) == 0)
+ reservation_object_add_shared_fence(resv, &rq->fence);
+ reservation_object_unlock(resv);
+}
+
+static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
+{
+ struct i915_vma_active *active;
+ int err;
+
+ /*
+ * XXX Note that the radix_tree uses unsigned longs for it indices,
+ * a problem for us on i386 with 32bit longs. However, the likelihood
+ * of 2 timelines being used on the same VMA aliasing is minimal,
+ * and further reduced by that both timelines must be active
+ * simultaneously to confuse us.
+ */
+ active = radix_tree_lookup(&vma->active_rt, idx);
+ if (likely(active)) {
+ GEM_BUG_ON(i915_gem_active_isset(&active->base) &&
+ idx != i915_gem_active_peek(&active->base,
+ &vma->vm->i915->drm.struct_mutex)->fence.context);
+ return &active->base;
+ }
+
+ active = kmalloc(sizeof(*active), GFP_KERNEL);
+ if (unlikely(!active))
+ return ERR_PTR(-ENOMEM);
+
+ init_request_active(&active->base, i915_vma_retire);
+ active->vma = vma;
+
+ err = radix_tree_insert(&vma->active_rt, idx, active);
+ if (unlikely(err)) {
+ kfree(active);
+ return ERR_PTR(err);
+ }
+
+ return &active->base;
+}
+
+int i915_vma_move_to_active(struct i915_vma *vma,
+ struct i915_request *rq,
+ unsigned int flags)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+ struct i915_gem_active *active;
+
+ lockdep_assert_held(&rq->i915->drm.struct_mutex);
+ GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+
+ active = lookup_active(vma, rq->fence.context);
+ if (IS_ERR(active))
+ return PTR_ERR(active);
+
+ /*
+ * Add a reference if we're newly entering the active list.
+ * The order in which we add operations to the retirement queue is
+ * vital here: mark_active adds to the start of the callback list,
+ * such that subsequent callbacks are called first. Therefore we
+ * add the active reference first and queue for it to be dropped
+ * *last*.
+ */
+ if (!i915_gem_active_isset(active) && !vma->active_count++) {
+ list_move_tail(&vma->vm_link, &vma->vm->active_list);
+ obj->active_count++;
+ }
+ i915_gem_active_set(active, rq);
+ GEM_BUG_ON(!i915_vma_is_active(vma));
+ GEM_BUG_ON(!obj->active_count);
+
+ obj->write_domain = 0;
+ if (flags & EXEC_OBJECT_WRITE) {
+ obj->write_domain = I915_GEM_DOMAIN_RENDER;
+
+ if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
+ i915_gem_active_set(&obj->frontbuffer_write, rq);
+
+ obj->read_domains = 0;
+ }
+ obj->read_domains |= I915_GEM_GPU_DOMAINS;
+
+ if (flags & EXEC_OBJECT_NEEDS_FENCE)
+ i915_gem_active_set(&vma->last_fence, rq);
+
+ export_fence(vma, rq, flags);
+ return 0;
+}
+
int i915_vma_unbind(struct i915_vma *vma)
{
- unsigned long active;
int ret;
lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -838,9 +962,9 @@ int i915_vma_unbind(struct i915_vma *vma)
* have side-effects such as unpinning or even unbinding this vma.
*/
might_sleep();
- active = i915_vma_get_active(vma);
- if (active) {
- int idx;
+ if (i915_vma_is_active(vma)) {
+ struct radix_tree_iter iter;
+ void __rcu **slot;
/*
* When a closed VMA is retired, it is unbound - eek.
@@ -857,18 +981,24 @@ int i915_vma_unbind(struct i915_vma *vma)
*/
__i915_vma_pin(vma);
- for_each_active(active, idx) {
- ret = i915_gem_active_retire(&vma->last_read[idx],
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, &vma->active_rt, &iter, 0) {
+ struct i915_vma_active *active =
+ rcu_dereference_raw(*slot);
+ rcu_read_unlock();
+
+ ret = i915_gem_active_retire(&active->base,
&vma->vm->i915->drm.struct_mutex);
if (ret)
- break;
- }
+ goto unpin;
- if (!ret) {
- ret = i915_gem_active_retire(&vma->last_fence,
- &vma->vm->i915->drm.struct_mutex);
+ rcu_read_lock();
}
+ rcu_read_unlock();
+ ret = i915_gem_active_retire(&vma->last_fence,
+ &vma->vm->i915->drm.struct_mutex);
+unpin:
__i915_vma_unpin(vma);
if (ret)
return ret;
@@ -26,6 +26,7 @@
#define __I915_VMA_H__
#include <linux/io-mapping.h>
+#include <linux/radix-tree.h>
#include <drm/drm_mm.h>
@@ -94,8 +95,8 @@ struct i915_vma {
#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT)
#define I915_VMA_GGTT_WRITE BIT(12)
- unsigned int active;
- struct i915_gem_active last_read[I915_NUM_ENGINES];
+ unsigned int active_count;
+ struct radix_tree_root active_rt;
struct i915_gem_active last_fence;
/**
@@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
void i915_vma_unpin_and_release(struct i915_vma **p_vma);
+static inline bool i915_vma_is_active(struct i915_vma *vma)
+{
+ return vma->active_count;
+}
+
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+ struct i915_request *rq,
+ unsigned int flags);
+
static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_GGTT;
@@ -187,34 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
}
-static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
-{
- return vma->active;
-}
-
-static inline bool i915_vma_is_active(const struct i915_vma *vma)
-{
- return i915_vma_get_active(vma);
-}
-
-static inline void i915_vma_set_active(struct i915_vma *vma,
- unsigned int engine)
-{
- vma->active |= BIT(engine);
-}
-
-static inline void i915_vma_clear_active(struct i915_vma *vma,
- unsigned int engine)
-{
- vma->active &= ~BIT(engine);
-}
-
-static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
- unsigned int engine)
-{
- return vma->active & BIT(engine);
-}
-
static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
{
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
In the next patch, we will want to be able to use more flexible request timelines that can hop between engines. From the vma pov, we can then not rely on the binding of this request to an engine and so can not ensure that different requests are ordered through a per-engine timeline, and so we must track activity of all timelines. (We track activity on the vma itself to prevent unbinding from HW before the HW has finished accessing it.) For now, let's just ignore the potential issue with trying to use 64b indices with radixtrees on 32b machines, it's unlikely to be a problem in practice... Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_drv.h | 3 - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 61 ------- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +- drivers/gpu/drm/i915/i915_gpu_error.c | 14 +- drivers/gpu/drm/i915/i915_gpu_error.h | 2 +- drivers/gpu/drm/i915/i915_request.h | 1 + drivers/gpu/drm/i915/i915_vma.c | 180 ++++++++++++++++++--- drivers/gpu/drm/i915/i915_vma.h | 42 ++--- 8 files changed, 173 insertions(+), 134 deletions(-)