diff mbox

[5/6] drm/i915: Track vma activity per fence.context, not per engine

Message ID 20180629225419.5832-5-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 29, 2018, 10:54 p.m. UTC
In the next patch, we will want to be able to use more flexible request
timelines that can hop between engines. From the vma pov, we can then
not rely on the binding of this request to an engine and so can not
ensure that different requests are ordered through a per-engine
timeline, and so we must track activity of all timelines. (We track
activity on the vma itself to prevent unbinding from HW before the HW
has finished accessing it.)

v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
index is fraught with aliasing of unsigned longs).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c   |   3 -
 drivers/gpu/drm/i915/i915_gpu_error.c |  14 +---
 drivers/gpu/drm/i915/i915_gpu_error.h |   2 +-
 drivers/gpu/drm/i915/i915_request.h   |   1 +
 drivers/gpu/drm/i915/i915_vma.c       | 112 +++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_vma.h       |  46 +++--------
 6 files changed, 98 insertions(+), 80 deletions(-)

Comments

Tvrtko Ursulin July 3, 2018, 5:28 p.m. UTC | #1
On 29/06/2018 23:54, Chris Wilson wrote:
> In the next patch, we will want to be able to use more flexible request
> timelines that can hop between engines. From the vma pov, we can then
> not rely on the binding of this request to an engine and so can not
> ensure that different requests are ordered through a per-engine
> timeline, and so we must track activity of all timelines. (We track
> activity on the vma itself to prevent unbinding from HW before the HW
> has finished accessing it.)
> 
> v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
> index is fraught with aliasing of unsigned longs).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c   |   3 -
>   drivers/gpu/drm/i915/i915_gpu_error.c |  14 +---
>   drivers/gpu/drm/i915/i915_gpu_error.h |   2 +-
>   drivers/gpu/drm/i915/i915_request.h   |   1 +
>   drivers/gpu/drm/i915/i915_vma.c       | 112 +++++++++++++++++++-------
>   drivers/gpu/drm/i915/i915_vma.h       |  46 +++--------
>   6 files changed, 98 insertions(+), 80 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c6aa761ca085..50fcf74248f2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1996,7 +1996,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
>   	struct i915_ggtt *ggtt = &i915->ggtt;
>   	struct i915_vma *vma;
> -	int i;
>   
>   	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>   	GEM_BUG_ON(size > ggtt->vm.total);
> @@ -2005,8 +2004,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	if (!vma)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		init_request_active(&vma->last_read[i], NULL);

At first I thought you need to initialize the rb tree but figured out 
access to it is guarded by the active_count.

>   	init_request_active(&vma->last_fence, NULL);
>   
>   	vma->vm = &ggtt->vm;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index df524c9cad40..8c81cf3aa182 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>   				struct drm_i915_error_buffer *err,
>   				int count)
>   {
> -	int i;
> -
>   	err_printf(m, "%s [%d]:\n", name, count);
>   
>   	while (count--) {
> -		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
> +		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
>   			   upper_32_bits(err->gtt_offset),
>   			   lower_32_bits(err->gtt_offset),
>   			   err->size,
>   			   err->read_domains,
> -			   err->write_domain);
> -		for (i = 0; i < I915_NUM_ENGINES; i++)
> -			err_printf(m, "%02x ", err->rseqno[i]);
> -
> -		err_printf(m, "] %02x", err->wseqno);
> +			   err->write_domain,
> +			   err->wseqno);
>   		err_puts(m, tiling_flag(err->tiling));
>   		err_puts(m, dirty_flag(err->dirty));
>   		err_puts(m, purgeable_flag(err->purgeable));
> @@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>   		       struct i915_vma *vma)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	int i;
>   
>   	err->size = obj->base.size;
>   	err->name = obj->base.name;
>   
> -	for (i = 0; i < I915_NUM_ENGINES; i++)
> -		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);

We could still flatten the tree and capture the list of 
fence.context:seqno here, to be included in the error state. Or you 
think it is not useful?

>   	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
>   	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 58910f1dc67c..f893a4e8b783 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -177,7 +177,7 @@ struct i915_gpu_state {
>   	struct drm_i915_error_buffer {
>   		u32 size;
>   		u32 name;
> -		u32 rseqno[I915_NUM_ENGINES], wseqno;
> +		u32 wseqno;
>   		u64 gtt_offset;
>   		u32 read_domains;
>   		u32 write_domain;
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index a355a081485f..e1c9365dfefb 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -380,6 +380,7 @@ static inline void
>   init_request_active(struct i915_gem_active *active,
>   		    i915_gem_retire_fn retire)
>   {
> +	RCU_INIT_POINTER(active->request, NULL);
>   	INIT_LIST_HEAD(&active->link);
>   	active->retire = retire ?: i915_gem_retire_noop;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 7635c27e7e8b..2faad2a1d00e 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
>   
>   #endif
>   
> +struct i915_vma_active {
> +	struct i915_gem_active base;
> +	struct i915_vma *vma;
> +	struct rb_node node;
> +	u64 timeline;

If my quick calculations are correct this is (8 + 16 + 8) + 8 + 20 + 8 = 
68 large - just unluckily over the 64-byte slab so at some point I think 
it will warrant a dedicated slab to avoid wastage.

> +};
> +
>   static void
> -i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> +__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
>   {
> -	const unsigned int idx = rq->engine->id;
> -	struct i915_vma *vma =
> -		container_of(active, struct i915_vma, last_read[idx]);
>   	struct drm_i915_gem_object *obj = vma->obj;
>   
> -	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
> -
> -	i915_vma_clear_active(vma, idx);
> -	if (i915_vma_is_active(vma))
> +	GEM_BUG_ON(!i915_vma_is_active(vma));
> +	if (--vma->active_count)
>   		return;
>   
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> @@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
>   	}
>   }
>   
> +static void
> +i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	struct i915_vma_active *active =
> +		container_of(base, typeof(*active), base);
> +
> +	__i915_vma_retire(active->vma, rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj,
>   {
>   	struct i915_vma *vma;
>   	struct rb_node *rb, **p;
> -	int i;
>   
>   	/* The aliasing_ppgtt should never be used directly! */
>   	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> @@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj,
>   	if (vma == NULL)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		init_request_active(&vma->last_read[i], i915_vma_retire);
> +	vma->active = RB_ROOT;
> +
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma)
>   static void __i915_vma_destroy(struct i915_vma *vma)
>   {
>   	struct drm_i915_private *i915 = vma->vm->i915;
> -	int i;
> +	struct i915_vma_active *iter, *n;
>   
>   	GEM_BUG_ON(vma->node.allocated);
>   	GEM_BUG_ON(vma->fence);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
>   	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
>   
>   	list_del(&vma->obj_link);
> @@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>   	if (!i915_vma_is_ggtt(vma))
>   		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
>   
> +	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> +		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> +		kfree(iter);
> +	}
> +
>   	kmem_cache_free(i915->vmas, vma);
>   }
>   
> @@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma,
>   	reservation_object_unlock(resv);
>   }
>   
> +static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)

lookup_or_create? Or get_active?

> +{
> +	struct i915_vma_active *active;
> +	struct rb_node **p, *parent;
> +
> +	parent = NULL;
> +	p = &vma->active.rb_node;
> +	while (*p) {
> +		parent = *p;
> +
> +		active = rb_entry(parent, struct i915_vma_active, node);
> +		if (active->timeline == idx)
> +			return &active->base;
> +
> +		if (active->timeline < idx)
> +			p = &parent->rb_right;
> +		else
> +			p = &parent->rb_left;
> +	}
> +
> +	active = kmalloc(sizeof(*active), GFP_KERNEL);
> +	if (unlikely(!active))
> +		return ERR_PTR(-ENOMEM);
> +
> +	init_request_active(&active->base, i915_vma_retire);
> +	active->vma = vma;
> +	active->timeline = idx;
> +
> +	rb_link_node(&active->node, parent, p);
> +	rb_insert_color(&active->node, &vma->active);
> +
> +	return &active->base;
> +}
> +
>   int i915_vma_move_to_active(struct i915_vma *vma,
>   			    struct i915_request *rq,
>   			    unsigned int flags)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	const unsigned int idx = rq->engine->id;
> +	struct i915_gem_active *active;
>   
>   	lockdep_assert_held(&rq->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   
> +	active = lookup_active(vma, rq->fence.context);
> +	if (IS_ERR(active))
> +		return PTR_ERR(active);
> +
>   	/*
>   	 * Add a reference if we're newly entering the active list.
>   	 * The order in which we add operations to the retirement queue is
> @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   	 * add the active reference first and queue for it to be dropped
>   	 * *last*.
>   	 */
> -	if (!i915_vma_is_active(vma))
> +	if (!i915_gem_active_isset(active) && !vma->active_count++) {

vma->active_count (which is i915_vma_is_active) check wouldn't be 
enough? Can it be zero with active _set_?

> +		list_move_tail(&vma->vm_link, &vma->vm->active_list);
>   		obj->active_count++;
> -	i915_vma_set_active(vma, idx);
> -	i915_gem_active_set(&vma->last_read[idx], rq);
> -	list_move_tail(&vma->vm_link, &vma->vm->active_list);
> +	}
> +	i915_gem_active_set(active, rq);
> +	GEM_BUG_ON(!i915_vma_is_active(vma));
> +	GEM_BUG_ON(!obj->active_count);
>   
>   	obj->write_domain = 0;
>   	if (flags & EXEC_OBJECT_WRITE) {
> @@ -922,7 +975,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   
>   int i915_vma_unbind(struct i915_vma *vma)
>   {
> -	unsigned long active;
>   	int ret;
>   
>   	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> @@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>   	 * have side-effects such as unpinning or even unbinding this vma.
>   	 */
>   	might_sleep();
> -	active = i915_vma_get_active(vma);
> -	if (active) {
> -		int idx;
> +	if (i915_vma_is_active(vma)) {

Guarding the tree with count might become problematic when removing 
struct mutex. So perhaps it would better to rely that the tree can 
always be iterated, and when it is empty it is automatically a no-op.

> +		struct i915_vma_active *active, *n;
>   
>   		/*
>   		 * When a closed VMA is retired, it is unbound - eek.
> @@ -951,18 +1002,17 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> -		for_each_active(active, idx) {
> -			ret = i915_gem_active_retire(&vma->last_read[idx],
> +		rbtree_postorder_for_each_entry_safe(active, n,
> +						     &vma->active, node) {
> +			ret = i915_gem_active_retire(&active->base,
>   						     &vma->vm->i915->drm.struct_mutex);
>   			if (ret)
> -				break;
> -		}
> -
> -		if (!ret) {
> -			ret = i915_gem_active_retire(&vma->last_fence,
> -						     &vma->vm->i915->drm.struct_mutex);
> +				goto unpin;
>   		}
>   
> +		ret = i915_gem_active_retire(&vma->last_fence,
> +					     &vma->vm->i915->drm.struct_mutex);
> +unpin:
>   		__i915_vma_unpin(vma);
>   		if (ret)
>   			return ret;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index a218b689e418..c297b0a0dc47 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -26,6 +26,7 @@
>   #define __I915_VMA_H__
>   
>   #include <linux/io-mapping.h>
> +#include <linux/rbtree.h>
>   
>   #include <drm/drm_mm.h>
>   
> @@ -94,8 +95,8 @@ struct i915_vma {
>   #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
>   #define I915_VMA_GGTT_WRITE	BIT(12)
>   
> -	unsigned int active;
> -	struct i915_gem_active last_read[I915_NUM_ENGINES];
> +	unsigned int active_count;
> +	struct rb_root active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> @@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   
>   void i915_vma_unpin_and_release(struct i915_vma **p_vma);
>   
> +static inline bool i915_vma_is_active(struct i915_vma *vma)
> +{
> +	return vma->active_count;
> +}
> +
> +int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> +					 struct i915_request *rq,
> +					 unsigned int flags);
> +
>   static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
>   {
>   	return vma->flags & I915_VMA_GGTT;
> @@ -187,38 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
>   	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
>   }
>   
> -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
> -{
> -	return vma->active;
> -}
> -
> -static inline bool i915_vma_is_active(const struct i915_vma *vma)
> -{
> -	return i915_vma_get_active(vma);
> -}
> -
> -static inline void i915_vma_set_active(struct i915_vma *vma,
> -				       unsigned int engine)
> -{
> -	vma->active |= BIT(engine);
> -}
> -
> -static inline void i915_vma_clear_active(struct i915_vma *vma,
> -					 unsigned int engine)
> -{
> -	vma->active &= ~BIT(engine);
> -}
> -
> -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
> -					      unsigned int engine)
> -{
> -	return vma->active & BIT(engine);
> -}
> -
> -int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> -					 struct i915_request *rq,
> -					 unsigned int flags);
> -
>   static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
>   {
>   	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> 

Looks fine in general.

Regards,

Tvrtko
Chris Wilson July 3, 2018, 8:29 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-07-03 18:28:31)
> 
> On 29/06/2018 23:54, Chris Wilson wrote:
> > In the next patch, we will want to be able to use more flexible request
> > timelines that can hop between engines. From the vma pov, we can then
> > not rely on the binding of this request to an engine and so can not
> > ensure that different requests are ordered through a per-engine
> > timeline, and so we must track activity of all timelines. (We track
> > activity on the vma itself to prevent unbinding from HW before the HW
> > has finished accessing it.)
> > 
> > v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
> > index is fraught with aliasing of unsigned longs).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_gtt.c   |   3 -
> >   drivers/gpu/drm/i915/i915_gpu_error.c |  14 +---
> >   drivers/gpu/drm/i915/i915_gpu_error.h |   2 +-
> >   drivers/gpu/drm/i915/i915_request.h   |   1 +
> >   drivers/gpu/drm/i915/i915_vma.c       | 112 +++++++++++++++++++-------
> >   drivers/gpu/drm/i915/i915_vma.h       |  46 +++--------
> >   6 files changed, 98 insertions(+), 80 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index c6aa761ca085..50fcf74248f2 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -1996,7 +1996,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
> >       struct drm_i915_private *i915 = ppgtt->base.vm.i915;
> >       struct i915_ggtt *ggtt = &i915->ggtt;
> >       struct i915_vma *vma;
> > -     int i;
> >   
> >       GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
> >       GEM_BUG_ON(size > ggtt->vm.total);
> > @@ -2005,8 +2004,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
> >       if (!vma)
> >               return ERR_PTR(-ENOMEM);
> >   
> > -     for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> > -             init_request_active(&vma->last_read[i], NULL);
> 
> At first I thought you need to initialize the rb tree but figured out 
> access to it is guarded by the active_count.

Or just NULL initialised. Oversight / rebase errors, it should have been
vma-active = RB_ROOT, looks like I skipped the radixtree init in earlier
versions.

> >       init_request_active(&vma->last_fence, NULL);
> >   
> >       vma->vm = &ggtt->vm;
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index df524c9cad40..8c81cf3aa182 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
> >                               struct drm_i915_error_buffer *err,
> >                               int count)
> >   {
> > -     int i;
> > -
> >       err_printf(m, "%s [%d]:\n", name, count);
> >   
> >       while (count--) {
> > -             err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
> > +             err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
> >                          upper_32_bits(err->gtt_offset),
> >                          lower_32_bits(err->gtt_offset),
> >                          err->size,
> >                          err->read_domains,
> > -                        err->write_domain);
> > -             for (i = 0; i < I915_NUM_ENGINES; i++)
> > -                     err_printf(m, "%02x ", err->rseqno[i]);
> > -
> > -             err_printf(m, "] %02x", err->wseqno);
> > +                        err->write_domain,
> > +                        err->wseqno);
> >               err_puts(m, tiling_flag(err->tiling));
> >               err_puts(m, dirty_flag(err->dirty));
> >               err_puts(m, purgeable_flag(err->purgeable));
> > @@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
> >                      struct i915_vma *vma)
> >   {
> >       struct drm_i915_gem_object *obj = vma->obj;
> > -     int i;
> >   
> >       err->size = obj->base.size;
> >       err->name = obj->base.name;
> >   
> > -     for (i = 0; i < I915_NUM_ENGINES; i++)
> > -             err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
> 
> We could still flatten the tree and capture the list of 
> fence.context:seqno here, to be included in the error state. Or you 
> think it is not useful?

I haven't looked at it for a long time, it's just noise to me at the
moment. Yes, we could flatten the tree, but to be honest I just took the
opportunity to kill it. (Who wants to add an variable array to our
unstuctured output, along with trying to say what those timelines are
the vma are active on.)

What I use the vma for in the error state is for working out what
buffers are active, and if the batch buffer tallies. (Severe errors
where userspace lied, it's anybody's guess.) The active seqno isn't
useful in this regard, since it almost always tells us of a later
request in the queue at the time of the hang. What we don't have now (or
after this patch) is the precise list of vma/buffers userspace sent
along with the batch. But it's never been critical enough to worry,
since the list of active is close enough.

> >       err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
> >       err->engine = __active_get_engine_id(&obj->frontbuffer_write);
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> > index 58910f1dc67c..f893a4e8b783 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> > @@ -177,7 +177,7 @@ struct i915_gpu_state {
> >       struct drm_i915_error_buffer {
> >               u32 size;
> >               u32 name;
> > -             u32 rseqno[I915_NUM_ENGINES], wseqno;
> > +             u32 wseqno;
> >               u64 gtt_offset;
> >               u32 read_domains;
> >               u32 write_domain;
> > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> > index a355a081485f..e1c9365dfefb 100644
> > --- a/drivers/gpu/drm/i915/i915_request.h
> > +++ b/drivers/gpu/drm/i915/i915_request.h
> > @@ -380,6 +380,7 @@ static inline void
> >   init_request_active(struct i915_gem_active *active,
> >                   i915_gem_retire_fn retire)
> >   {
> > +     RCU_INIT_POINTER(active->request, NULL);
> >       INIT_LIST_HEAD(&active->link);
> >       active->retire = retire ?: i915_gem_retire_noop;
> >   }
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index 7635c27e7e8b..2faad2a1d00e 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
> >   
> >   #endif
> >   
> > +struct i915_vma_active {
> > +     struct i915_gem_active base;
> > +     struct i915_vma *vma;
> > +     struct rb_node node;
> > +     u64 timeline;
> 
> If my quick calculations are correct this is (8 + 16 + 8) + 8 + 20 + 8 = 
> 68 large - just unluckily over the 64-byte slab so at some point I think 
> it will warrant a dedicated slab to avoid wastage.

Hmm, isn't it 7 pointers + a u64.

 sizeof(i915_vma_active)=72
 offsetofend(i915_vma_active.base)=32
 offsetofend(i915_vma_active.vma)=40
 offsetofend(i915_vma_active.node)=64
 offsetofend(i915_vma_active.timeline)=72

Bah i915_gem_active is bigger than I remember.

> >   static void
> > -i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> > +__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
> >   {
> > -     const unsigned int idx = rq->engine->id;
> > -     struct i915_vma *vma =
> > -             container_of(active, struct i915_vma, last_read[idx]);
> >       struct drm_i915_gem_object *obj = vma->obj;
> >   
> > -     GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
> > -
> > -     i915_vma_clear_active(vma, idx);
> > -     if (i915_vma_is_active(vma))
> > +     GEM_BUG_ON(!i915_vma_is_active(vma));
> > +     if (--vma->active_count)
> >               return;
> >   
> >       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> > @@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> >       }
> >   }
> >   
> > +static void
> > +i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> > +{
> > +     struct i915_vma_active *active =
> > +             container_of(base, typeof(*active), base);
> > +
> > +     __i915_vma_retire(active->vma, rq);
> > +}
> > +
> >   static struct i915_vma *
> >   vma_create(struct drm_i915_gem_object *obj,
> >          struct i915_address_space *vm,
> > @@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj,
> >   {
> >       struct i915_vma *vma;
> >       struct rb_node *rb, **p;
> > -     int i;
> >   
> >       /* The aliasing_ppgtt should never be used directly! */
> >       GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> > @@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj,
> >       if (vma == NULL)
> >               return ERR_PTR(-ENOMEM);
> >   
> > -     for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> > -             init_request_active(&vma->last_read[i], i915_vma_retire);
> > +     vma->active = RB_ROOT;
> > +
> >       init_request_active(&vma->last_fence, NULL);
> >       vma->vm = vm;
> >       vma->ops = &vm->vma_ops;
> > @@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma)
> >   static void __i915_vma_destroy(struct i915_vma *vma)
> >   {
> >       struct drm_i915_private *i915 = vma->vm->i915;
> > -     int i;
> > +     struct i915_vma_active *iter, *n;
> >   
> >       GEM_BUG_ON(vma->node.allocated);
> >       GEM_BUG_ON(vma->fence);
> >   
> > -     for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> > -             GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
> >       GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
> >   
> >       list_del(&vma->obj_link);
> > @@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
> >       if (!i915_vma_is_ggtt(vma))
> >               i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
> >   
> > +     rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> > +             GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> > +             kfree(iter);
> > +     }
> > +
> >       kmem_cache_free(i915->vmas, vma);
> >   }
> >   
> > @@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma,
> >       reservation_object_unlock(resv);
> >   }
> >   
> > +static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> 
> lookup_or_create? Or get_active?

If we're having this argument, active_instance. It's what we settled on
last time for i915_gem_object_lookup_vma_or_create.

> >       /*
> >        * Add a reference if we're newly entering the active list.
> >        * The order in which we add operations to the retirement queue is
> > @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
> >        * add the active reference first and queue for it to be dropped
> >        * *last*.
> >        */
> > -     if (!i915_vma_is_active(vma))
> > +     if (!i915_gem_active_isset(active) && !vma->active_count++) {
> 
> vma->active_count (which is i915_vma_is_active) check wouldn't be 
> enough? Can it be zero with active _set_?

No, in the next patch we can have active_count with vma->active unset.

> > @@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma)
> >        * have side-effects such as unpinning or even unbinding this vma.
> >        */
> >       might_sleep();
> > -     active = i915_vma_get_active(vma);
> > -     if (active) {
> > -             int idx;
> > +     if (i915_vma_is_active(vma)) {
> 
> Guarding the tree with count might become problematic when removing 
> struct mutex. So perhaps it would better to rely that the tree can 
> always be iterated, and when it is empty it is automatically a no-op.

Iterating the tree still has the same serialisation requirement, so it
doesn't become easier just because we remove vma->active_count. The
vma->active + vma->active_count is guarded by vm->mutex (details to be
thoroughly tested).

The global request lock is an unsolved problem. Close to it being the
last problem (other than trying to recover the single client perf loss).

I'm not sure if i915_gem_active can survive in its current form.
-Chris
Tvrtko Ursulin July 4, 2018, 9:43 a.m. UTC | #3
On 03/07/2018 21:29, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-07-03 18:28:31)
>>
>> On 29/06/2018 23:54, Chris Wilson wrote:
>>> In the next patch, we will want to be able to use more flexible request
>>> timelines that can hop between engines. From the vma pov, we can then
>>> not rely on the binding of this request to an engine and so can not
>>> ensure that different requests are ordered through a per-engine
>>> timeline, and so we must track activity of all timelines. (We track
>>> activity on the vma itself to prevent unbinding from HW before the HW
>>> has finished accessing it.)
>>>
>>> v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
>>> index is fraught with aliasing of unsigned longs).

[snip]

>>> +struct i915_vma_active {
>>> +     struct i915_gem_active base;
>>> +     struct i915_vma *vma;
>>> +     struct rb_node node;
>>> +     u64 timeline;
>>
>> If my quick calculations are correct this is (8 + 16 + 8) + 8 + 20 + 8 =
>> 68 large - just unluckily over the 64-byte slab so at some point I think
>> it will warrant a dedicated slab to avoid wastage.
> 
> Hmm, isn't it 7 pointers + a u64.
> 
>   sizeof(i915_vma_active)=72
>   offsetofend(i915_vma_active.base)=32
>   offsetofend(i915_vma_active.vma)=40
>   offsetofend(i915_vma_active.node)=64
>   offsetofend(i915_vma_active.timeline)=72
> 
> Bah i915_gem_active is bigger than I remember.

So goes into the 96-byte bucket and waste is 24 bytes per entry. I 
thought there is only 128 bucket and 128 minus my incorrect 68 was much 
more. So OK, can leave it for later optimisation.

[snip]

>>>        /*
>>>         * Add a reference if we're newly entering the active list.
>>>         * The order in which we add operations to the retirement queue is
>>> @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>>>         * add the active reference first and queue for it to be dropped
>>>         * *last*.
>>>         */
>>> -     if (!i915_vma_is_active(vma))
>>> +     if (!i915_gem_active_isset(active) && !vma->active_count++) {
>>
>> vma->active_count (which is i915_vma_is_active) check wouldn't be
>> enough? Can it be zero with active _set_?
> 
> No, in the next patch we can have active_count with vma->active unset.

Definitely then please make it so it is only what is needed for this 
patch, and change it in the next.

Regards,

Tvrtko
Chris Wilson July 4, 2018, 9:53 a.m. UTC | #4
Quoting Tvrtko Ursulin (2018-07-04 10:43:07)
> 
> On 03/07/2018 21:29, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-07-03 18:28:31)
> >>
> >> On 29/06/2018 23:54, Chris Wilson wrote:
> >>>        /*
> >>>         * Add a reference if we're newly entering the active list.
> >>>         * The order in which we add operations to the retirement queue is
> >>> @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
> >>>         * add the active reference first and queue for it to be dropped
> >>>         * *last*.
> >>>         */
> >>> -     if (!i915_vma_is_active(vma))
> >>> +     if (!i915_gem_active_isset(active) && !vma->active_count++) {
> >>
> >> vma->active_count (which is i915_vma_is_active) check wouldn't be
> >> enough? Can it be zero with active _set_?
> > 
> > No, in the next patch we can have active_count with vma->active unset.
> 
> Definitely then please make it so it is only what is needed for this 
> patch, and change it in the next.

It is. active_count == the number of retirement callbacks we will
receive, and so must not be incremented if we have already accounted for
this one. It is expressing the same old rule, having given up the bitmask and
knowing the state of each i915_gem_active_isset() simultaneously.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c6aa761ca085..50fcf74248f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1996,7 +1996,6 @@  static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
-	int i;
 
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(size > ggtt->vm.total);
@@ -2005,8 +2004,6 @@  static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	if (!vma)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], NULL);
 	init_request_active(&vma->last_fence, NULL);
 
 	vma->vm = &ggtt->vm;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index df524c9cad40..8c81cf3aa182 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -335,21 +335,16 @@  static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				struct drm_i915_error_buffer *err,
 				int count)
 {
-	int i;
-
 	err_printf(m, "%s [%d]:\n", name, count);
 
 	while (count--) {
-		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
+		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
 			   upper_32_bits(err->gtt_offset),
 			   lower_32_bits(err->gtt_offset),
 			   err->size,
 			   err->read_domains,
-			   err->write_domain);
-		for (i = 0; i < I915_NUM_ENGINES; i++)
-			err_printf(m, "%02x ", err->rseqno[i]);
-
-		err_printf(m, "] %02x", err->wseqno);
+			   err->write_domain,
+			   err->wseqno);
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
@@ -1021,13 +1016,10 @@  static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	int i;
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
 
-	for (i = 0; i < I915_NUM_ENGINES; i++)
-		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
 	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
 	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 58910f1dc67c..f893a4e8b783 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -177,7 +177,7 @@  struct i915_gpu_state {
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
-		u32 rseqno[I915_NUM_ENGINES], wseqno;
+		u32 wseqno;
 		u64 gtt_offset;
 		u32 read_domains;
 		u32 write_domain;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index a355a081485f..e1c9365dfefb 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -380,6 +380,7 @@  static inline void
 init_request_active(struct i915_gem_active *active,
 		    i915_gem_retire_fn retire)
 {
+	RCU_INIT_POINTER(active->request, NULL);
 	INIT_LIST_HEAD(&active->link);
 	active->retire = retire ?: i915_gem_retire_noop;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 7635c27e7e8b..2faad2a1d00e 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -63,18 +63,20 @@  static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 
 #endif
 
+struct i915_vma_active {
+	struct i915_gem_active base;
+	struct i915_vma *vma;
+	struct rb_node node;
+	u64 timeline;
+};
+
 static void
-i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
+__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 {
-	const unsigned int idx = rq->engine->id;
-	struct i915_vma *vma =
-		container_of(active, struct i915_vma, last_read[idx]);
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
-
-	i915_vma_clear_active(vma, idx);
-	if (i915_vma_is_active(vma))
+	GEM_BUG_ON(!i915_vma_is_active(vma));
+	if (--vma->active_count)
 		return;
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -108,6 +110,15 @@  i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
 	}
 }
 
+static void
+i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	struct i915_vma_active *active =
+		container_of(base, typeof(*active), base);
+
+	__i915_vma_retire(active->vma, rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -115,7 +126,6 @@  vma_create(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 	struct rb_node *rb, **p;
-	int i;
 
 	/* The aliasing_ppgtt should never be used directly! */
 	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
@@ -124,8 +134,8 @@  vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], i915_vma_retire);
+	vma->active = RB_ROOT;
+
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -778,13 +788,11 @@  void i915_vma_reopen(struct i915_vma *vma)
 static void __i915_vma_destroy(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
-	int i;
+	struct i915_vma_active *iter, *n;
 
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(vma->fence);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
 	list_del(&vma->obj_link);
@@ -795,6 +803,11 @@  static void __i915_vma_destroy(struct i915_vma *vma)
 	if (!i915_vma_is_ggtt(vma))
 		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 
+	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
+		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
+		kfree(iter);
+	}
+
 	kmem_cache_free(i915->vmas, vma);
 }
 
@@ -878,16 +891,54 @@  static void export_fence(struct i915_vma *vma,
 	reservation_object_unlock(resv);
 }
 
+static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
+{
+	struct i915_vma_active *active;
+	struct rb_node **p, *parent;
+
+	parent = NULL;
+	p = &vma->active.rb_node;
+	while (*p) {
+		parent = *p;
+
+		active = rb_entry(parent, struct i915_vma_active, node);
+		if (active->timeline == idx)
+			return &active->base;
+
+		if (active->timeline < idx)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+
+	active = kmalloc(sizeof(*active), GFP_KERNEL);
+	if (unlikely(!active))
+		return ERR_PTR(-ENOMEM);
+
+	init_request_active(&active->base, i915_vma_retire);
+	active->vma = vma;
+	active->timeline = idx;
+
+	rb_link_node(&active->node, parent, p);
+	rb_insert_color(&active->node, &vma->active);
+
+	return &active->base;
+}
+
 int i915_vma_move_to_active(struct i915_vma *vma,
 			    struct i915_request *rq,
 			    unsigned int flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	const unsigned int idx = rq->engine->id;
+	struct i915_gem_active *active;
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
+	active = lookup_active(vma, rq->fence.context);
+	if (IS_ERR(active))
+		return PTR_ERR(active);
+
 	/*
 	 * Add a reference if we're newly entering the active list.
 	 * The order in which we add operations to the retirement queue is
@@ -896,11 +947,13 @@  int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_vma_is_active(vma))
+	if (!i915_gem_active_isset(active) && !vma->active_count++) {
+		list_move_tail(&vma->vm_link, &vma->vm->active_list);
 		obj->active_count++;
-	i915_vma_set_active(vma, idx);
-	i915_gem_active_set(&vma->last_read[idx], rq);
-	list_move_tail(&vma->vm_link, &vma->vm->active_list);
+	}
+	i915_gem_active_set(active, rq);
+	GEM_BUG_ON(!i915_vma_is_active(vma));
+	GEM_BUG_ON(!obj->active_count);
 
 	obj->write_domain = 0;
 	if (flags & EXEC_OBJECT_WRITE) {
@@ -922,7 +975,6 @@  int i915_vma_move_to_active(struct i915_vma *vma,
 
 int i915_vma_unbind(struct i915_vma *vma)
 {
-	unsigned long active;
 	int ret;
 
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -932,9 +984,8 @@  int i915_vma_unbind(struct i915_vma *vma)
 	 * have side-effects such as unpinning or even unbinding this vma.
 	 */
 	might_sleep();
-	active = i915_vma_get_active(vma);
-	if (active) {
-		int idx;
+	if (i915_vma_is_active(vma)) {
+		struct i915_vma_active *active, *n;
 
 		/*
 		 * When a closed VMA is retired, it is unbound - eek.
@@ -951,18 +1002,17 @@  int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
-		for_each_active(active, idx) {
-			ret = i915_gem_active_retire(&vma->last_read[idx],
+		rbtree_postorder_for_each_entry_safe(active, n,
+						     &vma->active, node) {
+			ret = i915_gem_active_retire(&active->base,
 						     &vma->vm->i915->drm.struct_mutex);
 			if (ret)
-				break;
-		}
-
-		if (!ret) {
-			ret = i915_gem_active_retire(&vma->last_fence,
-						     &vma->vm->i915->drm.struct_mutex);
+				goto unpin;
 		}
 
+		ret = i915_gem_active_retire(&vma->last_fence,
+					     &vma->vm->i915->drm.struct_mutex);
+unpin:
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index a218b689e418..c297b0a0dc47 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -26,6 +26,7 @@ 
 #define __I915_VMA_H__
 
 #include <linux/io-mapping.h>
+#include <linux/rbtree.h>
 
 #include <drm/drm_mm.h>
 
@@ -94,8 +95,8 @@  struct i915_vma {
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
 #define I915_VMA_GGTT_WRITE	BIT(12)
 
-	unsigned int active;
-	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	unsigned int active_count;
+	struct rb_root active;
 	struct i915_gem_active last_fence;
 
 	/**
@@ -138,6 +139,15 @@  i915_vma_instance(struct drm_i915_gem_object *obj,
 
 void i915_vma_unpin_and_release(struct i915_vma **p_vma);
 
+static inline bool i915_vma_is_active(struct i915_vma *vma)
+{
+	return vma->active_count;
+}
+
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+					 struct i915_request *rq,
+					 unsigned int flags);
+
 static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_GGTT;
@@ -187,38 +197,6 @@  static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
-static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
-{
-	return vma->active;
-}
-
-static inline bool i915_vma_is_active(const struct i915_vma *vma)
-{
-	return i915_vma_get_active(vma);
-}
-
-static inline void i915_vma_set_active(struct i915_vma *vma,
-				       unsigned int engine)
-{
-	vma->active |= BIT(engine);
-}
-
-static inline void i915_vma_clear_active(struct i915_vma *vma,
-					 unsigned int engine)
-{
-	vma->active &= ~BIT(engine);
-}
-
-static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
-					      unsigned int engine)
-{
-	return vma->active & BIT(engine);
-}
-
-int __must_check i915_vma_move_to_active(struct i915_vma *vma,
-					 struct i915_request *rq,
-					 unsigned int flags);
-
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));