diff mbox

[10/42] drm/i915: Defer active reference until required

Message ID 20161007094635.28319-11-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Oct. 7, 2016, 9:46 a.m. UTC
We only need the active reference to keep the object alive after the
handle has been deleted (so as to prevent a synchronous gem_close). Why
then pay the price of a kref on every execbuf when we can insert that
final active ref just in time for the handle deletion?

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h              | 28 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem.c              | 22 +++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_context.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  2 --
 drivers/gpu/drm/i915/i915_gem_gtt.c          |  7 ++++++-
 drivers/gpu/drm/i915/i915_gem_render_state.c |  3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 15 ++++++++++++---
 8 files changed, 71 insertions(+), 10 deletions(-)

Comments

Tvrtko Ursulin Oct. 7, 2016, 4:35 p.m. UTC | #1
On 07/10/2016 10:46, Chris Wilson wrote:
> We only need the active reference to keep the object alive after the
> handle has been deleted (so as to prevent a synchronous gem_close). Why
> then pay the price of a kref on every execbuf when we can insert that
> final active ref just in time for the handle deletion?

I really dislike this.  Where there was elegance with obj/vma_put, it is 
now replaced with out of place looking 
__i915_gem_object_release_unless_active. I don't see why would higher 
level layers have to concern themselves with calling something with such 
a low-level sounding name.

How much does this influence performance and in what cases? If 
significant, could we try to come up with something similar but more 
elegant?

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h              | 28 ++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem.c              | 22 +++++++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  2 +-
>   drivers/gpu/drm/i915/i915_gem_context.c      |  2 +-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  2 --
>   drivers/gpu/drm/i915/i915_gem_gtt.c          |  7 ++++++-
>   drivers/gpu/drm/i915/i915_gem_render_state.c |  3 ++-
>   drivers/gpu/drm/i915/intel_ringbuffer.c      | 15 ++++++++++++---
>   8 files changed, 71 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ee25e265416f..fee5cc92e2f2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2232,6 +2232,12 @@ struct drm_i915_gem_object {
>   	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
>   
>   	/**
> +	 * Have we taken a reference for the object for incomplete GPU
> +	 * activity?
> +	 */
> +#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
> +
> +	/**
>   	 * This is set if the object has been written to since last bound
>   	 * to the GTT
>   	 */
> @@ -2399,6 +2405,28 @@ i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
>   	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
>   }
>   
> +static inline bool
> +i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
> +{
> +	return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
> +}
> +
> +static inline void
> +i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	__set_bit(I915_BO_ACTIVE_REF, &obj->flags);
> +}
> +
> +static inline void
> +i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	__clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
> +}
> +
> +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
> +
>   static inline unsigned int
>   i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7fa5cb764739..b560263bf446 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2618,7 +2618,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
>   		list_move_tail(&obj->global_list,
>   			       &request->i915->mm.bound_list);
>   
> -	i915_gem_object_put(obj);
> +	if (i915_gem_object_has_active_reference(obj)) {
> +		i915_gem_object_clear_active_reference(obj);
> +		i915_gem_object_put(obj);
> +	}
>   }
>   
>   static bool i915_context_is_banned(const struct i915_gem_context *ctx)
> @@ -2889,6 +2892,12 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
>   	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
>   		if (vma->vm->file == fpriv)
>   			i915_vma_close(vma);
> +
> +	if (i915_gem_object_is_active(obj) &&
> +	    !i915_gem_object_has_active_reference(obj)) {
> +		i915_gem_object_set_active_reference(obj);
> +		i915_gem_object_get(obj);
> +	}
>   	mutex_unlock(&obj->base.dev->struct_mutex);
>   }
>   
> @@ -4365,6 +4374,17 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>   	intel_runtime_pm_put(dev_priv);
>   }
>   
> +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +
> +	GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
> +	if (i915_gem_object_is_active(obj))
> +		i915_gem_object_set_active_reference(obj);
> +	else
> +		i915_gem_object_put(obj);
> +}
> +
>   int i915_gem_suspend(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index ed989596d9a3..cb25cad3318c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -73,7 +73,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
>   		list_for_each_entry_safe(obj, next,
>   					 &pool->cache_list[n],
>   					 batch_pool_link)
> -			i915_gem_object_put(obj);
> +			__i915_gem_object_release_unless_active(obj);
>   
>   		INIT_LIST_HEAD(&pool->cache_list[n]);
>   	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index df10f4e95736..1d2ab73a8f43 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -155,7 +155,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
>   		if (ce->ring)
>   			intel_ring_free(ce->ring);
>   
> -		i915_vma_put(ce->state);
> +		__i915_gem_object_release_unless_active(ce->state->obj);
>   	}
>   
>   	put_pid(ctx->pid);
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 72c7c1855e70..0deecd4e3b6c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1299,8 +1299,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>   	 * add the active reference first and queue for it to be dropped
>   	 * *last*.
>   	 */
> -	if (!i915_gem_object_is_active(obj))
> -		i915_gem_object_get(obj);
>   	i915_gem_object_set_active(obj, idx);
>   	i915_gem_active_set(&obj->last_read[idx], req);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 2d846aa39ca5..1c95da8424cb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3712,11 +3712,16 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>   void i915_vma_unpin_and_release(struct i915_vma **p_vma)
>   {
>   	struct i915_vma *vma;
> +	struct drm_i915_gem_object *obj;
>   
>   	vma = fetch_and_zero(p_vma);
>   	if (!vma)
>   		return;
>   
> +	obj = vma->obj;
> +
>   	i915_vma_unpin(vma);
> -	i915_vma_put(vma);
> +	i915_vma_close(vma);
> +
> +	__i915_gem_object_release_unless_active(obj);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index 95b7e9afd5f8..09cf4874c45f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -224,7 +224,8 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>   	i915_vma_move_to_active(so.vma, req, 0);
>   err_unpin:
>   	i915_vma_unpin(so.vma);
> +	i915_vma_close(so.vma);
>   err_obj:
> -	i915_gem_object_put(obj);
> +	__i915_gem_object_release_unless_active(obj);
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index b60c6f09fbfd..f3dfb7ca625d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1763,14 +1763,19 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
>   static void cleanup_status_page(struct intel_engine_cs *engine)
>   {
>   	struct i915_vma *vma;
> +	struct drm_i915_gem_object *obj;
>   
>   	vma = fetch_and_zero(&engine->status_page.vma);
>   	if (!vma)
>   		return;
>   
> +	obj = vma->obj;
> +
>   	i915_vma_unpin(vma);
> -	i915_gem_object_unpin_map(vma->obj);
> -	i915_vma_put(vma);
> +	i915_vma_close(vma);
> +
> +	i915_gem_object_unpin_map(obj);
> +	__i915_gem_object_release_unless_active(obj);
>   }
>   
>   static int init_status_page(struct intel_engine_cs *engine)
> @@ -1968,7 +1973,11 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
>   void
>   intel_ring_free(struct intel_ring *ring)
>   {
> -	i915_vma_put(ring->vma);
> +	struct drm_i915_gem_object *obj = ring->vma->obj;
> +
> +	i915_vma_close(ring->vma);
> +	__i915_gem_object_release_unless_active(obj);
> +
>   	kfree(ring);
>   }
>
Chris Wilson Oct. 7, 2016, 4:58 p.m. UTC | #2
On Fri, Oct 07, 2016 at 05:35:38PM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 10:46, Chris Wilson wrote:
> >We only need the active reference to keep the object alive after the
> >handle has been deleted (so as to prevent a synchronous gem_close). Why
> >then pay the price of a kref on every execbuf when we can insert that
> >final active ref just in time for the handle deletion?
> 
> I really dislike this.  Where there was elegance with obj/vma_put,
> it is now replaced with out of place looking
> __i915_gem_object_release_unless_active. I don't see why would
> higher level layers have to concern themselves with calling
> something with such a low-level sounding name.
> 
> How much does this influence performance and in what cases? If
> significant, could we try to come up with something similar but more
> elegant?

Back in the day, this was one of the most frequent atomic operations we
did. And whilst perf overemphasizes the stalls from locked instructions,
the sheer numbers of them we do are significant (since we do one at the
start and end of every execbuf for every object in typical conditions).
Whilst it is less significant in the face of obj->resv undoing all of the
gains, it is still a deep paper cut. (At the GL level, consider about 100
objects per batch, several thousand times a second x 2, these ops are low
hanging fruit.)

What's needed is a function to take the place of the close_object for
internally allocated objects. It is also worth noting that they are either
already part of a cache, or are suitable for caching....
-Chris
> 
> Regards,
> 
> Tvrtko
> 
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >---
> >  drivers/gpu/drm/i915/i915_drv.h              | 28 ++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_gem.c              | 22 +++++++++++++++++++++-
> >  drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  2 +-
> >  drivers/gpu/drm/i915/i915_gem_context.c      |  2 +-
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  2 --
> >  drivers/gpu/drm/i915/i915_gem_gtt.c          |  7 ++++++-
> >  drivers/gpu/drm/i915/i915_gem_render_state.c |  3 ++-
> >  drivers/gpu/drm/i915/intel_ringbuffer.c      | 15 ++++++++++++---
> >  8 files changed, 71 insertions(+), 10 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index ee25e265416f..fee5cc92e2f2 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -2232,6 +2232,12 @@ struct drm_i915_gem_object {
> >  	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
> >  	/**
> >+	 * Have we taken a reference for the object for incomplete GPU
> >+	 * activity?
> >+	 */
> >+#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
> >+
> >+	/**
> >  	 * This is set if the object has been written to since last bound
> >  	 * to the GTT
> >  	 */
> >@@ -2399,6 +2405,28 @@ i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
> >  	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
> >  }
> >+static inline bool
> >+i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
> >+{
> >+	return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
> >+}
> >+
> >+static inline void
> >+i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
> >+{
> >+	lockdep_assert_held(&obj->base.dev->struct_mutex);
> >+	__set_bit(I915_BO_ACTIVE_REF, &obj->flags);
> >+}
> >+
> >+static inline void
> >+i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
> >+{
> >+	lockdep_assert_held(&obj->base.dev->struct_mutex);
> >+	__clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
> >+}
> >+
> >+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
> >+
> >  static inline unsigned int
> >  i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
> >  {
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index 7fa5cb764739..b560263bf446 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -2618,7 +2618,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
> >  		list_move_tail(&obj->global_list,
> >  			       &request->i915->mm.bound_list);
> >-	i915_gem_object_put(obj);
> >+	if (i915_gem_object_has_active_reference(obj)) {
> >+		i915_gem_object_clear_active_reference(obj);
> >+		i915_gem_object_put(obj);
> >+	}
> >  }
> >  static bool i915_context_is_banned(const struct i915_gem_context *ctx)
> >@@ -2889,6 +2892,12 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
> >  	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
> >  		if (vma->vm->file == fpriv)
> >  			i915_vma_close(vma);
> >+
> >+	if (i915_gem_object_is_active(obj) &&
> >+	    !i915_gem_object_has_active_reference(obj)) {
> >+		i915_gem_object_set_active_reference(obj);
> >+		i915_gem_object_get(obj);
> >+	}
> >  	mutex_unlock(&obj->base.dev->struct_mutex);
> >  }
> >@@ -4365,6 +4374,17 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> >  	intel_runtime_pm_put(dev_priv);
> >  }
> >+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
> >+{
> >+	lockdep_assert_held(&obj->base.dev->struct_mutex);
> >+
> >+	GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
> >+	if (i915_gem_object_is_active(obj))
> >+		i915_gem_object_set_active_reference(obj);
> >+	else
> >+		i915_gem_object_put(obj);
> >+}
> >+
> >  int i915_gem_suspend(struct drm_device *dev)
> >  {
> >  	struct drm_i915_private *dev_priv = to_i915(dev);
> >diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >index ed989596d9a3..cb25cad3318c 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >@@ -73,7 +73,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
> >  		list_for_each_entry_safe(obj, next,
> >  					 &pool->cache_list[n],
> >  					 batch_pool_link)
> >-			i915_gem_object_put(obj);
> >+			__i915_gem_object_release_unless_active(obj);
> >  		INIT_LIST_HEAD(&pool->cache_list[n]);
> >  	}
> >diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> >index df10f4e95736..1d2ab73a8f43 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_context.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_context.c
> >@@ -155,7 +155,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
> >  		if (ce->ring)
> >  			intel_ring_free(ce->ring);
> >-		i915_vma_put(ce->state);
> >+		__i915_gem_object_release_unless_active(ce->state->obj);
> >  	}
> >  	put_pid(ctx->pid);
> >diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> >index 72c7c1855e70..0deecd4e3b6c 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> >@@ -1299,8 +1299,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
> >  	 * add the active reference first and queue for it to be dropped
> >  	 * *last*.
> >  	 */
> >-	if (!i915_gem_object_is_active(obj))
> >-		i915_gem_object_get(obj);
> >  	i915_gem_object_set_active(obj, idx);
> >  	i915_gem_active_set(&obj->last_read[idx], req);
> >diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >index 2d846aa39ca5..1c95da8424cb 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >@@ -3712,11 +3712,16 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
> >  void i915_vma_unpin_and_release(struct i915_vma **p_vma)
> >  {
> >  	struct i915_vma *vma;
> >+	struct drm_i915_gem_object *obj;
> >  	vma = fetch_and_zero(p_vma);
> >  	if (!vma)
> >  		return;
> >+	obj = vma->obj;
> >+
> >  	i915_vma_unpin(vma);
> >-	i915_vma_put(vma);
> >+	i915_vma_close(vma);
> >+
> >+	__i915_gem_object_release_unless_active(obj);
> >  }
> >diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> >index 95b7e9afd5f8..09cf4874c45f 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> >@@ -224,7 +224,8 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
> >  	i915_vma_move_to_active(so.vma, req, 0);
> >  err_unpin:
> >  	i915_vma_unpin(so.vma);
> >+	i915_vma_close(so.vma);
> >  err_obj:
> >-	i915_gem_object_put(obj);
> >+	__i915_gem_object_release_unless_active(obj);
> >  	return ret;
> >  }
> >diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >index b60c6f09fbfd..f3dfb7ca625d 100644
> >--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> >+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >@@ -1763,14 +1763,19 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
> >  static void cleanup_status_page(struct intel_engine_cs *engine)
> >  {
> >  	struct i915_vma *vma;
> >+	struct drm_i915_gem_object *obj;
> >  	vma = fetch_and_zero(&engine->status_page.vma);
> >  	if (!vma)
> >  		return;
> >+	obj = vma->obj;
> >+
> >  	i915_vma_unpin(vma);
> >-	i915_gem_object_unpin_map(vma->obj);
> >-	i915_vma_put(vma);
> >+	i915_vma_close(vma);
> >+
> >+	i915_gem_object_unpin_map(obj);
> >+	__i915_gem_object_release_unless_active(obj);
> >  }
> >  static int init_status_page(struct intel_engine_cs *engine)
> >@@ -1968,7 +1973,11 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
> >  void
> >  intel_ring_free(struct intel_ring *ring)
> >  {
> >-	i915_vma_put(ring->vma);
> >+	struct drm_i915_gem_object *obj = ring->vma->obj;
> >+
> >+	i915_vma_close(ring->vma);
> >+	__i915_gem_object_release_unless_active(obj);
> >+
> >  	kfree(ring);
> >  }
>
Tvrtko Ursulin Oct. 8, 2016, 8:18 a.m. UTC | #3
On 07/10/2016 17:58, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 05:35:38PM +0100, Tvrtko Ursulin wrote:
>> On 07/10/2016 10:46, Chris Wilson wrote:
>>> We only need the active reference to keep the object alive after the
>>> handle has been deleted (so as to prevent a synchronous gem_close). Why
>>> then pay the price of a kref on every execbuf when we can insert that
>>> final active ref just in time for the handle deletion?
>> I really dislike this.  Where there was elegance with obj/vma_put,
>> it is now replaced with out of place looking
>> __i915_gem_object_release_unless_active. I don't see why would
>> higher level layers have to concern themselves with calling
>> something with such a low-level sounding name.
>>
>> How much does this influence performance and in what cases? If
>> significant, could we try to come up with something similar but more
>> elegant?
> Back in the day, this was one of the most frequent atomic operations we
> did. And whilst perf overemphasizes the stalls from locked instructions,
> the sheer numbers of them we do are significant (since we do one at the
> start and end of every execbuf for every object in typical conditions).
> Whilst it is less significant in the face of obj->resv undoing all of the
> gains, it is still a deep paper cut. (At the GL level, consider about 100
> objects per batch, several thousand times a second x 2, these ops are low
> hanging fruit.)

I understand there is a lot of them (same is true for many other 
operations we do), but how does that translate to some benchmarks?

> What's needed is a function to take the place of the close_object for
> internally allocated objects. It is also worth noting that they are either
> already part of a cache, or are suitable for caching....

Ok but those are not x 100 per batch.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ee25e265416f..fee5cc92e2f2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2232,6 +2232,12 @@  struct drm_i915_gem_object {
 	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
 
 	/**
+	 * Have we taken a reference for the object for incomplete GPU
+	 * activity?
+	 */
+#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
+
+	/**
 	 * This is set if the object has been written to since last bound
 	 * to the GTT
 	 */
@@ -2399,6 +2405,28 @@  i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
 	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
 }
 
+static inline bool
+i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
+{
+	return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+static inline void
+i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	__set_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+static inline void
+i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	__clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
+
 static inline unsigned int
 i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7fa5cb764739..b560263bf446 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2618,7 +2618,10 @@  i915_gem_object_retire__read(struct i915_gem_active *active,
 		list_move_tail(&obj->global_list,
 			       &request->i915->mm.bound_list);
 
-	i915_gem_object_put(obj);
+	if (i915_gem_object_has_active_reference(obj)) {
+		i915_gem_object_clear_active_reference(obj);
+		i915_gem_object_put(obj);
+	}
 }
 
 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
@@ -2889,6 +2892,12 @@  void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
 		if (vma->vm->file == fpriv)
 			i915_vma_close(vma);
+
+	if (i915_gem_object_is_active(obj) &&
+	    !i915_gem_object_has_active_reference(obj)) {
+		i915_gem_object_set_active_reference(obj);
+		i915_gem_object_get(obj);
+	}
 	mutex_unlock(&obj->base.dev->struct_mutex);
 }
 
@@ -4365,6 +4374,17 @@  void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	intel_runtime_pm_put(dev_priv);
 }
 
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
+	if (i915_gem_object_is_active(obj))
+		i915_gem_object_set_active_reference(obj);
+	else
+		i915_gem_object_put(obj);
+}
+
 int i915_gem_suspend(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index ed989596d9a3..cb25cad3318c 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -73,7 +73,7 @@  void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 		list_for_each_entry_safe(obj, next,
 					 &pool->cache_list[n],
 					 batch_pool_link)
-			i915_gem_object_put(obj);
+			__i915_gem_object_release_unless_active(obj);
 
 		INIT_LIST_HEAD(&pool->cache_list[n]);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index df10f4e95736..1d2ab73a8f43 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -155,7 +155,7 @@  void i915_gem_context_free(struct kref *ctx_ref)
 		if (ce->ring)
 			intel_ring_free(ce->ring);
 
-		i915_vma_put(ce->state);
+		__i915_gem_object_release_unless_active(ce->state->obj);
 	}
 
 	put_pid(ctx->pid);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 72c7c1855e70..0deecd4e3b6c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1299,8 +1299,6 @@  void i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_gem_object_is_active(obj))
-		i915_gem_object_get(obj);
 	i915_gem_object_set_active(obj, idx);
 	i915_gem_active_set(&obj->last_read[idx], req);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2d846aa39ca5..1c95da8424cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3712,11 +3712,16 @@  void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 void i915_vma_unpin_and_release(struct i915_vma **p_vma)
 {
 	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj;
 
 	vma = fetch_and_zero(p_vma);
 	if (!vma)
 		return;
 
+	obj = vma->obj;
+
 	i915_vma_unpin(vma);
-	i915_vma_put(vma);
+	i915_vma_close(vma);
+
+	__i915_gem_object_release_unless_active(obj);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 95b7e9afd5f8..09cf4874c45f 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -224,7 +224,8 @@  int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 	i915_vma_move_to_active(so.vma, req, 0);
 err_unpin:
 	i915_vma_unpin(so.vma);
+	i915_vma_close(so.vma);
 err_obj:
-	i915_gem_object_put(obj);
+	__i915_gem_object_release_unless_active(obj);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b60c6f09fbfd..f3dfb7ca625d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1763,14 +1763,19 @@  static void cleanup_phys_status_page(struct intel_engine_cs *engine)
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
 	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj;
 
 	vma = fetch_and_zero(&engine->status_page.vma);
 	if (!vma)
 		return;
 
+	obj = vma->obj;
+
 	i915_vma_unpin(vma);
-	i915_gem_object_unpin_map(vma->obj);
-	i915_vma_put(vma);
+	i915_vma_close(vma);
+
+	i915_gem_object_unpin_map(obj);
+	__i915_gem_object_release_unless_active(obj);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
@@ -1968,7 +1973,11 @@  intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 void
 intel_ring_free(struct intel_ring *ring)
 {
-	i915_vma_put(ring->vma);
+	struct drm_i915_gem_object *obj = ring->vma->obj;
+
+	i915_vma_close(ring->vma);
+	__i915_gem_object_release_unless_active(obj);
+
 	kfree(ring);
 }