diff mbox series

[32/38] drm/i915: Introduce intel_context.pin_mutex for pin management

Message ID 20190301140404.26690-32-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/38] drm/i915/execlists: Suppress redundant preemption | expand

Commit Message

Chris Wilson March 1, 2019, 2:03 p.m. UTC
Introduce a mutex to start locking the HW contexts independently of
struct_mutex, with a view to reducing the coarse struct_mutex. The
intel_context.pin_mutex is used to guard the transition to and from being
pinned on the gpu, and so is required before starting to build any
request, and released when the execution is completed.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c               |  2 +-
 drivers/gpu/drm/i915/i915_gem_context.c       | 76 +++++++------------
 drivers/gpu/drm/i915/intel_context.c          | 56 ++++++++++++--
 drivers/gpu/drm/i915/intel_context.h          | 38 ++++++----
 drivers/gpu/drm/i915/intel_context_types.h    |  8 +-
 drivers/gpu/drm/i915/intel_lrc.c              |  4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  4 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c |  2 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  2 +-
 9 files changed, 110 insertions(+), 82 deletions(-)

Comments

Tvrtko Ursulin March 6, 2019, 9:45 a.m. UTC | #1
On 01/03/2019 14:03, Chris Wilson wrote:
> Introduce a mutex to start locking the HW contexts independently of
> struct_mutex, with a view to reducing the coarse struct_mutex. The
> intel_context.pin_mutex is used to guard the transition to and from being
> pinned on the gpu, and so is required before starting to build any
> request, and released when the execution is completed.

execution is completed or construction/emission is completed?

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c               |  2 +-
>   drivers/gpu/drm/i915/i915_gem_context.c       | 76 +++++++------------
>   drivers/gpu/drm/i915/intel_context.c          | 56 ++++++++++++--
>   drivers/gpu/drm/i915/intel_context.h          | 38 ++++++----
>   drivers/gpu/drm/i915/intel_context_types.h    |  8 +-
>   drivers/gpu/drm/i915/intel_lrc.c              |  4 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.c       |  4 +-
>   .../gpu/drm/i915/selftests/i915_gem_context.c |  2 +-
>   drivers/gpu/drm/i915/selftests/mock_engine.c  |  2 +-
>   9 files changed, 110 insertions(+), 82 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index c6a25c3276ee..4babea524fc8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4706,7 +4706,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>   		if (!state)
>   			continue;
>   
> -		GEM_BUG_ON(ce->pin_count);
> +		GEM_BUG_ON(intel_context_is_pinned(ce));
>   
>   		/*
>   		 * As we will hold a reference to the logical state, it will
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 60b17f6a727d..321b26e302e5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1096,23 +1096,27 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
>   }
>   
>   static int
> -gen8_modify_rpcs_gpu(struct intel_context *ce,
> -		     struct intel_engine_cs *engine,
> -		     struct intel_sseu sseu)
> +gen8_modify_rpcs_gpu(struct intel_context *ce, struct intel_sseu sseu)
>   {
> -	struct drm_i915_private *i915 = engine->i915;
> +	struct drm_i915_private *i915 = ce->engine->i915;
>   	struct i915_request *rq, *prev;
>   	intel_wakeref_t wakeref;
>   	int ret;
>   
> -	GEM_BUG_ON(!ce->pin_count);
> +	lockdep_assert_held(&ce->pin_mutex);
>   
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> +	/*
> +	 * If context is not idle we have to submit an ordered request to modify
> +	 * its context image via the kernel context. Pristine and idle contexts
> +	 * will be configured on pinning.
> +	 */
> +	if (!intel_context_is_pinned(ce))
> +		return 0;
>   
>   	/* Submitting requests etc needs the hw awake. */
>   	wakeref = intel_runtime_pm_get(i915);
>   
> -	rq = i915_request_alloc(engine, i915->kernel_context);
> +	rq = i915_request_alloc(ce->engine, i915->kernel_context);
>   	if (IS_ERR(rq)) {
>   		ret = PTR_ERR(rq);
>   		goto out_put;
> @@ -1156,53 +1160,30 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
>   }
>   
>   static int
> -__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> -				    struct intel_engine_cs *engine,
> -				    struct intel_sseu sseu)
> +i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> +				  struct intel_engine_cs *engine,
> +				  struct intel_sseu sseu)
>   {
>   	struct intel_context *ce;
>   	int ret = 0;
>   
> -	ce = intel_context_instance(ctx, engine);
> -	if (IS_ERR(ce))
> -		return PTR_ERR(ce);
> -
>   	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
>   	GEM_BUG_ON(engine->id != RCS);
>   
> +	ce = intel_context_pin_lock(ctx, engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
>   	/* Nothing to do if unmodified. */
>   	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
> -		return 0;
> -
> -	/*
> -	 * If context is not idle we have to submit an ordered request to modify
> -	 * its context image via the kernel context. Pristine and idle contexts
> -	 * will be configured on pinning.
> -	 */
> -	if (ce->pin_count)
> -		ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
> +		goto unlock;
>   
> +	ret = gen8_modify_rpcs_gpu(ce, sseu);

The _gpu suffix is not true any more so I'd drop it.

>   	if (!ret)
>   		ce->sseu = sseu;
>   
> -	return ret;
> -}
> -
> -static int
> -i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> -				  struct intel_engine_cs *engine,
> -				  struct intel_sseu sseu)
> -{
> -	int ret;
> -
> -	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> -	if (ret)
> -		return ret;
> -
> -	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
> -
> -	mutex_unlock(&ctx->i915->drm.struct_mutex);
> -
> +unlock:
> +	intel_context_pin_unlock(ce);
>   	return ret;
>   }
>   
> @@ -1620,11 +1601,12 @@ static int clone_sseu(struct i915_gem_context *dst,
>   		if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
>   			continue;
>   
> -		ce = intel_context_instance(dst, engine);
> +		ce = intel_context_pin_lock(dst, engine);
>   		if (IS_ERR(ce))
>   			return PTR_ERR(ce);
>   
>   		ce->sseu = sseu;
> +		intel_context_pin_unlock(ce);
>   	}
>   
>   	return 0;
> @@ -1790,7 +1772,6 @@ static int get_sseu(struct i915_gem_context *ctx,
>   	struct intel_engine_cs *engine;
>   	struct intel_context *ce;
>   	unsigned long lookup;
> -	int ret;
>   
>   	if (args->size == 0)
>   		goto out;
> @@ -1817,21 +1798,16 @@ static int get_sseu(struct i915_gem_context *ctx,
>   	if (!engine)
>   		return -EINVAL;
>   
> -	ce = intel_context_instance(ctx, engine);
> +	ce = intel_context_pin_lock(ctx, engine); /* serialise with set_sseu */
>   	if (IS_ERR(ce))
>   		return PTR_ERR(ce);
>   
> -	/* Only use for mutex here is to serialize get_param and set_param. */
> -	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> -	if (ret)
> -		return ret;
> -
>   	user_sseu.slice_mask = ce->sseu.slice_mask;
>   	user_sseu.subslice_mask = ce->sseu.subslice_mask;
>   	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
>   	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
>   
> -	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +	intel_context_pin_unlock(ce);
>   
>   	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
>   			 sizeof(user_sseu)))
> diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
> index 7de02be0b552..7c4171114e5b 100644
> --- a/drivers/gpu/drm/i915/intel_context.c
> +++ b/drivers/gpu/drm/i915/intel_context.c
> @@ -86,7 +86,7 @@ void __intel_context_remove(struct intel_context *ce)
>   	spin_unlock(&ctx->hw_contexts_lock);
>   }
>   
> -struct intel_context *
> +static struct intel_context *
>   intel_context_instance(struct i915_gem_context *ctx,
>   		       struct intel_engine_cs *engine)
>   {
> @@ -110,6 +110,21 @@ intel_context_instance(struct i915_gem_context *ctx,
>   	return pos;
>   }
>   
> +struct intel_context *
> +intel_context_pin_lock(struct i915_gem_context *ctx,
> +		       struct intel_engine_cs *engine)
> +	__acquires(ce->pin_mutex)
> +{
> +	struct intel_context *ce;
> +
> +	ce = intel_context_instance(ctx, engine);
> +	if (IS_ERR(ce))
> +		return ce;
> +
> +	mutex_lock(&ce->pin_mutex);
> +	return ce;
> +}
> +
>   struct intel_context *
>   intel_context_pin(struct i915_gem_context *ctx,
>   		  struct intel_engine_cs *engine)
> @@ -117,16 +132,20 @@ intel_context_pin(struct i915_gem_context *ctx,
>   	struct intel_context *ce;
>   	int err;
>   
> -	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
> -
>   	ce = intel_context_instance(ctx, engine);
>   	if (IS_ERR(ce))
>   		return ce;
>   
> -	if (unlikely(!ce->pin_count++)) {
> +	if (likely(atomic_inc_not_zero(&ce->pin_count)))
> +		return ce;
> +
> +	if (mutex_lock_interruptible(&ce->pin_mutex))
> +		return ERR_PTR(-EINTR);
> +
> +	if (likely(!atomic_read(&ce->pin_count))) {
>   		err = ce->ops->pin(ce);
>   		if (err)
> -			goto err_unpin;
> +			goto err;
>   
>   		mutex_lock(&ctx->mutex);
>   		list_add(&ce->active_link, &ctx->active_engines);
> @@ -134,16 +153,35 @@ intel_context_pin(struct i915_gem_context *ctx,
>   
>   		i915_gem_context_get(ctx);
>   		GEM_BUG_ON(ce->gem_context != ctx);
> +
> +		smp_mb__before_atomic();

Why is this needed?

>   	}
> -	GEM_BUG_ON(!ce->pin_count); /* no overflow! */
>   
> +	atomic_inc(&ce->pin_count);
> +	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
> +
> +	mutex_unlock(&ce->pin_mutex);
>   	return ce;
>   
> -err_unpin:
> -	ce->pin_count = 0;
> +err:
> +	mutex_unlock(&ce->pin_mutex);
>   	return ERR_PTR(err);
>   }
>   
> +void intel_context_unpin(struct intel_context *ce)
> +{
> +	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
> +		return;
> +
> +	/* We may be called from inside intel_context_pin() to evict another */
> +	mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
> +
> +	if (likely(atomic_dec_and_test(&ce->pin_count)))
> +		ce->ops->unpin(ce);
> +
> +	mutex_unlock(&ce->pin_mutex);
> +}
> +
>   static void intel_context_retire(struct i915_active_request *active,
>   				 struct i915_request *rq)
>   {
> @@ -165,6 +203,8 @@ intel_context_init(struct intel_context *ce,
>   	INIT_LIST_HEAD(&ce->signal_link);
>   	INIT_LIST_HEAD(&ce->signals);
>   
> +	mutex_init(&ce->pin_mutex);
> +
>   	/* Use the whole device by default */
>   	ce->sseu = intel_device_default_sseu(ctx->i915);
>   
> diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
> index aa34311a472e..3657f3a34f8e 100644
> --- a/drivers/gpu/drm/i915/intel_context.h
> +++ b/drivers/gpu/drm/i915/intel_context.h
> @@ -7,6 +7,8 @@
>   #ifndef __INTEL_CONTEXT_H__
>   #define __INTEL_CONTEXT_H__
>   
> +#include <linux/lockdep.h>
> +
>   #include "intel_context_types.h"
>   #include "intel_engine_types.h"
>   
> @@ -26,18 +28,30 @@ intel_context_lookup(struct i915_gem_context *ctx,
>   		     struct intel_engine_cs *engine);
>   
>   /**
> - * intel_context_instance - Lookup or allocate the HW context for (ctx, engine)
> + * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
>    * @ctx - the parent GEM context
>    * @engine - the target HW engine
>    *
> - * Returns the existing HW context for this pair of (GEM context, engine), or
> - * allocates and initialises a fresh context. Once allocated, the HW context
> - * remains resident until the GEM context is destroyed.
> + * Acquire a lock on the pinned status of the HW context, such that the context
> + * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
> + * intel_context_is_pinned() remains stable.
>    */
>   struct intel_context *
> -intel_context_instance(struct i915_gem_context *ctx,
> +intel_context_pin_lock(struct i915_gem_context *ctx,
>   		       struct intel_engine_cs *engine);
>   
> +static inline bool
> +intel_context_is_pinned(struct intel_context *ce)
> +{
> +	return atomic_read(&ce->pin_count);
> +}
> +
> +static inline void intel_context_pin_unlock(struct intel_context *ce)
> +__releases(ce->pin_mutex)
> +{
> +	mutex_unlock(&ce->pin_mutex);
> +}
> +
>   struct intel_context *
>   __intel_context_insert(struct i915_gem_context *ctx,
>   		       struct intel_engine_cs *engine,
> @@ -50,18 +64,10 @@ intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
>   
>   static inline void __intel_context_pin(struct intel_context *ce)
>   {
> -	GEM_BUG_ON(!ce->pin_count);
> -	ce->pin_count++;
> +	GEM_BUG_ON(!intel_context_is_pinned(ce));
> +	atomic_inc(&ce->pin_count);

This can be called without the mutex held but only if not zero, right?

BUG_ON(!atomic_inc_not_zero(&ce->pin_count)) ?

>   }
>   
> -static inline void intel_context_unpin(struct intel_context *ce)
> -{
> -	GEM_BUG_ON(!ce->pin_count);
> -	if (--ce->pin_count)
> -		return;
> -
> -	GEM_BUG_ON(!ce->ops);
> -	ce->ops->unpin(ce);
> -}
> +void intel_context_unpin(struct intel_context *ce);
>   
>   #endif /* __INTEL_CONTEXT_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
> index 804fa93de853..6dc9b4b9067b 100644
> --- a/drivers/gpu/drm/i915/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/intel_context_types.h
> @@ -8,6 +8,7 @@
>   #define __INTEL_CONTEXT_TYPES__
>   
>   #include <linux/list.h>
> +#include <linux/mutex.h>
>   #include <linux/rbtree.h>
>   #include <linux/types.h>
>   
> @@ -38,14 +39,19 @@ struct intel_context {
>   	struct i915_gem_context *gem_context;
>   	struct intel_engine_cs *engine;
>   	struct intel_engine_cs *active;
> +
>   	struct list_head active_link;
>   	struct list_head signal_link;
>   	struct list_head signals;
> +
>   	struct i915_vma *state;
>   	struct intel_ring *ring;
> +
>   	u32 *lrc_reg_state;
>   	u64 lrc_desc;
> -	int pin_count;
> +
> +	atomic_t pin_count;
> +	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */

Not pinning but transition from pinned to unpinned state AFAIU.

>   
>   	/**
>   	 * active_tracker: Active tracker for the external rq activity
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 473cfea4c503..e69b52c4d3ce 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1238,7 +1238,7 @@ static void __execlists_context_fini(struct intel_context *ce)
>   
>   static void execlists_context_destroy(struct intel_context *ce)
>   {
> -	GEM_BUG_ON(ce->pin_count);
> +	GEM_BUG_ON(intel_context_is_pinned(ce));
>   
>   	if (ce->state)
>   		__execlists_context_fini(ce);
> @@ -1476,7 +1476,7 @@ static int execlists_request_alloc(struct i915_request *request)
>   {
>   	int ret;
>   
> -	GEM_BUG_ON(!request->hw_context->pin_count);
> +	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
>   
>   	/*
>   	 * Flush enough space to reduce the likelihood of waiting after
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 4f137d0a5316..a2bdf188f3f6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1356,7 +1356,7 @@ static void __ring_context_fini(struct intel_context *ce)
>   
>   static void ring_context_destroy(struct intel_context *ce)
>   {
> -	GEM_BUG_ON(ce->pin_count);
> +	GEM_BUG_ON(intel_context_is_pinned(ce));
>   
>   	if (ce->state)
>   		__ring_context_fini(ce);
> @@ -1917,7 +1917,7 @@ static int ring_request_alloc(struct i915_request *request)
>   {
>   	int ret;
>   
> -	GEM_BUG_ON(!request->hw_context->pin_count);
> +	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
>   	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 15f016ca8e0d..c6c14d1b2ce8 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1029,7 +1029,7 @@ __sseu_test(struct drm_i915_private *i915,
>   	if (ret)
>   		goto out_context;
>   
> -	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
> +	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
>   	if (ret)
>   		goto out_spin;
>   
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 6434e968ecb0..c032c0cf20e6 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -131,7 +131,7 @@ static void mock_context_unpin(struct intel_context *ce)
>   
>   static void mock_context_destroy(struct intel_context *ce)
>   {
> -	GEM_BUG_ON(ce->pin_count);
> +	GEM_BUG_ON(intel_context_is_pinned(ce));
>   
>   	if (ce->ring)
>   		mock_ring_free(ce->ring);
> 

Regards,

Tvrtko
Chris Wilson March 6, 2019, 10:15 a.m. UTC | #2
Quoting Tvrtko Ursulin (2019-03-06 09:45:27)
> 
> On 01/03/2019 14:03, Chris Wilson wrote:
> > Introduce a mutex to start locking the HW contexts independently of
> > struct_mutex, with a view to reducing the coarse struct_mutex. The
> > intel_context.pin_mutex is used to guard the transition to and from being
> > pinned on the gpu, and so is required before starting to build any
> > request, and released when the execution is completed.
> 
> execution is completed or construction/emission is completed?

We keep the context pinned until the end of emission, but we only need
to hold the lock while pinning the request. Yup, that last sentence
changed direction half-way through and is confused.

> >   static int
> > -__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> > -                                 struct intel_engine_cs *engine,
> > -                                 struct intel_sseu sseu)
> > +i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> > +                               struct intel_engine_cs *engine,
> > +                               struct intel_sseu sseu)
> >   {
> >       struct intel_context *ce;
> >       int ret = 0;
> >   
> > -     ce = intel_context_instance(ctx, engine);
> > -     if (IS_ERR(ce))
> > -             return PTR_ERR(ce);
> > -
> >       GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
> >       GEM_BUG_ON(engine->id != RCS);
> >   
> > +     ce = intel_context_pin_lock(ctx, engine);
> > +     if (IS_ERR(ce))
> > +             return PTR_ERR(ce);
> > +
> >       /* Nothing to do if unmodified. */
> >       if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
> > -             return 0;
> > -
> > -     /*
> > -      * If context is not idle we have to submit an ordered request to modify
> > -      * its context image via the kernel context. Pristine and idle contexts
> > -      * will be configured on pinning.
> > -      */
> > -     if (ce->pin_count)
> > -             ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
> > +             goto unlock;
> >   
> > +     ret = gen8_modify_rpcs_gpu(ce, sseu);
> 
> The _gpu suffix is not true any more so I'd drop it.

Ok.

> > @@ -134,16 +153,35 @@ intel_context_pin(struct i915_gem_context *ctx,
> >   
> >               i915_gem_context_get(ctx);
> >               GEM_BUG_ON(ce->gem_context != ctx);
> > +
> > +             smp_mb__before_atomic();
> 
> Why is this needed?

nop on intel, but because we are using the pin_count == 0 as the barrier
for others, before that is incremented we need to be sure that writes
into ce (for the pinned state) are visible.

> > @@ -50,18 +64,10 @@ intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
> >   
> >   static inline void __intel_context_pin(struct intel_context *ce)
> >   {
> > -     GEM_BUG_ON(!ce->pin_count);
> > -     ce->pin_count++;
> > +     GEM_BUG_ON(!intel_context_is_pinned(ce));
> > +     atomic_inc(&ce->pin_count);
> 
> This can be called without the mutex held but only if not zero, right?
> 
> BUG_ON(!atomic_inc_not_zero(&ce->pin_count)) ?

That's quite heavy (it's a locked cmpxchg), whereas a plain locked inc
doesn't involve a roundtrip.

In terms of this being a function that should only be used at controlled
points, and for that I think the GEM_BUG_ON(!intel_context_is_pinned())
is the right documentation (i.e. you should only be using this if you
know you have already pinned the ce yourself).

> >       u32 *lrc_reg_state;
> >       u64 lrc_desc;
> > -     int pin_count;
> > +
> > +     atomic_t pin_count;
> > +     struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
> 
> Not pinning but transition from pinned to unpinned state AFAIU.

unpinned to pinned! A lot may happen the first time we pin, and every
user needs to be serialised across that.

pinned to unpinned is less tricky, but still requires serialisation with a
potential concurrent pinner.

The only cheap (as cheap as any cmpxchg operation may be) path is for
already pinned ce, which fortunately is our steady state.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c6a25c3276ee..4babea524fc8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4706,7 +4706,7 @@  static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 		if (!state)
 			continue;
 
-		GEM_BUG_ON(ce->pin_count);
+		GEM_BUG_ON(intel_context_is_pinned(ce));
 
 		/*
 		 * As we will hold a reference to the logical state, it will
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 60b17f6a727d..321b26e302e5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1096,23 +1096,27 @@  static int gen8_emit_rpcs_config(struct i915_request *rq,
 }
 
 static int
-gen8_modify_rpcs_gpu(struct intel_context *ce,
-		     struct intel_engine_cs *engine,
-		     struct intel_sseu sseu)
+gen8_modify_rpcs_gpu(struct intel_context *ce, struct intel_sseu sseu)
 {
-	struct drm_i915_private *i915 = engine->i915;
+	struct drm_i915_private *i915 = ce->engine->i915;
 	struct i915_request *rq, *prev;
 	intel_wakeref_t wakeref;
 	int ret;
 
-	GEM_BUG_ON(!ce->pin_count);
+	lockdep_assert_held(&ce->pin_mutex);
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	/*
+	 * If context is not idle we have to submit an ordered request to modify
+	 * its context image via the kernel context. Pristine and idle contexts
+	 * will be configured on pinning.
+	 */
+	if (!intel_context_is_pinned(ce))
+		return 0;
 
 	/* Submitting requests etc needs the hw awake. */
 	wakeref = intel_runtime_pm_get(i915);
 
-	rq = i915_request_alloc(engine, i915->kernel_context);
+	rq = i915_request_alloc(ce->engine, i915->kernel_context);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 		goto out_put;
@@ -1156,53 +1160,30 @@  gen8_modify_rpcs_gpu(struct intel_context *ce,
 }
 
 static int
-__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				    struct intel_engine_cs *engine,
-				    struct intel_sseu sseu)
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+				  struct intel_engine_cs *engine,
+				  struct intel_sseu sseu)
 {
 	struct intel_context *ce;
 	int ret = 0;
 
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
-
 	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
 	GEM_BUG_ON(engine->id != RCS);
 
+	ce = intel_context_pin_lock(ctx, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
 	/* Nothing to do if unmodified. */
 	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
-		return 0;
-
-	/*
-	 * If context is not idle we have to submit an ordered request to modify
-	 * its context image via the kernel context. Pristine and idle contexts
-	 * will be configured on pinning.
-	 */
-	if (ce->pin_count)
-		ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
+		goto unlock;
 
+	ret = gen8_modify_rpcs_gpu(ce, sseu);
 	if (!ret)
 		ce->sseu = sseu;
 
-	return ret;
-}
-
-static int
-i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				  struct intel_engine_cs *engine,
-				  struct intel_sseu sseu)
-{
-	int ret;
-
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
-
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
-
+unlock:
+	intel_context_pin_unlock(ce);
 	return ret;
 }
 
@@ -1620,11 +1601,12 @@  static int clone_sseu(struct i915_gem_context *dst,
 		if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
 			continue;
 
-		ce = intel_context_instance(dst, engine);
+		ce = intel_context_pin_lock(dst, engine);
 		if (IS_ERR(ce))
 			return PTR_ERR(ce);
 
 		ce->sseu = sseu;
+		intel_context_pin_unlock(ce);
 	}
 
 	return 0;
@@ -1790,7 +1772,6 @@  static int get_sseu(struct i915_gem_context *ctx,
 	struct intel_engine_cs *engine;
 	struct intel_context *ce;
 	unsigned long lookup;
-	int ret;
 
 	if (args->size == 0)
 		goto out;
@@ -1817,21 +1798,16 @@  static int get_sseu(struct i915_gem_context *ctx,
 	if (!engine)
 		return -EINVAL;
 
-	ce = intel_context_instance(ctx, engine);
+	ce = intel_context_pin_lock(ctx, engine); /* serialise with set_sseu */
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
-	/* Only use for mutex here is to serialize get_param and set_param. */
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	user_sseu.slice_mask = ce->sseu.slice_mask;
 	user_sseu.subslice_mask = ce->sseu.subslice_mask;
 	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
 	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
 
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
+	intel_context_pin_unlock(ce);
 
 	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
 			 sizeof(user_sseu)))
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 7de02be0b552..7c4171114e5b 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -86,7 +86,7 @@  void __intel_context_remove(struct intel_context *ce)
 	spin_unlock(&ctx->hw_contexts_lock);
 }
 
-struct intel_context *
+static struct intel_context *
 intel_context_instance(struct i915_gem_context *ctx,
 		       struct intel_engine_cs *engine)
 {
@@ -110,6 +110,21 @@  intel_context_instance(struct i915_gem_context *ctx,
 	return pos;
 }
 
+struct intel_context *
+intel_context_pin_lock(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine)
+	__acquires(ce->pin_mutex)
+{
+	struct intel_context *ce;
+
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
+
+	mutex_lock(&ce->pin_mutex);
+	return ce;
+}
+
 struct intel_context *
 intel_context_pin(struct i915_gem_context *ctx,
 		  struct intel_engine_cs *engine)
@@ -117,16 +132,20 @@  intel_context_pin(struct i915_gem_context *ctx,
 	struct intel_context *ce;
 	int err;
 
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-
 	ce = intel_context_instance(ctx, engine);
 	if (IS_ERR(ce))
 		return ce;
 
-	if (unlikely(!ce->pin_count++)) {
+	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+		return ce;
+
+	if (mutex_lock_interruptible(&ce->pin_mutex))
+		return ERR_PTR(-EINTR);
+
+	if (likely(!atomic_read(&ce->pin_count))) {
 		err = ce->ops->pin(ce);
 		if (err)
-			goto err_unpin;
+			goto err;
 
 		mutex_lock(&ctx->mutex);
 		list_add(&ce->active_link, &ctx->active_engines);
@@ -134,16 +153,35 @@  intel_context_pin(struct i915_gem_context *ctx,
 
 		i915_gem_context_get(ctx);
 		GEM_BUG_ON(ce->gem_context != ctx);
+
+		smp_mb__before_atomic();
 	}
-	GEM_BUG_ON(!ce->pin_count); /* no overflow! */
 
+	atomic_inc(&ce->pin_count);
+	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
+
+	mutex_unlock(&ce->pin_mutex);
 	return ce;
 
-err_unpin:
-	ce->pin_count = 0;
+err:
+	mutex_unlock(&ce->pin_mutex);
 	return ERR_PTR(err);
 }
 
+void intel_context_unpin(struct intel_context *ce)
+{
+	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+		return;
+
+	/* We may be called from inside intel_context_pin() to evict another */
+	mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
+
+	if (likely(atomic_dec_and_test(&ce->pin_count)))
+		ce->ops->unpin(ce);
+
+	mutex_unlock(&ce->pin_mutex);
+}
+
 static void intel_context_retire(struct i915_active_request *active,
 				 struct i915_request *rq)
 {
@@ -165,6 +203,8 @@  intel_context_init(struct intel_context *ce,
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
 
+	mutex_init(&ce->pin_mutex);
+
 	/* Use the whole device by default */
 	ce->sseu = intel_device_default_sseu(ctx->i915);
 
diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
index aa34311a472e..3657f3a34f8e 100644
--- a/drivers/gpu/drm/i915/intel_context.h
+++ b/drivers/gpu/drm/i915/intel_context.h
@@ -7,6 +7,8 @@ 
 #ifndef __INTEL_CONTEXT_H__
 #define __INTEL_CONTEXT_H__
 
+#include <linux/lockdep.h>
+
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
 
@@ -26,18 +28,30 @@  intel_context_lookup(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine);
 
 /**
- * intel_context_instance - Lookup or allocate the HW context for (ctx, engine)
+ * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
  * @ctx - the parent GEM context
  * @engine - the target HW engine
  *
- * Returns the existing HW context for this pair of (GEM context, engine), or
- * allocates and initialises a fresh context. Once allocated, the HW context
- * remains resident until the GEM context is destroyed.
+ * Acquire a lock on the pinned status of the HW context, such that the context
+ * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
+ * intel_context_is_pinned() remains stable.
  */
 struct intel_context *
-intel_context_instance(struct i915_gem_context *ctx,
+intel_context_pin_lock(struct i915_gem_context *ctx,
 		       struct intel_engine_cs *engine);
 
+static inline bool
+intel_context_is_pinned(struct intel_context *ce)
+{
+	return atomic_read(&ce->pin_count);
+}
+
+static inline void intel_context_pin_unlock(struct intel_context *ce)
+__releases(ce->pin_mutex)
+{
+	mutex_unlock(&ce->pin_mutex);
+}
+
 struct intel_context *
 __intel_context_insert(struct i915_gem_context *ctx,
 		       struct intel_engine_cs *engine,
@@ -50,18 +64,10 @@  intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
 
 static inline void __intel_context_pin(struct intel_context *ce)
 {
-	GEM_BUG_ON(!ce->pin_count);
-	ce->pin_count++;
+	GEM_BUG_ON(!intel_context_is_pinned(ce));
+	atomic_inc(&ce->pin_count);
 }
 
-static inline void intel_context_unpin(struct intel_context *ce)
-{
-	GEM_BUG_ON(!ce->pin_count);
-	if (--ce->pin_count)
-		return;
-
-	GEM_BUG_ON(!ce->ops);
-	ce->ops->unpin(ce);
-}
+void intel_context_unpin(struct intel_context *ce);
 
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 804fa93de853..6dc9b4b9067b 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -8,6 +8,7 @@ 
 #define __INTEL_CONTEXT_TYPES__
 
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/types.h>
 
@@ -38,14 +39,19 @@  struct intel_context {
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *active;
+
 	struct list_head active_link;
 	struct list_head signal_link;
 	struct list_head signals;
+
 	struct i915_vma *state;
 	struct intel_ring *ring;
+
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
-	int pin_count;
+
+	atomic_t pin_count;
+	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
 
 	/**
 	 * active_tracker: Active tracker for the external rq activity
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 473cfea4c503..e69b52c4d3ce 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1238,7 +1238,7 @@  static void __execlists_context_fini(struct intel_context *ce)
 
 static void execlists_context_destroy(struct intel_context *ce)
 {
-	GEM_BUG_ON(ce->pin_count);
+	GEM_BUG_ON(intel_context_is_pinned(ce));
 
 	if (ce->state)
 		__execlists_context_fini(ce);
@@ -1476,7 +1476,7 @@  static int execlists_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!request->hw_context->pin_count);
+	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
 
 	/*
 	 * Flush enough space to reduce the likelihood of waiting after
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4f137d0a5316..a2bdf188f3f6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1356,7 +1356,7 @@  static void __ring_context_fini(struct intel_context *ce)
 
 static void ring_context_destroy(struct intel_context *ce)
 {
-	GEM_BUG_ON(ce->pin_count);
+	GEM_BUG_ON(intel_context_is_pinned(ce));
 
 	if (ce->state)
 		__ring_context_fini(ce);
@@ -1917,7 +1917,7 @@  static int ring_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!request->hw_context->pin_count);
+	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
 	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
 
 	/*
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 15f016ca8e0d..c6c14d1b2ce8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1029,7 +1029,7 @@  __sseu_test(struct drm_i915_private *i915,
 	if (ret)
 		goto out_context;
 
-	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
 	if (ret)
 		goto out_spin;
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 6434e968ecb0..c032c0cf20e6 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -131,7 +131,7 @@  static void mock_context_unpin(struct intel_context *ce)
 
 static void mock_context_destroy(struct intel_context *ce)
 {
-	GEM_BUG_ON(ce->pin_count);
+	GEM_BUG_ON(intel_context_is_pinned(ce));
 
 	if (ce->ring)
 		mock_ring_free(ce->ring);