diff mbox series

[07/19] drm/i915: Mark up the calling context for intel_wakeref_put()

Message ID 20191118230254.2615942-8-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/19] drm/i915/selftests: Force bonded submission to overlap | expand

Commit Message

Chris Wilson Nov. 18, 2019, 11:02 p.m. UTC
Previously, we assumed we could use mutex_trylock() within an atomic
context, falling back to a working if contended. However, such trickery
is illegal inside interrupt context, and so we need to always use a
worker under such circumstances. As we normally are in process context,
we can typically use a plain mutex, and only defer to a work when we
know we are caller from an interrupt path.

Fixes: 51fbd8de87dc ("drm/i915/pmu: Atomically acquire the gt_pm wakeref")
References: a0855d24fc22d ("locking/mutex: Complain upon mutex API misuse in IRQ contexts")
References: https://bugs.freedesktop.org/show_bug.cgi?id=111626
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c    |  3 +-
 drivers/gpu/drm/i915/gt/intel_engine_pm.h    | 10 +++++
 drivers/gpu/drm/i915/gt/intel_gt_pm.c        |  1 -
 drivers/gpu/drm/i915/gt/intel_gt_pm.h        |  5 +++
 drivers/gpu/drm/i915/gt/intel_lrc.c          |  2 +-
 drivers/gpu/drm/i915/gt/intel_reset.c        |  2 +-
 drivers/gpu/drm/i915/gt/selftest_engine_pm.c |  7 ++--
 drivers/gpu/drm/i915/i915_active.c           |  5 ++-
 drivers/gpu/drm/i915/i915_pmu.c              |  6 +--
 drivers/gpu/drm/i915/intel_wakeref.c         |  8 ++--
 drivers/gpu/drm/i915/intel_wakeref.h         | 44 ++++++++++++++++----
 11 files changed, 68 insertions(+), 25 deletions(-)

Comments

Tvrtko Ursulin Nov. 19, 2019, 3:57 p.m. UTC | #1
On 18/11/2019 23:02, Chris Wilson wrote:
> Previously, we assumed we could use mutex_trylock() within an atomic
> context, falling back to a working if contended. However, such trickery
> is illegal inside interrupt context, and so we need to always use a
> worker under such circumstances. As we normally are in process context,
> we can typically use a plain mutex, and only defer to a work when we
> know we are caller from an interrupt path.
> 
> Fixes: 51fbd8de87dc ("drm/i915/pmu: Atomically acquire the gt_pm wakeref")
> References: a0855d24fc22d ("locking/mutex: Complain upon mutex API misuse in IRQ contexts")
> References: https://bugs.freedesktop.org/show_bug.cgi?id=111626
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c    |  3 +-
>   drivers/gpu/drm/i915/gt/intel_engine_pm.h    | 10 +++++
>   drivers/gpu/drm/i915/gt/intel_gt_pm.c        |  1 -
>   drivers/gpu/drm/i915/gt/intel_gt_pm.h        |  5 +++
>   drivers/gpu/drm/i915/gt/intel_lrc.c          |  2 +-
>   drivers/gpu/drm/i915/gt/intel_reset.c        |  2 +-
>   drivers/gpu/drm/i915/gt/selftest_engine_pm.c |  7 ++--
>   drivers/gpu/drm/i915/i915_active.c           |  5 ++-
>   drivers/gpu/drm/i915/i915_pmu.c              |  6 +--
>   drivers/gpu/drm/i915/intel_wakeref.c         |  8 ++--
>   drivers/gpu/drm/i915/intel_wakeref.h         | 44 ++++++++++++++++----
>   11 files changed, 68 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 722d3eec226e..4878d16176d5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -180,7 +180,8 @@ static int __engine_park(struct intel_wakeref *wf)
>   
>   	engine->execlists.no_priolist = false;
>   
> -	intel_gt_pm_put(engine->gt);
> +	/* While we call i915_vma_parked, we have to break the lock cycle */
> +	intel_gt_pm_put_async(engine->gt);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> index 739c50fefcef..467475fce9c6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> @@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
>   	intel_wakeref_put(&engine->wakeref);
>   }
>   
> +static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
> +{
> +	intel_wakeref_put_async(&engine->wakeref);
> +}
> +
> +static inline void intel_engine_pm_unlock_wait(struct intel_engine_cs *engine)
> +{
> +	intel_wakeref_unlock_wait(&engine->wakeref);
> +}
> +
>   void intel_engine_init__pm(struct intel_engine_cs *engine);
>   
>   #endif /* INTEL_ENGINE_PM_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index e61f752a3cd5..7a9044ac4b75 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -105,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf)
>   static const struct intel_wakeref_ops wf_ops = {
>   	.get = __gt_unpark,
>   	.put = __gt_park,
> -	.flags = INTEL_WAKEREF_PUT_ASYNC,
>   };
>   
>   void intel_gt_pm_init_early(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> index b3e17399be9b..990efc27a4e4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> @@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt)
>   	intel_wakeref_put(&gt->wakeref);
>   }
>   
> +static inline void intel_gt_pm_put_async(struct intel_gt *gt)
> +{
> +	intel_wakeref_put_async(&gt->wakeref);
> +}
> +
>   static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
>   {
>   	return intel_wakeref_wait_for_idle(&gt->wakeref);
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index f7c8fec436a9..fec46afb9264 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1173,7 +1173,7 @@ __execlists_schedule_out(struct i915_request *rq,
>   
>   	intel_engine_context_out(engine);
>   	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> -	intel_gt_pm_put(engine->gt);
> +	intel_gt_pm_put_async(engine->gt);

Wait a minute.. wasn't the wakeref hierarchy supposed to be engine -> gt 
so this place should be on the engine level?

>   
>   	/*
>   	 * If this is part of a virtual engine, its next request may
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index b7007cd78c6f..0388f9375366 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -1125,7 +1125,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
>   out:
>   	intel_engine_cancel_stop_cs(engine);
>   	reset_finish_engine(engine);
> -	intel_engine_pm_put(engine);
> +	intel_engine_pm_put_async(engine);
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
> index 20b9c83f43ad..851a6c4f65c6 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
> @@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
>   				pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
>   				       engine->name, p->name);
>   			else
> -				intel_engine_pm_put(engine);
> -			intel_engine_pm_put(engine);
> +				intel_engine_pm_put_async(engine);
> +			intel_engine_pm_put_async(engine);
>   			p->critical_section_end();
>   
> -			/* engine wakeref is sync (instant) */
> +			intel_engine_pm_unlock_wait(engine);

 From the context would it be clearer to name it 
intel_engine_pm_wait_put? sync_put? flush_put? To more clearly represent 
it is a pair of put_async.

> +
>   			if (intel_engine_pm_is_awake(engine)) {
>   				pr_err("%s is still awake after flushing pm\n",
>   				       engine->name);
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 5448f37c8102..dca15ace88f6 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -672,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref)
>   	 * populated by i915_request_add_active_barriers() to point to the
>   	 * request that will eventually release them.
>   	 */
> -	spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING);
>   	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
>   		struct active_node *node = barrier_from_ll(pos);
>   		struct intel_engine_cs *engine = barrier_to_engine(node);
>   		struct rb_node **p, *parent;
>   
> +		spin_lock_irqsave_nested(&ref->tree_lock, flags,
> +					 SINGLE_DEPTH_NESTING);
>   		parent = NULL;
>   		p = &ref->tree.rb_node;
>   		while (*p) {
> @@ -693,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref)
>   		}
>   		rb_link_node(&node->node, parent, p);
>   		rb_insert_color(&node->node, &ref->tree);
> +		spin_unlock_irqrestore(&ref->tree_lock, flags);
>   
>   		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
>   		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
>   		intel_engine_pm_put(engine);
>   	}
> -	spin_unlock_irqrestore(&ref->tree_lock, flags);

Pros/cons of leaving the locks where they were and using put_async, 
versus this layout?

>   }
>   
>   void i915_request_add_active_barriers(struct i915_request *rq)
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 9b02be0ad4e6..95e824a78d4d 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt)
>   	val = 0;
>   	if (intel_gt_pm_get_if_awake(gt)) {
>   		val = __get_rc6(gt);
> -		intel_gt_pm_put(gt);
> +		intel_gt_pm_put_async(gt);
>   	}
>   
>   	spin_lock_irqsave(&pmu->lock, flags);
> @@ -360,7 +360,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
>   skip:
>   		if (unlikely(mmio_lock))
>   			spin_unlock_irqrestore(mmio_lock, flags);
> -		intel_engine_pm_put(engine);
> +		intel_engine_pm_put_async(engine);
>   	}
>   }
>   
> @@ -398,7 +398,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
>   			if (stat)
>   				val = intel_get_cagf(rps, stat);
>   
> -			intel_gt_pm_put(gt);
> +			intel_gt_pm_put_async(gt);
>   		}
>   
>   		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
> index 868cc78048d0..9b29176cc1ca 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.c
> +++ b/drivers/gpu/drm/i915/intel_wakeref.c
> @@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf)
>   
>   static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
>   {
> -	if (!atomic_dec_and_test(&wf->count))
> +	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
> +	if (unlikely(!atomic_dec_and_test(&wf->count)))
>   		goto unlock;
>   
>   	/* ops->put() must reschedule its own release on error/deferral */
> @@ -67,13 +68,12 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
>   	mutex_unlock(&wf->mutex);
>   }
>   
> -void __intel_wakeref_put_last(struct intel_wakeref *wf)
> +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
>   {
>   	INTEL_WAKEREF_BUG_ON(work_pending(&wf->work));
>   
>   	/* Assume we are not in process context and so cannot sleep. */
> -	if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC ||
> -	    !mutex_trylock(&wf->mutex)) {
> +	if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
>   		schedule_work(&wf->work);
>   		return;
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
> index 5f0c972a80fb..688b9b536a69 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.h
> +++ b/drivers/gpu/drm/i915/intel_wakeref.h
> @@ -9,6 +9,7 @@
>   
>   #include <linux/atomic.h>
>   #include <linux/bits.h>
> +#include <linux/lockdep.h>
>   #include <linux/mutex.h>
>   #include <linux/refcount.h>
>   #include <linux/stackdepot.h>
> @@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t;
>   struct intel_wakeref_ops {
>   	int (*get)(struct intel_wakeref *wf);
>   	int (*put)(struct intel_wakeref *wf);
> -
> -	unsigned long flags;
> -#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
>   };
>   
>   struct intel_wakeref {
> @@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
>   } while (0)
>   
>   int __intel_wakeref_get_first(struct intel_wakeref *wf);
> -void __intel_wakeref_put_last(struct intel_wakeref *wf);
> +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags);
>   
>   /**
>    * intel_wakeref_get: Acquire the wakeref
> @@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
>   }
>   
>   /**
> - * intel_wakeref_put: Release the wakeref
> - * @i915: the drm_i915_private device
> + * intel_wakeref_put_flags: Release the wakeref
>    * @wf: the wakeref
> - * @fn: callback for releasing the wakeref, called only on final release.
> + * @flags: control flags
>    *
>    * Release our hold on the wakeref. When there are no more users,
>    * the runtime pm wakeref will be released after the @fn callback is called
> @@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
>    * code otherwise.
>    */
>   static inline void
> -intel_wakeref_put(struct intel_wakeref *wf)
> +__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
> +#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
>   {
>   	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
>   	if (unlikely(!atomic_add_unless(&wf->count, -1, 1)))
> -		__intel_wakeref_put_last(wf);
> +		__intel_wakeref_put_last(wf, flags);
> +}
> +
> +static inline void
> +intel_wakeref_put(struct intel_wakeref *wf)
> +{
> +	might_sleep();
> +	__intel_wakeref_put(wf, 0);
> +}
> +
> +static inline void
> +intel_wakeref_put_async(struct intel_wakeref *wf)
> +{
> +	__intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC);
>   }
>   
>   /**
> @@ -151,6 +162,20 @@ intel_wakeref_unlock(struct intel_wakeref *wf)
>   	mutex_unlock(&wf->mutex);
>   }
>   
> +/**
> + * intel_wakeref_unlock_wait: Wait until the active callback is complete
> + * @wf: the wakeref
> + *
> + * Waits for the active callback (under the @wf->mtuex) is complete.
> + */
> +static inline void
> +intel_wakeref_unlock_wait(struct intel_wakeref *wf)
> +{
> +	mutex_lock(&wf->mutex);
> +	mutex_unlock(&wf->mutex);
> +	flush_work(&wf->work);
> +}
> +
>   /**
>    * intel_wakeref_is_active: Query whether the wakeref is currently held
>    * @wf: the wakeref
> @@ -170,6 +195,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
>   static inline void
>   __intel_wakeref_defer_park(struct intel_wakeref *wf)
>   {
> +	lockdep_assert_held(&wf->mutex);
>   	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
>   	atomic_set_release(&wf->count, 1);
>   }
> 

Regards,

Tvrtko
Chris Wilson Nov. 19, 2019, 4:12 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-11-19 15:57:24)
> 
> On 18/11/2019 23:02, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index f7c8fec436a9..fec46afb9264 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -1173,7 +1173,7 @@ __execlists_schedule_out(struct i915_request *rq,
> >   
> >       intel_engine_context_out(engine);
> >       execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> > -     intel_gt_pm_put(engine->gt);
> > +     intel_gt_pm_put_async(engine->gt);
> 
> Wait a minute.. wasn't the wakeref hierarchy supposed to be engine -> gt 
> so this place should be on the engine level?

Ssh. I lied.

We skip the engine-pm here for the CS interrupts so that we are not kept
waiting to call engine_park().

The excuse is that this wakeref for the GT interrupt, not the engine :)

> > diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> > index b7007cd78c6f..0388f9375366 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> > @@ -1125,7 +1125,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
> >   out:
> >       intel_engine_cancel_stop_cs(engine);
> >       reset_finish_engine(engine);
> > -     intel_engine_pm_put(engine);
> > +     intel_engine_pm_put_async(engine);
> >       return ret;
> >   }
> >   
> > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
> > index 20b9c83f43ad..851a6c4f65c6 100644
> > --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
> > @@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
> >                               pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
> >                                      engine->name, p->name);
> >                       else
> > -                             intel_engine_pm_put(engine);
> > -                     intel_engine_pm_put(engine);
> > +                             intel_engine_pm_put_async(engine);
> > +                     intel_engine_pm_put_async(engine);
> >                       p->critical_section_end();
> >   
> > -                     /* engine wakeref is sync (instant) */
> > +                     intel_engine_pm_unlock_wait(engine);
> 
>  From the context would it be clearer to name it 
> intel_engine_pm_wait_put? sync_put? flush_put? To more clearly represent 
> it is a pair of put_async.

Possibly, I am in mourning for spin_unlock_wait() and will keep on
protesting its demise!

intel_engine_pm_flush() is perhaps the clearest description in line with
say flush_work().

> > diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> > index 5448f37c8102..dca15ace88f6 100644
> > --- a/drivers/gpu/drm/i915/i915_active.c
> > +++ b/drivers/gpu/drm/i915/i915_active.c
> > @@ -672,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref)
> >        * populated by i915_request_add_active_barriers() to point to the
> >        * request that will eventually release them.
> >        */
> > -     spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING);
> >       llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
> >               struct active_node *node = barrier_from_ll(pos);
> >               struct intel_engine_cs *engine = barrier_to_engine(node);
> >               struct rb_node **p, *parent;
> >   
> > +             spin_lock_irqsave_nested(&ref->tree_lock, flags,
> > +                                      SINGLE_DEPTH_NESTING);
> >               parent = NULL;
> >               p = &ref->tree.rb_node;
> >               while (*p) {
> > @@ -693,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref)
> >               }
> >               rb_link_node(&node->node, parent, p);
> >               rb_insert_color(&node->node, &ref->tree);
> > +             spin_unlock_irqrestore(&ref->tree_lock, flags);
> >   
> >               GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
> >               llist_add(barrier_to_ll(node), &engine->barrier_tasks);
> >               intel_engine_pm_put(engine);
> >       }
> > -     spin_unlock_irqrestore(&ref->tree_lock, flags);
> 
> Pros/cons of leaving the locks where they were and using put_async, 
> versus this layout?

Usually just a single engine in the list (only for virtual engines will
there be more) so we save the worker invocation at typically no cost.
Thus getting into the engine_park() earlier while the GPU is still warm.

That and I'm still smarting from RT demanding all spin_lock_irq to be
reviewed and tightened.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 722d3eec226e..4878d16176d5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -180,7 +180,8 @@  static int __engine_park(struct intel_wakeref *wf)
 
 	engine->execlists.no_priolist = false;
 
-	intel_gt_pm_put(engine->gt);
+	/* While we call i915_vma_parked, we have to break the lock cycle */
+	intel_gt_pm_put_async(engine->gt);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 739c50fefcef..467475fce9c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -31,6 +31,16 @@  static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
 	intel_wakeref_put(&engine->wakeref);
 }
 
+static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
+{
+	intel_wakeref_put_async(&engine->wakeref);
+}
+
+static inline void intel_engine_pm_unlock_wait(struct intel_engine_cs *engine)
+{
+	intel_wakeref_unlock_wait(&engine->wakeref);
+}
+
 void intel_engine_init__pm(struct intel_engine_cs *engine);
 
 #endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index e61f752a3cd5..7a9044ac4b75 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -105,7 +105,6 @@  static int __gt_park(struct intel_wakeref *wf)
 static const struct intel_wakeref_ops wf_ops = {
 	.get = __gt_unpark,
 	.put = __gt_park,
-	.flags = INTEL_WAKEREF_PUT_ASYNC,
 };
 
 void intel_gt_pm_init_early(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index b3e17399be9b..990efc27a4e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -32,6 +32,11 @@  static inline void intel_gt_pm_put(struct intel_gt *gt)
 	intel_wakeref_put(&gt->wakeref);
 }
 
+static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+{
+	intel_wakeref_put_async(&gt->wakeref);
+}
+
 static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
 {
 	return intel_wakeref_wait_for_idle(&gt->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f7c8fec436a9..fec46afb9264 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1173,7 +1173,7 @@  __execlists_schedule_out(struct i915_request *rq,
 
 	intel_engine_context_out(engine);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
-	intel_gt_pm_put(engine->gt);
+	intel_gt_pm_put_async(engine->gt);
 
 	/*
 	 * If this is part of a virtual engine, its next request may
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b7007cd78c6f..0388f9375366 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1125,7 +1125,7 @@  int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
 out:
 	intel_engine_cancel_stop_cs(engine);
 	reset_finish_engine(engine);
-	intel_engine_pm_put(engine);
+	intel_engine_pm_put_async(engine);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 20b9c83f43ad..851a6c4f65c6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -51,11 +51,12 @@  static int live_engine_pm(void *arg)
 				pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
 				       engine->name, p->name);
 			else
-				intel_engine_pm_put(engine);
-			intel_engine_pm_put(engine);
+				intel_engine_pm_put_async(engine);
+			intel_engine_pm_put_async(engine);
 			p->critical_section_end();
 
-			/* engine wakeref is sync (instant) */
+			intel_engine_pm_unlock_wait(engine);
+
 			if (intel_engine_pm_is_awake(engine)) {
 				pr_err("%s is still awake after flushing pm\n",
 				       engine->name);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 5448f37c8102..dca15ace88f6 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -672,12 +672,13 @@  void i915_active_acquire_barrier(struct i915_active *ref)
 	 * populated by i915_request_add_active_barriers() to point to the
 	 * request that will eventually release them.
 	 */
-	spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING);
 	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
 		struct active_node *node = barrier_from_ll(pos);
 		struct intel_engine_cs *engine = barrier_to_engine(node);
 		struct rb_node **p, *parent;
 
+		spin_lock_irqsave_nested(&ref->tree_lock, flags,
+					 SINGLE_DEPTH_NESTING);
 		parent = NULL;
 		p = &ref->tree.rb_node;
 		while (*p) {
@@ -693,12 +694,12 @@  void i915_active_acquire_barrier(struct i915_active *ref)
 		}
 		rb_link_node(&node->node, parent, p);
 		rb_insert_color(&node->node, &ref->tree);
+		spin_unlock_irqrestore(&ref->tree_lock, flags);
 
 		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
 		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
 		intel_engine_pm_put(engine);
 	}
-	spin_unlock_irqrestore(&ref->tree_lock, flags);
 }
 
 void i915_request_add_active_barriers(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 9b02be0ad4e6..95e824a78d4d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -190,7 +190,7 @@  static u64 get_rc6(struct intel_gt *gt)
 	val = 0;
 	if (intel_gt_pm_get_if_awake(gt)) {
 		val = __get_rc6(gt);
-		intel_gt_pm_put(gt);
+		intel_gt_pm_put_async(gt);
 	}
 
 	spin_lock_irqsave(&pmu->lock, flags);
@@ -360,7 +360,7 @@  engines_sample(struct intel_gt *gt, unsigned int period_ns)
 skip:
 		if (unlikely(mmio_lock))
 			spin_unlock_irqrestore(mmio_lock, flags);
-		intel_engine_pm_put(engine);
+		intel_engine_pm_put_async(engine);
 	}
 }
 
@@ -398,7 +398,7 @@  frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 			if (stat)
 				val = intel_get_cagf(rps, stat);
 
-			intel_gt_pm_put(gt);
+			intel_gt_pm_put_async(gt);
 		}
 
 		add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 868cc78048d0..9b29176cc1ca 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -54,7 +54,8 @@  int __intel_wakeref_get_first(struct intel_wakeref *wf)
 
 static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
 {
-	if (!atomic_dec_and_test(&wf->count))
+	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
+	if (unlikely(!atomic_dec_and_test(&wf->count)))
 		goto unlock;
 
 	/* ops->put() must reschedule its own release on error/deferral */
@@ -67,13 +68,12 @@  static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
 	mutex_unlock(&wf->mutex);
 }
 
-void __intel_wakeref_put_last(struct intel_wakeref *wf)
+void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
 {
 	INTEL_WAKEREF_BUG_ON(work_pending(&wf->work));
 
 	/* Assume we are not in process context and so cannot sleep. */
-	if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC ||
-	    !mutex_trylock(&wf->mutex)) {
+	if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
 		schedule_work(&wf->work);
 		return;
 	}
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 5f0c972a80fb..688b9b536a69 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -9,6 +9,7 @@ 
 
 #include <linux/atomic.h>
 #include <linux/bits.h>
+#include <linux/lockdep.h>
 #include <linux/mutex.h>
 #include <linux/refcount.h>
 #include <linux/stackdepot.h>
@@ -29,9 +30,6 @@  typedef depot_stack_handle_t intel_wakeref_t;
 struct intel_wakeref_ops {
 	int (*get)(struct intel_wakeref *wf);
 	int (*put)(struct intel_wakeref *wf);
-
-	unsigned long flags;
-#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
 };
 
 struct intel_wakeref {
@@ -57,7 +55,7 @@  void __intel_wakeref_init(struct intel_wakeref *wf,
 } while (0)
 
 int __intel_wakeref_get_first(struct intel_wakeref *wf);
-void __intel_wakeref_put_last(struct intel_wakeref *wf);
+void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags);
 
 /**
  * intel_wakeref_get: Acquire the wakeref
@@ -100,10 +98,9 @@  intel_wakeref_get_if_active(struct intel_wakeref *wf)
 }
 
 /**
- * intel_wakeref_put: Release the wakeref
- * @i915: the drm_i915_private device
+ * intel_wakeref_put_flags: Release the wakeref
  * @wf: the wakeref
- * @fn: callback for releasing the wakeref, called only on final release.
+ * @flags: control flags
  *
  * Release our hold on the wakeref. When there are no more users,
  * the runtime pm wakeref will be released after the @fn callback is called
@@ -116,11 +113,25 @@  intel_wakeref_get_if_active(struct intel_wakeref *wf)
  * code otherwise.
  */
 static inline void
-intel_wakeref_put(struct intel_wakeref *wf)
+__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
+#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
 {
 	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
 	if (unlikely(!atomic_add_unless(&wf->count, -1, 1)))
-		__intel_wakeref_put_last(wf);
+		__intel_wakeref_put_last(wf, flags);
+}
+
+static inline void
+intel_wakeref_put(struct intel_wakeref *wf)
+{
+	might_sleep();
+	__intel_wakeref_put(wf, 0);
+}
+
+static inline void
+intel_wakeref_put_async(struct intel_wakeref *wf)
+{
+	__intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC);
 }
 
 /**
@@ -151,6 +162,20 @@  intel_wakeref_unlock(struct intel_wakeref *wf)
 	mutex_unlock(&wf->mutex);
 }
 
+/**
+ * intel_wakeref_unlock_wait: Wait until the active callback is complete
+ * @wf: the wakeref
+ *
+ * Waits for the active callback (under the @wf->mtuex) is complete.
+ */
+static inline void
+intel_wakeref_unlock_wait(struct intel_wakeref *wf)
+{
+	mutex_lock(&wf->mutex);
+	mutex_unlock(&wf->mutex);
+	flush_work(&wf->work);
+}
+
 /**
  * intel_wakeref_is_active: Query whether the wakeref is currently held
  * @wf: the wakeref
@@ -170,6 +195,7 @@  intel_wakeref_is_active(const struct intel_wakeref *wf)
 static inline void
 __intel_wakeref_defer_park(struct intel_wakeref *wf)
 {
+	lockdep_assert_held(&wf->mutex);
 	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
 	atomic_set_release(&wf->count, 1);
 }