diff mbox series

[08/25] drm/i915/pmu: Always sample an active ringbuffer

Message ID 20190219122215.8941-8-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/25] drm/i915: Move verify_wm_state() to heap | expand

Commit Message

Chris Wilson Feb. 19, 2019, 12:21 p.m. UTC
As we no longer have a precise indication of requests queued to an
engine, make no presumptions and just sample the ring registers to see
if the engine is busy.

v2: Report busy while the ring is idling on a semaphore/event.
v3: Give the struct a name!
v4: Always 0 outside the powerwell; trusting the powerwell is
accurate enough for our sampling pmu.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 60 ++++++++++---------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 2 files changed, 24 insertions(+), 38 deletions(-)

Comments

Mika Kuoppala Feb. 22, 2019, 12:10 p.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> As we no longer have a precise indication of requests queued to an
> engine, make no presumptions and just sample the ring registers to see
> if the engine is busy.
>
> v2: Report busy while the ring is idling on a semaphore/event.
> v3: Give the struct a name!
> v4: Always 0 outside the powerwell; trusting the powerwell is
> accurate enough for our sampling pmu.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_pmu.c         | 60 ++++++++++---------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
>  2 files changed, 24 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 13d70b90dd0f..21adad72bd86 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -148,14 +148,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>  	spin_unlock_irq(&i915->pmu.lock);
>  }
>  
> -static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
> -{
> -	if (!fw)
> -		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
> -
> -	return true;
> -}
> -
>  static void
>  add_sample(struct i915_pmu_sample *sample, u32 val)
>  {
> @@ -168,49 +160,43 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
>  	intel_wakeref_t wakeref;
> -	bool fw = false;
>  
>  	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
>  		return;
>  
> -	if (!dev_priv->gt.awake)
> -		return;
> -
> -	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> +	wakeref = 0;
> +	if (READ_ONCE(dev_priv->gt.awake))

Is this gt.awake check just to be more lightweight on sampling?

> +		wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
>  	if (!wakeref)
>  		return;
>  
>  	for_each_engine(engine, dev_priv, id) {
> -		u32 current_seqno = intel_engine_get_seqno(engine);
> -		u32 last_seqno = intel_engine_last_submit(engine);
> +		struct intel_engine_pmu *pmu = &engine->pmu;
> +		bool busy;
>  		u32 val;
>  
> -		val = !i915_seqno_passed(current_seqno, last_seqno);
> -
> -		if (val)
> -			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> -				   period_ns);
> -
> -		if (val && (engine->pmu.enable &
> -		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> -			fw = grab_forcewake(dev_priv, fw);
> -
> -			val = I915_READ_FW(RING_CTL(engine->mmio_base));
> -		} else {
> -			val = 0;
> -		}
> +		val = I915_READ_FW(RING_CTL(engine->mmio_base));
> +		if (val == 0) /* powerwell off => engine idle */
> +			continue;
>  
>  		if (val & RING_WAIT)
> -			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> -				   period_ns);
> -
> +			add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
>  		if (val & RING_WAIT_SEMAPHORE)
> -			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> -				   period_ns);
> -	}
> +			add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
>  
> -	if (fw)
> -		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +		/*
> +		 * MI_MODE reports IDLE if the ring is waiting, but we regard
> +		 * this as being busy instead, as the engine is busy with the
> +		 * user request.
> +		 */
> +		busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
> +		if (!busy) {
> +			val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
> +			busy = !(val & MODE_IDLE);

The comment makes sense if you do
busy = val & MODE_IDLE;

-Mika

> +		}
> +		if (busy)
> +			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
> +	}
>  
>  	intel_runtime_pm_put(dev_priv, wakeref);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 710ffb221775..5d45ad4ecca9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -392,7 +392,7 @@ struct intel_engine_cs {
>  		bool irq_armed;
>  	} breadcrumbs;
>  
> -	struct {
> +	struct intel_engine_pmu {
>  		/**
>  		 * @enable: Bitmask of enable sample events on this engine.
>  		 *
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Feb. 22, 2019, 12:17 p.m. UTC | #2
Quoting Mika Kuoppala (2019-02-22 12:10:33)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > As we no longer have a precise indication of requests queued to an
> > engine, make no presumptions and just sample the ring registers to see
> > if the engine is busy.
> >
> > v2: Report busy while the ring is idling on a semaphore/event.
> > v3: Give the struct a name!
> > v4: Always 0 outside the powerwell; trusting the powerwell is
> > accurate enough for our sampling pmu.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_pmu.c         | 60 ++++++++++---------------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
> >  2 files changed, 24 insertions(+), 38 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index 13d70b90dd0f..21adad72bd86 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -148,14 +148,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
> >       spin_unlock_irq(&i915->pmu.lock);
> >  }
> >  
> > -static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
> > -{
> > -     if (!fw)
> > -             intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
> > -
> > -     return true;
> > -}
> > -
> >  static void
> >  add_sample(struct i915_pmu_sample *sample, u32 val)
> >  {
> > @@ -168,49 +160,43 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
> >       struct intel_engine_cs *engine;
> >       enum intel_engine_id id;
> >       intel_wakeref_t wakeref;
> > -     bool fw = false;
> >  
> >       if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
> >               return;
> >  
> > -     if (!dev_priv->gt.awake)
> > -             return;
> > -
> > -     wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> > +     wakeref = 0;
> > +     if (READ_ONCE(dev_priv->gt.awake))
> 
> Is this gt.awake check just to be more lightweight on sampling?

Yes, we know that if !awake, then we are idle and are done with
sampling. It also ties in later with a patch to change how we handle rpm
suspend periods (which is why I went with writing it in this idiom).

> > +             wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> >       if (!wakeref)
> >               return;
> >  
> >       for_each_engine(engine, dev_priv, id) {
> > -             u32 current_seqno = intel_engine_get_seqno(engine);
> > -             u32 last_seqno = intel_engine_last_submit(engine);
> > +             struct intel_engine_pmu *pmu = &engine->pmu;
> > +             bool busy;
> >               u32 val;
> >  
> > -             val = !i915_seqno_passed(current_seqno, last_seqno);
> > -
> > -             if (val)
> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> > -                                period_ns);
> > -
> > -             if (val && (engine->pmu.enable &
> > -                 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> > -                     fw = grab_forcewake(dev_priv, fw);
> > -
> > -                     val = I915_READ_FW(RING_CTL(engine->mmio_base));
> > -             } else {
> > -                     val = 0;
> > -             }
> > +             val = I915_READ_FW(RING_CTL(engine->mmio_base));
> > +             if (val == 0) /* powerwell off => engine idle */
> > +                     continue;
> >  
> >               if (val & RING_WAIT)
> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> > -                                period_ns);
> > -
> > +                     add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
> >               if (val & RING_WAIT_SEMAPHORE)
> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> > -                                period_ns);
> > -     }
> > +                     add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
> >  
> > -     if (fw)
> > -             intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> > +             /*
> > +              * MI_MODE reports IDLE if the ring is waiting, but we regard
> > +              * this as being busy instead, as the engine is busy with the
> > +              * user request.
> > +              */
> > +             busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
> > +             if (!busy) {
> > +                     val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
> > +                     busy = !(val & MODE_IDLE);
> 
> The comment makes sense if you do
> busy = val & MODE_IDLE;

/*
 * While waiting on a semaphore or event, MI_MODE reports the ring as
 * idle. However, previously using the seqno, and with execlists sampling,
 * we account for the ring waiting as the engine being busy. Therefore,
 * we record the sample as being busy if either waiting or !idle.
 */
-Chris
Mika Kuoppala Feb. 22, 2019, 12:31 p.m. UTC | #3
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-02-22 12:10:33)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > As we no longer have a precise indication of requests queued to an
>> > engine, make no presumptions and just sample the ring registers to see
>> > if the engine is busy.
>> >
>> > v2: Report busy while the ring is idling on a semaphore/event.
>> > v3: Give the struct a name!
>> > v4: Always 0 outside the powerwell; trusting the powerwell is
>> > accurate enough for our sampling pmu.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/i915_pmu.c         | 60 ++++++++++---------------
>> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
>> >  2 files changed, 24 insertions(+), 38 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>> > index 13d70b90dd0f..21adad72bd86 100644
>> > --- a/drivers/gpu/drm/i915/i915_pmu.c
>> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
>> > @@ -148,14 +148,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>> >       spin_unlock_irq(&i915->pmu.lock);
>> >  }
>> >  
>> > -static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
>> > -{
>> > -     if (!fw)
>> > -             intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
>> > -
>> > -     return true;
>> > -}
>> > -
>> >  static void
>> >  add_sample(struct i915_pmu_sample *sample, u32 val)
>> >  {
>> > @@ -168,49 +160,43 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>> >       struct intel_engine_cs *engine;
>> >       enum intel_engine_id id;
>> >       intel_wakeref_t wakeref;
>> > -     bool fw = false;
>> >  
>> >       if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
>> >               return;
>> >  
>> > -     if (!dev_priv->gt.awake)
>> > -             return;
>> > -
>> > -     wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
>> > +     wakeref = 0;
>> > +     if (READ_ONCE(dev_priv->gt.awake))
>> 
>> Is this gt.awake check just to be more lightweight on sampling?
>
> Yes, we know that if !awake, then we are idle and are done with
> sampling. It also ties in later with a patch to change how we handle rpm
> suspend periods (which is why I went with writing it in this idiom).
>
>> > +             wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
>> >       if (!wakeref)
>> >               return;
>> >  
>> >       for_each_engine(engine, dev_priv, id) {
>> > -             u32 current_seqno = intel_engine_get_seqno(engine);
>> > -             u32 last_seqno = intel_engine_last_submit(engine);
>> > +             struct intel_engine_pmu *pmu = &engine->pmu;
>> > +             bool busy;
>> >               u32 val;
>> >  
>> > -             val = !i915_seqno_passed(current_seqno, last_seqno);
>> > -
>> > -             if (val)
>> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
>> > -                                period_ns);
>> > -
>> > -             if (val && (engine->pmu.enable &
>> > -                 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
>> > -                     fw = grab_forcewake(dev_priv, fw);
>> > -
>> > -                     val = I915_READ_FW(RING_CTL(engine->mmio_base));
>> > -             } else {
>> > -                     val = 0;
>> > -             }
>> > +             val = I915_READ_FW(RING_CTL(engine->mmio_base));
>> > +             if (val == 0) /* powerwell off => engine idle */
>> > +                     continue;
>> >  
>> >               if (val & RING_WAIT)
>> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
>> > -                                period_ns);
>> > -
>> > +                     add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
>> >               if (val & RING_WAIT_SEMAPHORE)
>> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
>> > -                                period_ns);
>> > -     }
>> > +                     add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
>> >  
>> > -     if (fw)
>> > -             intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>> > +             /*
>> > +              * MI_MODE reports IDLE if the ring is waiting, but we regard
>> > +              * this as being busy instead, as the engine is busy with the
>> > +              * user request.
>> > +              */
>> > +             busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
>> > +             if (!busy) {
>> > +                     val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
>> > +                     busy = !(val & MODE_IDLE);
>> 
>> The comment makes sense if you do
>> busy = val & MODE_IDLE;
>
> /*
>  * While waiting on a semaphore or event, MI_MODE reports the ring as
>  * idle. However, previously using the seqno, and with execlists sampling,
>  * we account for the ring waiting as the engine being busy. Therefore,
>  * we record the sample as being busy if either waiting or !idle.
>  */

Altho now the original makes more sense too, I like this one better!

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 13d70b90dd0f..21adad72bd86 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -148,14 +148,6 @@  void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 	spin_unlock_irq(&i915->pmu.lock);
 }
 
-static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
-{
-	if (!fw)
-		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
-
-	return true;
-}
-
 static void
 add_sample(struct i915_pmu_sample *sample, u32 val)
 {
@@ -168,49 +160,43 @@  engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	intel_wakeref_t wakeref;
-	bool fw = false;
 
 	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
 		return;
 
-	if (!dev_priv->gt.awake)
-		return;
-
-	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	wakeref = 0;
+	if (READ_ONCE(dev_priv->gt.awake))
+		wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
 	if (!wakeref)
 		return;
 
 	for_each_engine(engine, dev_priv, id) {
-		u32 current_seqno = intel_engine_get_seqno(engine);
-		u32 last_seqno = intel_engine_last_submit(engine);
+		struct intel_engine_pmu *pmu = &engine->pmu;
+		bool busy;
 		u32 val;
 
-		val = !i915_seqno_passed(current_seqno, last_seqno);
-
-		if (val)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
-				   period_ns);
-
-		if (val && (engine->pmu.enable &
-		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
-			fw = grab_forcewake(dev_priv, fw);
-
-			val = I915_READ_FW(RING_CTL(engine->mmio_base));
-		} else {
-			val = 0;
-		}
+		val = I915_READ_FW(RING_CTL(engine->mmio_base));
+		if (val == 0) /* powerwell off => engine idle */
+			continue;
 
 		if (val & RING_WAIT)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
-				   period_ns);
-
+			add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
 		if (val & RING_WAIT_SEMAPHORE)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
-				   period_ns);
-	}
+			add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
 
-	if (fw)
-		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+		/*
+		 * MI_MODE reports IDLE if the ring is waiting, but we regard
+		 * this as being busy instead, as the engine is busy with the
+		 * user request.
+		 */
+		busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
+		if (!busy) {
+			val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
+			busy = !(val & MODE_IDLE);
+		}
+		if (busy)
+			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
+	}
 
 	intel_runtime_pm_put(dev_priv, wakeref);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 710ffb221775..5d45ad4ecca9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -392,7 +392,7 @@  struct intel_engine_cs {
 		bool irq_armed;
 	} breadcrumbs;
 
-	struct {
+	struct intel_engine_pmu {
 		/**
 		 * @enable: Bitmask of enable sample events on this engine.
 		 *