diff mbox series

[19/46] drm/i915/pmu: Always sample an active ringbuffer

Message ID 20190206130356.18771-20-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/46] drm/i915: Hack and slash, throttle execbuffer hogs | expand

Commit Message

Chris Wilson Feb. 6, 2019, 1:03 p.m. UTC
As we no longer have a precise indication of requests queued to an
engine, make no presumptions and just sample the ring registers to see
if the engine is busy.

v2: Report busy while the ring is idling on a semaphore/event.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_pmu.c | 55 +++++++++++++--------------------
 1 file changed, 21 insertions(+), 34 deletions(-)

Comments

Tvrtko Ursulin Feb. 11, 2019, 6:18 p.m. UTC | #1
On 06/02/2019 13:03, Chris Wilson wrote:
> As we no longer have a precise indication of requests queued to an
> engine, make no presumptions and just sample the ring registers to see
> if the engine is busy.
> 
> v2: Report busy while the ring is idling on a semaphore/event.

I was planning to take care of this detail but cool, no complaints. :)

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_pmu.c | 55 +++++++++++++--------------------
>   1 file changed, 21 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 13d70b90dd0f..157cbfa155d9 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -101,7 +101,7 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
>   	 *
>   	 * Use RCS as proxy for all engines.
>   	 */
> -	else if (intel_engine_supports_stats(i915->engine[RCS]))
> +	else if (i915->caps.scheduler & I915_SCHEDULER_CAP_PMU)

Need to nuke the comment as well.

But my problem is I still think I915_SCHEDULER_CAP_PMU is wrong name and 
level. It is neither a scheduler feature, nor the whole PMU. Maybe 
I915_SCHEDULER_CAP_ENGINE_STATS removes one contention point, but still 
I am wondering if I could refactor how the PMU tracks the need for 
having the sampling timer and so remove the need for proxying from RCS 
via that route.

>   		enable &= ~BIT(I915_SAMPLE_BUSY);
>   
>   	/*
> @@ -148,14 +148,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
>   	spin_unlock_irq(&i915->pmu.lock);
>   }
>   
> -static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
> -{
> -	if (!fw)
> -		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
> -
> -	return true;
> -}
> -
>   static void
>   add_sample(struct i915_pmu_sample *sample, u32 val)
>   {
> @@ -168,7 +160,6 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>   	struct intel_engine_cs *engine;
>   	enum intel_engine_id id;
>   	intel_wakeref_t wakeref;
> -	bool fw = false;
>   
>   	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
>   		return;
> @@ -181,36 +172,32 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
>   		return;
>   
>   	for_each_engine(engine, dev_priv, id) {
> -		u32 current_seqno = intel_engine_get_seqno(engine);
> -		u32 last_seqno = intel_engine_last_submit(engine);
> +		typeof(engine->pmu) *pmu = &engine->pmu;

I would also prefer we did not start introducing the idiom of declaring 
locals outside macros with typeof.

> +		bool busy;
>   		u32 val;
>   
> -		val = !i915_seqno_passed(current_seqno, last_seqno);
> -
> -		if (val)
> -			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> -				   period_ns);
> -
> -		if (val && (engine->pmu.enable &
> -		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> -			fw = grab_forcewake(dev_priv, fw);
> -
> -			val = I915_READ_FW(RING_CTL(engine->mmio_base));
> -		} else {
> -			val = 0;
> -		}
> +		val = I915_READ_FW(RING_CTL(engine->mmio_base));
> +		if (val == 0 || val == ~0u) /* outside of powerwell */
> +			continue;
Would /* Powerwell not awake. */ be clearer?

So the claim is we can rely on register being either all zeros or all 
ones when powered down? Absolutely 100%? Is this documented somewhere? 
But still need the runtime pm ref?

Regards,

Tvrtko

>   
>   		if (val & RING_WAIT)
> -			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
> -				   period_ns);
> -
> +			add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
>   		if (val & RING_WAIT_SEMAPHORE)
> -			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> -				   period_ns);
> -	}
> +			add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
>   
> -	if (fw)
> -		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +		/*
> +		 * MI_MODE reports IDLE if the ring is waiting, but we regard
> +		 * this as being busy instead, as the engine is busy with the
> +		 * user request.
> +		 */
> +		busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
> +		if (!busy) {
> +			val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
> +			busy = !(val & MODE_IDLE);
> +		}
> +		if (busy)
> +			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
> +	}
>   
>   	intel_runtime_pm_put(dev_priv, wakeref);
>   }
>
Chris Wilson Feb. 12, 2019, 1:40 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-02-11 18:18:36)
> 
> On 06/02/2019 13:03, Chris Wilson wrote:
> > As we no longer have a precise indication of requests queued to an
> > engine, make no presumptions and just sample the ring registers to see
> > if the engine is busy.
> > 
> > v2: Report busy while the ring is idling on a semaphore/event.
> 
> I was planning to take care of this detail but cool, no complaints. :)
> 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_pmu.c | 55 +++++++++++++--------------------
> >   1 file changed, 21 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index 13d70b90dd0f..157cbfa155d9 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -101,7 +101,7 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
> >        *
> >        * Use RCS as proxy for all engines.
> >        */
> > -     else if (intel_engine_supports_stats(i915->engine[RCS]))
> > +     else if (i915->caps.scheduler & I915_SCHEDULER_CAP_PMU)
> 
> Need to nuke the comment as well.
> 
> But my problem is I still think I915_SCHEDULER_CAP_PMU is wrong name and 
> level. It is neither a scheduler feature, nor the whole PMU. Maybe 
> I915_SCHEDULER_CAP_ENGINE_STATS removes one contention point, but still 
> I am wondering if I could refactor how the PMU tracks the need for 
> having the sampling timer and so remove the need for proxying from RCS 
> via that route.

This chunk wasn't meant to be in this patch, sorry.

> >               enable &= ~BIT(I915_SAMPLE_BUSY);
> >   
> >       /*
> > @@ -148,14 +148,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
> >       spin_unlock_irq(&i915->pmu.lock);
> >   }
> >   
> > -static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
> > -{
> > -     if (!fw)
> > -             intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
> > -
> > -     return true;
> > -}
> > -
> >   static void
> >   add_sample(struct i915_pmu_sample *sample, u32 val)
> >   {
> > @@ -168,7 +160,6 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
> >       struct intel_engine_cs *engine;
> >       enum intel_engine_id id;
> >       intel_wakeref_t wakeref;
> > -     bool fw = false;
> >   
> >       if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
> >               return;
> > @@ -181,36 +172,32 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
> >               return;
> >   
> >       for_each_engine(engine, dev_priv, id) {
> > -             u32 current_seqno = intel_engine_get_seqno(engine);
> > -             u32 last_seqno = intel_engine_last_submit(engine);
> > +             typeof(engine->pmu) *pmu = &engine->pmu;
> 
> I would also prefer we did not start introducing the idiom of declaring 
> locals outside macros with typeof.

You didn't give me a convenient type name :) I don't mind it too much,
auto locals.

> 
> > +             bool busy;
> >               u32 val;
> >   
> > -             val = !i915_seqno_passed(current_seqno, last_seqno);
> > -
> > -             if (val)
> > -                     add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
> > -                                period_ns);
> > -
> > -             if (val && (engine->pmu.enable &
> > -                 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
> > -                     fw = grab_forcewake(dev_priv, fw);
> > -
> > -                     val = I915_READ_FW(RING_CTL(engine->mmio_base));
> > -             } else {
> > -                     val = 0;
> > -             }
> > +             val = I915_READ_FW(RING_CTL(engine->mmio_base));
> > +             if (val == 0 || val == ~0u) /* outside of powerwell */
> > +                     continue;
> Would /* Powerwell not awake. */ be clearer?
> 
> So the claim is we can rely on register being either all zeros or all 
> ones when powered down? Absolutely 100%? Is this documented somewhere? 
> But still need the runtime pm ref?

We still runtime pm or else we upset our own sanitychecks, iirc,
although we may be bypassing those and not leaving an error in the
mmio-debug register, I haven't checked.

As far as I remember, it always reads 0 outside the powerwell. Some
registers return ~0u so I hedged my bets.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 13d70b90dd0f..157cbfa155d9 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -101,7 +101,7 @@  static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
 	 *
 	 * Use RCS as proxy for all engines.
 	 */
-	else if (intel_engine_supports_stats(i915->engine[RCS]))
+	else if (i915->caps.scheduler & I915_SCHEDULER_CAP_PMU)
 		enable &= ~BIT(I915_SAMPLE_BUSY);
 
 	/*
@@ -148,14 +148,6 @@  void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 	spin_unlock_irq(&i915->pmu.lock);
 }
 
-static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
-{
-	if (!fw)
-		intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
-
-	return true;
-}
-
 static void
 add_sample(struct i915_pmu_sample *sample, u32 val)
 {
@@ -168,7 +160,6 @@  engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	intel_wakeref_t wakeref;
-	bool fw = false;
 
 	if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
 		return;
@@ -181,36 +172,32 @@  engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 		return;
 
 	for_each_engine(engine, dev_priv, id) {
-		u32 current_seqno = intel_engine_get_seqno(engine);
-		u32 last_seqno = intel_engine_last_submit(engine);
+		typeof(engine->pmu) *pmu = &engine->pmu;
+		bool busy;
 		u32 val;
 
-		val = !i915_seqno_passed(current_seqno, last_seqno);
-
-		if (val)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
-				   period_ns);
-
-		if (val && (engine->pmu.enable &
-		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
-			fw = grab_forcewake(dev_priv, fw);
-
-			val = I915_READ_FW(RING_CTL(engine->mmio_base));
-		} else {
-			val = 0;
-		}
+		val = I915_READ_FW(RING_CTL(engine->mmio_base));
+		if (val == 0 || val == ~0u) /* outside of powerwell */
+			continue;
 
 		if (val & RING_WAIT)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
-				   period_ns);
-
+			add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
 		if (val & RING_WAIT_SEMAPHORE)
-			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
-				   period_ns);
-	}
+			add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
 
-	if (fw)
-		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+		/*
+		 * MI_MODE reports IDLE if the ring is waiting, but we regard
+		 * this as being busy instead, as the engine is busy with the
+		 * user request.
+		 */
+		busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
+		if (!busy) {
+			val = I915_READ_FW(RING_MI_MODE(engine->mmio_base));
+			busy = !(val & MODE_IDLE);
+		}
+		if (busy)
+			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
+	}
 
 	intel_runtime_pm_put(dev_priv, wakeref);
 }