diff mbox series

[4/4] drm/i915/pmu: Only use exclusive mmio access for gen7

Message ID 20191108085626.32429-4-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/4] drm/i915/icl: Refine PG_HYSTERESIS | expand

Commit Message

Chris Wilson Nov. 8, 2019, 8:56 a.m. UTC
On gen7, we have to avoid concurrent access to the same mmio cacheline,
and so coordinate all mmio access with the uncore->lock. However, for
pmu, we want to avoid perturbing the system and disabling interrupts
unnecessarily, so restrict the w/a to gen7 where it is requied to
prevent machine hangs.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

Comments

Tvrtko Ursulin Nov. 8, 2019, 10:21 a.m. UTC | #1
On 08/11/2019 08:56, Chris Wilson wrote:
> On gen7, we have to avoid concurrent access to the same mmio cacheline,
> and so coordinate all mmio access with the uncore->lock. However, for
> pmu, we want to avoid perturbing the system and disabling interrupts
> unnecessarily, so restrict the w/a to gen7 where it is requied to
> prevent machine hangs.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_pmu.c | 11 +++++++++--
>   1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index dbde80a376cb..9e1627782284 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -292,6 +292,11 @@ add_sample(struct i915_pmu_sample *sample, u32 val)
>   	sample->cur += val;
>   }
>   
> +static bool exclusive_mmio_access(const struct drm_i915_private *i915)
> +{
> +	return IS_GEN(i915, 7);
> +}
> +
>   static void
>   engines_sample(struct intel_gt *gt, unsigned int period_ns)
>   {
> @@ -311,7 +316,8 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
>   		if (!intel_engine_pm_get_if_awake(engine))
>   			continue;
>   
> -		spin_lock_irqsave(&engine->uncore->lock, flags);
> +		if (exclusive_mmio_access(i915))
> +			spin_lock_irqsave(&engine->uncore->lock, flags);
>   
>   		val = ENGINE_READ_FW(engine, RING_CTL);
>   		if (val == 0) /* powerwell off => engine idle */
> @@ -342,7 +348,8 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
>   			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
>   
>   skip:
> -		spin_unlock_irqrestore(&engine->uncore->lock, flags);
> +		if (exclusive_mmio_access(i915))
> +			spin_unlock_irqrestore(&engine->uncore->lock, flags);
>   		intel_engine_pm_put(engine);
>   	}
>   }
> 

For bonus points cache the check in a local? Or cache the lock in a 
local for even more win, spinlock_t *lock = exclusive_mmio_access(..) ? 
&.. : NULL ?

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Chris Wilson Nov. 8, 2019, 10:28 a.m. UTC | #2
Quoting Tvrtko Ursulin (2019-11-08 10:21:22)
> 
> On 08/11/2019 08:56, Chris Wilson wrote:
> > On gen7, we have to avoid concurrent access to the same mmio cacheline,
> > and so coordinate all mmio access with the uncore->lock. However, for
> > pmu, we want to avoid perturbing the system and disabling interrupts
> > unnecessarily, so restrict the w/a to gen7 where it is requied to
> > prevent machine hangs.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_pmu.c | 11 +++++++++--
> >   1 file changed, 9 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index dbde80a376cb..9e1627782284 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -292,6 +292,11 @@ add_sample(struct i915_pmu_sample *sample, u32 val)
> >       sample->cur += val;
> >   }
> >   
> > +static bool exclusive_mmio_access(const struct drm_i915_private *i915)
> > +{
> > +     return IS_GEN(i915, 7);
> > +}
> > +
> >   static void
> >   engines_sample(struct intel_gt *gt, unsigned int period_ns)
> >   {
> > @@ -311,7 +316,8 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
> >               if (!intel_engine_pm_get_if_awake(engine))
> >                       continue;
> >   
> > -             spin_lock_irqsave(&engine->uncore->lock, flags);
> > +             if (exclusive_mmio_access(i915))
> > +                     spin_lock_irqsave(&engine->uncore->lock, flags);
> >   
> >               val = ENGINE_READ_FW(engine, RING_CTL);
> >               if (val == 0) /* powerwell off => engine idle */
> > @@ -342,7 +348,8 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
> >                       add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
> >   
> >   skip:
> > -             spin_unlock_irqrestore(&engine->uncore->lock, flags);
> > +             if (exclusive_mmio_access(i915))
> > +                     spin_unlock_irqrestore(&engine->uncore->lock, flags);
> >               intel_engine_pm_put(engine);
> >       }
> >   }
> > 
> 
> For bonus points cache the check in a local? Or cache the lock in a 
> local for even more win, spinlock_t *lock = exclusive_mmio_access(..) ? 
> &.. : NULL ?

In the back of my mind, I was assuming that IS_GEN() gets compiled out
for single sku builds.

lock = exclusive_mmio_access() ...

if (lock)
	spin_lock_irqsave(lock, flags);
...
if (lock)
	spin_unlock_irqsave(lock, flags);

Ok.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index dbde80a376cb..9e1627782284 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -292,6 +292,11 @@  add_sample(struct i915_pmu_sample *sample, u32 val)
 	sample->cur += val;
 }
 
+static bool exclusive_mmio_access(const struct drm_i915_private *i915)
+{
+	return IS_GEN(i915, 7);
+}
+
 static void
 engines_sample(struct intel_gt *gt, unsigned int period_ns)
 {
@@ -311,7 +316,8 @@  engines_sample(struct intel_gt *gt, unsigned int period_ns)
 		if (!intel_engine_pm_get_if_awake(engine))
 			continue;
 
-		spin_lock_irqsave(&engine->uncore->lock, flags);
+		if (exclusive_mmio_access(i915))
+			spin_lock_irqsave(&engine->uncore->lock, flags);
 
 		val = ENGINE_READ_FW(engine, RING_CTL);
 		if (val == 0) /* powerwell off => engine idle */
@@ -342,7 +348,8 @@  engines_sample(struct intel_gt *gt, unsigned int period_ns)
 			add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
 
 skip:
-		spin_unlock_irqrestore(&engine->uncore->lock, flags);
+		if (exclusive_mmio_access(i915))
+			spin_unlock_irqrestore(&engine->uncore->lock, flags);
 		intel_engine_pm_put(engine);
 	}
 }