diff mbox

[3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies

Message ID 1461587888-5047-3-git-send-email-imre.deak@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Imre Deak April 25, 2016, 12:38 p.m. UTC
While browsing BSpec I bumped into a note saying we need to tune these
values based on actual measurements done after initial enabling. I've
checked that it indeed improves things on BXT. I haven't checked this on
CHV, but here it is if someone wants to give it a go.

CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

Comments

Ville Syrjälä April 25, 2016, 1:16 p.m. UTC | #1
On Mon, Apr 25, 2016 at 03:38:07PM +0300, Imre Deak wrote:
> While browsing BSpec I bumped into a note saying we need to tune these
> values based on actual measurements done after initial enabling. I've
> checked that it indeed improves things on BXT. I haven't checked this on
> CHV, but here it is if someone wants to give it a go.
> 
> CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 41 +++++++++++++++++++++++++++++------------
>  1 file changed, 29 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index a9b7626..fcfdb7f 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6669,11 +6669,32 @@ static void lpt_suspend_hw(struct drm_device *dev)
>  	}
>  }
>  
> +static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
> +				   int general_prio_credits,
> +				   int high_prio_credits)
> +{
> +	u32 misccpctl;
> +
> +	misccpctl = I915_READ(GEN7_MISCCPCTL);
> +	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> +
> +	I915_WRITE(GEN8_L3SQCREG1,
> +		   L3_GENERAL_PRIO_CREDITS(general_prio_credits) |
> +		   L3_HIGH_PRIO_CREDITS(high_prio_credits));
> +
> +	/*
> +	 * Wait at least 100 clocks before re-enabling clock gating.
> +	 * See the definition of L3SQCREG1 in BSpec.
> +	 */
> +	POSTING_READ(GEN8_L3SQCREG1);
> +	udelay(1);
> +	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> +}
> +
>  static void broadwell_init_clock_gating(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	enum pipe pipe;
> -	uint32_t misccpctl;
>  
>  	ilk_init_lp_watermarks(dev);
>  
> @@ -6708,17 +6729,7 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
>  	 * WaProgramL3SqcReg1Default:bdw
>  	 * WaTempDisableDOPClkGating:bdw

The w/a note should be moved as well then.

>  	 */
> -	misccpctl = I915_READ(GEN7_MISCCPCTL);
> -	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> -	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
> -				   L3_HIGH_PRIO_CREDITS(2));
> -	/*
> -	 * Wait at least 100 clocks before re-enabling clock gating. See
> -	 * the definition of L3SQCREG1 in BSpec.
> -	 */
> -	POSTING_READ(GEN8_L3SQCREG1);
> -	udelay(1);
> -	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> +	gen8_set_l3sqc_credits(dev_priv, 30, 2);
>  
>  	/*
>  	 * WaGttCachingOffByDefault:bdw
> @@ -6989,6 +7000,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
>  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
>  
>  	/*
> +	 * Adjust credits based on actual latencies, see BSpec LSQC Setting
> +	 * Recommendations.
> +	 */
> +	gen8_set_l3sqc_credits(dev_priv, 38, 2);

Where exactly in Bspec is this? Last I looked CHV was supposed to be
fine with the defaults.

> +
> +	/*
>  	 * GTT cache may not work with big pages, so if those
>  	 * are ever enabled GTT cache may need to be disabled.
>  	 */
> -- 
> 2.5.0
Ville Syrjälä April 26, 2016, 4:19 p.m. UTC | #2
On Mon, Apr 25, 2016 at 04:16:38PM +0300, Ville Syrjälä wrote:
> On Mon, Apr 25, 2016 at 03:38:07PM +0300, Imre Deak wrote:
> > While browsing BSpec I bumped into a note saying we need to tune these
> > values based on actual measurements done after initial enabling. I've
> > checked that it indeed improves things on BXT. I haven't checked this on
> > CHV, but here it is if someone wants to give it a go.
> > 
> > CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_pm.c | 41 +++++++++++++++++++++++++++++------------
> >  1 file changed, 29 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> > index a9b7626..fcfdb7f 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -6669,11 +6669,32 @@ static void lpt_suspend_hw(struct drm_device *dev)
> >  	}
> >  }
> >  
> > +static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
> > +				   int general_prio_credits,
> > +				   int high_prio_credits)
> > +{
> > +	u32 misccpctl;
> > +
> > +	misccpctl = I915_READ(GEN7_MISCCPCTL);
> > +	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> > +
> > +	I915_WRITE(GEN8_L3SQCREG1,
> > +		   L3_GENERAL_PRIO_CREDITS(general_prio_credits) |
> > +		   L3_HIGH_PRIO_CREDITS(high_prio_credits));
> > +
> > +	/*
> > +	 * Wait at least 100 clocks before re-enabling clock gating.
> > +	 * See the definition of L3SQCREG1 in BSpec.
> > +	 */
> > +	POSTING_READ(GEN8_L3SQCREG1);
> > +	udelay(1);
> > +	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > +}
> > +
> >  static void broadwell_init_clock_gating(struct drm_device *dev)
> >  {
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	enum pipe pipe;
> > -	uint32_t misccpctl;
> >  
> >  	ilk_init_lp_watermarks(dev);
> >  
> > @@ -6708,17 +6729,7 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
> >  	 * WaProgramL3SqcReg1Default:bdw
> >  	 * WaTempDisableDOPClkGating:bdw
> 
> The w/a note should be moved as well then.
> 
> >  	 */
> > -	misccpctl = I915_READ(GEN7_MISCCPCTL);
> > -	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> > -	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
> > -				   L3_HIGH_PRIO_CREDITS(2));
> > -	/*
> > -	 * Wait at least 100 clocks before re-enabling clock gating. See
> > -	 * the definition of L3SQCREG1 in BSpec.
> > -	 */
> > -	POSTING_READ(GEN8_L3SQCREG1);
> > -	udelay(1);
> > -	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > +	gen8_set_l3sqc_credits(dev_priv, 30, 2);
> >  
> >  	/*
> >  	 * WaGttCachingOffByDefault:bdw
> > @@ -6989,6 +7000,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
> >  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
> >  
> >  	/*
> > +	 * Adjust credits based on actual latencies, see BSpec LSQC Setting
> > +	 * Recommendations.
> > +	 */
> > +	gen8_set_l3sqc_credits(dev_priv, 38, 2);
> 
> Where exactly in Bspec is this? Last I looked CHV was supposed to be
> fine with the defaults.

OK, so the table of recommended values is in the "performance guide".
I don't actually know where the chv numbers came from since there's no
w/a for this, nor could I find a related hsd with any numbers.

Anyway, using these values Harri's membwtester texturing micro benchmark
gained >10% which seems nice. The other numbers seemed unchanged. I also
ran xonotic [1] but didn't see any significant changes in performance.

So based on that I'm fine with adjusting this on CHV.

[1] First I got crap results, but that turned out to be the CPU
    side making a mess of things. So it seems we're back to having
    to use taskset to pin the process to one core on chv :(

> 
> > +
> > +	/*
> >  	 * GTT cache may not work with big pages, so if those
> >  	 * are ever enabled GTT cache may need to be disabled.
> >  	 */
> > -- 
> > 2.5.0
> 
> -- 
> Ville Syrjälä
> Intel OTC
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a9b7626..fcfdb7f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6669,11 +6669,32 @@  static void lpt_suspend_hw(struct drm_device *dev)
 	}
 }
 
+static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
+				   int general_prio_credits,
+				   int high_prio_credits)
+{
+	u32 misccpctl;
+
+	misccpctl = I915_READ(GEN7_MISCCPCTL);
+	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+
+	I915_WRITE(GEN8_L3SQCREG1,
+		   L3_GENERAL_PRIO_CREDITS(general_prio_credits) |
+		   L3_HIGH_PRIO_CREDITS(high_prio_credits));
+
+	/*
+	 * Wait at least 100 clocks before re-enabling clock gating.
+	 * See the definition of L3SQCREG1 in BSpec.
+	 */
+	POSTING_READ(GEN8_L3SQCREG1);
+	udelay(1);
+	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+}
+
 static void broadwell_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum pipe pipe;
-	uint32_t misccpctl;
 
 	ilk_init_lp_watermarks(dev);
 
@@ -6708,17 +6729,7 @@  static void broadwell_init_clock_gating(struct drm_device *dev)
 	 * WaProgramL3SqcReg1Default:bdw
 	 * WaTempDisableDOPClkGating:bdw
 	 */
-	misccpctl = I915_READ(GEN7_MISCCPCTL);
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
-	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
-				   L3_HIGH_PRIO_CREDITS(2));
-	/*
-	 * Wait at least 100 clocks before re-enabling clock gating. See
-	 * the definition of L3SQCREG1 in BSpec.
-	 */
-	POSTING_READ(GEN8_L3SQCREG1);
-	udelay(1);
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+	gen8_set_l3sqc_credits(dev_priv, 30, 2);
 
 	/*
 	 * WaGttCachingOffByDefault:bdw
@@ -6989,6 +7000,12 @@  static void cherryview_init_clock_gating(struct drm_device *dev)
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
 	/*
+	 * Adjust credits based on actual latencies, see BSpec LSQC Setting
+	 * Recommendations.
+	 */
+	gen8_set_l3sqc_credits(dev_priv, 38, 2);
+
+	/*
 	 * GTT cache may not work with big pages, so if those
 	 * are ever enabled GTT cache may need to be disabled.
 	 */