Message ID | 1461587888-5047-3-git-send-email-imre.deak@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Apr 25, 2016 at 03:38:07PM +0300, Imre Deak wrote: > While browsing BSpec I bumped into a note saying we need to tune these > values based on actual measurements done after initial enabling. I've > checked that it indeed improves things on BXT. I haven't checked this on > CHV, but here it is if someone wants to give it a go. > > CC: Ville Syrjälä <ville.syrjala@linux.intel.com> > Signed-off-by: Imre Deak <imre.deak@intel.com> > --- > drivers/gpu/drm/i915/intel_pm.c | 41 +++++++++++++++++++++++++++++------------ > 1 file changed, 29 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index a9b7626..fcfdb7f 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -6669,11 +6669,32 @@ static void lpt_suspend_hw(struct drm_device *dev) > } > } > > +static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, > + int general_prio_credits, > + int high_prio_credits) > +{ > + u32 misccpctl; > + > + misccpctl = I915_READ(GEN7_MISCCPCTL); > + I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); > + > + I915_WRITE(GEN8_L3SQCREG1, > + L3_GENERAL_PRIO_CREDITS(general_prio_credits) | > + L3_HIGH_PRIO_CREDITS(high_prio_credits)); > + > + /* > + * Wait at least 100 clocks before re-enabling clock gating. > + * See the definition of L3SQCREG1 in BSpec. > + */ > + POSTING_READ(GEN8_L3SQCREG1); > + udelay(1); > + I915_WRITE(GEN7_MISCCPCTL, misccpctl); > +} > + > static void broadwell_init_clock_gating(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > enum pipe pipe; > - uint32_t misccpctl; > > ilk_init_lp_watermarks(dev); > > @@ -6708,17 +6729,7 @@ static void broadwell_init_clock_gating(struct drm_device *dev) > * WaProgramL3SqcReg1Default:bdw > * WaTempDisableDOPClkGating:bdw The w/a note should be moved as well then. > */ > - misccpctl = I915_READ(GEN7_MISCCPCTL); > - I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); > - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) | > - L3_HIGH_PRIO_CREDITS(2)); > - /* > - * Wait at least 100 clocks before re-enabling clock gating. See > - * the definition of L3SQCREG1 in BSpec. > - */ > - POSTING_READ(GEN8_L3SQCREG1); > - udelay(1); > - I915_WRITE(GEN7_MISCCPCTL, misccpctl); > + gen8_set_l3sqc_credits(dev_priv, 30, 2); > > /* > * WaGttCachingOffByDefault:bdw > @@ -6989,6 +7000,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev) > GEN8_SDEUNIT_CLOCK_GATE_DISABLE); > > /* > + * Adjust credits based on actual latencies, see BSpec LSQC Setting > + * Recommendations. > + */ > + gen8_set_l3sqc_credits(dev_priv, 38, 2); Where exactly in Bspec is this? Last I looked CHV was supposed to be fine with the defaults. > + > + /* > * GTT cache may not work with big pages, so if those > * are ever enabled GTT cache may need to be disabled. > */ > -- > 2.5.0
On Mon, Apr 25, 2016 at 04:16:38PM +0300, Ville Syrjälä wrote: > On Mon, Apr 25, 2016 at 03:38:07PM +0300, Imre Deak wrote: > > While browsing BSpec I bumped into a note saying we need to tune these > > values based on actual measurements done after initial enabling. I've > > checked that it indeed improves things on BXT. I haven't checked this on > > CHV, but here it is if someone wants to give it a go. > > > > CC: Ville Syrjälä <ville.syrjala@linux.intel.com> > > Signed-off-by: Imre Deak <imre.deak@intel.com> > > --- > > drivers/gpu/drm/i915/intel_pm.c | 41 +++++++++++++++++++++++++++++------------ > > 1 file changed, 29 insertions(+), 12 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > > index a9b7626..fcfdb7f 100644 > > --- a/drivers/gpu/drm/i915/intel_pm.c > > +++ b/drivers/gpu/drm/i915/intel_pm.c > > @@ -6669,11 +6669,32 @@ static void lpt_suspend_hw(struct drm_device *dev) > > } > > } > > > > +static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, > > + int general_prio_credits, > > + int high_prio_credits) > > +{ > > + u32 misccpctl; > > + > > + misccpctl = I915_READ(GEN7_MISCCPCTL); > > + I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); > > + > > + I915_WRITE(GEN8_L3SQCREG1, > > + L3_GENERAL_PRIO_CREDITS(general_prio_credits) | > > + L3_HIGH_PRIO_CREDITS(high_prio_credits)); > > + > > + /* > > + * Wait at least 100 clocks before re-enabling clock gating. > > + * See the definition of L3SQCREG1 in BSpec. > > + */ > > + POSTING_READ(GEN8_L3SQCREG1); > > + udelay(1); > > + I915_WRITE(GEN7_MISCCPCTL, misccpctl); > > +} > > + > > static void broadwell_init_clock_gating(struct drm_device *dev) > > { > > struct drm_i915_private *dev_priv = dev->dev_private; > > enum pipe pipe; > > - uint32_t misccpctl; > > > > ilk_init_lp_watermarks(dev); > > > > @@ -6708,17 +6729,7 @@ static void broadwell_init_clock_gating(struct drm_device *dev) > > * WaProgramL3SqcReg1Default:bdw > > * WaTempDisableDOPClkGating:bdw > > The w/a note should be moved as well then. > > > */ > > - misccpctl = I915_READ(GEN7_MISCCPCTL); > > - I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); > > - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) | > > - L3_HIGH_PRIO_CREDITS(2)); > > - /* > > - * Wait at least 100 clocks before re-enabling clock gating. See > > - * the definition of L3SQCREG1 in BSpec. > > - */ > > - POSTING_READ(GEN8_L3SQCREG1); > > - udelay(1); > > - I915_WRITE(GEN7_MISCCPCTL, misccpctl); > > + gen8_set_l3sqc_credits(dev_priv, 30, 2); > > > > /* > > * WaGttCachingOffByDefault:bdw > > @@ -6989,6 +7000,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev) > > GEN8_SDEUNIT_CLOCK_GATE_DISABLE); > > > > /* > > + * Adjust credits based on actual latencies, see BSpec LSQC Setting > > + * Recommendations. > > + */ > > + gen8_set_l3sqc_credits(dev_priv, 38, 2); > > Where exactly in Bspec is this? Last I looked CHV was supposed to be > fine with the defaults. OK, so the table of recommended values is in the "performance guide". I don't actually know where the chv numbers came from since there's no w/a for this, nor could I find a related hsd with any numbers. Anyway, using these values Harri's membwtester texturing micro benchmark gained >10% which seems nice. The other numbers seemed unchanged. I also ran xonotic [1] but didn't see any significant changes in performance. So based on that I'm fine with adjusting this on CHV. [1] First I got crap results, but that turned out to be the CPU side making a mess of things. So it seems we're back to having to use taskset to pin the process to one core on chv :( > > > + > > + /* > > * GTT cache may not work with big pages, so if those > > * are ever enabled GTT cache may need to be disabled. > > */ > > -- > > 2.5.0 > > -- > Ville Syrjälä > Intel OTC > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a9b7626..fcfdb7f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6669,11 +6669,32 @@ static void lpt_suspend_hw(struct drm_device *dev) } } +static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, + int general_prio_credits, + int high_prio_credits) +{ + u32 misccpctl; + + misccpctl = I915_READ(GEN7_MISCCPCTL); + I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); + + I915_WRITE(GEN8_L3SQCREG1, + L3_GENERAL_PRIO_CREDITS(general_prio_credits) | + L3_HIGH_PRIO_CREDITS(high_prio_credits)); + + /* + * Wait at least 100 clocks before re-enabling clock gating. + * See the definition of L3SQCREG1 in BSpec. + */ + POSTING_READ(GEN8_L3SQCREG1); + udelay(1); + I915_WRITE(GEN7_MISCCPCTL, misccpctl); +} + static void broadwell_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; enum pipe pipe; - uint32_t misccpctl; ilk_init_lp_watermarks(dev); @@ -6708,17 +6729,7 @@ static void broadwell_init_clock_gating(struct drm_device *dev) * WaProgramL3SqcReg1Default:bdw * WaTempDisableDOPClkGating:bdw */ - misccpctl = I915_READ(GEN7_MISCCPCTL); - I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) | - L3_HIGH_PRIO_CREDITS(2)); - /* - * Wait at least 100 clocks before re-enabling clock gating. See - * the definition of L3SQCREG1 in BSpec. - */ - POSTING_READ(GEN8_L3SQCREG1); - udelay(1); - I915_WRITE(GEN7_MISCCPCTL, misccpctl); + gen8_set_l3sqc_credits(dev_priv, 30, 2); /* * WaGttCachingOffByDefault:bdw @@ -6989,6 +7000,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev) GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* + * Adjust credits based on actual latencies, see BSpec LSQC Setting + * Recommendations. + */ + gen8_set_l3sqc_credits(dev_priv, 38, 2); + + /* * GTT cache may not work with big pages, so if those * are ever enabled GTT cache may need to be disabled. */
While browsing BSpec I bumped into a note saying we need to tune these values based on actual measurements done after initial enabling. I've checked that it indeed improves things on BXT. I haven't checked this on CHV, but here it is if someone wants to give it a go. CC: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Imre Deak <imre.deak@intel.com> --- drivers/gpu/drm/i915/intel_pm.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-)