Message ID | 1362175722-9281-16-git-send-email-jbarnes@virtuousgeek.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, 2 Mar 2013, Jesse Barnes wrote: > From: Ben Widawsky <ben@bwidawsk.net> > > Uses slightly different interfaces than other platforms. > > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> > --- > drivers/gpu/drm/i915/intel_pm.c | 148 +++++++++++++++++++++++++++++++++++++-- > 1 file changed, 144 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index e3947cb..d16f4f40 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -2477,6 +2477,47 @@ void gen6_set_rps(struct drm_device *dev, u8 val) > trace_intel_gpu_freq_change(val * 50); > } > > +void valleyview_set_rps(struct drm_device *dev, u8 val) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + unsigned long timeout = jiffies + msecs_to_jiffies(100); > + u32 limits = gen6_rps_limits(dev_priv, &val); > + u32 pval; > + > + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); > + WARN_ON(val > dev_priv->rps.max_delay); > + WARN_ON(val < dev_priv->rps.min_delay); > + > + if (val == dev_priv->rps.cur_delay) > + return; > + > + valleyview_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); > + > + do { > + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval); > + if (time_after(jiffies, timeout)) { > + DRM_DEBUG_DRIVER("timed out waiting for Punit\n"); > + break; > + } > + udelay(10); > + } while (pval & 1); > + > + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval); > + if ((pval >> 8) != val) > + DRM_DEBUG_DRIVER("punit overrode freq: %d requested, but got %d\n", > + val, pval >> 8); > + > + /* Make sure we continue to get interrupts > + * until we hit the minimum or maximum frequencies. > + */ > + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits); > + > + dev_priv->rps.cur_delay = val; Shouldn't we store pval >> 8 instead of val in cur_delay in order to reclaim the rps state? If we store val here, the requested frequency will eventually exceed max_delay if punit overrides with a lower frequency. > + > + trace_intel_gpu_freq_change(val * 50); > +} > + > + > static void gen6_disable_rps(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > @@ -2714,6 +2755,100 @@ static void gen6_update_ring_freq(struct drm_device *dev) > } > } > > +static void valleyview_enable_rps(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_ring_buffer *ring; > + u32 gtfifodbg, val; > + int i; > + > + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); > + > + if ((gtfifodbg = I915_READ(GTFIFODBG))) { > + DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); > + I915_WRITE(GTFIFODBG, gtfifodbg); > + } > + > + gen6_gt_force_wake_get(dev_priv); > + > + I915_WRITE(GEN6_RC_SLEEP, 0); > + > + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); > + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); > + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 0x19); > + > + for_each_ring(ring, dev_priv, i) > + I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); > + > + I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); > + I915_WRITE(GEN6_RC6_THRESHOLD, 0xc350); > + > + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); > + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); > + I915_WRITE(GEN6_RP_UP_EI, 66000); > + I915_WRITE(GEN6_RP_DOWN_EI, 350000); > + > + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); > + > + I915_WRITE(GEN6_RP_CONTROL, > + GEN6_RP_MEDIA_TURBO | > + GEN6_RP_MEDIA_HW_NORMAL_MODE | > + GEN6_RP_MEDIA_IS_GFX | > + GEN6_RP_ENABLE | > + GEN6_RP_UP_BUSY_AVG | > + GEN6_RP_DOWN_IDLE_CONT); > + > + /* allows RC6 residency counter to work */ > + I915_WRITE(0x138104, 0xffff00ff); > + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE); > + > + valleyview_punit_read(dev_priv, PUNIT_FUSE_BUS1, &val); > + DRM_DEBUG_DRIVER("max GPU freq: %d\n", val); > + dev_priv->rps.max_delay = val; > + > + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_LFM, &val); > + DRM_DEBUG_DRIVER("min GPU freq: %d\n", val); > + dev_priv->rps.min_delay = val; > + > + valleyview_punit_read(dev_priv, PUNIT_FUSE_BUS2, &val); > + DRM_DEBUG_DRIVER("max GPLL freq: %d\n", val); > + > + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &val); > + DRM_DEBUG_DRIVER("DDR speed: "); > + if (((val >> 6) & 3) == 0) { > + dev_priv->mem_freq = 800; > + printk("800 MHz\n"); > + } else if (((val >> 6) & 3) == 1) { > + printk("1066 MHz\n"); > + dev_priv->mem_freq = 1066; > + } else if (((val >> 6) & 3) == 2) { > + printk("1333 MHz\n"); > + dev_priv->mem_freq = 1333; > + } else if (((val >> 6) & 3) == 3) > + printk("invalid\n"); > + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 8 ? "yes" : "no"); > + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); > + > + DRM_DEBUG_DRIVER("current GPU freq: %x\n", (val >> 8) & 0xff); > + dev_priv->rps.cur_delay = (val >> 8) & 0xff; > + > + val = 0xd500; > + DRM_DEBUG_DRIVER("setting GPU freq to %d\n", (val >> 8) & 0xff); > + > + valleyview_set_rps(dev_priv->dev, (val >> 8) & 0xff); > + > + /* requires MSI enabled */ > + I915_WRITE(GEN6_PMIER, GEN6_PM_DEFERRED_EVENTS); > + spin_lock_irq(&dev_priv->rps.lock); > + WARN_ON(dev_priv->rps.pm_iir != 0); > + I915_WRITE(GEN6_PMIMR, 0); > + spin_unlock_irq(&dev_priv->rps.lock); > + /* enable all PM interrupts */ > + I915_WRITE(GEN6_PMINTRMSK, 0); > + > + gen6_gt_force_wake_put(dev_priv); > +} > + > void ironlake_teardown_rc6(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > @@ -3440,7 +3575,7 @@ void intel_disable_gt_powersave(struct drm_device *dev) > if (IS_IRONLAKE_M(dev)) { > ironlake_disable_drps(dev); > ironlake_disable_rc6(dev); > - } else if (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) { > + } else if (INTEL_INFO(dev)->gen >= 6) { > cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work); > mutex_lock(&dev_priv->rps.hw_lock); > gen6_disable_rps(dev); > @@ -3456,8 +3591,13 @@ static void intel_gen6_powersave_work(struct work_struct *work) > struct drm_device *dev = dev_priv->dev; > > mutex_lock(&dev_priv->rps.hw_lock); > - gen6_enable_rps(dev); > - gen6_update_ring_freq(dev); > + > + if (IS_VALLEYVIEW(dev)) { > + valleyview_enable_rps(dev); > + } else { > + gen6_enable_rps(dev); > + gen6_update_ring_freq(dev); > + } > mutex_unlock(&dev_priv->rps.hw_lock); > } > > @@ -3469,7 +3609,7 @@ void intel_enable_gt_powersave(struct drm_device *dev) > ironlake_enable_drps(dev); > ironlake_enable_rc6(dev); > intel_init_emon(dev); > - } else if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) { > + } else if (IS_GEN6(dev) || IS_GEN7(dev)) { > /* > * PCU communication is slow and this doesn't need to be > * done at any specific time, so do this out of our fast path > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx >
On Wed, 6 Mar 2013 16:21:03 +0530 (IST) Rohit Jain <rohit@intel.com> wrote: > > > On Sat, 2 Mar 2013, Jesse Barnes wrote: > > > From: Ben Widawsky <ben@bwidawsk.net> > > > > Uses slightly different interfaces than other platforms. > > > > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> > > --- > > drivers/gpu/drm/i915/intel_pm.c | 148 +++++++++++++++++++++++++++++++++++++-- > > 1 file changed, 144 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > > index e3947cb..d16f4f40 100644 > > --- a/drivers/gpu/drm/i915/intel_pm.c > > +++ b/drivers/gpu/drm/i915/intel_pm.c > > @@ -2477,6 +2477,47 @@ void gen6_set_rps(struct drm_device *dev, u8 val) > > trace_intel_gpu_freq_change(val * 50); > > } > > > > +void valleyview_set_rps(struct drm_device *dev, u8 val) > > +{ > > + struct drm_i915_private *dev_priv = dev->dev_private; > > + unsigned long timeout = jiffies + msecs_to_jiffies(100); > > + u32 limits = gen6_rps_limits(dev_priv, &val); > > + u32 pval; > > + > > + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); > > + WARN_ON(val > dev_priv->rps.max_delay); > > + WARN_ON(val < dev_priv->rps.min_delay); > > + > > + if (val == dev_priv->rps.cur_delay) > > + return; > > + > > + valleyview_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); > > + > > + do { > > + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval); > > + if (time_after(jiffies, timeout)) { > > + DRM_DEBUG_DRIVER("timed out waiting for Punit\n"); > > + break; > > + } > > + udelay(10); > > + } while (pval & 1); > > + > > + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval); > > + if ((pval >> 8) != val) > > + DRM_DEBUG_DRIVER("punit overrode freq: %d requested, but got %d\n", > > + val, pval >> 8); > > + > > + /* Make sure we continue to get interrupts > > + * until we hit the minimum or maximum frequencies. > > + */ > > + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits); > > + > > + dev_priv->rps.cur_delay = val; > > Shouldn't we store pval >> 8 instead of val in cur_delay in order to > reclaim the rps state? If we store val here, the requested frequency will > eventually exceed max_delay if punit overrides with a lower frequency. > Yeah we should track the current freq here instead. But we clamp to max_delay in the caller right? And yeah I missed the update to i915_irq.c, I fixed that too.
> -----Original Message----- > From: intel-gfx-bounces+rohit.jain=intel.com@lists.freedesktop.org > [mailto:intel-gfx-bounces+rohit.jain=intel.com@lists.freedesktop.org] > On Behalf Of Jesse Barnes > Sent: Friday, March 08, 2013 3:58 AM > To: Rohit Jain > Cc: intel-gfx@lists.freedesktop.org > Subject: Re: [Intel-gfx] [PATCH 16/26] drm/i915: turbo & RC6 support > for VLV > > On Wed, 6 Mar 2013 16:21:03 +0530 (IST) > Rohit Jain <rohit@intel.com> wrote: > > > > > > > On Sat, 2 Mar 2013, Jesse Barnes wrote: > > > > > From: Ben Widawsky <ben@bwidawsk.net> > > > > > > Uses slightly different interfaces than other platforms. > > > > > > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> > > > --- > > > drivers/gpu/drm/i915/intel_pm.c | 148 > > > +++++++++++++++++++++++++++++++++++++-- > > > 1 file changed, 144 insertions(+), 4 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c > > > b/drivers/gpu/drm/i915/intel_pm.c index e3947cb..d16f4f40 100644 > > > --- a/drivers/gpu/drm/i915/intel_pm.c > > > +++ b/drivers/gpu/drm/i915/intel_pm.c > > > @@ -2477,6 +2477,47 @@ void gen6_set_rps(struct drm_device *dev, u8 > val) > > > trace_intel_gpu_freq_change(val * 50); } > > > > > > +void valleyview_set_rps(struct drm_device *dev, u8 val) { > > > + struct drm_i915_private *dev_priv = dev->dev_private; > > > + unsigned long timeout = jiffies + msecs_to_jiffies(100); > > > + u32 limits = gen6_rps_limits(dev_priv, &val); > > > + u32 pval; > > > + > > > + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); > > > + WARN_ON(val > dev_priv->rps.max_delay); > > > + WARN_ON(val < dev_priv->rps.min_delay); > > > + > > > + if (val == dev_priv->rps.cur_delay) > > > + return; > > > + > > > + valleyview_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); > > > + > > > + do { > > > + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, > &pval); > > > + if (time_after(jiffies, timeout)) { > > > + DRM_DEBUG_DRIVER("timed out waiting for Punit\n"); > > > + break; > > > + } > > > + udelay(10); > > > + } while (pval & 1); > > > + > > > + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval); > > > + if ((pval >> 8) != val) > > > + DRM_DEBUG_DRIVER("punit overrode freq: %d requested, but > got %d\n", > > > + val, pval >> 8); > > > + > > > + /* Make sure we continue to get interrupts > > > + * until we hit the minimum or maximum frequencies. > > > + */ > > > + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits); > > > + > > > + dev_priv->rps.cur_delay = val; > > > > Shouldn't we store pval >> 8 instead of val in cur_delay in order to > > reclaim the rps state? If we store val here, the requested frequency > > will eventually exceed max_delay if punit overrides with a lower > frequency. > > > > Yeah we should track the current freq here instead. But we clamp to > max_delay in the caller right? And yeah I missed the update to > i915_irq.c, I fixed that too. Cool! On my board, max_delay gets set to 255 while the punit refuses to go above 222 in practice. In this case, before we can clamp to max_delay, cur_delay overflows and gets set to min_delay instead :) Fixing it like this solves this problem neatly. Cheers, Rohit
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e3947cb..d16f4f40 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2477,6 +2477,47 @@ void gen6_set_rps(struct drm_device *dev, u8 val) trace_intel_gpu_freq_change(val * 50); } +void valleyview_set_rps(struct drm_device *dev, u8 val) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long timeout = jiffies + msecs_to_jiffies(100); + u32 limits = gen6_rps_limits(dev_priv, &val); + u32 pval; + + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(val > dev_priv->rps.max_delay); + WARN_ON(val < dev_priv->rps.min_delay); + + if (val == dev_priv->rps.cur_delay) + return; + + valleyview_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + + do { + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval); + if (time_after(jiffies, timeout)) { + DRM_DEBUG_DRIVER("timed out waiting for Punit\n"); + break; + } + udelay(10); + } while (pval & 1); + + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval); + if ((pval >> 8) != val) + DRM_DEBUG_DRIVER("punit overrode freq: %d requested, but got %d\n", + val, pval >> 8); + + /* Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits); + + dev_priv->rps.cur_delay = val; + + trace_intel_gpu_freq_change(val * 50); +} + + static void gen6_disable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2714,6 +2755,100 @@ static void gen6_update_ring_freq(struct drm_device *dev) } } +static void valleyview_enable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + u32 gtfifodbg, val; + int i; + + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + + if ((gtfifodbg = I915_READ(GTFIFODBG))) { + DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); + I915_WRITE(GTFIFODBG, gtfifodbg); + } + + gen6_gt_force_wake_get(dev_priv); + + I915_WRITE(GEN6_RC_SLEEP, 0); + + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 0x19); + + for_each_ring(ring, dev_priv, i) + I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); + + I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); + I915_WRITE(GEN6_RC6_THRESHOLD, 0xc350); + + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); + I915_WRITE(GEN6_RP_UP_EI, 66000); + I915_WRITE(GEN6_RP_DOWN_EI, 350000); + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); + + /* allows RC6 residency counter to work */ + I915_WRITE(0x138104, 0xffff00ff); + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE); + + valleyview_punit_read(dev_priv, PUNIT_FUSE_BUS1, &val); + DRM_DEBUG_DRIVER("max GPU freq: %d\n", val); + dev_priv->rps.max_delay = val; + + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_LFM, &val); + DRM_DEBUG_DRIVER("min GPU freq: %d\n", val); + dev_priv->rps.min_delay = val; + + valleyview_punit_read(dev_priv, PUNIT_FUSE_BUS2, &val); + DRM_DEBUG_DRIVER("max GPLL freq: %d\n", val); + + valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &val); + DRM_DEBUG_DRIVER("DDR speed: "); + if (((val >> 6) & 3) == 0) { + dev_priv->mem_freq = 800; + printk("800 MHz\n"); + } else if (((val >> 6) & 3) == 1) { + printk("1066 MHz\n"); + dev_priv->mem_freq = 1066; + } else if (((val >> 6) & 3) == 2) { + printk("1333 MHz\n"); + dev_priv->mem_freq = 1333; + } else if (((val >> 6) & 3) == 3) + printk("invalid\n"); + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 8 ? "yes" : "no"); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + DRM_DEBUG_DRIVER("current GPU freq: %x\n", (val >> 8) & 0xff); + dev_priv->rps.cur_delay = (val >> 8) & 0xff; + + val = 0xd500; + DRM_DEBUG_DRIVER("setting GPU freq to %d\n", (val >> 8) & 0xff); + + valleyview_set_rps(dev_priv->dev, (val >> 8) & 0xff); + + /* requires MSI enabled */ + I915_WRITE(GEN6_PMIER, GEN6_PM_DEFERRED_EVENTS); + spin_lock_irq(&dev_priv->rps.lock); + WARN_ON(dev_priv->rps.pm_iir != 0); + I915_WRITE(GEN6_PMIMR, 0); + spin_unlock_irq(&dev_priv->rps.lock); + /* enable all PM interrupts */ + I915_WRITE(GEN6_PMINTRMSK, 0); + + gen6_gt_force_wake_put(dev_priv); +} + void ironlake_teardown_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -3440,7 +3575,7 @@ void intel_disable_gt_powersave(struct drm_device *dev) if (IS_IRONLAKE_M(dev)) { ironlake_disable_drps(dev); ironlake_disable_rc6(dev); - } else if (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) { + } else if (INTEL_INFO(dev)->gen >= 6) { cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work); mutex_lock(&dev_priv->rps.hw_lock); gen6_disable_rps(dev); @@ -3456,8 +3591,13 @@ static void intel_gen6_powersave_work(struct work_struct *work) struct drm_device *dev = dev_priv->dev; mutex_lock(&dev_priv->rps.hw_lock); - gen6_enable_rps(dev); - gen6_update_ring_freq(dev); + + if (IS_VALLEYVIEW(dev)) { + valleyview_enable_rps(dev); + } else { + gen6_enable_rps(dev); + gen6_update_ring_freq(dev); + } mutex_unlock(&dev_priv->rps.hw_lock); } @@ -3469,7 +3609,7 @@ void intel_enable_gt_powersave(struct drm_device *dev) ironlake_enable_drps(dev); ironlake_enable_rc6(dev); intel_init_emon(dev); - } else if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) { + } else if (IS_GEN6(dev) || IS_GEN7(dev)) { /* * PCU communication is slow and this doesn't need to be * done at any specific time, so do this out of our fast path