@@ -3365,7 +3365,6 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv);
extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
-extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
bool enable);
@@ -1193,145 +1193,6 @@ static void notify_ring(struct intel_engine_cs *engine)
trace_intel_engine_notify(engine, wait);
}
-static void vlv_c0_read(struct drm_i915_private *dev_priv,
- struct intel_rps_ei *ei)
-{
- ei->ktime = ktime_get_raw();
- ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
- ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
-}
-
-void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
-{
- memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei));
-}
-
-static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
-{
- struct intel_rps *rps = &dev_priv->gt_pm.rps;
- const struct intel_rps_ei *prev = &rps->ei;
- struct intel_rps_ei now;
- u32 events = 0;
-
- if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
- return 0;
-
- vlv_c0_read(dev_priv, &now);
-
- if (prev->ktime) {
- u64 time, c0;
- u32 render, media;
-
- time = ktime_us_delta(now.ktime, prev->ktime);
-
- time *= dev_priv->czclk_freq;
-
- /* Workload can be split between render + media,
- * e.g. SwapBuffers being blitted in X after being rendered in
- * mesa. To account for this we need to combine both engines
- * into our activity counter.
- */
- render = now.render_c0 - prev->render_c0;
- media = now.media_c0 - prev->media_c0;
- c0 = max(render, media);
- c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
-
- if (c0 > time * rps->up_threshold)
- events = GEN6_PM_RP_UP_THRESHOLD;
- else if (c0 < time * rps->down_threshold)
- events = GEN6_PM_RP_DOWN_THRESHOLD;
- }
-
- rps->ei = now;
- return events;
-}
-
-static void gen6_pm_rps_work(struct work_struct *work)
-{
- struct drm_i915_private *dev_priv =
- container_of(work, struct drm_i915_private, gt_pm.rps.work);
- struct intel_rps *rps = &dev_priv->gt_pm.rps;
- bool client_boost = false;
- int new_delay, adj, min, max;
- u32 pm_iir = 0;
-
- spin_lock_irq(&dev_priv->irq_lock);
- if (rps->interrupts_enabled) {
- pm_iir = fetch_and_zero(&rps->pm_iir);
- client_boost = atomic_read(&rps->num_waiters);
- }
- spin_unlock_irq(&dev_priv->irq_lock);
-
- /* Make sure we didn't queue anything we're not going to process. */
- WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
- if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
- goto out;
-
- mutex_lock(&rps->lock);
-
- pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
-
- adj = rps->last_adj;
- new_delay = rps->cur_freq;
- min = rps->min_freq_softlimit;
- max = rps->max_freq_softlimit;
- if (client_boost)
- max = rps->max_freq;
- if (client_boost && new_delay < rps->boost_freq) {
- new_delay = rps->boost_freq;
- adj = 0;
- } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
- if (adj > 0)
- adj *= 2;
- else /* CHV needs even encode values */
- adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
-
- if (new_delay >= rps->max_freq_softlimit)
- adj = 0;
- } else if (client_boost) {
- adj = 0;
- } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
- if (rps->cur_freq > rps->efficient_freq)
- new_delay = rps->efficient_freq;
- else if (rps->cur_freq > rps->min_freq_softlimit)
- new_delay = rps->min_freq_softlimit;
- adj = 0;
- } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
- if (adj < 0)
- adj *= 2;
- else /* CHV needs even encode values */
- adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
-
- if (new_delay <= rps->min_freq_softlimit)
- adj = 0;
- } else { /* unknown event */
- adj = 0;
- }
-
- rps->last_adj = adj;
-
- /* sysfs frequency interfaces may have snuck in while servicing the
- * interrupt
- */
- new_delay += adj;
- new_delay = clamp_t(int, new_delay, min, max);
-
- if (intel_set_rps(dev_priv, new_delay)) {
- DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
- rps->last_adj = 0;
- }
-
- mutex_unlock(&rps->lock);
-
-out:
- /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
- spin_lock_irq(&dev_priv->irq_lock);
- if (rps->interrupts_enabled)
- gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
- spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-
/**
* ivybridge_parity_work - Workqueue called when a parity error interrupt
* occurred.
@@ -4363,8 +4224,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
intel_hpd_init_work(dev_priv);
- INIT_WORK(&rps->work, gen6_pm_rps_work);
-
INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
for (i = 0; i < MAX_L3_SLICES; ++i)
dev_priv->l3_parity.remap_info[i] = NULL;
@@ -355,17 +355,16 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
struct intel_rps *rps = &dev_priv->gt_pm.rps;
- u32 val;
ssize_t ret;
+ u32 val;
ret = kstrtou32(buf, 0, &val);
if (ret)
return ret;
- intel_runtime_pm_get(dev_priv);
- mutex_lock(&rps->lock);
-
val = intel_freq_opcode(dev_priv, val);
+
+ mutex_lock(&rps->lock);
if (val < rps->min_freq ||
val > rps->max_freq ||
val < rps->min_freq_softlimit) {
@@ -378,19 +377,11 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
intel_gpu_freq(dev_priv, val));
rps->max_freq_softlimit = val;
-
- val = clamp_t(int, rps->cur_freq,
- rps->min_freq_softlimit,
- rps->max_freq_softlimit);
-
- /* We still need *_set_rps to process the new max_delay and
- * update the interrupt limits and PMINTRMSK even though
- * frequency request may be unchanged. */
- ret = intel_set_rps(dev_priv, val);
+ schedule_work(&rps->work);
unlock:
mutex_unlock(&rps->lock);
- intel_runtime_pm_put(dev_priv);
+ flush_work(&rps->work);
return ret ?: count;
}
@@ -410,17 +401,16 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
struct intel_rps *rps = &dev_priv->gt_pm.rps;
- u32 val;
ssize_t ret;
+ u32 val;
ret = kstrtou32(buf, 0, &val);
if (ret)
return ret;
- intel_runtime_pm_get(dev_priv);
- mutex_lock(&rps->lock);
-
val = intel_freq_opcode(dev_priv, val);
+
+ mutex_lock(&rps->lock);
if (val < rps->min_freq ||
val > rps->max_freq ||
val > rps->max_freq_softlimit) {
@@ -429,19 +419,11 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
}
rps->min_freq_softlimit = val;
-
- val = clamp_t(int, rps->cur_freq,
- rps->min_freq_softlimit,
- rps->max_freq_softlimit);
-
- /* We still need *_set_rps to process the new min_delay and
- * update the interrupt limits and PMINTRMSK even though
- * frequency request may be unchanged. */
- ret = intel_set_rps(dev_priv, val);
+ schedule_work(&rps->work);
unlock:
mutex_unlock(&rps->lock);
- intel_runtime_pm_put(dev_priv);
+ flush_work(&rps->work);
return ret ?: count;
}
@@ -329,13 +329,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
{
struct intel_rps *rps = &dev_priv->gt_pm.rps;
- /*
- * min/max delay may still have been modified so be sure to
- * write the limits value.
- */
if (val != rps->cur_freq) {
- gen6_set_rps_thresholds(dev_priv, val);
-
if (INTEL_GEN(dev_priv) >= 9)
I915_WRITE(GEN6_RPNSWREQ,
GEN9_FREQUENCY(val));
@@ -349,6 +343,8 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
GEN6_AGGRESSIVE_TURBO);
}
+ gen6_set_rps_thresholds(dev_priv, val);
+
/*
* Make sure we continue to get interrupts
* until we hit the minimum or maximum frequencies.
@@ -370,18 +366,17 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
"Odd GPU freq value\n"))
val &= ~1;
- I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-
if (val != dev_priv->gt_pm.rps.cur_freq) {
vlv_punit_get(dev_priv);
err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
vlv_punit_put(dev_priv);
if (err)
return err;
-
- gen6_set_rps_thresholds(dev_priv, val);
}
+ gen6_set_rps_thresholds(dev_priv, val);
+ I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+
dev_priv->gt_pm.rps.cur_freq = val;
trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
@@ -426,6 +421,151 @@ static void vlv_set_rps_idle(struct drm_i915_private *i915)
DRM_ERROR("Failed to set RPS for idle\n");
}
+static int intel_set_rps(struct drm_i915_private *i915, u8 val)
+{
+ struct intel_rps *rps = &i915->gt_pm.rps;
+ int err;
+
+ lockdep_assert_held(&rps->lock);
+ GEM_BUG_ON(val > rps->max_freq);
+ GEM_BUG_ON(val < rps->min_freq);
+
+ if (!rps->enabled) {
+ rps->cur_freq = val;
+ return 0;
+ }
+
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ err = valleyview_set_rps(i915, val);
+ else
+ err = gen6_set_rps(i915, val);
+
+ return err;
+}
+
+static void vlv_c0_read(struct drm_i915_private *dev_priv,
+ struct intel_rps_ei *ei)
+{
+ ei->ktime = ktime_get_raw();
+ ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
+ ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
+}
+
+static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+ const struct intel_rps_ei *prev = &rps->ei;
+ struct intel_rps_ei now;
+ u32 events = 0;
+
+ if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+ return 0;
+
+ vlv_c0_read(dev_priv, &now);
+
+ if (prev->ktime) {
+ u64 time, c0;
+ u32 render, media;
+
+ time = ktime_us_delta(now.ktime, prev->ktime);
+
+ time *= dev_priv->czclk_freq;
+
+ /* Workload can be split between render + media,
+ * e.g. SwapBuffers being blitted in X after being rendered in
+ * mesa. To account for this we need to combine both engines
+ * into our activity counter.
+ */
+ render = now.render_c0 - prev->render_c0;
+ media = now.media_c0 - prev->media_c0;
+ c0 = max(render, media);
+ c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
+
+ if (c0 > time * rps->up_threshold)
+ events = GEN6_PM_RP_UP_THRESHOLD;
+ else if (c0 < time * rps->down_threshold)
+ events = GEN6_PM_RP_DOWN_THRESHOLD;
+ }
+
+ rps->ei = now;
+ return events;
+}
+
+static void intel_rps_work(struct work_struct *work)
+{
+ struct drm_i915_private *i915 =
+ container_of(work, struct drm_i915_private, gt_pm.rps.work);
+ struct intel_rps *rps = &i915->gt_pm.rps;
+ int freq, adj, min, max;
+ bool client_boost;
+ u32 pm_iir;
+
+ pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events;
+ pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
+
+ client_boost = atomic_read(&rps->num_waiters);
+
+ mutex_lock(&rps->lock);
+
+ min = rps->min_freq_softlimit;
+ max = rps->max_freq_softlimit;
+ if (client_boost && max < rps->boost_freq)
+ max = rps->boost_freq;
+
+ GEM_BUG_ON(min < rps->min_freq);
+ GEM_BUG_ON(max > rps->max_freq);
+ GEM_BUG_ON(max < min);
+
+ adj = rps->last_adj;
+ freq = rps->cur_freq;
+ if (client_boost && freq < rps->boost_freq) {
+ freq = rps->boost_freq;
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+ if (adj > 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = IS_CHERRYVIEW(i915) ? 2 : 1;
+
+ if (freq >= max)
+ adj = 0;
+ } else if (client_boost) {
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
+ if (freq > max_t(int, rps->efficient_freq, min))
+ freq = max_t(int, rps->efficient_freq, min);
+ else if (freq > min_t(int, rps->efficient_freq, min))
+ freq = min_t(int, rps->efficient_freq, min);
+
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
+ if (adj < 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = IS_CHERRYVIEW(i915) ? -2 : -1;
+
+ if (freq <= min)
+ adj = 0;
+ } else { /* unknown/external event */
+ adj = 0;
+ }
+
+ if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) {
+ DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
+ adj = 0;
+ }
+
+ mutex_unlock(&rps->lock);
+
+ if (pm_iir) {
+ spin_lock_irq(&i915->irq_lock);
+ if (rps->interrupts_enabled)
+ gen6_unmask_pm_irq(i915, i915->pm_rps_events);
+ spin_unlock_irq(&i915->irq_lock);
+ rps->last_adj = adj;
+ }
+}
+
void gen6_rps_busy(struct drm_i915_private *dev_priv)
{
struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -434,19 +574,17 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
if (rps->enabled) {
u8 freq;
- if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
- gen6_rps_reset_ei(dev_priv);
I915_WRITE(GEN6_PMINTRMSK,
gen6_rps_pm_mask(dev_priv, rps->cur_freq));
gen6_enable_rps_interrupts(dev_priv);
+ memset(&rps->ei, 0, sizeof(rps->ei));
/*
* Use the user's desired frequency as a guide, but for better
* performance, jump directly to RPe as our starting frequency.
*/
- freq = max(rps->cur_freq,
- rps->efficient_freq);
+ freq = max(rps->cur_freq, rps->efficient_freq);
if (intel_set_rps(dev_priv,
clamp(freq,
@@ -515,28 +653,6 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
atomic_inc(client ? &client->boosts : &rps->boosts);
}
-int intel_set_rps(struct drm_i915_private *i915, u8 val)
-{
- struct intel_rps *rps = &i915->gt_pm.rps;
- int err;
-
- lockdep_assert_held(&rps->lock);
- GEM_BUG_ON(val > rps->max_freq);
- GEM_BUG_ON(val < rps->min_freq);
-
- if (!rps->enabled) {
- rps->cur_freq = val;
- return 0;
- }
-
- if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
- err = valleyview_set_rps(i915, val);
- else
- err = gen6_set_rps(i915, val);
-
- return err;
-}
-
static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
{
I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -2124,6 +2240,7 @@ void intel_init_gt_powersave(struct drm_i915_private *i915)
struct intel_rps *rps = &i915->gt_pm.rps;
mutex_init(&rps->lock);
+ INIT_WORK(&rps->work, intel_rps_work);
/*
* RPM depends on RC6 to save restore the GT HW context, so make RC6 a
@@ -95,7 +95,6 @@ void intel_disable_gt_powersave(struct drm_i915_private *i915);
void intel_suspend_gt_powersave(struct drm_i915_private *i915);
void gen6_rps_busy(struct drm_i915_private *i915);
-void gen6_rps_reset_ei(struct drm_i915_private *i915);
void gen6_rps_idle(struct drm_i915_private *i915);
void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);