diff mbox

RFM drm/i915: Downclock when idle "idle-decay"

Message ID 1377102635-20492-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Aug. 21, 2013, 4:30 p.m. UTC
This is the missing component for the last past that provides the
wait-boost. After boosting the RPS frequency of the GPU, it would not
decay, leaving the GPU at high clocks whenever it woke from idle. If we
apply the Valleyview logic of resetting our request to the "optimal"
frequency when we detect the GPU is idle, we recovered the automatic
regulating behaviour.

This patch removes the dedicated timer for Valleyview downclocking, and
moves the logic into intel_mark_idle(), taking care that we trigger the
downclock shortly after the earliest point we notice the GPU is idle.

RFM - request for measurement!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h      |  4 +---
 drivers/gpu/drm/i915/i915_gem.c      | 20 +++++++++++-----
 drivers/gpu/drm/i915/i915_irq.c      | 11 ---------
 drivers/gpu/drm/i915/intel_display.c |  2 ++
 drivers/gpu/drm/i915/intel_drv.h     |  2 ++
 drivers/gpu/drm/i915/intel_pm.c      | 46 +++++++++++++++++-------------------
 6 files changed, 41 insertions(+), 44 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0506b93..43577f3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -832,9 +832,6 @@  struct intel_gen6_power_mgmt {
 	struct work_struct work;
 	u32 pm_iir;
 
-	/* On vlv we need to manually drop to Vmin with a delayed work. */
-	struct delayed_work vlv_work;
-
 	/* The below variables an all the rps hw state are protected by
 	 * dev->struct mutext. */
 	u8 cur_delay;
@@ -842,6 +839,7 @@  struct intel_gen6_power_mgmt {
 	u8 max_delay;
 	u8 rpe_delay;
 	u8 rp1_delay;
+	u8 rp0_delay;
 	u8 hw_max;
 
 	struct delayed_work delayed_resume_work;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8ef3559..e4c6d67 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2476,10 +2476,18 @@  i915_gem_retire_requests(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *ring;
+	bool idle = true;
 	int i;
 
-	for_each_ring(ring, dev_priv, i)
+	for_each_ring(ring, dev_priv, i) {
 		i915_gem_retire_requests_ring(ring);
+		idle &= list_empty(&ring->request_list);
+	}
+
+	if (idle)
+		queue_delayed_work(dev_priv->wq,
+				   &dev_priv->mm.retire_work,
+				   msecs_to_jiffies(100));
 }
 
 static void
@@ -2502,13 +2510,13 @@  i915_gem_retire_work_handler(struct work_struct *work)
 		return;
 	}
 
-	i915_gem_retire_requests(dev);
-
-	/* Send a periodic flush down the ring so we don't hold onto GEM
-	 * objects indefinitely.
-	 */
 	idle = true;
 	for_each_ring(ring, dev_priv, i) {
+		i915_gem_retire_requests_ring(ring);
+
+		/* Send a periodic flush down the ring so we don't hold
+		 * onto GEM objects indefinitely.
+		 */
 		if (ring->gpu_caches_dirty)
 			i915_add_request(ring, NULL);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d4f8f65..4aa421e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -836,17 +836,6 @@  static void gen6_pm_rps_work(struct work_struct *work)
 	else
 		gen6_set_rps(dev_priv->dev, new_delay);
 
-	if (IS_VALLEYVIEW(dev_priv->dev)) {
-		/*
-		 * On VLV, when we enter RC6 we may not be at the minimum
-		 * voltage level, so arm a timer to check.  It should only
-		 * fire when there's activity or once after we've entered
-		 * RC6, and then won't be re-armed until the next RPS interrupt.
-		 */
-		mod_delayed_work(dev_priv->wq, &dev_priv->rps.vlv_work,
-				 msecs_to_jiffies(100));
-	}
-
 	mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c6c19e1..d21d6c9 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7326,6 +7326,8 @@  void intel_mark_idle(struct drm_device *dev)
 
 		intel_decrease_pllclock(crtc);
 	}
+
+	gen6_rps_idle(dev->dev_private);
 }
 
 void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 8222f24..482c30b 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -785,4 +785,6 @@  extern void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 extern void snb_disable_pm_irq(struct drm_i915_private *dev_priv,
 			       uint32_t mask);
 
+extern void gen6_rps_idle(struct drm_i915_private *dev_priv);
+
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 942b02b..aa98b0b 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3320,6 +3320,25 @@  void gen6_set_rps(struct drm_device *dev, u8 val)
 	trace_intel_gpu_freq_change(val * 50);
 }
 
+void gen6_rps_idle(struct drm_i915_private *dev_priv)
+{
+	/* We must be idle; drop to min voltage state.
+	 * Note: we use RPe here since it should match the
+	 * Vmin we were shooting for.  That should give us better
+	 * perf when we come back out of RC6 than if we used the
+	 * min freq available.
+	 */
+	mutex_lock(&dev_priv->rps.hw_lock);
+	if (dev_priv->info->is_valleyview) {
+		if (dev_priv->rps.cur_delay > dev_priv->rps.rpe_delay)
+			valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
+	} else {
+		if (dev_priv->rps.cur_delay > dev_priv->rps.rp1_delay)
+			gen6_set_rps(dev_priv->dev, dev_priv->rps.rp1_delay);
+	}
+	mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
 /*
  * Wait until the previous freq change has completed,
  * or the timeout elapsed, and then update our notion
@@ -3493,8 +3512,9 @@  static void gen6_enable_rps(struct drm_device *dev)
 
 	/* In units of 50MHz */
 	dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;
-	dev_priv->rps.min_delay = (rp_state_cap & 0xff0000) >> 16;
-	dev_priv->rps.rp1_delay = (rp_state_cap & 0xff00) >> 8;
+	dev_priv->rps.min_delay = (rp_state_cap >> 16) & 0xff;
+	dev_priv->rps.rp1_delay = (rp_state_cap >>  8) & 0xff;
+	dev_priv->rps.rp0_delay = (rp_state_cap >>  0) & 0xff;
 	dev_priv->rps.cur_delay = 0;
 
 	/* disable the counters and set deterministic thresholds */
@@ -3702,24 +3722,6 @@  int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
 	return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
 }
 
-static void vlv_rps_timer_work(struct work_struct *work)
-{
-	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
-						    rps.vlv_work.work);
-
-	/*
-	 * Timer fired, we must be idle.  Drop to min voltage state.
-	 * Note: we use RPe here since it should match the
-	 * Vmin we were shooting for.  That should give us better
-	 * perf when we come back out of RC6 than if we used the
-	 * min freq available.
-	 */
-	mutex_lock(&dev_priv->rps.hw_lock);
-	if (dev_priv->rps.cur_delay > dev_priv->rps.rpe_delay)
-		valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
-	mutex_unlock(&dev_priv->rps.hw_lock);
-}
-
 static void valleyview_setup_pctx(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3857,8 +3859,6 @@  static void valleyview_enable_rps(struct drm_device *dev)
 				      dev_priv->rps.rpe_delay),
 			 dev_priv->rps.rpe_delay);
 
-	INIT_DELAYED_WORK(&dev_priv->rps.vlv_work, vlv_rps_timer_work);
-
 	valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
 
 	gen6_enable_rps_interrupts(dev);
@@ -4546,8 +4546,6 @@  void intel_disable_gt_powersave(struct drm_device *dev)
 	} else if (INTEL_INFO(dev)->gen >= 6) {
 		cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
 		cancel_work_sync(&dev_priv->rps.work);
-		if (IS_VALLEYVIEW(dev))
-			cancel_delayed_work_sync(&dev_priv->rps.vlv_work);
 		mutex_lock(&dev_priv->rps.hw_lock);
 		if (IS_VALLEYVIEW(dev))
 			valleyview_disable_rps(dev);