@@ -383,15 +383,25 @@ static int __intel_set_rps(struct drm_i915_private *i915, u8 val)
return 0;
}
-static int intel_set_rps(struct drm_i915_private *i915, u8 val)
+static int adjust_rps(struct drm_i915_private *i915, int freq, int adj)
{
struct intel_rps *rps = &i915->gt_pm.rps;
+ int min, max, val;
int err;
lockdep_assert_held(&rps->lock);
GEM_BUG_ON(!rps->active);
- GEM_BUG_ON(val > rps->max_freq);
- GEM_BUG_ON(val < rps->min_freq);
+
+ min = rps->min_freq_softlimit;
+ max = rps->max_freq_softlimit;
+ if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
+ max = rps->boost_freq;
+
+ GEM_BUG_ON(min < rps->min_freq);
+ GEM_BUG_ON(max > rps->max_freq);
+ GEM_BUG_ON(max < min);
+
+ val = clamp(freq + adj, min, max);
err = __intel_set_rps(i915, val);
if (err)
@@ -400,6 +410,7 @@ static int intel_set_rps(struct drm_i915_private *i915, u8 val)
if (val != rps->cur_freq) {
trace_intel_gpu_freq_change(intel_gpu_freq(i915, val));
rps->cur_freq = val;
+ rps->last_adj = val == freq + adj ? adj : 0;
}
return 0;
@@ -576,8 +587,8 @@ static void intel_rps_work(struct work_struct *work)
struct drm_i915_private *i915 =
container_of(work, struct drm_i915_private, gt_pm.rps.work);
struct intel_rps *rps = &i915->gt_pm.rps;
- int freq, adj, min, max;
bool client_boost;
+ int freq, adj;
u32 pm_iir;
pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events;
@@ -590,15 +601,6 @@ static void intel_rps_work(struct work_struct *work)
if (!rps->active)
goto unlock;
- min = rps->min_freq_softlimit;
- max = rps->max_freq_softlimit;
- if (client_boost && max < rps->boost_freq)
- max = rps->boost_freq;
-
- GEM_BUG_ON(min < rps->min_freq);
- GEM_BUG_ON(max > rps->max_freq);
- GEM_BUG_ON(max < min);
-
adj = rps->last_adj;
freq = rps->cur_freq;
if (client_boost && freq < rps->boost_freq) {
@@ -609,16 +611,13 @@ static void intel_rps_work(struct work_struct *work)
adj *= 2;
else /* CHV needs even encode values */
adj = IS_CHERRYVIEW(i915) ? 2 : 1;
-
- if (freq >= max)
- adj = 0;
} else if (client_boost) {
adj = 0;
} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
- if (freq > max_t(int, rps->efficient_freq, min))
- freq = max_t(int, rps->efficient_freq, min);
- else if (freq > min_t(int, rps->efficient_freq, min))
- freq = min_t(int, rps->efficient_freq, min);
+ if (freq > rps->efficient_freq)
+ freq = rps->efficient_freq;
+ else if (freq > rps->idle_freq)
+ freq = rps->idle_freq;
adj = 0;
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
@@ -626,23 +625,17 @@ static void intel_rps_work(struct work_struct *work)
adj *= 2;
else /* CHV needs even encode values */
adj = IS_CHERRYVIEW(i915) ? -2 : -1;
-
- if (freq <= min)
- adj = 0;
} else { /* unknown/external event */
adj = 0;
}
- if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) {
+ if (adjust_rps(i915, freq, adj))
DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
- adj = 0;
- }
if (pm_iir) {
spin_lock_irq(&i915->irq_lock);
gen6_unmask_pm_irq(i915, rps->pm_events);
spin_unlock_irq(&i915->irq_lock);
- rps->last_adj = adj;
}
unlock:
@@ -666,7 +659,6 @@ void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
{
struct intel_rps *rps = &dev_priv->gt_pm.rps;
- u8 freq;
if (!HAS_RPS(dev_priv))
return;
@@ -681,14 +673,7 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
* Use the user's desired frequency as a guide, but for better
* performance, jump directly to RPe as our starting frequency.
*/
- freq = max(rps->cur_freq, rps->efficient_freq);
- if (intel_set_rps(dev_priv,
- clamp(freq,
- rps->min_freq_softlimit,
- rps->max_freq_softlimit)))
- DRM_DEBUG_DRIVER("Failed to set busy frequency\n");
-
- rps->last_adj = 0;
+ adjust_rps(dev_priv, max(rps->cur_freq, rps->efficient_freq), 0);
if (INTEL_GEN(dev_priv) >= 6) {
memset(&rps->ei, 0, sizeof(rps->ei));
When choosing the initial frequency in intel_gt_pm_busy() we also need to calculate the current min/max bounds. As this calculation is going to become more complex with the intersection of several different limits, refactor it to a common function. The alternative wold be to feed the initial reclocking through the RPS worker, but the latency in this case is undesirable. v2: Only apply the rps->last_adj update if the frequency was unclamped. The intention is that we don't continue to accumulate the adjustment when we hit the bounds. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com> --- drivers/gpu/drm/i915/intel_gt_pm.c | 57 +++++++++++------------------- 1 file changed, 21 insertions(+), 36 deletions(-)