Message ID | 20180314093748.8541-30-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 3/14/2018 3:07 PM, Chris Wilson wrote: > When choosing the initial frequency in intel_gt_pm_busy() we also need > to calculate the current min/max bounds. As this calculation is going to > become more complex with the intersection of several different limits, > refactor it to a common function. The alternative wold be to feed the typo > initial reclocking through the RPS worker, but the latency in this case > is undesirable. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/intel_gt_pm.c | 58 +++++++++++++++----------------------- > 1 file changed, 22 insertions(+), 36 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c > index 8630c30a7e48..f8e029b4a8a7 100644 > --- a/drivers/gpu/drm/i915/intel_gt_pm.c > +++ b/drivers/gpu/drm/i915/intel_gt_pm.c > @@ -382,15 +382,25 @@ static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val) > return 0; > } > > -static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) > +static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj) > { > struct intel_rps *rps = &dev_priv->gt_pm.rps; > + int min, max, val; Can we move to u8 type in this patch itself > int err; > > lockdep_assert_held(&rps->lock); > GEM_BUG_ON(!rps->active); > - GEM_BUG_ON(val > rps->max_freq); > - GEM_BUG_ON(val < rps->min_freq); > + > + min = rps->min_freq_softlimit; > + max = rps->max_freq_softlimit; > + if (atomic_read(&rps->num_waiters) && max < rps->boost_freq) > + max = rps->boost_freq; > + > + GEM_BUG_ON(min < rps->min_freq); > + GEM_BUG_ON(max > rps->max_freq); > + GEM_BUG_ON(max < min); > + > + val = clamp(freq + adj, min, max); > > err = __intel_set_rps(dev_priv, val); > if (err) > @@ -401,6 +411,8 @@ static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) > rps->cur_freq = val; > } > > + rps->last_adj = val == freq ? adj : 0; > + I think this should be: rps->last_adj = val == freq ? 0 : adj; and this update can be done in previous if/(else) condition. > return 0; > } > > @@ -562,8 +574,8 @@ static void intel_rps_work(struct work_struct *work) > struct drm_i915_private *i915 = > container_of(work, struct drm_i915_private, gt_pm.rps.work); > struct intel_rps *rps = &i915->gt_pm.rps; > - int freq, adj, min, max; > bool client_boost; > + int freq, adj; > u32 pm_iir; > > pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events; > @@ -576,15 +588,6 @@ static void intel_rps_work(struct work_struct *work) > if (!rps->active) > goto unlock; > > - min = rps->min_freq_softlimit; > - max = rps->max_freq_softlimit; > - if (client_boost && max < rps->boost_freq) > - max = rps->boost_freq; > - > - GEM_BUG_ON(min < rps->min_freq); > - GEM_BUG_ON(max > rps->max_freq); > - GEM_BUG_ON(max < min); > - > adj = rps->last_adj; > freq = rps->cur_freq; > if (client_boost && freq < rps->boost_freq) { > @@ -595,16 +598,13 @@ static void intel_rps_work(struct work_struct *work) > adj *= 2; > else /* CHV needs even encode values */ > adj = IS_CHERRYVIEW(i915) ? 2 : 1; > - > - if (freq >= max) > - adj = 0; > } else if (client_boost) { > adj = 0; > } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { > - if (freq > max_t(int, rps->efficient_freq, min)) > - freq = max_t(int, rps->efficient_freq, min); > - else if (freq > min_t(int, rps->efficient_freq, min)) > - freq = min_t(int, rps->efficient_freq, min); > + if (freq > rps->efficient_freq) > + freq = rps->efficient_freq; > + else if (freq > rps->idle_freq) > + freq = rps->idle_freq; > > adj = 0; > } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { > @@ -612,23 +612,17 @@ static void intel_rps_work(struct work_struct *work) > adj *= 2; > else /* CHV needs even encode values */ > adj = IS_CHERRYVIEW(i915) ? -2 : -1; > - > - if (freq <= min) > - adj = 0; > } else { /* unknown/external event */ > adj = 0; > } > > - if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) { > + if (adjust_rps(i915, freq, adj)) > DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); > - adj = 0; > - } > > if (pm_iir) { > spin_lock_irq(&i915->irq_lock); > gen6_unmask_pm_irq(i915, rps->pm_events); > spin_unlock_irq(&i915->irq_lock); > - rps->last_adj = adj; > } > > unlock: > @@ -652,7 +646,6 @@ void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) > void intel_gt_pm_busy(struct drm_i915_private *dev_priv) > { > struct intel_rps *rps = &dev_priv->gt_pm.rps; > - u8 freq; > > if (!HAS_RPS(dev_priv)) > return; > @@ -667,14 +660,7 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv) > * Use the user's desired frequency as a guide, but for better > * performance, jump directly to RPe as our starting frequency. > */ > - freq = max(rps->cur_freq, rps->efficient_freq); > - if (intel_set_rps(dev_priv, > - clamp(freq, > - rps->min_freq_softlimit, > - rps->max_freq_softlimit))) > - DRM_DEBUG_DRIVER("Failed to set busy frequency\n"); > - > - rps->last_adj = 0; > + adjust_rps(dev_priv, max(rps->cur_freq, rps->efficient_freq), 0); > > if (INTEL_GEN(dev_priv) >= 6) { > memset(&rps->ei, 0, sizeof(rps->ei));
Quoting Sagar Arun Kamble (2018-03-17 15:10:08) > > > On 3/14/2018 3:07 PM, Chris Wilson wrote: > > When choosing the initial frequency in intel_gt_pm_busy() we also need > > to calculate the current min/max bounds. As this calculation is going to > > become more complex with the intersection of several different limits, > > refactor it to a common function. The alternative wold be to feed the > typo > > initial reclocking through the RPS worker, but the latency in this case > > is undesirable. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > drivers/gpu/drm/i915/intel_gt_pm.c | 58 +++++++++++++++----------------------- > > 1 file changed, 22 insertions(+), 36 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c > > index 8630c30a7e48..f8e029b4a8a7 100644 > > --- a/drivers/gpu/drm/i915/intel_gt_pm.c > > +++ b/drivers/gpu/drm/i915/intel_gt_pm.c > > @@ -382,15 +382,25 @@ static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val) > > return 0; > > } > > > > -static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) > > +static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj) > > { > > struct intel_rps *rps = &dev_priv->gt_pm.rps; > > + int min, max, val; > Can we move to u8 type in this patch itself No. Check the math, that presumes int clamping, so just use native register types until after the clamping. > > int err; > > > > lockdep_assert_held(&rps->lock); > > GEM_BUG_ON(!rps->active); > > - GEM_BUG_ON(val > rps->max_freq); > > - GEM_BUG_ON(val < rps->min_freq); > > + > > + min = rps->min_freq_softlimit; > > + max = rps->max_freq_softlimit; > > + if (atomic_read(&rps->num_waiters) && max < rps->boost_freq) > > + max = rps->boost_freq; > > + > > + GEM_BUG_ON(min < rps->min_freq); > > + GEM_BUG_ON(max > rps->max_freq); > > + GEM_BUG_ON(max < min); > > + > > + val = clamp(freq + adj, min, max); > > > > err = __intel_set_rps(dev_priv, val); > > if (err) > > @@ -401,6 +411,8 @@ static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) > > rps->cur_freq = val; > > } > > > > + rps->last_adj = val == freq ? adj : 0; > > + > I think this should be: > rps->last_adj = val == freq ? 0 : adj; If we make the adjustment, store the new adj; if we overrule the selection, then cancel the adj so that we don't keep on accumulating adj upon hitting the bounds. Hmm, should have been val == freq + adj. -Chris
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c index 8630c30a7e48..f8e029b4a8a7 100644 --- a/drivers/gpu/drm/i915/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/intel_gt_pm.c @@ -382,15 +382,25 @@ static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val) return 0; } -static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj) { struct intel_rps *rps = &dev_priv->gt_pm.rps; + int min, max, val; int err; lockdep_assert_held(&rps->lock); GEM_BUG_ON(!rps->active); - GEM_BUG_ON(val > rps->max_freq); - GEM_BUG_ON(val < rps->min_freq); + + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; + if (atomic_read(&rps->num_waiters) && max < rps->boost_freq) + max = rps->boost_freq; + + GEM_BUG_ON(min < rps->min_freq); + GEM_BUG_ON(max > rps->max_freq); + GEM_BUG_ON(max < min); + + val = clamp(freq + adj, min, max); err = __intel_set_rps(dev_priv, val); if (err) @@ -401,6 +411,8 @@ static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) rps->cur_freq = val; } + rps->last_adj = val == freq ? adj : 0; + return 0; } @@ -562,8 +574,8 @@ static void intel_rps_work(struct work_struct *work) struct drm_i915_private *i915 = container_of(work, struct drm_i915_private, gt_pm.rps.work); struct intel_rps *rps = &i915->gt_pm.rps; - int freq, adj, min, max; bool client_boost; + int freq, adj; u32 pm_iir; pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events; @@ -576,15 +588,6 @@ static void intel_rps_work(struct work_struct *work) if (!rps->active) goto unlock; - min = rps->min_freq_softlimit; - max = rps->max_freq_softlimit; - if (client_boost && max < rps->boost_freq) - max = rps->boost_freq; - - GEM_BUG_ON(min < rps->min_freq); - GEM_BUG_ON(max > rps->max_freq); - GEM_BUG_ON(max < min); - adj = rps->last_adj; freq = rps->cur_freq; if (client_boost && freq < rps->boost_freq) { @@ -595,16 +598,13 @@ static void intel_rps_work(struct work_struct *work) adj *= 2; else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(i915) ? 2 : 1; - - if (freq >= max) - adj = 0; } else if (client_boost) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { - if (freq > max_t(int, rps->efficient_freq, min)) - freq = max_t(int, rps->efficient_freq, min); - else if (freq > min_t(int, rps->efficient_freq, min)) - freq = min_t(int, rps->efficient_freq, min); + if (freq > rps->efficient_freq) + freq = rps->efficient_freq; + else if (freq > rps->idle_freq) + freq = rps->idle_freq; adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { @@ -612,23 +612,17 @@ static void intel_rps_work(struct work_struct *work) adj *= 2; else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(i915) ? -2 : -1; - - if (freq <= min) - adj = 0; } else { /* unknown/external event */ adj = 0; } - if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) { + if (adjust_rps(i915, freq, adj)) DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - adj = 0; - } if (pm_iir) { spin_lock_irq(&i915->irq_lock); gen6_unmask_pm_irq(i915, rps->pm_events); spin_unlock_irq(&i915->irq_lock); - rps->last_adj = adj; } unlock: @@ -652,7 +646,6 @@ void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) void intel_gt_pm_busy(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - u8 freq; if (!HAS_RPS(dev_priv)) return; @@ -667,14 +660,7 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv) * Use the user's desired frequency as a guide, but for better * performance, jump directly to RPe as our starting frequency. */ - freq = max(rps->cur_freq, rps->efficient_freq); - if (intel_set_rps(dev_priv, - clamp(freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit))) - DRM_DEBUG_DRIVER("Failed to set busy frequency\n"); - - rps->last_adj = 0; + adjust_rps(dev_priv, max(rps->cur_freq, rps->efficient_freq), 0); if (INTEL_GEN(dev_priv) >= 6) { memset(&rps->ei, 0, sizeof(rps->ei));
When choosing the initial frequency in intel_gt_pm_busy() we also need to calculate the current min/max bounds. As this calculation is going to become more complex with the intersection of several different limits, refactor it to a common function. The alternative wold be to feed the initial reclocking through the RPS worker, but the latency in this case is undesirable. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/intel_gt_pm.c | 58 +++++++++++++++----------------------- 1 file changed, 22 insertions(+), 36 deletions(-)