[CI,4/6] drm/i915/gt: Switch to manual evaluation of RPS

Message ID	20200429103535.27781-4-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=NN/q=6N=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org E381C21973 From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Wed, 29 Apr 2020 11:35:33 +0100 Message-Id: <20200429103535.27781-4-chris@chris-wilson.co.uk> In-Reply-To: <20200429103535.27781-1-chris@chris-wilson.co.uk> References: <20200429103535.27781-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [CI 4/6] drm/i915/gt: Switch to manual evaluation of RPS Precedence: list Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[CI,1/6] drm/i915/gt: Always enable busy-stats for execlists \| expand [CI,1/6] drm/i915/gt: Always enable busy-stats for execlists [CI,2/6] drm/i915/gt: Move rps.enabled/active to flags [CI,3/6] drm/i915/gt: Track use of RPS interrupts in flags [CI,4/6] drm/i915/gt: Switch to manual evaluation of RPS [CI,5/6] drm/i915/gt: Apply the aggressive downclocking to parking [CI,6/6] drm/i915/gt: Restore aggressive post-boost downclocking

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index c943ab0981f0..601be7d4ba79 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -555,6 +555,11 @@ struct intel_engine_cs { * Idle is defined as active == 0, active is active > 0. */ ktime_t start; + + /** + * @rps: Utilisation at last RPS sampling. + */ + ktime_t rps; } stats; struct { diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 52151001d7ab..f723a9cdd24e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -15,6 +15,10 @@ #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" +#define BUSY_UP 95 /* % utilisation */ +#define BUSY_DOWN 85 /* % utilisation */ +#define BUSY_MAX_EI 20u /* ms */ + /* * Lock protecting IPS related data structures */ @@ -45,6 +49,99 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) intel_uncore_write_fw(uncore, reg, val); } +static void rps_timer(struct timer_list *t) +{ + struct intel_rps *rps = from_timer(rps, t, timer); + struct intel_engine_cs *engine; + enum intel_engine_id id; + s64 max_busy[3] = {}; + ktime_t dt, last; + + for_each_engine(engine, rps_to_gt(rps), id) { + s64 busy; + int i; + + dt = intel_engine_get_busy_time(engine); + last = engine->stats.rps; + engine->stats.rps = dt; + + busy = ktime_to_ns(ktime_sub(dt, last)); + for (i = 0; i < ARRAY_SIZE(max_busy); i++) { + if (busy > max_busy[i]) + swap(busy, max_busy[i]); + } + } + + dt = ktime_get(); + last = rps->pm_timestamp; + rps->pm_timestamp = dt; + + if (intel_rps_is_active(rps)) { + s64 busy; + int i; + + dt = ktime_sub(dt, last); + + /* + * Our goal is to evaluate each engine independently, so we run + * at the lowest clocks required to sustain the heaviest + * workload. However, a task may be split into sequential + * dependent operations across a set of engines, such that + * the independent contributions do not account for high load, + * but overall the task is GPU bound. For example, consider + * video decode on vcs followed by colour post-processing + * on vecs, followed by general post-processing on rcs. + * Since multi-engines being active does imply a single + * continuous workload across all engines, we hedge our + * bets by only contributing a factor of the distributed + * load into our busyness calculation. + */ + busy = max_busy[0]; + for (i = 1; i < ARRAY_SIZE(max_busy); i++) { + if (!max_busy[i]) + break; + + busy += div_u64(max_busy[i], 1 << i); + } + GT_TRACE(rps_to_gt(rps), + "busy:%lld [%d%%], max:[%lld, %lld, %lld]\n", + busy, (int)div64_u64(100 * busy, dt), + max_busy[0], max_busy[1], max_busy[2]); + + if (100 * busy > BUSY_UP * dt && + rps->cur_freq < rps->max_freq_softlimit) { + rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; + rps->pm_interval = 1; + schedule_work(&rps->work); + } else if (100 * busy < BUSY_DOWN * dt && + rps->cur_freq > rps->min_freq_softlimit) { + rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; + rps->pm_interval = 1; + schedule_work(&rps->work); + } else { + rps->last_adj = 0; + } + + mod_timer(&rps->timer, + jiffies + msecs_to_jiffies(rps->pm_interval)); + rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI); + } +} + +static void rps_start_timer(struct intel_rps *rps) +{ + rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); + rps->pm_interval = 1; + mod_timer(&rps->timer, jiffies + 1); +} + +static void rps_stop_timer(struct intel_rps *rps) +{ + del_timer_sync(&rps->timer); + rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); + cancel_work_sync(&rps->work); +} + static u32 rps_pm_mask(struct intel_rps *rps, u8 val) { u32 mask = 0; @@ -742,8 +839,11 @@ void intel_rps_unpark(struct intel_rps *rps) mutex_unlock(&rps->lock); + rps->pm_iir = 0; if (intel_rps_has_interrupts(rps)) rps_enable_interrupts(rps); + if (intel_rps_uses_timer(rps)) + rps_start_timer(rps); if (IS_GEN(rps_to_i915(rps), 5)) gen5_rps_update(rps); @@ -754,6 +854,8 @@ void intel_rps_park(struct intel_rps *rps) if (!intel_rps_clear_active(rps)) return; + if (intel_rps_uses_timer(rps)) + rps_stop_timer(rps); if (intel_rps_has_interrupts(rps)) rps_disable_interrupts(rps); @@ -1211,6 +1313,19 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips) return ips->gfx_power + state2; } +static bool has_busy_stats(struct intel_rps *rps) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, rps_to_gt(rps), id) { + if (!intel_engine_supports_stats(engine)) + return false; + } + + return true; +} + void intel_rps_enable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); @@ -1255,7 +1370,9 @@ void intel_rps_enable(struct intel_rps *rps) GEM_BUG_ON(rps->efficient_freq < rps->min_freq); GEM_BUG_ON(rps->efficient_freq > rps->max_freq); - if (INTEL_GEN(i915) >= 6) + if (has_busy_stats(rps)) + intel_rps_set_timer(rps); + else if (INTEL_GEN(i915) >= 6) intel_rps_set_interrupts(rps); else /* Ironlake currently uses intel_ips.ko */ {} @@ -1274,6 +1391,7 @@ void intel_rps_disable(struct intel_rps *rps) intel_rps_clear_enabled(rps); intel_rps_clear_interrupts(rps); + intel_rps_clear_timer(rps); if (INTEL_GEN(i915) >= 6) gen6_rps_disable(rps); @@ -1689,6 +1807,7 @@ void intel_rps_init_early(struct intel_rps *rps) mutex_init(&rps->power.mutex); INIT_WORK(&rps->work, rps_work); + timer_setup(&rps->timer, rps_timer, 0); atomic_set(&rps->num_waiters, 0); } diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 0ce6a0e492ea..af07fa5b7584 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -81,4 +81,19 @@ static inline void intel_rps_clear_interrupts(struct intel_rps *rps) clear_bit(INTEL_RPS_INTERRUPTS, &rps->flags); } +static inline bool intel_rps_uses_timer(const struct intel_rps *rps) +{ + return test_bit(INTEL_RPS_TIMER, &rps->flags); +} + +static inline void intel_rps_set_timer(struct intel_rps *rps) +{ + set_bit(INTEL_RPS_TIMER, &rps->flags); +} + +static inline void intel_rps_clear_timer(struct intel_rps *rps) +{ + clear_bit(INTEL_RPS_TIMER, &rps->flags); +} + #endif /* INTEL_RPS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h index 624e93108da4..38083f0402d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -35,6 +35,7 @@ enum { INTEL_RPS_ENABLED = 0, INTEL_RPS_ACTIVE, INTEL_RPS_INTERRUPTS, + INTEL_RPS_TIMER, }; struct intel_rps { @@ -44,8 +45,12 @@ struct intel_rps { * work, interrupts_enabled and pm_iir are protected by * dev_priv->irq_lock */ + struct timer_list timer; struct work_struct work; unsigned long flags; + + ktime_t pm_timestamp; + u32 pm_interval; u32 pm_iir; /* PM interrupt bits that should never be masked */

[CI,4/6] drm/i915/gt: Switch to manual evaluation of RPS

Commit Message

Patch