Message ID | 20211117224955.28999-3-vinay.belgaumkar@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/gt: RPS tuning for light media playback | expand |
On 11/17/2021 2:49 PM, Vinay Belgaumkar wrote: > From: Chris Wilson <chris.p.wilson@intel.com> > > Currently, we inspect each engine individually and measure the occupancy > of that engine over the last evaluation interval. If that exceeds our > busyness thresholds, we decide to increase the GPU frequency. However, > under a load balancer, we should consider the occupancy of entire engine > groups, as work may be spread out across the group. In doing so, we > prefer wide over fast, power consumption is approximately proportional to > the square of the frequency. However, since the load balancer is greedy, > the first idle engine gets all the work, and preferrentially reuses the > last active engine, under light loads all work is assigned to one > engine, and so that engine appears very busy. But if the work happened > to overlap slightly, the workload would spread across multiple engines, > reducing each individual engine's runtime, and so reducing the rps > contribution, keeping the frequency low. Instead, when considering the > contribution, consider the contribution over the entire engine group > (capacity). > > Signed-off-by: Chris Wilson <chris.p.wilson@intel.com> > Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_rps.c | 48 ++++++++++++++++++++--------- > 1 file changed, 34 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c > index 07ff7ba7b2b7..3675ac93ded0 100644 > --- a/drivers/gpu/drm/i915/gt/intel_rps.c > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c > @@ -7,6 +7,7 @@ > > #include "i915_drv.h" > #include "intel_breadcrumbs.h" > +#include "intel_engine_pm.h" > #include "intel_gt.h" > #include "intel_gt_clock_utils.h" > #include "intel_gt_irq.h" > @@ -65,26 +66,45 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) > static void rps_timer(struct timer_list *t) > { > struct intel_rps *rps = from_timer(rps, t, timer); > - struct intel_engine_cs *engine; > - ktime_t dt, last, timestamp; > - enum intel_engine_id id; > + struct intel_gt *gt = rps_to_gt(rps); > + ktime_t dt, last, timestamp = 0; > s64 max_busy[3] = {}; > + int i, j; > > - timestamp = 0; > - for_each_engine(engine, rps_to_gt(rps), id) { > - s64 busy; > - int i; > + /* Compare average occupancy over each engine group */ > + for (i = 0; i < ARRAY_SIZE(gt->engine_class); i++) { > + s64 busy = 0; > + int count = 0; > + > + for (j = 0; j < ARRAY_SIZE(gt->engine_class[i]); j++) { > + struct intel_engine_cs *engine; > > - dt = intel_engine_get_busy_time(engine, ×tamp); > - last = engine->stats.rps; > - engine->stats.rps = dt; > + engine = gt->engine_class[i][j]; > + if (!engine) > + continue; > > - busy = ktime_to_ns(ktime_sub(dt, last)); > - for (i = 0; i < ARRAY_SIZE(max_busy); i++) { > - if (busy > max_busy[i]) > - swap(busy, max_busy[i]); > + dt = intel_engine_get_busy_time(engine, ×tamp); > + last = engine->stats.rps; > + engine->stats.rps = dt; > + > + if (!intel_engine_pm_is_awake(engine)) > + continue; > + > + busy += ktime_to_ns(ktime_sub(dt, last)); > + count++; > + } > + > + if (count > 1) > + busy = div_u64(busy, count); > + if (busy <= max_busy[ARRAY_SIZE(max_busy) - 1]) > + continue; > + > + for (j = 0; j < ARRAY_SIZE(max_busy); j++) { > + if (busy > max_busy[j]) > + swap(busy, max_busy[j]); > } > } > + > last = rps->pm_timestamp; > rps->pm_timestamp = timestamp; > >
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 07ff7ba7b2b7..3675ac93ded0 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_breadcrumbs.h" +#include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" #include "intel_gt_irq.h" @@ -65,26 +66,45 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) static void rps_timer(struct timer_list *t) { struct intel_rps *rps = from_timer(rps, t, timer); - struct intel_engine_cs *engine; - ktime_t dt, last, timestamp; - enum intel_engine_id id; + struct intel_gt *gt = rps_to_gt(rps); + ktime_t dt, last, timestamp = 0; s64 max_busy[3] = {}; + int i, j; - timestamp = 0; - for_each_engine(engine, rps_to_gt(rps), id) { - s64 busy; - int i; + /* Compare average occupancy over each engine group */ + for (i = 0; i < ARRAY_SIZE(gt->engine_class); i++) { + s64 busy = 0; + int count = 0; + + for (j = 0; j < ARRAY_SIZE(gt->engine_class[i]); j++) { + struct intel_engine_cs *engine; - dt = intel_engine_get_busy_time(engine, ×tamp); - last = engine->stats.rps; - engine->stats.rps = dt; + engine = gt->engine_class[i][j]; + if (!engine) + continue; - busy = ktime_to_ns(ktime_sub(dt, last)); - for (i = 0; i < ARRAY_SIZE(max_busy); i++) { - if (busy > max_busy[i]) - swap(busy, max_busy[i]); + dt = intel_engine_get_busy_time(engine, ×tamp); + last = engine->stats.rps; + engine->stats.rps = dt; + + if (!intel_engine_pm_is_awake(engine)) + continue; + + busy += ktime_to_ns(ktime_sub(dt, last)); + count++; + } + + if (count > 1) + busy = div_u64(busy, count); + if (busy <= max_busy[ARRAY_SIZE(max_busy) - 1]) + continue; + + for (j = 0; j < ARRAY_SIZE(max_busy); j++) { + if (busy > max_busy[j]) + swap(busy, max_busy[j]); } } + last = rps->pm_timestamp; rps->pm_timestamp = timestamp;