@@ -94,7 +94,7 @@ static int live_nop_switch(void *arg)
rq = i915_request_get(this);
i915_request_add(this);
}
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ if (i915_request_wait(rq, 0, HZ) < 0) {
pr_err("Failed to populated %d contexts\n", nctx);
intel_gt_set_wedged(&i915->gt);
i915_request_put(rq);
@@ -868,6 +868,8 @@ int intel_engines_init(struct intel_gt *gt)
setup = intel_guc_submission_setup;
else if (HAS_EXECLISTS(gt->i915))
setup = intel_execlists_submission_setup;
+ else if (INTEL_GEN(gt->i915) >= 5)
+ setup = intel_ring_scheduler_setup;
else
setup = intel_ring_submission_setup;
@@ -1081,9 +1081,7 @@ static bool gen6_rps_enable(struct intel_rps *rps)
intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
- rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
- GEN6_PM_RP_DOWN_THRESHOLD |
- GEN6_PM_RP_DOWN_TIMEOUT);
+ rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
return rps_reset(rps);
}
@@ -1391,7 +1389,7 @@ void intel_rps_enable(struct intel_rps *rps)
GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
- if (has_busy_stats(rps))
+ if (has_busy_stats(rps) && !IS_VALLEYVIEW(i915))
intel_rps_set_timer(rps);
else if (INTEL_GEN(i915) >= 6)
intel_rps_set_interrupts(rps);
Switch over from FIFO global submission to the priority-sorted topographical scheduler. At the cost of more busy work on the CPU to keep the GPU supplied with the next packet of requests, this allows us to reorder requests around submission stalls and so allow low latency under load while maintaining fairness between clients. The downside is that we enable interrupts on all requests (unlike with execlists where we have an interrupt for context switches). This means that instead of receiving an interrupt for when we are waitng for completion, we are processing them all the time, with noticeable overhead of cpu time absorbed by the interrupt handler. The effect is most pronounced on CPU-throughput limited renderers like uxa, where performance can be degraded by 20% in the worst case. Nevertheless, this is a pathological example of an obsolete userspace driver. (There are also cases where uxa performs better by 20%, which is an interesting quirk...) The glxgears-not-a-benchmark (cpu throughtput bound) is one such example of a performance hit, only affecting uxa. The expectation is that allowing request reordering will allow much smoother UX that greatly compensates for reduced throughput under high submission load (but low GPU load). This also enables the timer based RPS for better powersaving, with the exception of Valleyview whose PCU doesn't take kindly to our interference. References: 0f46832fab77 ("drm/i915: Mask USER interrupts on gen6 (until required)") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 ++ drivers/gpu/drm/i915/gt/intel_rps.c | 6 ++---- 3 files changed, 5 insertions(+), 5 deletions(-)