drm/i915: Apply rps waitboosting for dma_fence_wait_timeout()

Message ID	20190213092504.25709-1-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Wed, 13 Feb 2019 09:25:04 +0000 Message-Id: <20190213092504.25709-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH] drm/i915: Apply rps waitboosting for dma_fence_wait_timeout() Precedence: list Cc: Eero Tamminen <eero.t.tamminen@intel.com> Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	drm/i915: Apply rps waitboosting for dma_fence_wait_timeout() \| expand drm/i915: Apply rps waitboosting for dma_fence_wait_timeout()

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 13bdf326287b..2aeea977283f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2020,11 +2020,9 @@ static const char *rps_power_to_str(unsigned int power) static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 act_freq = rps->cur_freq; intel_wakeref_t wakeref; - struct drm_file *file; with_intel_runtime_pm_if_in_use(dev_priv, wakeref) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { @@ -2058,22 +2056,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) intel_gpu_freq(dev_priv, rps->efficient_freq), intel_gpu_freq(dev_priv, rps->boost_freq)); - mutex_lock(&dev->filelist_mutex); - list_for_each_entry_reverse(file, &dev->filelist, lhead) { - struct drm_i915_file_private *file_priv = file->driver_priv; - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(file->pid, PIDTYPE_PID); - seq_printf(m, "%s [%d]: %d boosts\n", - task ? task->comm : "<unknown>", - task ? task->pid : -1, - atomic_read(&file_priv->rps_client.boosts)); - rcu_read_unlock(); - } - seq_printf(m, "Kernel (anonymous) boosts: %d\n", - atomic_read(&rps->boosts)); - mutex_unlock(&dev->filelist_mutex); + seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 380b994fe5dc..17fe942eaafa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -217,10 +217,6 @@ struct drm_i915_file_private { } mm; struct idr context_idr; - struct intel_rps_client { - atomic_t boosts; - } rps_client; - unsigned int bsd_engine; /* @@ -3056,8 +3052,7 @@ void i915_gem_resume(struct drm_i915_private *dev_priv); vm_fault_t i915_gem_fault(struct vm_fault *vmf); int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, - long timeout, - struct intel_rps_client *rps); + long timeout); int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, const struct i915_sched_attr *attr); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ae1467a74a08..b421bc7a2e26 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -418,8 +418,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) static long i915_gem_object_wait_fence(struct dma_fence *fence, unsigned int flags, - long timeout, - struct intel_rps_client *rps_client) + long timeout) { struct i915_request *rq; @@ -437,27 +436,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence, if (i915_request_completed(rq)) goto out; - /* - * This client is about to stall waiting for the GPU. In many cases - * this is undesirable and limits the throughput of the system, as - * many clients cannot continue processing user input/output whilst - * blocked. RPS autotuning may take tens of milliseconds to respond - * to the GPU load and thus incurs additional latency for the client. - * We can circumvent that by promoting the GPU frequency to maximum - * before we wait. This makes the GPU throttle up much more quickly - * (good for benchmarks and user experience, e.g. window animations), - * but at a cost of spending more power processing the workload - * (bad for battery). Not all clients even want their results - * immediately and for them we should just let the GPU select its own - * frequency to maximise efficiency. To prevent a single client from - * forcing the clocks too high for the whole system, we only allow - * each client to waitboost once in a busy period. - */ - if (rps_client && !i915_request_started(rq)) { - if (INTEL_GEN(rq->i915) >= 6) - gen6_rps_boost(rq, rps_client); - } - timeout = i915_request_wait(rq, flags, timeout); out: @@ -470,8 +448,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence, static long i915_gem_object_wait_reservation(struct reservation_object *resv, unsigned int flags, - long timeout, - struct intel_rps_client *rps_client) + long timeout) { unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; @@ -489,8 +466,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (i = 0; i < count; i++) { timeout = i915_gem_object_wait_fence(shared[i], - flags, timeout, - rps_client); + flags, timeout); if (timeout < 0) break; @@ -516,8 +492,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, } if (excl && timeout >= 0) - timeout = i915_gem_object_wait_fence(excl, flags, timeout, - rps_client); + timeout = i915_gem_object_wait_fence(excl, flags, timeout); dma_fence_put(excl); @@ -611,30 +586,19 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, * @obj: i915 gem object * @flags: how to wait (under a lock, for all rendering or just for writes etc) * @timeout: how long to wait - * @rps_client: client (user process) to charge for any waitboosting */ int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, - long timeout, - struct intel_rps_client *rps_client) + long timeout) { might_sleep(); GEM_BUG_ON(timeout < 0); - timeout = i915_gem_object_wait_reservation(obj->resv, - flags, timeout, - rps_client); + timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout); return timeout < 0 ? timeout : 0; } -static struct intel_rps_client *to_rps_client(struct drm_file *file) -{ - struct drm_i915_file_private *fpriv = file->driver_priv; - - return &fpriv->rps_client; -} - static int i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, @@ -840,8 +804,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT, - NULL); + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -893,8 +856,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - NULL); + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -1156,8 +1118,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT, - to_rps_client(file)); + MAX_SCHEDULE_TIMEOUT); if (ret) goto out; @@ -1456,8 +1417,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - to_rps_client(file)); + MAX_SCHEDULE_TIMEOUT); if (ret) goto err; @@ -1555,8 +1515,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, I915_WAIT_INTERRUPTIBLE | I915_WAIT_PRIORITY | (write_domain ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT, - to_rps_client(file)); + MAX_SCHEDULE_TIMEOUT); if (err) goto out; @@ -1865,8 +1824,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) */ ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT, - NULL); + MAX_SCHEDULE_TIMEOUT); if (ret) goto err; @@ -3197,8 +3155,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) I915_WAIT_INTERRUPTIBLE | I915_WAIT_PRIORITY | I915_WAIT_ALL, - to_wait_timeout(args->timeout_ns), - to_rps_client(file)); + to_wait_timeout(args->timeout_ns)); if (args->timeout_ns > 0) { args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); @@ -3267,7 +3224,7 @@ wait_for_timelines(struct drm_i915_private *i915, * stalls, so allow the gpu to boost to maximum clocks. */ if (flags & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq, NULL); + gen6_rps_boost(rq); timeout = i915_request_wait(rq, flags, timeout); i915_request_put(rq); @@ -3362,8 +3319,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT, - NULL); + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -3425,8 +3381,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT, - NULL); + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -3541,8 +3496,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - NULL); + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -3680,8 +3634,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT, - to_rps_client(file)); + MAX_SCHEDULE_TIMEOUT); if (ret) goto out; @@ -3807,8 +3760,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | (write ? I915_WAIT_ALL : 0), - MAX_SCHEDULE_TIMEOUT, - NULL); + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c2a5c48c7541..0acd6baa3c88 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -68,7 +68,9 @@ static signed long i915_fence_wait(struct dma_fence *fence, bool interruptible, signed long timeout) { - return i915_request_wait(to_request(fence), interruptible, timeout); + return i915_request_wait(to_request(fence), + interruptible | I915_WAIT_PRIORITY, + timeout); } static void i915_fence_release(struct dma_fence *fence) @@ -1136,8 +1138,23 @@ long i915_request_wait(struct i915_request *rq, if (__i915_spin_request(rq, state, 5)) goto out; - if (flags & I915_WAIT_PRIORITY) + /* + * This client is about to stall waiting for the GPU. In many cases + * this is undesirable and limits the throughput of the system, as + * many clients cannot continue processing user input/output whilst + * blocked. RPS autotuning may take tens of milliseconds to respond + * to the GPU load and thus incurs additional latency for the client. + * We can circumvent that by promoting the GPU frequency to maximum + * before we sleep. This makes the GPU throttle up much more quickly + * (good for benchmarks and user experience, e.g. window animations), + * but at a cost of spending more power processing the workload + * (bad for battery). + */ + if (flags & I915_WAIT_PRIORITY) { + if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) + gen6_rps_boost(rq); i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); + } wait.tsk = current; if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake)) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5b8dabdb0e39..0a8913b2059e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13568,7 +13568,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait, * vblank without our intervention, so leave RPS alone. */ if (!i915_request_started(rq)) - gen6_rps_boost(rq, NULL); + gen6_rps_boost(rq); i915_request_put(rq); drm_crtc_vblank_put(wait->crtc); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index fd50c962eaa3..568b7c004852 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -2264,7 +2264,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); -void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps); +void gen6_rps_boost(struct i915_request *rq); void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv); void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv); void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 454581b044ad..6021971de84b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6771,8 +6771,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->pcu_lock); } -void gen6_rps_boost(struct i915_request *rq, - struct intel_rps_client *rps_client) +void gen6_rps_boost(struct i915_request *rq) { struct intel_rps *rps = &rq->i915->gt_pm.rps; unsigned long flags; @@ -6801,7 +6800,7 @@ void gen6_rps_boost(struct i915_request *rq, if (READ_ONCE(rps->cur_freq) < rps->boost_freq) schedule_work(&rps->work); - atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts); + atomic_inc(&rps->boosts); } int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)

drm/i915: Apply rps waitboosting for dma_fence_wait_timeout()

Commit Message

Patch