Message ID | 20180730123954.6249-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] drm/i915: Limit C-states when waiting for the active request | expand |
Chris Wilson <chris@chris-wilson.co.uk> writes: > If we are waiting for the currently executing request, we have a good > idea that it will be completed in the very near future and so want to > cap the CPU_DMA_LATENCY to ensure that we wake up the client quickly. > > v2: Not allowed to block in kmalloc after setting TASK_INTERRUPTIBLE. > v3: Avoid the blocking notifier as well for TASK_INTERRUPTIBLE > > Testcase: igt/gem_sync/store-default > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Eero Tamminen <eero.t.tamminen@intel.com> > Cc: Francisco Jerez <currojerez@riseup.net> > --- > drivers/gpu/drm/i915/i915_request.c | 38 +++++++++++++++++++++++++++++ > 1 file changed, 38 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 5c2c93cbab12..7c7746ef0d1b 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -1258,6 +1258,28 @@ static bool __i915_wait_request_check_and_reset(struct i915_request *request) > return true; > } > > +struct pm_qos { > + struct pm_qos_request req; > + struct work_struct add, del; > +}; > + > +static void pm_qos_add(struct work_struct *work) > +{ > + struct pm_qos *pm_qos = container_of(work, typeof(*pm_qos), add); > + > + pm_qos_add_request(&pm_qos->req, PM_QOS_CPU_DMA_LATENCY, 50); > +} > + > +static void pm_qos_del(struct work_struct *work) > +{ > + struct pm_qos *pm_qos = container_of(work, typeof(*pm_qos), del); > + > + if (!cancel_work_sync(&pm_qos->add)) > + pm_qos_remove_request(&pm_qos->req); > + > + kfree(pm_qos); > +} > + > /** > * i915_request_wait - wait until execution of request has finished > * @rq: the request to wait upon > @@ -1286,6 +1308,7 @@ long i915_request_wait(struct i915_request *rq, > wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; > DEFINE_WAIT_FUNC(reset, default_wake_function); > DEFINE_WAIT_FUNC(exec, default_wake_function); > + struct pm_qos *pm_qos = NULL; > struct intel_wait wait; > > might_sleep(); > @@ -1363,6 +1386,19 @@ long i915_request_wait(struct i915_request *rq, > break; > } > > + if (!pm_qos && > + i915_seqno_passed(intel_engine_get_seqno(rq->engine), > + wait.seqno - 1)) { > + pm_qos = kzalloc(sizeof(*pm_qos), > + GFP_NOWAIT | __GFP_NOWARN); > + if (pm_qos) { > + INIT_WORK(&pm_qos->add, pm_qos_add); > + INIT_WORK(&pm_qos->del, pm_qos_del); > + schedule_work_on(smp_processor_id(), > + &pm_qos->add); > + } My immediate thoughts are on why this and not preallocate pm_qos on init/fini and just add/remove requests on suitable spots? -Mika > + } > + > timeout = io_schedule_timeout(timeout); > > if (intel_wait_complete(&wait) && > @@ -1412,6 +1448,8 @@ long i915_request_wait(struct i915_request *rq, > if (flags & I915_WAIT_LOCKED) > remove_wait_queue(errq, &reset); > remove_wait_queue(&rq->execute, &exec); > + if (pm_qos) > + schedule_work(&pm_qos->del); > trace_i915_request_wait_end(rq); > > return timeout; > -- > 2.18.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Quoting Mika Kuoppala (2018-07-30 14:07:07) > Chris Wilson <chris@chris-wilson.co.uk> writes: > > > If we are waiting for the currently executing request, we have a good > > idea that it will be completed in the very near future and so want to > > cap the CPU_DMA_LATENCY to ensure that we wake up the client quickly. > > > > v2: Not allowed to block in kmalloc after setting TASK_INTERRUPTIBLE. > > v3: Avoid the blocking notifier as well for TASK_INTERRUPTIBLE > > > > Testcase: igt/gem_sync/store-default > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > > Cc: Eero Tamminen <eero.t.tamminen@intel.com> > > Cc: Francisco Jerez <currojerez@riseup.net> > > --- > > drivers/gpu/drm/i915/i915_request.c | 38 +++++++++++++++++++++++++++++ > > 1 file changed, 38 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > > index 5c2c93cbab12..7c7746ef0d1b 100644 > > --- a/drivers/gpu/drm/i915/i915_request.c > > +++ b/drivers/gpu/drm/i915/i915_request.c > > @@ -1258,6 +1258,28 @@ static bool __i915_wait_request_check_and_reset(struct i915_request *request) > > return true; > > } > > > > +struct pm_qos { > > + struct pm_qos_request req; > > + struct work_struct add, del; > > +}; > > + > > +static void pm_qos_add(struct work_struct *work) > > +{ > > + struct pm_qos *pm_qos = container_of(work, typeof(*pm_qos), add); > > + > > + pm_qos_add_request(&pm_qos->req, PM_QOS_CPU_DMA_LATENCY, 50); > > +} > > + > > +static void pm_qos_del(struct work_struct *work) > > +{ > > + struct pm_qos *pm_qos = container_of(work, typeof(*pm_qos), del); > > + > > + if (!cancel_work_sync(&pm_qos->add)) > > + pm_qos_remove_request(&pm_qos->req); > > + > > + kfree(pm_qos); > > +} > > + > > /** > > * i915_request_wait - wait until execution of request has finished > > * @rq: the request to wait upon > > @@ -1286,6 +1308,7 @@ long i915_request_wait(struct i915_request *rq, > > wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; > > DEFINE_WAIT_FUNC(reset, default_wake_function); > > DEFINE_WAIT_FUNC(exec, default_wake_function); > > + struct pm_qos *pm_qos = NULL; > > struct intel_wait wait; > > > > might_sleep(); > > @@ -1363,6 +1386,19 @@ long i915_request_wait(struct i915_request *rq, > > break; > > } > > > > + if (!pm_qos && > > + i915_seqno_passed(intel_engine_get_seqno(rq->engine), > > + wait.seqno - 1)) { > > + pm_qos = kzalloc(sizeof(*pm_qos), > > + GFP_NOWAIT | __GFP_NOWARN); > > + if (pm_qos) { > > + INIT_WORK(&pm_qos->add, pm_qos_add); > > + INIT_WORK(&pm_qos->del, pm_qos_del); > > + schedule_work_on(smp_processor_id(), > > + &pm_qos->add); > > + } > > My immediate thoughts are on why this and not preallocate pm_qos > on init/fini and just add/remove requests on suitable spots? For simplicity, we want multiple requests. Now we could go the global route, to save on the kmalloc/kfree; I wasn't sure if that was worth it. The principle cost is on the kmalloc from the per-cpu slab, which we only need before sleeping after having already checked with a short spin that the request isn't likely to complete immediately. We would still need to use workers to avoid the blocking notifier, especially in client wakeup. The way it is structured should mean that only one waiter at a time on each engine will need to allocate, so shouldn't be frequent enough to worry about (certainly gem_sync --run store-default isn't too alarming, that's the test case that goes from 900us to 170us on bxt, and is hitting the kmalloc on every iteration, and has no more latency than if we purely busy-waited.) The only critical factor then is readability. Is it terrible? Maybe a small pm_qos = pm_qos_create(). And a better name than pm_qos so that we associate it with the wait: struct wait_dma_qos? -Chris
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 5c2c93cbab12..7c7746ef0d1b 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1258,6 +1258,28 @@ static bool __i915_wait_request_check_and_reset(struct i915_request *request) return true; } +struct pm_qos { + struct pm_qos_request req; + struct work_struct add, del; +}; + +static void pm_qos_add(struct work_struct *work) +{ + struct pm_qos *pm_qos = container_of(work, typeof(*pm_qos), add); + + pm_qos_add_request(&pm_qos->req, PM_QOS_CPU_DMA_LATENCY, 50); +} + +static void pm_qos_del(struct work_struct *work) +{ + struct pm_qos *pm_qos = container_of(work, typeof(*pm_qos), del); + + if (!cancel_work_sync(&pm_qos->add)) + pm_qos_remove_request(&pm_qos->req); + + kfree(pm_qos); +} + /** * i915_request_wait - wait until execution of request has finished * @rq: the request to wait upon @@ -1286,6 +1308,7 @@ long i915_request_wait(struct i915_request *rq, wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; DEFINE_WAIT_FUNC(reset, default_wake_function); DEFINE_WAIT_FUNC(exec, default_wake_function); + struct pm_qos *pm_qos = NULL; struct intel_wait wait; might_sleep(); @@ -1363,6 +1386,19 @@ long i915_request_wait(struct i915_request *rq, break; } + if (!pm_qos && + i915_seqno_passed(intel_engine_get_seqno(rq->engine), + wait.seqno - 1)) { + pm_qos = kzalloc(sizeof(*pm_qos), + GFP_NOWAIT | __GFP_NOWARN); + if (pm_qos) { + INIT_WORK(&pm_qos->add, pm_qos_add); + INIT_WORK(&pm_qos->del, pm_qos_del); + schedule_work_on(smp_processor_id(), + &pm_qos->add); + } + } + timeout = io_schedule_timeout(timeout); if (intel_wait_complete(&wait) && @@ -1412,6 +1448,8 @@ long i915_request_wait(struct i915_request *rq, if (flags & I915_WAIT_LOCKED) remove_wait_queue(errq, &reset); remove_wait_queue(&rq->execute, &exec); + if (pm_qos) + schedule_work(&pm_qos->del); trace_i915_request_wait_end(rq); return timeout;
If we are waiting for the currently executing request, we have a good idea that it will be completed in the very near future and so want to cap the CPU_DMA_LATENCY to ensure that we wake up the client quickly. v2: Not allowed to block in kmalloc after setting TASK_INTERRUPTIBLE. v3: Avoid the blocking notifier as well for TASK_INTERRUPTIBLE Testcase: igt/gem_sync/store-default Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Eero Tamminen <eero.t.tamminen@intel.com> Cc: Francisco Jerez <currojerez@riseup.net> --- drivers/gpu/drm/i915/i915_request.c | 38 +++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+)