[17/27] drm/i915/gt: Expose busywait duration to sysfs

Message ID	20191112092854.869-17-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=YtjL=ZE=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org A044A21925 From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Tue, 12 Nov 2019 09:28:44 +0000 Message-Id: <20191112092854.869-17-chris@chris-wilson.co.uk> In-Reply-To: <20191112092854.869-1-chris@chris-wilson.co.uk> References: <20191112092854.869-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 17/27] drm/i915/gt: Expose busywait duration to sysfs Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/27] drm/i915: Flush context free work on cleanup \| expand [01/27] drm/i915: Flush context free work on cleanup [02/27] drm/i915/gt: Try an extra flush on the Haswell blitter [03/27] drm/i915/gem: Silence sparse for RCU protection inside the constructor [04/27] drm/i915/selftests: Mock the engine sorting for easy validation [05/27] Revert "drm/i915: use a separate context for gpu relocs" [06/27] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request [07/27] drm/i915: Drop GEM context as a direct link from i915_request [08/27] drm/i915: Push the use-semaphore marker onto the intel_context [09/27] drm/i915: Remove i915->kernel_context [10/27] drm/i915: Move i915_gem_init_contexts() earlier [11/27] drm/i915/uc: Use an internal buffer for firmware images [12/27] drm/i915/gt: Pull GT initialisation under intel_gt_init() [13/27] drm/i915/gt: Merge engine init/setup loops [14/27] drm/i915/gt: Expose engine properties via sysfs [15/27] drm/i915/gt: Expose engine->mmio_base via sysfs [16/27] drm/i915/gt: Expose timeslice duration to sysfs [17/27] drm/i915/gt: Expose busywait duration to sysfs [18/27] drm/i915/gt: Expose reset stop timeout via sysfs [19/27] drm/i915/gt: Expose preempt reset timeout via sysfs [20/27] drm/i915/gt: Expose heartbeat interval via sysfs [21/27] drm/i915: Flush idle barriers when waiting [22/27] drm/i915: Allow userspace to specify ringsize on construction [23/27] drm/i915/gem: Honour O_NONBLOCK before throttling execbuf submissions [24/27] drm/i915/gt: Set unused mocs entry to follow PTE on tgl as on all others [25/27] drm/i915/gt: Tidy up debug-warns for the mocs control table [26/27] drm/i915/gt: Refactor mocs loops into single control macro [27/27] drm/i915/selftests: Add coverage of mocs registers

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index b87c8f485a24..b9060b023c51 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -35,9 +35,9 @@ config DRM_I915_PREEMPT_TIMEOUT May be 0 to disable the timeout. -config DRM_I915_SPIN_REQUEST - int "Busywait for request completion (us)" - default 5 # microseconds +config DRM_I915_MAX_REQUEST_BUSYWAIT + int "Busywait for request completion limit (ns)" + default 8000 # nanoseconds help Before sleeping waiting for a request (GPU operation) to complete, we may spend some time polling for its completion. As the IRQ may @@ -45,6 +45,9 @@ config DRM_I915_SPIN_REQUEST check if the request will complete in the time it would have taken us to enable the interrupt. + This is adjustable via + /sys/class/drm/card?/engine/*/max_busywait_duration_ns + May be 0 to disable the initial spin. In practice, we estimate the cost of enabling the interrupt (if currently disabled) to be a few microseconds. diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 54e785dc6a3b..79f70b305bed 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -311,6 +311,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->props.heartbeat_interval_ms = CONFIG_DRM_I915_HEARTBEAT_INTERVAL; + engine->props.max_busywait_duration_ns = + CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT; engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT; engine->props.stop_timeout_ms = diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c index b1bd768b13d7..6d87529c64a7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c @@ -142,6 +142,54 @@ all_caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) static struct kobj_attribute all_caps_attr = __ATTR(known_capabilities, 0444, all_caps_show, NULL); +static ssize_t +max_spin_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long duration; + int err; + + /* + * When waiting for a request, if is it currently being executed + * on the GPU, we busywait for a short while before sleeping. The + * premise is that most requests are short, and if it is already + * executing then there is a good chance that it will complete + * before we can setup the interrupt handler and go to sleep. + * We try to offset the cost of going to sleep, by first spinning + * on the request -- if it completed in less time than it would take + * to go sleep, process the interrupt and return back to the client, + * then we have saved the client some latency, albeit at the cost + * of spinning on an expensive CPU core. + * + * While we try to avoid waiting at all for a request that is unlikely + * to complete, deciding how long it is worth spinning is for is an + * arbitrary decision: trading off power vs latency. + */ + + err = kstrtoull(buf, 0, &duration); + if (err) + return err; + + if (duration > jiffies_to_nsecs(2)) + return -EINVAL; + + WRITE_ONCE(engine->props.max_busywait_duration_ns, duration); + + return count; +} + +static ssize_t +max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.max_busywait_duration_ns); +} + +static struct kobj_attribute max_spin_attr = +__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store); + static ssize_t timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -224,6 +272,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915) &mmio_attr.attr, &caps_attr.attr, &all_caps_attr.attr, + &max_spin_attr.attr, NULL }; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 6bdca3e7ae9f..926e50b6fbc9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -516,6 +516,7 @@ struct intel_engine_cs { struct { unsigned long heartbeat_interval_ms; + unsigned long max_busywait_duration_ns; unsigned long preempt_timeout_ms; unsigned long stop_timeout_ms; unsigned long timeslice_duration_ms; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 20eeef386577..5b9d2110c573 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1303,7 +1303,7 @@ void i915_request_add(struct i915_request *rq) mutex_unlock(&tl->mutex); } -static unsigned long local_clock_us(unsigned int *cpu) +static unsigned long local_clock_ns(unsigned int *cpu) { unsigned long t; @@ -1320,7 +1320,7 @@ static unsigned long local_clock_us(unsigned int *cpu) * stop busywaiting, see busywait_stop(). */ *cpu = get_cpu(); - t = local_clock() >> 10; + t = local_clock(); put_cpu(); return t; @@ -1330,15 +1330,15 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) { unsigned int this_cpu; - if (time_after(local_clock_us(&this_cpu), timeout)) + if (time_after(local_clock_ns(&this_cpu), timeout)) return true; return this_cpu != cpu; } -static bool __i915_spin_request(const struct i915_request * const rq, - int state, unsigned long timeout_us) +static bool __i915_spin_request(const struct i915_request * const rq, int state) { + unsigned long timeout_ns; unsigned int cpu; /* @@ -1366,7 +1366,8 @@ static bool __i915_spin_request(const struct i915_request * const rq, * takes to sleep on a request, on the order of a microsecond. */ - timeout_us += local_clock_us(&cpu); + timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns); + timeout_ns += local_clock_ns(&cpu); do { if (i915_request_completed(rq)) return true; @@ -1374,7 +1375,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, if (signal_pending_state(state, current)) break; - if (busywait_stop(timeout_us, cpu)) + if (busywait_stop(timeout_ns, cpu)) break; cpu_relax(); @@ -1460,8 +1461,8 @@ long i915_request_wait(struct i915_request *rq, * completion. That requires having a good predictor for the request * duration, which we currently lack. */ - if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) && - __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) { + if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) && + __i915_spin_request(rq, state)) { dma_fence_signal(&rq->fence); goto out; }

[17/27] drm/i915/gt: Expose busywait duration to sysfs

Commit Message

Patch