diff mbox series

[30/33] drm/i915/gt: Expose reset stop timeout via sysfs

Message ID 20191212140459.1307617-30-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/33] drm/i915: Use EAGAIN for trylock failures | expand

Commit Message

Chris Wilson Dec. 12, 2019, 2:04 p.m. UTC
When we allow ourselves to sleep before a GPU reset after disabling
submission, even for a few milliseconds, gives an innocent context the
opportunity to clear the GPU before the reset occurs. However, how long
to sleep depends on the typical non-preemptible duration (a similar
problem to determining the ideal preempt-reset timeout or even the
heartbeat interval). As this seems of a hard policy decision, punt it to
userspace.

The timeout can be adjusted using

	/sys/class/drm/card?/engine/*/stop_timeout_ms

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile         |  3 ++
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 40 ++++++++++++++++++++
 2 files changed, 43 insertions(+)

Comments

Steve Carbonari Jan. 22, 2020, 9:34 p.m. UTC | #1
On Thu, Dec 12, 2019 at 02:04:56PM +0000, Chris Wilson wrote:
> When we allow ourselves to sleep before a GPU reset after disabling
> submission, even for a few milliseconds, gives an innocent context the
> opportunity to clear the GPU before the reset occurs. However, how long
> to sleep depends on the typical non-preemptible duration (a similar
> problem to determining the ideal preempt-reset timeout or even the
> heartbeat interval). As this seems of a hard policy decision, punt it to
> userspace.
> 
> The timeout can be adjusted using
> 
> 	/sys/class/drm/card?/engine/*/stop_timeout_ms
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Jon Bloomfield <jon.bloomfield@intel.com>

Code looks good.
Tested with other sysfs related patches in the series.
The stop_timeout_ms file exists and can be modified.

Reviewed-by: Steve Carbonari <steven.carbonari@intel.com>
Tested-by: Steve Carbonari <steven.carbonari@intel.com

> ---
>  drivers/gpu/drm/i915/Kconfig.profile         |  3 ++
>  drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 40 ++++++++++++++++++++
>  2 files changed, 43 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
> index 9ee3b59685b9..5f4ec3aec1d2 100644
> --- a/drivers/gpu/drm/i915/Kconfig.profile
> +++ b/drivers/gpu/drm/i915/Kconfig.profile
> @@ -63,6 +63,9 @@ config DRM_I915_STOP_TIMEOUT
>  	  that the reset itself may take longer and so be more disruptive to
>  	  interactive or low latency workloads.
>  
> +	  This is adjustable via
> +	  /sys/class/drm/card?/engine/*/stop_timeout_ms
> +
>  config DRM_I915_TIMESLICE_DURATION
>  	int "Scheduling quantum for userspace batches (ms, jiffy granularity)"
>  	default 1 # milliseconds
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> index 6d87529c64a7..2b65fed76435 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> @@ -232,6 +232,45 @@ timeslice_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
>  static struct kobj_attribute timeslice_duration_attr =
>  __ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store);
>  
> +static ssize_t
> +stop_store(struct kobject *kobj, struct kobj_attribute *attr,
> +	   const char *buf, size_t count)
> +{
> +	struct intel_engine_cs *engine = kobj_to_engine(kobj);
> +	unsigned long long duration;
> +	int err;
> +
> +	/*
> +	 * When we allow ourselves to sleep before a GPU reset after disabling
> +	 * submission, even for a few milliseconds, gives an innocent context
> +	 * the opportunity to clear the GPU before the reset occurs. However,
> +	 * how long to sleep depends on the typical non-preemptible duration
> +	 * (a similar problem to determining the ideal preempt-reset timeout
> +	 * or even the heartbeat interval).
> +	 */
> +
> +	err = kstrtoull(buf, 0, &duration);
> +	if (err)
> +		return err;
> +
> +	if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
> +		return -EINVAL;
> +
> +	WRITE_ONCE(engine->props.stop_timeout_ms, duration);
> +	return count;
> +}
> +
> +static ssize_t
> +stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> +{
> +	struct intel_engine_cs *engine = kobj_to_engine(kobj);
> +
> +	return sprintf(buf, "%lu\n", engine->props.stop_timeout_ms);
> +}
> +
> +static struct kobj_attribute stop_timeout_attr =
> +__ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
> +
>  static void kobj_engine_release(struct kobject *kobj)
>  {
>  	kfree(kobj);
> @@ -273,6 +312,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
>  		&caps_attr.attr,
>  		&all_caps_attr.attr,
>  		&max_spin_attr.attr,
> +		&stop_timeout_attr.attr,
>  		NULL
>  	};
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 9ee3b59685b9..5f4ec3aec1d2 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -63,6 +63,9 @@  config DRM_I915_STOP_TIMEOUT
 	  that the reset itself may take longer and so be more disruptive to
 	  interactive or low latency workloads.
 
+	  This is adjustable via
+	  /sys/class/drm/card?/engine/*/stop_timeout_ms
+
 config DRM_I915_TIMESLICE_DURATION
 	int "Scheduling quantum for userspace batches (ms, jiffy granularity)"
 	default 1 # milliseconds
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
index 6d87529c64a7..2b65fed76435 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -232,6 +232,45 @@  timeslice_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 static struct kobj_attribute timeslice_duration_attr =
 __ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store);
 
+static ssize_t
+stop_store(struct kobject *kobj, struct kobj_attribute *attr,
+	   const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	unsigned long long duration;
+	int err;
+
+	/*
+	 * When we allow ourselves to sleep before a GPU reset after disabling
+	 * submission, even for a few milliseconds, gives an innocent context
+	 * the opportunity to clear the GPU before the reset occurs. However,
+	 * how long to sleep depends on the typical non-preemptible duration
+	 * (a similar problem to determining the ideal preempt-reset timeout
+	 * or even the heartbeat interval).
+	 */
+
+	err = kstrtoull(buf, 0, &duration);
+	if (err)
+		return err;
+
+	if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+		return -EINVAL;
+
+	WRITE_ONCE(engine->props.stop_timeout_ms, duration);
+	return count;
+}
+
+static ssize_t
+stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->props.stop_timeout_ms);
+}
+
+static struct kobj_attribute stop_timeout_attr =
+__ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
+
 static void kobj_engine_release(struct kobject *kobj)
 {
 	kfree(kobj);
@@ -273,6 +312,7 @@  void intel_engines_add_sysfs(struct drm_i915_private *i915)
 		&caps_attr.attr,
 		&all_caps_attr.attr,
 		&max_spin_attr.attr,
+		&stop_timeout_attr.attr,
 		NULL
 	};