diff mbox series

[v2,2/3] drm/i915/gt: Always send a pulse down the engine after disabling heartbeat

Message ID 20200928121255.21494-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [v2,1/3] drm/i915: Cancel outstanding work after disabling heartbeats on an engine | expand

Commit Message

Chris Wilson Sept. 28, 2020, 12:12 p.m. UTC
Currently, we check we can send a pulse prior to disabling the
heartbeat to verify that we can change the heartbeat, but since we may
re-evaluate execution upon changing the heartbeat interval we need another
pulse afterwards to refresh execution.

v2: Tvrtko asked if we could reduce the double pulse to a single, which
opened up a discussion of how we should handle the pulse-error after
attempting to change the property, and the desire to serialise
adjustment of the property with its validating pulse, and unwind upon
failure.

Fixes: 9a40bddd47ca ("drm/i915/gt: Expose heartbeat interval via sysfs")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.7+
---
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 105 +++++++++++-------
 1 file changed, 66 insertions(+), 39 deletions(-)

Comments

Tvrtko Ursulin Sept. 28, 2020, 2:51 p.m. UTC | #1
On 28/09/2020 13:12, Chris Wilson wrote:
> Currently, we check we can send a pulse prior to disabling the
> heartbeat to verify that we can change the heartbeat, but since we may
> re-evaluate execution upon changing the heartbeat interval we need another
> pulse afterwards to refresh execution.
> 
> v2: Tvrtko asked if we could reduce the double pulse to a single, which
> opened up a discussion of how we should handle the pulse-error after
> attempting to change the property, and the desire to serialise
> adjustment of the property with its validating pulse, and unwind upon
> failure.
> 
> Fixes: 9a40bddd47ca ("drm/i915/gt: Expose heartbeat interval via sysfs")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v5.7+
> ---
>   .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 105 +++++++++++-------
>   1 file changed, 66 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> index 8ffdf676c0a0..eda475f50fa7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> @@ -177,36 +177,81 @@ void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
>   	INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
>   }
>   
> +static int __intel_engine_pulse(struct intel_engine_cs *engine)
> +{
> +	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
> +	struct intel_context *ce = engine->kernel_context;
> +	struct i915_request *rq;
> +
> +	lockdep_assert_held(&ce->timeline->mutex);
> +	GEM_BUG_ON(intel_engine_has_preemption(engine));
> +	GEM_BUG_ON(intel_engine_pm_is_awake(engine));
> +
> +	intel_context_enter(ce);
> +	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
> +	intel_context_exit(ce);
> +	if (IS_ERR(rq))
> +		return PTR_ERR(rq);
> +
> +	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
> +	idle_pulse(engine, rq);
> +
> +	__i915_request_commit(rq);
> +	__i915_request_queue(rq, &attr);
> +	GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
> +
> +	return 0;
> +}
> +
> +static unsigned long set_heartbeat(struct intel_engine_cs *engine,
> +				   unsigned long delay)
> +{
> +	unsigned long old;
> +
> +	old = xchg(&engine->props.heartbeat_interval_ms, delay);
> +	if (delay)
> +		intel_engine_unpark_heartbeat(engine);
> +	else
> +		intel_engine_park_heartbeat(engine);
> +
> +	return old;
> +}
> +
>   int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
>   			       unsigned long delay)
>   {
> -	int err;
> +	struct intel_context *ce = engine->kernel_context;
> +	int err = 0;
>   
> -	/* Send one last pulse before to cleanup persistent hogs */
> -	if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
> -		err = intel_engine_pulse(engine);
> -		if (err)
> -			return err;
> -	}
> +	if (!delay && !intel_engine_has_preempt_reset(engine))
> +		return -ENODEV;
> +
> +	intel_engine_pm_get(engine);
> +
> +	err = mutex_lock_interruptible(&ce->timeline->mutex);
> +	if (err)
> +		return err;
>   
> -	WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
> +	if (delay != engine->props.heartbeat_interval_ms) {
> +		unsigned long saved = set_heartbeat(engine, delay);
>   
> -	if (intel_engine_pm_get_if_awake(engine)) {
> -		if (delay)
> -			intel_engine_unpark_heartbeat(engine);
> -		else
> -			intel_engine_park_heartbeat(engine);
> -		intel_engine_pm_put(engine);
> +		/* recheck current execution */
> +		if (intel_engine_has_preemption(engine)) {
> +			err = __intel_engine_pulse(engine);
> +			if (err)
> +				set_heartbeat(engine, saved);
> +		}
>   	}
>   
> -	return 0;
> +	mutex_unlock(&ce->timeline->mutex);
> +	intel_engine_pm_put(engine);
> +
> +	return err;
>   }
>   
>   int intel_engine_pulse(struct intel_engine_cs *engine)
>   {
> -	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
>   	struct intel_context *ce = engine->kernel_context;
> -	struct i915_request *rq;
>   	int err;
>   
>   	if (!intel_engine_has_preemption(engine))
> @@ -215,30 +260,12 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
>   	if (!intel_engine_pm_get_if_awake(engine))
>   		return 0;
>   
> -	if (mutex_lock_interruptible(&ce->timeline->mutex)) {
> -		err = -EINTR;
> -		goto out_rpm;
> +	err = -EINTR;
> +	if (!mutex_lock_interruptible(&ce->timeline->mutex)) {
> +		err = __intel_engine_pulse(engine);
> +		mutex_unlock(&ce->timeline->mutex);
>   	}
>   
> -	intel_context_enter(ce);
> -	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
> -	intel_context_exit(ce);
> -	if (IS_ERR(rq)) {
> -		err = PTR_ERR(rq);
> -		goto out_unlock;
> -	}
> -
> -	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
> -	idle_pulse(engine, rq);
> -
> -	__i915_request_commit(rq);
> -	__i915_request_queue(rq, &attr);
> -	GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
> -	err = 0;
> -
> -out_unlock:
> -	mutex_unlock(&ce->timeline->mutex);
> -out_rpm:
>   	intel_engine_pm_put(engine);
>   	return err;
>   }
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 8ffdf676c0a0..eda475f50fa7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -177,36 +177,81 @@  void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
 	INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
 }
 
+static int __intel_engine_pulse(struct intel_engine_cs *engine)
+{
+	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
+	struct intel_context *ce = engine->kernel_context;
+	struct i915_request *rq;
+
+	lockdep_assert_held(&ce->timeline->mutex);
+	GEM_BUG_ON(intel_engine_has_preemption(engine));
+	GEM_BUG_ON(intel_engine_pm_is_awake(engine));
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+	intel_context_exit(ce);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
+	idle_pulse(engine, rq);
+
+	__i915_request_commit(rq);
+	__i915_request_queue(rq, &attr);
+	GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
+
+	return 0;
+}
+
+static unsigned long set_heartbeat(struct intel_engine_cs *engine,
+				   unsigned long delay)
+{
+	unsigned long old;
+
+	old = xchg(&engine->props.heartbeat_interval_ms, delay);
+	if (delay)
+		intel_engine_unpark_heartbeat(engine);
+	else
+		intel_engine_park_heartbeat(engine);
+
+	return old;
+}
+
 int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
 			       unsigned long delay)
 {
-	int err;
+	struct intel_context *ce = engine->kernel_context;
+	int err = 0;
 
-	/* Send one last pulse before to cleanup persistent hogs */
-	if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
-		err = intel_engine_pulse(engine);
-		if (err)
-			return err;
-	}
+	if (!delay && !intel_engine_has_preempt_reset(engine))
+		return -ENODEV;
+
+	intel_engine_pm_get(engine);
+
+	err = mutex_lock_interruptible(&ce->timeline->mutex);
+	if (err)
+		return err;
 
-	WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
+	if (delay != engine->props.heartbeat_interval_ms) {
+		unsigned long saved = set_heartbeat(engine, delay);
 
-	if (intel_engine_pm_get_if_awake(engine)) {
-		if (delay)
-			intel_engine_unpark_heartbeat(engine);
-		else
-			intel_engine_park_heartbeat(engine);
-		intel_engine_pm_put(engine);
+		/* recheck current execution */
+		if (intel_engine_has_preemption(engine)) {
+			err = __intel_engine_pulse(engine);
+			if (err)
+				set_heartbeat(engine, saved);
+		}
 	}
 
-	return 0;
+	mutex_unlock(&ce->timeline->mutex);
+	intel_engine_pm_put(engine);
+
+	return err;
 }
 
 int intel_engine_pulse(struct intel_engine_cs *engine)
 {
-	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
 	struct intel_context *ce = engine->kernel_context;
-	struct i915_request *rq;
 	int err;
 
 	if (!intel_engine_has_preemption(engine))
@@ -215,30 +260,12 @@  int intel_engine_pulse(struct intel_engine_cs *engine)
 	if (!intel_engine_pm_get_if_awake(engine))
 		return 0;
 
-	if (mutex_lock_interruptible(&ce->timeline->mutex)) {
-		err = -EINTR;
-		goto out_rpm;
+	err = -EINTR;
+	if (!mutex_lock_interruptible(&ce->timeline->mutex)) {
+		err = __intel_engine_pulse(engine);
+		mutex_unlock(&ce->timeline->mutex);
 	}
 
-	intel_context_enter(ce);
-	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
-	intel_context_exit(ce);
-	if (IS_ERR(rq)) {
-		err = PTR_ERR(rq);
-		goto out_unlock;
-	}
-
-	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
-	idle_pulse(engine, rq);
-
-	__i915_request_commit(rq);
-	__i915_request_queue(rq, &attr);
-	GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
-	err = 0;
-
-out_unlock:
-	mutex_unlock(&ce->timeline->mutex);
-out_rpm:
 	intel_engine_pm_put(engine);
 	return err;
 }