diff mbox series

[v3] drm/i915/gt: Track the most recent pulse for the heartbeat

Message ID 20201006094653.7558-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [v3] drm/i915/gt: Track the most recent pulse for the heartbeat | expand

Commit Message

Chris Wilson Oct. 6, 2020, 9:46 a.m. UTC
Since we track the idle_pulse for flushing the barriers and avoid
re-emitting the pulse upon idling if no futher action is required, this
also impacts the heartbeat. Before emitting a fresh heartbeat, we look
at the engine idle status and assume that if the pulse was the last
request emitted along the heartbeat, the engine is idling and a
heartbeat pulse not required. This assumption fails, but we can reuse
the idle pulse as the heartbeat if we are yet to emit one, and so track
the status of that pulse for our engine health check.

This impacts tgl/rcs0 as we rely on the heartbeat for our healthcheck for
the normal preemption detection mechanism is disabled by default.

Testcase: igt/gem_exec_schedule/preempt-hang/rcs0 #tgl
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Comments

Mika Kuoppala Oct. 7, 2020, 8:40 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since we track the idle_pulse for flushing the barriers and avoid
> re-emitting the pulse upon idling if no futher action is required, this
> also impacts the heartbeat. Before emitting a fresh heartbeat, we look
> at the engine idle status and assume that if the pulse was the last
> request emitted along the heartbeat, the engine is idling and a
> heartbeat pulse not required. This assumption fails, but we can reuse
> the idle pulse as the heartbeat if we are yet to emit one, and so track
> the status of that pulse for our engine health check.
>
> This impacts tgl/rcs0 as we rely on the heartbeat for our healthcheck for
> the normal preemption detection mechanism is disabled by default.
>
> Testcase: igt/gem_exec_schedule/preempt-hang/rcs0 #tgl
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> index 5067d0524d4b..9060385cd69e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> @@ -41,6 +41,8 @@ static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
>  {
>  	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
>  	i915_request_add_active_barriers(rq);
> +	if (!engine->heartbeat.systole && intel_engine_has_heartbeat(engine))
> +		engine->heartbeat.systole = i915_request_get(rq);
>  }
>  
>  static void show_heartbeat(const struct i915_request *rq,
> @@ -144,8 +146,6 @@ static void heartbeat(struct work_struct *wrk)
>  		goto unlock;
>  
>  	idle_pulse(engine, rq);
> -	if (engine->i915->params.enable_hangcheck)
> -		engine->heartbeat.systole = i915_request_get(rq);
>  
>  	__i915_request_commit(rq);
>  	__i915_request_queue(rq, &attr);
> @@ -153,7 +153,7 @@ static void heartbeat(struct work_struct *wrk)
>  unlock:
>  	mutex_unlock(&ce->timeline->mutex);
>  out:
> -	if (!next_heartbeat(engine))
> +	if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine))
>  		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
>  	intel_engine_pm_put(engine);
>  }
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 5067d0524d4b..9060385cd69e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -41,6 +41,8 @@  static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
 {
 	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
 	i915_request_add_active_barriers(rq);
+	if (!engine->heartbeat.systole && intel_engine_has_heartbeat(engine))
+		engine->heartbeat.systole = i915_request_get(rq);
 }
 
 static void show_heartbeat(const struct i915_request *rq,
@@ -144,8 +146,6 @@  static void heartbeat(struct work_struct *wrk)
 		goto unlock;
 
 	idle_pulse(engine, rq);
-	if (engine->i915->params.enable_hangcheck)
-		engine->heartbeat.systole = i915_request_get(rq);
 
 	__i915_request_commit(rq);
 	__i915_request_queue(rq, &attr);
@@ -153,7 +153,7 @@  static void heartbeat(struct work_struct *wrk)
 unlock:
 	mutex_unlock(&ce->timeline->mutex);
 out:
-	if (!next_heartbeat(engine))
+	if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine))
 		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
 	intel_engine_pm_put(engine);
 }