@@ -368,8 +368,6 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
if (s_job->s_fence->parent &&
dma_fence_remove_callback(s_job->s_fence->parent,
&s_job->cb)) {
- dma_fence_put(s_job->s_fence->parent);
- s_job->s_fence->parent = NULL;
atomic_dec(&sched->hw_rq_count);
} else {
/*
@@ -396,6 +394,14 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
sched->ops->free_job(s_job);
}
}
+
+ /*
+ * Stop pending timer in flight as we rearm it in drm_sched_start. This
+ * avoids the pending timeout work in progress to fire right away after
+ * this TDR finished and before the newly restarted jobs had a
+ * chance to complete.
+ */
+ cancel_delayed_work(&sched->work_tdr);
}
EXPORT_SYMBOL(drm_sched_stop);
@@ -467,6 +473,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
dma_fence_set_error(&s_fence->finished, -ECANCELED);
+ dma_fence_put(s_job->s_fence->parent);
s_job->s_fence->parent = sched->ops->run_job(s_job);
atomic_inc(&sched->hw_rq_count);
}
For later driver's reference to see if the fence is signaled. v2: Move parent fence put to resubmit jobs. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> --- drivers/gpu/drm/scheduler/sched_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)