diff mbox series

[v6,11/16] drm/sched: Make timeout timer rearm conditional.

Message ID 20210510163625.407105-12-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series RFC Support hot device unplug in amdgpu | expand

Commit Message

Andrey Grodzovsky May 10, 2021, 4:36 p.m. UTC
We don't want to rearm the timer if driver hook reports
that the device is gone.

v5: Update drm_gpu_sched_stat values in code.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

Comments

Christian König May 11, 2021, 6:52 a.m. UTC | #1
Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:
> We don't want to rearm the timer if driver hook reports
> that the device is gone.
>
> v5: Update drm_gpu_sched_stat values in code.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/scheduler/sched_main.c | 11 +++++++----
>   1 file changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index f4f474944169..8d1211e87101 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -314,6 +314,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
>   {
>   	struct drm_gpu_scheduler *sched;
>   	struct drm_sched_job *job;
> +	enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
>   
>   	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
>   
> @@ -331,7 +332,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
>   		list_del_init(&job->list);
>   		spin_unlock(&sched->job_list_lock);
>   
> -		job->sched->ops->timedout_job(job);
> +		status = job->sched->ops->timedout_job(job);
>   
>   		/*
>   		 * Guilty job did complete and hence needs to be manually removed
> @@ -345,9 +346,11 @@ static void drm_sched_job_timedout(struct work_struct *work)
>   		spin_unlock(&sched->job_list_lock);
>   	}
>   
> -	spin_lock(&sched->job_list_lock);
> -	drm_sched_start_timeout(sched);
> -	spin_unlock(&sched->job_list_lock);
> +	if (status != DRM_GPU_SCHED_STAT_ENODEV) {
> +		spin_lock(&sched->job_list_lock);
> +		drm_sched_start_timeout(sched);
> +		spin_unlock(&sched->job_list_lock);
> +	}
>   }
>   
>    /**
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index f4f474944169..8d1211e87101 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -314,6 +314,7 @@  static void drm_sched_job_timedout(struct work_struct *work)
 {
 	struct drm_gpu_scheduler *sched;
 	struct drm_sched_job *job;
+	enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
 
 	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 
@@ -331,7 +332,7 @@  static void drm_sched_job_timedout(struct work_struct *work)
 		list_del_init(&job->list);
 		spin_unlock(&sched->job_list_lock);
 
-		job->sched->ops->timedout_job(job);
+		status = job->sched->ops->timedout_job(job);
 
 		/*
 		 * Guilty job did complete and hence needs to be manually removed
@@ -345,9 +346,11 @@  static void drm_sched_job_timedout(struct work_struct *work)
 		spin_unlock(&sched->job_list_lock);
 	}
 
-	spin_lock(&sched->job_list_lock);
-	drm_sched_start_timeout(sched);
-	spin_unlock(&sched->job_list_lock);
+	if (status != DRM_GPU_SCHED_STAT_ENODEV) {
+		spin_lock(&sched->job_list_lock);
+		drm_sched_start_timeout(sched);
+		spin_unlock(&sched->job_list_lock);
+	}
 }
 
  /**