diff mbox series

[v2] drm/scheduler: remove timeout work_struct from drm_sched_job

Message ID 20180921185736.2066-1-nayan26deshmukh@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] drm/scheduler: remove timeout work_struct from drm_sched_job | expand

Commit Message

Nayan Deshmukh Sept. 21, 2018, 6:57 p.m. UTC
having a delayed work item per job is redundant as we only need one
per scheduler to track the time out the currently executing job.

v2: the first element of the ring mirror list is the currently
executing job so we don't need a additional variable for it

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Suggested-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 28 +++++++++++++++-------------
 include/drm/gpu_scheduler.h            |  6 +++---
 2 files changed, 18 insertions(+), 16 deletions(-)

Comments

Christian König Sept. 22, 2018, 7:23 a.m. UTC | #1
Am 21.09.2018 um 20:57 schrieb Nayan Deshmukh:
> having a delayed work item per job is redundant as we only need one
> per scheduler to track the time out the currently executing job.
>
> v2: the first element of the ring mirror list is the currently
> executing job so we don't need a additional variable for it
>
> Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
> Suggested-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/scheduler/sched_main.c | 28 +++++++++++++++-------------
>   include/drm/gpu_scheduler.h            |  6 +++---
>   2 files changed, 18 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 9ca741f3a0bc..88f6cff136f2 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -197,19 +197,16 @@ static void drm_sched_job_finish(struct work_struct *work)
>   	 * manages to find this job as the next job in the list, the fence
>   	 * signaled check below will prevent the timeout to be restarted.
>   	 */
> -	cancel_delayed_work_sync(&s_job->work_tdr);
> +	cancel_delayed_work_sync(&sched->work_tdr);
>   
>   	spin_lock(&sched->job_list_lock);
>   	/* queue TDR for next job */
> +	list_del(&s_job->node);
>   	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> -	    !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
> -		struct drm_sched_job *next = list_next_entry(s_job, node);
> -
> -		if (!dma_fence_is_signaled(&next->s_fence->finished))
> -			schedule_delayed_work(&next->work_tdr, sched->timeout);
> +	    !list_empty(&sched->ring_mirror_list)) {
> +		schedule_delayed_work(&sched->work_tdr, sched->timeout);
>   	}

You can remove the {} here as well.

>   	/* remove job from ring_mirror_list */

That comment should move with the list_del().

Apart from that it looks good to me,
Christian.

> -	list_del(&s_job->node);
>   	spin_unlock(&sched->job_list_lock);
>   
>   	dma_fence_put(&s_job->s_fence->finished);
> @@ -236,16 +233,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
>   	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
>   	    list_first_entry_or_null(&sched->ring_mirror_list,
>   				     struct drm_sched_job, node) == s_job)
> -		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> +		schedule_delayed_work(&sched->work_tdr, sched->timeout);
>   	spin_unlock(&sched->job_list_lock);
>   }
>   
>   static void drm_sched_job_timedout(struct work_struct *work)
>   {
> -	struct drm_sched_job *job = container_of(work, struct drm_sched_job,
> -						 work_tdr.work);
> +	struct drm_gpu_scheduler *sched;
> +	struct drm_sched_job *job;
> +
> +	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
> +	job = list_first_entry_or_null(&sched->ring_mirror_list,
> +				       struct drm_sched_job, node);
>   
> -	job->sched->ops->timedout_job(job);
> +	if (job)
> +		job->sched->ops->timedout_job(job);
>   }
>   
>   /**
> @@ -315,7 +317,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
>   	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
>   					 struct drm_sched_job, node);
>   	if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
> -		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
> +		schedule_delayed_work(&sched->work_tdr, sched->timeout);
>   
>   	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
>   		struct drm_sched_fence *s_fence = s_job->s_fence;
> @@ -384,7 +386,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
>   
>   	INIT_WORK(&job->finish_work, drm_sched_job_finish);
>   	INIT_LIST_HEAD(&job->node);
> -	INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
>   
>   	return 0;
>   }
> @@ -575,6 +576,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>   	INIT_LIST_HEAD(&sched->ring_mirror_list);
>   	spin_lock_init(&sched->job_list_lock);
>   	atomic_set(&sched->hw_rq_count, 0);
> +	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
>   	atomic_set(&sched->num_jobs, 0);
>   	atomic64_set(&sched->job_id_count, 0);
>   
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index daec50f887b3..d87b268f1781 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
>    *               finished to remove the job from the
>    *               @drm_gpu_scheduler.ring_mirror_list.
>    * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.
> - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout
> - *            interval is over.
>    * @id: a unique id assigned to each job scheduled on the scheduler.
>    * @karma: increment on every hang caused by this job. If this exceeds the hang
>    *         limit of the scheduler then the job is marked guilty and will not
> @@ -195,7 +193,6 @@ struct drm_sched_job {
>   	struct dma_fence_cb		finish_cb;
>   	struct work_struct		finish_work;
>   	struct list_head		node;
> -	struct delayed_work		work_tdr;
>   	uint64_t			id;
>   	atomic_t			karma;
>   	enum drm_sched_priority		s_priority;
> @@ -259,6 +256,8 @@ struct drm_sched_backend_ops {
>    *                 finished.
>    * @hw_rq_count: the number of jobs currently in the hardware queue.
>    * @job_id_count: used to assign unique id to the each job.
> + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
> + *            timeout interval is over.
>    * @thread: the kthread on which the scheduler which run.
>    * @ring_mirror_list: the list of jobs which are currently in the job queue.
>    * @job_list_lock: lock to protect the ring_mirror_list.
> @@ -278,6 +277,7 @@ struct drm_gpu_scheduler {
>   	wait_queue_head_t		job_scheduled;
>   	atomic_t			hw_rq_count;
>   	atomic64_t			job_id_count;
> +	struct delayed_work		work_tdr;
>   	struct task_struct		*thread;
>   	struct list_head		ring_mirror_list;
>   	spinlock_t			job_list_lock;
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 9ca741f3a0bc..88f6cff136f2 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -197,19 +197,16 @@  static void drm_sched_job_finish(struct work_struct *work)
 	 * manages to find this job as the next job in the list, the fence
 	 * signaled check below will prevent the timeout to be restarted.
 	 */
-	cancel_delayed_work_sync(&s_job->work_tdr);
+	cancel_delayed_work_sync(&sched->work_tdr);
 
 	spin_lock(&sched->job_list_lock);
 	/* queue TDR for next job */
+	list_del(&s_job->node);
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-	    !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
-		struct drm_sched_job *next = list_next_entry(s_job, node);
-
-		if (!dma_fence_is_signaled(&next->s_fence->finished))
-			schedule_delayed_work(&next->work_tdr, sched->timeout);
+	    !list_empty(&sched->ring_mirror_list)) {
+		schedule_delayed_work(&sched->work_tdr, sched->timeout);
 	}
 	/* remove job from ring_mirror_list */
-	list_del(&s_job->node);
 	spin_unlock(&sched->job_list_lock);
 
 	dma_fence_put(&s_job->s_fence->finished);
@@ -236,16 +233,21 @@  static void drm_sched_job_begin(struct drm_sched_job *s_job)
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 	    list_first_entry_or_null(&sched->ring_mirror_list,
 				     struct drm_sched_job, node) == s_job)
-		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+		schedule_delayed_work(&sched->work_tdr, sched->timeout);
 	spin_unlock(&sched->job_list_lock);
 }
 
 static void drm_sched_job_timedout(struct work_struct *work)
 {
-	struct drm_sched_job *job = container_of(work, struct drm_sched_job,
-						 work_tdr.work);
+	struct drm_gpu_scheduler *sched;
+	struct drm_sched_job *job;
+
+	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
+	job = list_first_entry_or_null(&sched->ring_mirror_list,
+				       struct drm_sched_job, node);
 
-	job->sched->ops->timedout_job(job);
+	if (job)
+		job->sched->ops->timedout_job(job);
 }
 
 /**
@@ -315,7 +317,7 @@  void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
 	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
 					 struct drm_sched_job, node);
 	if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
-		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+		schedule_delayed_work(&sched->work_tdr, sched->timeout);
 
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct drm_sched_fence *s_fence = s_job->s_fence;
@@ -384,7 +386,6 @@  int drm_sched_job_init(struct drm_sched_job *job,
 
 	INIT_WORK(&job->finish_work, drm_sched_job_finish);
 	INIT_LIST_HEAD(&job->node);
-	INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
 
 	return 0;
 }
@@ -575,6 +576,7 @@  int drm_sched_init(struct drm_gpu_scheduler *sched,
 	INIT_LIST_HEAD(&sched->ring_mirror_list);
 	spin_lock_init(&sched->job_list_lock);
 	atomic_set(&sched->hw_rq_count, 0);
+	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 	atomic_set(&sched->num_jobs, 0);
 	atomic64_set(&sched->job_id_count, 0);
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index daec50f887b3..d87b268f1781 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -175,8 +175,6 @@  struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
  *               finished to remove the job from the
  *               @drm_gpu_scheduler.ring_mirror_list.
  * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.
- * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout
- *            interval is over.
  * @id: a unique id assigned to each job scheduled on the scheduler.
  * @karma: increment on every hang caused by this job. If this exceeds the hang
  *         limit of the scheduler then the job is marked guilty and will not
@@ -195,7 +193,6 @@  struct drm_sched_job {
 	struct dma_fence_cb		finish_cb;
 	struct work_struct		finish_work;
 	struct list_head		node;
-	struct delayed_work		work_tdr;
 	uint64_t			id;
 	atomic_t			karma;
 	enum drm_sched_priority		s_priority;
@@ -259,6 +256,8 @@  struct drm_sched_backend_ops {
  *                 finished.
  * @hw_rq_count: the number of jobs currently in the hardware queue.
  * @job_id_count: used to assign unique id to the each job.
+ * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
+ *            timeout interval is over.
  * @thread: the kthread on which the scheduler which run.
  * @ring_mirror_list: the list of jobs which are currently in the job queue.
  * @job_list_lock: lock to protect the ring_mirror_list.
@@ -278,6 +277,7 @@  struct drm_gpu_scheduler {
 	wait_queue_head_t		job_scheduled;
 	atomic_t			hw_rq_count;
 	atomic64_t			job_id_count;
+	struct delayed_work		work_tdr;
 	struct task_struct		*thread;
 	struct list_head		ring_mirror_list;
 	spinlock_t			job_list_lock;