Message ID | 20180921185736.2066-1-nayan26deshmukh@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] drm/scheduler: remove timeout work_struct from drm_sched_job | expand |
Am 21.09.2018 um 20:57 schrieb Nayan Deshmukh: > having a delayed work item per job is redundant as we only need one > per scheduler to track the time out the currently executing job. > > v2: the first element of the ring mirror list is the currently > executing job so we don't need a additional variable for it > > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> > Suggested-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/scheduler/sched_main.c | 28 +++++++++++++++------------- > include/drm/gpu_scheduler.h | 6 +++--- > 2 files changed, 18 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index 9ca741f3a0bc..88f6cff136f2 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -197,19 +197,16 @@ static void drm_sched_job_finish(struct work_struct *work) > * manages to find this job as the next job in the list, the fence > * signaled check below will prevent the timeout to be restarted. > */ > - cancel_delayed_work_sync(&s_job->work_tdr); > + cancel_delayed_work_sync(&sched->work_tdr); > > spin_lock(&sched->job_list_lock); > /* queue TDR for next job */ > + list_del(&s_job->node); > if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > - !list_is_last(&s_job->node, &sched->ring_mirror_list)) { > - struct drm_sched_job *next = list_next_entry(s_job, node); > - > - if (!dma_fence_is_signaled(&next->s_fence->finished)) > - schedule_delayed_work(&next->work_tdr, sched->timeout); > + !list_empty(&sched->ring_mirror_list)) { > + schedule_delayed_work(&sched->work_tdr, sched->timeout); > } You can remove the {} here as well. > /* remove job from ring_mirror_list */ That comment should move with the list_del(). Apart from that it looks good to me, Christian. > - list_del(&s_job->node); > spin_unlock(&sched->job_list_lock); > > dma_fence_put(&s_job->s_fence->finished); > @@ -236,16 +233,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) > if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > list_first_entry_or_null(&sched->ring_mirror_list, > struct drm_sched_job, node) == s_job) > - schedule_delayed_work(&s_job->work_tdr, sched->timeout); > + schedule_delayed_work(&sched->work_tdr, sched->timeout); > spin_unlock(&sched->job_list_lock); > } > > static void drm_sched_job_timedout(struct work_struct *work) > { > - struct drm_sched_job *job = container_of(work, struct drm_sched_job, > - work_tdr.work); > + struct drm_gpu_scheduler *sched; > + struct drm_sched_job *job; > + > + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); > + job = list_first_entry_or_null(&sched->ring_mirror_list, > + struct drm_sched_job, node); > > - job->sched->ops->timedout_job(job); > + if (job) > + job->sched->ops->timedout_job(job); > } > > /** > @@ -315,7 +317,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) > s_job = list_first_entry_or_null(&sched->ring_mirror_list, > struct drm_sched_job, node); > if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) > - schedule_delayed_work(&s_job->work_tdr, sched->timeout); > + schedule_delayed_work(&sched->work_tdr, sched->timeout); > > list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { > struct drm_sched_fence *s_fence = s_job->s_fence; > @@ -384,7 +386,6 @@ int drm_sched_job_init(struct drm_sched_job *job, > > INIT_WORK(&job->finish_work, drm_sched_job_finish); > INIT_LIST_HEAD(&job->node); > - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); > > return 0; > } > @@ -575,6 +576,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > INIT_LIST_HEAD(&sched->ring_mirror_list); > spin_lock_init(&sched->job_list_lock); > atomic_set(&sched->hw_rq_count, 0); > + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); > atomic_set(&sched->num_jobs, 0); > atomic64_set(&sched->job_id_count, 0); > > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index daec50f887b3..d87b268f1781 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); > * finished to remove the job from the > * @drm_gpu_scheduler.ring_mirror_list. > * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. > - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout > - * interval is over. > * @id: a unique id assigned to each job scheduled on the scheduler. > * @karma: increment on every hang caused by this job. If this exceeds the hang > * limit of the scheduler then the job is marked guilty and will not > @@ -195,7 +193,6 @@ struct drm_sched_job { > struct dma_fence_cb finish_cb; > struct work_struct finish_work; > struct list_head node; > - struct delayed_work work_tdr; > uint64_t id; > atomic_t karma; > enum drm_sched_priority s_priority; > @@ -259,6 +256,8 @@ struct drm_sched_backend_ops { > * finished. > * @hw_rq_count: the number of jobs currently in the hardware queue. > * @job_id_count: used to assign unique id to the each job. > + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the > + * timeout interval is over. > * @thread: the kthread on which the scheduler which run. > * @ring_mirror_list: the list of jobs which are currently in the job queue. > * @job_list_lock: lock to protect the ring_mirror_list. > @@ -278,6 +277,7 @@ struct drm_gpu_scheduler { > wait_queue_head_t job_scheduled; > atomic_t hw_rq_count; > atomic64_t job_id_count; > + struct delayed_work work_tdr; > struct task_struct *thread; > struct list_head ring_mirror_list; > spinlock_t job_list_lock;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 9ca741f3a0bc..88f6cff136f2 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -197,19 +197,16 @@ static void drm_sched_job_finish(struct work_struct *work) * manages to find this job as the next job in the list, the fence * signaled check below will prevent the timeout to be restarted. */ - cancel_delayed_work_sync(&s_job->work_tdr); + cancel_delayed_work_sync(&sched->work_tdr); spin_lock(&sched->job_list_lock); /* queue TDR for next job */ + list_del(&s_job->node); if (sched->timeout != MAX_SCHEDULE_TIMEOUT && - !list_is_last(&s_job->node, &sched->ring_mirror_list)) { - struct drm_sched_job *next = list_next_entry(s_job, node); - - if (!dma_fence_is_signaled(&next->s_fence->finished)) - schedule_delayed_work(&next->work_tdr, sched->timeout); + !list_empty(&sched->ring_mirror_list)) { + schedule_delayed_work(&sched->work_tdr, sched->timeout); } /* remove job from ring_mirror_list */ - list_del(&s_job->node); spin_unlock(&sched->job_list_lock); dma_fence_put(&s_job->s_fence->finished); @@ -236,16 +233,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) if (sched->timeout != MAX_SCHEDULE_TIMEOUT && list_first_entry_or_null(&sched->ring_mirror_list, struct drm_sched_job, node) == s_job) - schedule_delayed_work(&s_job->work_tdr, sched->timeout); + schedule_delayed_work(&sched->work_tdr, sched->timeout); spin_unlock(&sched->job_list_lock); } static void drm_sched_job_timedout(struct work_struct *work) { - struct drm_sched_job *job = container_of(work, struct drm_sched_job, - work_tdr.work); + struct drm_gpu_scheduler *sched; + struct drm_sched_job *job; + + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); + job = list_first_entry_or_null(&sched->ring_mirror_list, + struct drm_sched_job, node); - job->sched->ops->timedout_job(job); + if (job) + job->sched->ops->timedout_job(job); } /** @@ -315,7 +317,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) s_job = list_first_entry_or_null(&sched->ring_mirror_list, struct drm_sched_job, node); if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) - schedule_delayed_work(&s_job->work_tdr, sched->timeout); + schedule_delayed_work(&sched->work_tdr, sched->timeout); list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct drm_sched_fence *s_fence = s_job->s_fence; @@ -384,7 +386,6 @@ int drm_sched_job_init(struct drm_sched_job *job, INIT_WORK(&job->finish_work, drm_sched_job_finish); INIT_LIST_HEAD(&job->node); - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); return 0; } @@ -575,6 +576,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, INIT_LIST_HEAD(&sched->ring_mirror_list); spin_lock_init(&sched->job_list_lock); atomic_set(&sched->hw_rq_count, 0); + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); atomic_set(&sched->num_jobs, 0); atomic64_set(&sched->job_id_count, 0); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index daec50f887b3..d87b268f1781 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); * finished to remove the job from the * @drm_gpu_scheduler.ring_mirror_list. * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout - * interval is over. * @id: a unique id assigned to each job scheduled on the scheduler. * @karma: increment on every hang caused by this job. If this exceeds the hang * limit of the scheduler then the job is marked guilty and will not @@ -195,7 +193,6 @@ struct drm_sched_job { struct dma_fence_cb finish_cb; struct work_struct finish_work; struct list_head node; - struct delayed_work work_tdr; uint64_t id; atomic_t karma; enum drm_sched_priority s_priority; @@ -259,6 +256,8 @@ struct drm_sched_backend_ops { * finished. * @hw_rq_count: the number of jobs currently in the hardware queue. * @job_id_count: used to assign unique id to the each job. + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the + * timeout interval is over. * @thread: the kthread on which the scheduler which run. * @ring_mirror_list: the list of jobs which are currently in the job queue. * @job_list_lock: lock to protect the ring_mirror_list. @@ -278,6 +277,7 @@ struct drm_gpu_scheduler { wait_queue_head_t job_scheduled; atomic_t hw_rq_count; atomic64_t job_id_count; + struct delayed_work work_tdr; struct task_struct *thread; struct list_head ring_mirror_list; spinlock_t job_list_lock;
having a delayed work item per job is redundant as we only need one per scheduler to track the time out the currently executing job. v2: the first element of the ring mirror list is the currently executing job so we don't need a additional variable for it Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> Suggested-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/scheduler/sched_main.c | 28 +++++++++++++++------------- include/drm/gpu_scheduler.h | 6 +++--- 2 files changed, 18 insertions(+), 16 deletions(-)