Message ID | 20180925170902.3676-1-nayan26deshmukh@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] drm/scheduler: remove timeout work_struct from drm_sched_job | expand |
Am 25.09.2018 um 19:09 schrieb Nayan Deshmukh: > having a delayed work item per job is redundant as we only need one > per scheduler to track the time out the currently executing job. > > v2: the first element of the ring mirror list is the currently > executing job so we don't need a additional variable for it > > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> > Suggested-by: Christian König <christian.koenig@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Going to push that into our branch on the next best occasion. Christian. > --- > drivers/gpu/drm/scheduler/sched_main.c | 31 ++++++++++++++++--------------- > include/drm/gpu_scheduler.h | 6 +++--- > 2 files changed, 19 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index 9ca741f3a0bc..4e8505d51795 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work) > * manages to find this job as the next job in the list, the fence > * signaled check below will prevent the timeout to be restarted. > */ > - cancel_delayed_work_sync(&s_job->work_tdr); > + cancel_delayed_work_sync(&sched->work_tdr); > > spin_lock(&sched->job_list_lock); > - /* queue TDR for next job */ > - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > - !list_is_last(&s_job->node, &sched->ring_mirror_list)) { > - struct drm_sched_job *next = list_next_entry(s_job, node); > - > - if (!dma_fence_is_signaled(&next->s_fence->finished)) > - schedule_delayed_work(&next->work_tdr, sched->timeout); > - } > /* remove job from ring_mirror_list */ > list_del(&s_job->node); > + /* queue TDR for next job */ > + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > + !list_empty(&sched->ring_mirror_list)) > + schedule_delayed_work(&sched->work_tdr, sched->timeout); > spin_unlock(&sched->job_list_lock); > > dma_fence_put(&s_job->s_fence->finished); > @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) > if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > list_first_entry_or_null(&sched->ring_mirror_list, > struct drm_sched_job, node) == s_job) > - schedule_delayed_work(&s_job->work_tdr, sched->timeout); > + schedule_delayed_work(&sched->work_tdr, sched->timeout); > spin_unlock(&sched->job_list_lock); > } > > static void drm_sched_job_timedout(struct work_struct *work) > { > - struct drm_sched_job *job = container_of(work, struct drm_sched_job, > - work_tdr.work); > + struct drm_gpu_scheduler *sched; > + struct drm_sched_job *job; > + > + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); > + job = list_first_entry_or_null(&sched->ring_mirror_list, > + struct drm_sched_job, node); > > - job->sched->ops->timedout_job(job); > + if (job) > + job->sched->ops->timedout_job(job); > } > > /** > @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) > s_job = list_first_entry_or_null(&sched->ring_mirror_list, > struct drm_sched_job, node); > if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) > - schedule_delayed_work(&s_job->work_tdr, sched->timeout); > + schedule_delayed_work(&sched->work_tdr, sched->timeout); > > list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { > struct drm_sched_fence *s_fence = s_job->s_fence; > @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job, > > INIT_WORK(&job->finish_work, drm_sched_job_finish); > INIT_LIST_HEAD(&job->node); > - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); > > return 0; > } > @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > INIT_LIST_HEAD(&sched->ring_mirror_list); > spin_lock_init(&sched->job_list_lock); > atomic_set(&sched->hw_rq_count, 0); > + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); > atomic_set(&sched->num_jobs, 0); > atomic64_set(&sched->job_id_count, 0); > > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index daec50f887b3..d87b268f1781 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); > * finished to remove the job from the > * @drm_gpu_scheduler.ring_mirror_list. > * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. > - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout > - * interval is over. > * @id: a unique id assigned to each job scheduled on the scheduler. > * @karma: increment on every hang caused by this job. If this exceeds the hang > * limit of the scheduler then the job is marked guilty and will not > @@ -195,7 +193,6 @@ struct drm_sched_job { > struct dma_fence_cb finish_cb; > struct work_struct finish_work; > struct list_head node; > - struct delayed_work work_tdr; > uint64_t id; > atomic_t karma; > enum drm_sched_priority s_priority; > @@ -259,6 +256,8 @@ struct drm_sched_backend_ops { > * finished. > * @hw_rq_count: the number of jobs currently in the hardware queue. > * @job_id_count: used to assign unique id to the each job. > + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the > + * timeout interval is over. > * @thread: the kthread on which the scheduler which run. > * @ring_mirror_list: the list of jobs which are currently in the job queue. > * @job_list_lock: lock to protect the ring_mirror_list. > @@ -278,6 +277,7 @@ struct drm_gpu_scheduler { > wait_queue_head_t job_scheduled; > atomic_t hw_rq_count; > atomic64_t job_id_count; > + struct delayed_work work_tdr; > struct task_struct *thread; > struct list_head ring_mirror_list; > spinlock_t job_list_lock;
Hi Nayan, Am Mittwoch, den 26.09.2018, 02:09 +0900 schrieb Nayan Deshmukh: > having a delayed work item per job is redundant as we only need one > per scheduler to track the time out the currently executing job. > > v2: the first element of the ring mirror list is the currently > executing job so we don't need a additional variable for it > > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> > Suggested-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/scheduler/sched_main.c | 31 ++++++++++++++++--------------- > include/drm/gpu_scheduler.h | 6 +++--- > 2 files changed, 19 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index 9ca741f3a0bc..4e8505d51795 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work) > * manages to find this job as the next job in the list, the fence > * signaled check below will prevent the timeout to be restarted. > */ > - cancel_delayed_work_sync(&s_job->work_tdr); > + cancel_delayed_work_sync(&sched->work_tdr); > > spin_lock(&sched->job_list_lock); > - /* queue TDR for next job */ > - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > - !list_is_last(&s_job->node, &sched->ring_mirror_list)) { > - struct drm_sched_job *next = list_next_entry(s_job, node); > - > - if (!dma_fence_is_signaled(&next->s_fence->finished)) > - schedule_delayed_work(&next->work_tdr, sched->timeout); > - } > /* remove job from ring_mirror_list */ > list_del(&s_job->node); > + /* queue TDR for next job */ > + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > + !list_empty(&sched->ring_mirror_list)) > + schedule_delayed_work(&sched->work_tdr, sched->timeout); > spin_unlock(&sched->job_list_lock); > > dma_fence_put(&s_job->s_fence->finished); > @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) > if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > list_first_entry_or_null(&sched->ring_mirror_list, > struct drm_sched_job, node) == s_job) > - schedule_delayed_work(&s_job->work_tdr, sched->timeout); > + schedule_delayed_work(&sched->work_tdr, sched->timeout); > spin_unlock(&sched->job_list_lock); > } > > static void drm_sched_job_timedout(struct work_struct *work) > { > - struct drm_sched_job *job = container_of(work, struct drm_sched_job, > - work_tdr.work); > + struct drm_gpu_scheduler *sched; > + struct drm_sched_job *job; > + > + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); > + job = list_first_entry_or_null(&sched->ring_mirror_list, > + struct drm_sched_job, node); > > - job->sched->ops->timedout_job(job); > + if (job) > + job->sched->ops->timedout_job(job); I don't think this is fully robust. Jobs are only removed from the ring_mirror_list once the job_finish worker has run. If execution of this worker is delayed for any reason (though it's really unlikely for a delay as long as the job timeout to happen) you are blaming the wrong job here. So I think what you need to to is find the first job in the ring mirror list with an unsignaled finish fence to robustly find the stuck job. Regards, Lucas > } > > /** > @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) > s_job = list_first_entry_or_null(&sched->ring_mirror_list, > struct drm_sched_job, node); > if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) > - schedule_delayed_work(&s_job->work_tdr, sched->timeout); > + schedule_delayed_work(&sched->work_tdr, sched->timeout); > > list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { > struct drm_sched_fence *s_fence = s_job->s_fence; > @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job, > > INIT_WORK(&job->finish_work, drm_sched_job_finish); > INIT_LIST_HEAD(&job->node); > - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); > > return 0; > } > @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > INIT_LIST_HEAD(&sched->ring_mirror_list); > spin_lock_init(&sched->job_list_lock); > atomic_set(&sched->hw_rq_count, 0); > + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); > atomic_set(&sched->num_jobs, 0); > atomic64_set(&sched->job_id_count, 0); > > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index daec50f887b3..d87b268f1781 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); > * finished to remove the job from the > * @drm_gpu_scheduler.ring_mirror_list. > * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. > - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout > - * interval is over. > * @id: a unique id assigned to each job scheduled on the scheduler. > * @karma: increment on every hang caused by this job. If this exceeds the hang > * limit of the scheduler then the job is marked guilty and will not > @@ -195,7 +193,6 @@ struct drm_sched_job { > struct dma_fence_cb finish_cb; > struct work_struct finish_work; > struct list_head node; > - struct delayed_work work_tdr; > uint64_t id; > atomic_t karma; > enum drm_sched_priority s_priority; > @@ -259,6 +256,8 @@ struct drm_sched_backend_ops { > * finished. > * @hw_rq_count: the number of jobs currently in the hardware queue. > * @job_id_count: used to assign unique id to the each job. > + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the > + * timeout interval is over. > * @thread: the kthread on which the scheduler which run. > * @ring_mirror_list: the list of jobs which are currently in the job queue. > * @job_list_lock: lock to protect the ring_mirror_list. > @@ -278,6 +277,7 @@ struct drm_gpu_scheduler { > wait_queue_head_t job_scheduled; > atomic_t hw_rq_count; > atomic64_t job_id_count; > + struct delayed_work work_tdr; > struct task_struct *thread; > struct list_head ring_mirror_list; > spinlock_t job_list_lock;
Am 26.09.2018 um 09:39 schrieb Lucas Stach: > Hi Nayan, > > Am Mittwoch, den 26.09.2018, 02:09 +0900 schrieb Nayan Deshmukh: >> having a delayed work item per job is redundant as we only need one >> per scheduler to track the time out the currently executing job. >> >> v2: the first element of the ring mirror list is the currently >> executing job so we don't need a additional variable for it >> >> Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> >> Suggested-by: Christian König <christian.koenig@amd.com> >> --- >> drivers/gpu/drm/scheduler/sched_main.c | 31 ++++++++++++++++--------------- >> include/drm/gpu_scheduler.h | 6 +++--- >> 2 files changed, 19 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c >> index 9ca741f3a0bc..4e8505d51795 100644 >> --- a/drivers/gpu/drm/scheduler/sched_main.c >> +++ b/drivers/gpu/drm/scheduler/sched_main.c >> @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work) >> * manages to find this job as the next job in the list, the fence >> * signaled check below will prevent the timeout to be restarted. >> */ >> - cancel_delayed_work_sync(&s_job->work_tdr); >> + cancel_delayed_work_sync(&sched->work_tdr); >> >> spin_lock(&sched->job_list_lock); >> - /* queue TDR for next job */ >> - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && >> - !list_is_last(&s_job->node, &sched->ring_mirror_list)) { >> - struct drm_sched_job *next = list_next_entry(s_job, node); >> - >> - if (!dma_fence_is_signaled(&next->s_fence->finished)) >> - schedule_delayed_work(&next->work_tdr, sched->timeout); >> - } >> /* remove job from ring_mirror_list */ >> list_del(&s_job->node); >> + /* queue TDR for next job */ >> + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && >> + !list_empty(&sched->ring_mirror_list)) >> + schedule_delayed_work(&sched->work_tdr, sched->timeout); >> spin_unlock(&sched->job_list_lock); >> >> dma_fence_put(&s_job->s_fence->finished); >> @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) >> if (sched->timeout != MAX_SCHEDULE_TIMEOUT && >> list_first_entry_or_null(&sched->ring_mirror_list, >> struct drm_sched_job, node) == s_job) >> - schedule_delayed_work(&s_job->work_tdr, sched->timeout); >> + schedule_delayed_work(&sched->work_tdr, sched->timeout); >> spin_unlock(&sched->job_list_lock); >> } >> >> static void drm_sched_job_timedout(struct work_struct *work) >> { >> - struct drm_sched_job *job = container_of(work, struct drm_sched_job, >> - work_tdr.work); >> + struct drm_gpu_scheduler *sched; >> + struct drm_sched_job *job; >> + >> + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); >> + job = list_first_entry_or_null(&sched->ring_mirror_list, >> + struct drm_sched_job, node); >> >> - job->sched->ops->timedout_job(job); >> + if (job) >> + job->sched->ops->timedout_job(job); > I don't think this is fully robust. Jobs are only removed from the > ring_mirror_list once the job_finish worker has run. If execution of > this worker is delayed for any reason (though it's really unlikely for > a delay as long as the job timeout to happen) you are blaming the wrong > job here. > > So I think what you need to to is find the first job in the ring mirror > list with an unsignaled finish fence to robustly find the stuck job. Yeah, that is a known problem I've pointed out as well. The issue is we have bug reports that this happened before the patch, but I'm not 100% sure how. My suggestion is to move a good part of the logic from drm_sched_hw_job_reset() and drm_sched_job_recovery() into drm_sched_job_timedout(). E.g. we first call dma_fence_remove_callback() for each job and actually check the return value if the fence was already signaled. If we find a signaled fence we abort and add the callback back to the ones where we removed it. Nayan do you want to take care of this or should I take a look? Regards, Christian. > > Regards, > Lucas > >> } >> >> /** >> @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) >> s_job = list_first_entry_or_null(&sched->ring_mirror_list, >> struct drm_sched_job, node); >> if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) >> - schedule_delayed_work(&s_job->work_tdr, sched->timeout); >> + schedule_delayed_work(&sched->work_tdr, sched->timeout); >> >> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { >> struct drm_sched_fence *s_fence = s_job->s_fence; >> @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job, >> >> INIT_WORK(&job->finish_work, drm_sched_job_finish); >> INIT_LIST_HEAD(&job->node); >> - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); >> >> return 0; >> } >> @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, >> INIT_LIST_HEAD(&sched->ring_mirror_list); >> spin_lock_init(&sched->job_list_lock); >> atomic_set(&sched->hw_rq_count, 0); >> + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); >> atomic_set(&sched->num_jobs, 0); >> atomic64_set(&sched->job_id_count, 0); >> >> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h >> index daec50f887b3..d87b268f1781 100644 >> --- a/include/drm/gpu_scheduler.h >> +++ b/include/drm/gpu_scheduler.h >> @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); >> * finished to remove the job from the >> * @drm_gpu_scheduler.ring_mirror_list. >> * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. >> - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout >> - * interval is over. >> * @id: a unique id assigned to each job scheduled on the scheduler. >> * @karma: increment on every hang caused by this job. If this exceeds the hang >> * limit of the scheduler then the job is marked guilty and will not >> @@ -195,7 +193,6 @@ struct drm_sched_job { >> struct dma_fence_cb finish_cb; >> struct work_struct finish_work; >> struct list_head node; >> - struct delayed_work work_tdr; >> uint64_t id; >> atomic_t karma; >> enum drm_sched_priority s_priority; >> @@ -259,6 +256,8 @@ struct drm_sched_backend_ops { >> * finished. >> * @hw_rq_count: the number of jobs currently in the hardware queue. >> * @job_id_count: used to assign unique id to the each job. >> + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the >> + * timeout interval is over. >> * @thread: the kthread on which the scheduler which run. >> * @ring_mirror_list: the list of jobs which are currently in the job queue. >> * @job_list_lock: lock to protect the ring_mirror_list. >> @@ -278,6 +277,7 @@ struct drm_gpu_scheduler { >> wait_queue_head_t job_scheduled; >> atomic_t hw_rq_count; >> atomic64_t job_id_count; >> + struct delayed_work work_tdr; >> struct task_struct *thread; >> struct list_head ring_mirror_list; >> spinlock_t job_list_lock; > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
Hi Christian, On Wed, Sep 26, 2018, 10:13 AM Christian König < ckoenig.leichtzumerken@gmail.com> wrote: > Am 26.09.2018 um 09:39 schrieb Lucas Stach: > > Hi Nayan, > > > > Am Mittwoch, den 26.09.2018, 02:09 +0900 schrieb Nayan Deshmukh: > >> having a delayed work item per job is redundant as we only need one > >> per scheduler to track the time out the currently executing job. > >> > >> v2: the first element of the ring mirror list is the currently > >> executing job so we don't need a additional variable for it > >> > >> Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> > >> Suggested-by: Christian König <christian.koenig@amd.com> > >> --- > >> drivers/gpu/drm/scheduler/sched_main.c | 31 > ++++++++++++++++--------------- > >> include/drm/gpu_scheduler.h | 6 +++--- > >> 2 files changed, 19 insertions(+), 18 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c > b/drivers/gpu/drm/scheduler/sched_main.c > >> index 9ca741f3a0bc..4e8505d51795 100644 > >> --- a/drivers/gpu/drm/scheduler/sched_main.c > >> +++ b/drivers/gpu/drm/scheduler/sched_main.c > >> @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct > work_struct *work) > >> * manages to find this job as the next job in the list, the fence > >> * signaled check below will prevent the timeout to be restarted. > >> */ > >> - cancel_delayed_work_sync(&s_job->work_tdr); > >> + cancel_delayed_work_sync(&sched->work_tdr); > >> > >> spin_lock(&sched->job_list_lock); > >> - /* queue TDR for next job */ > >> - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > >> - !list_is_last(&s_job->node, &sched->ring_mirror_list)) { > >> - struct drm_sched_job *next = list_next_entry(s_job, node); > >> - > >> - if (!dma_fence_is_signaled(&next->s_fence->finished)) > >> - schedule_delayed_work(&next->work_tdr, > sched->timeout); > >> - } > >> /* remove job from ring_mirror_list */ > >> list_del(&s_job->node); > >> + /* queue TDR for next job */ > >> + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > >> + !list_empty(&sched->ring_mirror_list)) > >> + schedule_delayed_work(&sched->work_tdr, sched->timeout); > >> spin_unlock(&sched->job_list_lock); > >> > >> dma_fence_put(&s_job->s_fence->finished); > >> @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct > drm_sched_job *s_job) > >> if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > >> list_first_entry_or_null(&sched->ring_mirror_list, > >> struct drm_sched_job, node) == s_job) > >> - schedule_delayed_work(&s_job->work_tdr, sched->timeout); > >> + schedule_delayed_work(&sched->work_tdr, sched->timeout); > >> spin_unlock(&sched->job_list_lock); > >> } > >> > >> static void drm_sched_job_timedout(struct work_struct *work) > >> { > >> - struct drm_sched_job *job = container_of(work, struct > drm_sched_job, > >> - work_tdr.work); > >> + struct drm_gpu_scheduler *sched; > >> + struct drm_sched_job *job; > >> + > >> + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work > ); > >> + job = list_first_entry_or_null(&sched->ring_mirror_list, > >> + struct drm_sched_job, node); > >> > >> - job->sched->ops->timedout_job(job); > >> + if (job) > >> + job->sched->ops->timedout_job(job); > > I don't think this is fully robust. Jobs are only removed from the > > ring_mirror_list once the job_finish worker has run. If execution of > > this worker is delayed for any reason (though it's really unlikely for > > a delay as long as the job timeout to happen) you are blaming the wrong > > job here. > > > > So I think what you need to to is find the first job in the ring mirror > > list with an unsignaled finish fence to robustly find the stuck job. > > Yeah, that is a known problem I've pointed out as well. > > The issue is we have bug reports that this happened before the patch, > but I'm not 100% sure how. > > My suggestion is to move a good part of the logic from > drm_sched_hw_job_reset() and drm_sched_job_recovery() into > drm_sched_job_timedout(). > > E.g. we first call dma_fence_remove_callback() for each job and actually > check the return value if the fence was already signaled. > > If we find a signaled fence we abort and add the callback back to the > ones where we removed it. > > Nayan do you want to take care of this or should I take a look? > I can take care of it. Regards, Nayan > > Regards, > Christian. > > > > > Regards, > > Lucas > > > >> } > >> > >> /** > >> @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct > drm_gpu_scheduler *sched) > >> s_job = list_first_entry_or_null(&sched->ring_mirror_list, > >> struct drm_sched_job, node); > >> if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) > >> - schedule_delayed_work(&s_job->work_tdr, sched->timeout); > >> + schedule_delayed_work(&sched->work_tdr, sched->timeout); > >> > >> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, > node) { > >> struct drm_sched_fence *s_fence = s_job->s_fence; > >> @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job, > >> > >> INIT_WORK(&job->finish_work, drm_sched_job_finish); > >> INIT_LIST_HEAD(&job->node); > >> - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); > >> > >> return 0; > >> } > >> @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > >> INIT_LIST_HEAD(&sched->ring_mirror_list); > >> spin_lock_init(&sched->job_list_lock); > >> atomic_set(&sched->hw_rq_count, 0); > >> + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); > >> atomic_set(&sched->num_jobs, 0); > >> atomic64_set(&sched->job_id_count, 0); > >> > >> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > >> index daec50f887b3..d87b268f1781 100644 > >> --- a/include/drm/gpu_scheduler.h > >> +++ b/include/drm/gpu_scheduler.h > >> @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct > dma_fence *f); > >> * finished to remove the job from the > >> * @drm_gpu_scheduler.ring_mirror_list. > >> * @node: used to append this struct to the > @drm_gpu_scheduler.ring_mirror_list. > >> - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout > after the timeout > >> - * interval is over. > >> * @id: a unique id assigned to each job scheduled on the scheduler. > >> * @karma: increment on every hang caused by this job. If this > exceeds the hang > >> * limit of the scheduler then the job is marked guilty and > will not > >> @@ -195,7 +193,6 @@ struct drm_sched_job { > >> struct dma_fence_cb finish_cb; > >> struct work_struct finish_work; > >> struct list_head node; > >> - struct delayed_work work_tdr; > >> uint64_t id; > >> atomic_t karma; > >> enum drm_sched_priority s_priority; > >> @@ -259,6 +256,8 @@ struct drm_sched_backend_ops { > >> * finished. > >> * @hw_rq_count: the number of jobs currently in the hardware queue. > >> * @job_id_count: used to assign unique id to the each job. > >> + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout > after the > >> + * timeout interval is over. > >> * @thread: the kthread on which the scheduler which run. > >> * @ring_mirror_list: the list of jobs which are currently in the job > queue. > >> * @job_list_lock: lock to protect the ring_mirror_list. > >> @@ -278,6 +277,7 @@ struct drm_gpu_scheduler { > >> wait_queue_head_t job_scheduled; > >> atomic_t hw_rq_count; > >> atomic64_t job_id_count; > >> + struct delayed_work work_tdr; > >> struct task_struct *thread; > >> struct list_head ring_mirror_list; > >> spinlock_t job_list_lock; > > _______________________________________________ > > dri-devel mailing list > > dri-devel@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/dri-devel > > <div dir="auto"><div>Hi Christian,</div><div dir="auto"><br><br><div class="gmail_quote" dir="auto"><div dir="ltr">On Wed, Sep 26, 2018, 10:13 AM Christian König <<a href="mailto:ckoenig.leichtzumerken@gmail.com" target="_blank" rel="noreferrer">ckoenig.leichtzumerken@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Am 26.09.2018 um 09:39 schrieb Lucas Stach:<br> > Hi Nayan,<br> ><br> > Am Mittwoch, den 26.09.2018, 02:09 +0900 schrieb Nayan Deshmukh:<br> >> having a delayed work item per job is redundant as we only need one<br> >> per scheduler to track the time out the currently executing job.<br> >><br> >> v2: the first element of the ring mirror list is the currently<br> >> executing job so we don't need a additional variable for it<br> >><br> >> Signed-off-by: Nayan Deshmukh <<a href="mailto:nayan26deshmukh@gmail.com" rel="noreferrer noreferrer" target="_blank">nayan26deshmukh@gmail.com</a>><br> >> Suggested-by: Christian König <<a href="mailto:christian.koenig@amd.com" rel="noreferrer noreferrer" target="_blank">christian.koenig@amd.com</a>><br> >> ---<br> >> drivers/gpu/drm/scheduler/sched_main.c | 31 ++++++++++++++++---------------<br> >> include/drm/gpu_scheduler.h | 6 +++---<br> >> 2 files changed, 19 insertions(+), 18 deletions(-)<br> >><br> >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c<br> >> index 9ca741f3a0bc..4e8505d51795 100644<br> >> --- a/drivers/gpu/drm/scheduler/sched_main.c<br> >> +++ b/drivers/gpu/drm/scheduler/sched_main.c<br> >> @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work)<br> >> * manages to find this job as the next job in the list, the fence<br> >> * signaled check below will prevent the timeout to be restarted.<br> >> */<br> >> - cancel_delayed_work_sync(&s_job->work_tdr);<br> >> + cancel_delayed_work_sync(&sched->work_tdr);<br> >> <br> >> spin_lock(&sched->job_list_lock);<br> >> - /* queue TDR for next job */<br> >> - if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&<br> >> - !list_is_last(&s_job->node, &sched->ring_mirror_list)) {<br> >> - struct drm_sched_job *next = list_next_entry(s_job, node);<br> >> -<br> >> - if (!dma_fence_is_signaled(&next->s_fence->finished))<br> >> - schedule_delayed_work(&next->work_tdr, sched->timeout);<br> >> - }<br> >> /* remove job from ring_mirror_list */<br> >> list_del(&s_job->node);<br> >> + /* queue TDR for next job */<br> >> + if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&<br> >> + !list_empty(&sched->ring_mirror_list))<br> >> + schedule_delayed_work(&sched->work_tdr, sched->timeout);<br> >> spin_unlock(&sched->job_list_lock);<br> >> <br> >> dma_fence_put(&s_job->s_fence->finished);<br> >> @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)<br> >> if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&<br> >> list_first_entry_or_null(&sched->ring_mirror_list,<br> >> struct drm_sched_job, node) == s_job)<br> >> - schedule_delayed_work(&s_job->work_tdr, sched->timeout);<br> >> + schedule_delayed_work(&sched->work_tdr, sched->timeout);<br> >> spin_unlock(&sched->job_list_lock);<br> >> }<br> >> <br> >> static void drm_sched_job_timedout(struct work_struct *work)<br> >> {<br> >> - struct drm_sched_job *job = container_of(work, struct drm_sched_job,<br> >> - <a href="http://work_tdr.work" rel="noreferrer noreferrer noreferrer" target="_blank">work_tdr.work</a>);<br> >> + struct drm_gpu_scheduler *sched;<br> >> + struct drm_sched_job *job;<br> >> +<br> >> + sched = container_of(work, struct drm_gpu_scheduler, <a href="http://work_tdr.work" rel="noreferrer noreferrer noreferrer" target="_blank">work_tdr.work</a>);<br> >> + job = list_first_entry_or_null(&sched->ring_mirror_list,<br> >> + struct drm_sched_job, node);<br> >> <br> >> - job->sched->ops->timedout_job(job);<br> >> + if (job)<br> >> + job->sched->ops->timedout_job(job);<br> > I don't think this is fully robust. Jobs are only removed from the<br> > ring_mirror_list once the job_finish worker has run. If execution of<br> > this worker is delayed for any reason (though it's really unlikely for<br> > a delay as long as the job timeout to happen) you are blaming the wrong<br> > job here.<br> ><br> > So I think what you need to to is find the first job in the ring mirror<br> > list with an unsignaled finish fence to robustly find the stuck job.<br> <br> Yeah, that is a known problem I've pointed out as well.<br> <br> The issue is we have bug reports that this happened before the patch, <br> but I'm not 100% sure how.<br> <br> My suggestion is to move a good part of the logic from <br> drm_sched_hw_job_reset() and drm_sched_job_recovery() into <br> drm_sched_job_timedout().<br> <br> E.g. we first call dma_fence_remove_callback() for each job and actually <br> check the return value if the fence was already signaled.<br> <br> If we find a signaled fence we abort and add the callback back to the <br> ones where we removed it.<br> <br> Nayan do you want to take care of this or should I take a look?<br></blockquote></div></div><div dir="auto">I can take care of it. </div><div dir="auto"><br></div><div dir="auto">Regards,</div><div dir="auto">Nayan</div><div dir="auto"><div class="gmail_quote" dir="auto"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> <br> Regards,<br> Christian.<br> <br> ><br> > Regards,<br> > Lucas<br> ><br> >> }<br> >> <br> >> /**<br> >> @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)<br> >> s_job = list_first_entry_or_null(&sched->ring_mirror_list,<br> >> struct drm_sched_job, node);<br> >> if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)<br> >> - schedule_delayed_work(&s_job->work_tdr, sched->timeout);<br> >> + schedule_delayed_work(&sched->work_tdr, sched->timeout);<br> >> <br> >> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {<br> >> struct drm_sched_fence *s_fence = s_job->s_fence;<br> >> @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job,<br> >> <br> >> INIT_WORK(&job->finish_work, drm_sched_job_finish);<br> >> INIT_LIST_HEAD(&job->node);<br> >> - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);<br> >> <br> >> return 0;<br> >> }<br> >> @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,<br> >> INIT_LIST_HEAD(&sched->ring_mirror_list);<br> >> spin_lock_init(&sched->job_list_lock);<br> >> atomic_set(&sched->hw_rq_count, 0);<br> >> + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);<br> >> atomic_set(&sched->num_jobs, 0);<br> >> atomic64_set(&sched->job_id_count, 0);<br> >> <br> >> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h<br> >> index daec50f887b3..d87b268f1781 100644<br> >> --- a/include/drm/gpu_scheduler.h<br> >> +++ b/include/drm/gpu_scheduler.h<br> >> @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);<br> >> * finished to remove the job from the<br> >> * @drm_gpu_scheduler.ring_mirror_list.<br> >> * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.<br> >> - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout<br> >> - * interval is over.<br> >> * @id: a unique id assigned to each job scheduled on the scheduler.<br> >> * @karma: increment on every hang caused by this job. If this exceeds the hang<br> >> * limit of the scheduler then the job is marked guilty and will not<br> >> @@ -195,7 +193,6 @@ struct drm_sched_job {<br> >> struct dma_fence_cb finish_cb;<br> >> struct work_struct finish_work;<br> >> struct list_head node;<br> >> - struct delayed_work work_tdr;<br> >> uint64_t id;<br> >> atomic_t karma;<br> >> enum drm_sched_priority s_priority;<br> >> @@ -259,6 +256,8 @@ struct drm_sched_backend_ops {<br> >> * finished.<br> >> * @hw_rq_count: the number of jobs currently in the hardware queue.<br> >> * @job_id_count: used to assign unique id to the each job.<br> >> + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the<br> >> + * timeout interval is over.<br> >> * @thread: the kthread on which the scheduler which run.<br> >> * @ring_mirror_list: the list of jobs which are currently in the job queue.<br> >> * @job_list_lock: lock to protect the ring_mirror_list.<br> >> @@ -278,6 +277,7 @@ struct drm_gpu_scheduler {<br> >> wait_queue_head_t job_scheduled;<br> >> atomic_t hw_rq_count;<br> >> atomic64_t job_id_count;<br> >> + struct delayed_work work_tdr;<br> >> struct task_struct *thread;<br> >> struct list_head ring_mirror_list;<br> >> spinlock_t job_list_lock;<br> > _______________________________________________<br> > dri-devel mailing list<br> > <a href="mailto:dri-devel@lists.freedesktop.org" rel="noreferrer noreferrer" target="_blank">dri-devel@lists.freedesktop.org</a><br> > <a href="https://lists.freedesktop.org/mailman/listinfo/dri-devel" rel="noreferrer noreferrer noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/dri-devel</a><br> <br> </blockquote></div></div></div>
Hi Christian, On Thu, Sep 27, 2018 at 12:55 AM Nayan Deshmukh <nayan26deshmukh@gmail.com> wrote: > > Hi Christian, > > > On Wed, Sep 26, 2018, 10:13 AM Christian König <ckoenig.leichtzumerken@gmail.com> wrote: >> >> Am 26.09.2018 um 09:39 schrieb Lucas Stach: >> > Hi Nayan, >> > >> > Am Mittwoch, den 26.09.2018, 02:09 +0900 schrieb Nayan Deshmukh: >> >> having a delayed work item per job is redundant as we only need one >> >> per scheduler to track the time out the currently executing job. >> >> >> >> v2: the first element of the ring mirror list is the currently >> >> executing job so we don't need a additional variable for it >> >> >> >> Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> >> >> Suggested-by: Christian König <christian.koenig@amd.com> >> >> --- >> >> drivers/gpu/drm/scheduler/sched_main.c | 31 ++++++++++++++++--------------- >> >> include/drm/gpu_scheduler.h | 6 +++--- >> >> 2 files changed, 19 insertions(+), 18 deletions(-) >> >> >> >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c >> >> index 9ca741f3a0bc..4e8505d51795 100644 >> >> --- a/drivers/gpu/drm/scheduler/sched_main.c >> >> +++ b/drivers/gpu/drm/scheduler/sched_main.c >> >> @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work) >> >> * manages to find this job as the next job in the list, the fence >> >> * signaled check below will prevent the timeout to be restarted. >> >> */ >> >> - cancel_delayed_work_sync(&s_job->work_tdr); >> >> + cancel_delayed_work_sync(&sched->work_tdr); >> >> >> >> spin_lock(&sched->job_list_lock); >> >> - /* queue TDR for next job */ >> >> - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && >> >> - !list_is_last(&s_job->node, &sched->ring_mirror_list)) { >> >> - struct drm_sched_job *next = list_next_entry(s_job, node); >> >> - >> >> - if (!dma_fence_is_signaled(&next->s_fence->finished)) >> >> - schedule_delayed_work(&next->work_tdr, sched->timeout); >> >> - } >> >> /* remove job from ring_mirror_list */ >> >> list_del(&s_job->node); >> >> + /* queue TDR for next job */ >> >> + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && >> >> + !list_empty(&sched->ring_mirror_list)) >> >> + schedule_delayed_work(&sched->work_tdr, sched->timeout); >> >> spin_unlock(&sched->job_list_lock); >> >> >> >> dma_fence_put(&s_job->s_fence->finished); >> >> @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) >> >> if (sched->timeout != MAX_SCHEDULE_TIMEOUT && >> >> list_first_entry_or_null(&sched->ring_mirror_list, >> >> struct drm_sched_job, node) == s_job) >> >> - schedule_delayed_work(&s_job->work_tdr, sched->timeout); >> >> + schedule_delayed_work(&sched->work_tdr, sched->timeout); >> >> spin_unlock(&sched->job_list_lock); >> >> } >> >> >> >> static void drm_sched_job_timedout(struct work_struct *work) >> >> { >> >> - struct drm_sched_job *job = container_of(work, struct drm_sched_job, >> >> - work_tdr.work); >> >> + struct drm_gpu_scheduler *sched; >> >> + struct drm_sched_job *job; >> >> + >> >> + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); >> >> + job = list_first_entry_or_null(&sched->ring_mirror_list, >> >> + struct drm_sched_job, node); >> >> >> >> - job->sched->ops->timedout_job(job); >> >> + if (job) >> >> + job->sched->ops->timedout_job(job); >> > I don't think this is fully robust. Jobs are only removed from the >> > ring_mirror_list once the job_finish worker has run. If execution of >> > this worker is delayed for any reason (though it's really unlikely for >> > a delay as long as the job timeout to happen) you are blaming the wrong >> > job here. >> > >> > So I think what you need to to is find the first job in the ring mirror >> > list with an unsignaled finish fence to robustly find the stuck job. >> >> Yeah, that is a known problem I've pointed out as well. >> >> The issue is we have bug reports that this happened before the patch, >> but I'm not 100% sure how. >> >> My suggestion is to move a good part of the logic from >> drm_sched_hw_job_reset() and drm_sched_job_recovery() into >> drm_sched_job_timedout(). >> >> E.g. we first call dma_fence_remove_callback() for each job and actually >> check the return value if the fence was already signaled. >> We can move this part to drm_sched_job_timedout(). >> If we find a signaled fence we abort and add the callback back to the >> ones where we removed it. >> I was not able to find the abort part. In drm_sched_hw_job_reset() we don't take a action if the parent fence was already signaled. We cannot shift a lot from drm_sched_job_recovery() to drm_sched_job_timedout(). The only part that seems shiftable is the one where we cancel the guilty job. Regards, Nayan >> Nayan do you want to take care of this or should I take a look? > > I can take care of it. > > Regards, > Nayan >> >> >> Regards, >> Christian. >> >> > >> > Regards, >> > Lucas >> > >> >> } >> >> >> >> /** >> >> @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) >> >> s_job = list_first_entry_or_null(&sched->ring_mirror_list, >> >> struct drm_sched_job, node); >> >> if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) >> >> - schedule_delayed_work(&s_job->work_tdr, sched->timeout); >> >> + schedule_delayed_work(&sched->work_tdr, sched->timeout); >> >> >> >> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { >> >> struct drm_sched_fence *s_fence = s_job->s_fence; >> >> @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job, >> >> >> >> INIT_WORK(&job->finish_work, drm_sched_job_finish); >> >> INIT_LIST_HEAD(&job->node); >> >> - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); >> >> >> >> return 0; >> >> } >> >> @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, >> >> INIT_LIST_HEAD(&sched->ring_mirror_list); >> >> spin_lock_init(&sched->job_list_lock); >> >> atomic_set(&sched->hw_rq_count, 0); >> >> + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); >> >> atomic_set(&sched->num_jobs, 0); >> >> atomic64_set(&sched->job_id_count, 0); >> >> >> >> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h >> >> index daec50f887b3..d87b268f1781 100644 >> >> --- a/include/drm/gpu_scheduler.h >> >> +++ b/include/drm/gpu_scheduler.h >> >> @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); >> >> * finished to remove the job from the >> >> * @drm_gpu_scheduler.ring_mirror_list. >> >> * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. >> >> - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout >> >> - * interval is over. >> >> * @id: a unique id assigned to each job scheduled on the scheduler. >> >> * @karma: increment on every hang caused by this job. If this exceeds the hang >> >> * limit of the scheduler then the job is marked guilty and will not >> >> @@ -195,7 +193,6 @@ struct drm_sched_job { >> >> struct dma_fence_cb finish_cb; >> >> struct work_struct finish_work; >> >> struct list_head node; >> >> - struct delayed_work work_tdr; >> >> uint64_t id; >> >> atomic_t karma; >> >> enum drm_sched_priority s_priority; >> >> @@ -259,6 +256,8 @@ struct drm_sched_backend_ops { >> >> * finished. >> >> * @hw_rq_count: the number of jobs currently in the hardware queue. >> >> * @job_id_count: used to assign unique id to the each job. >> >> + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the >> >> + * timeout interval is over. >> >> * @thread: the kthread on which the scheduler which run. >> >> * @ring_mirror_list: the list of jobs which are currently in the job queue. >> >> * @job_list_lock: lock to protect the ring_mirror_list. >> >> @@ -278,6 +277,7 @@ struct drm_gpu_scheduler { >> >> wait_queue_head_t job_scheduled; >> >> atomic_t hw_rq_count; >> >> atomic64_t job_id_count; >> >> + struct delayed_work work_tdr; >> >> struct task_struct *thread; >> >> struct list_head ring_mirror_list; >> >> spinlock_t job_list_lock; >> > _______________________________________________ >> > dri-devel mailing list >> > dri-devel@lists.freedesktop.org >> > https://lists.freedesktop.org/mailman/listinfo/dri-devel >>
Am 04.10.2018 um 18:32 schrieb Nayan Deshmukh: > Hi Christian, > > On Thu, Sep 27, 2018 at 12:55 AM Nayan Deshmukh > <nayan26deshmukh@gmail.com> wrote: >> Hi Christian, >> >> >> On Wed, Sep 26, 2018, 10:13 AM Christian König <ckoenig.leichtzumerken@gmail.com> wrote: >>> Am 26.09.2018 um 09:39 schrieb Lucas Stach: >>>> Hi Nayan, >>>> >>>> Am Mittwoch, den 26.09.2018, 02:09 +0900 schrieb Nayan Deshmukh: >>>>> having a delayed work item per job is redundant as we only need one >>>>> per scheduler to track the time out the currently executing job. >>>>> >>>>> v2: the first element of the ring mirror list is the currently >>>>> executing job so we don't need a additional variable for it >>>>> >>>>> Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> >>>>> Suggested-by: Christian König <christian.koenig@amd.com> >>>>> --- >>>>> drivers/gpu/drm/scheduler/sched_main.c | 31 ++++++++++++++++--------------- >>>>> include/drm/gpu_scheduler.h | 6 +++--- >>>>> 2 files changed, 19 insertions(+), 18 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c >>>>> index 9ca741f3a0bc..4e8505d51795 100644 >>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c >>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c >>>>> @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work) >>>>> * manages to find this job as the next job in the list, the fence >>>>> * signaled check below will prevent the timeout to be restarted. >>>>> */ >>>>> - cancel_delayed_work_sync(&s_job->work_tdr); >>>>> + cancel_delayed_work_sync(&sched->work_tdr); >>>>> >>>>> spin_lock(&sched->job_list_lock); >>>>> - /* queue TDR for next job */ >>>>> - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && >>>>> - !list_is_last(&s_job->node, &sched->ring_mirror_list)) { >>>>> - struct drm_sched_job *next = list_next_entry(s_job, node); >>>>> - >>>>> - if (!dma_fence_is_signaled(&next->s_fence->finished)) >>>>> - schedule_delayed_work(&next->work_tdr, sched->timeout); >>>>> - } >>>>> /* remove job from ring_mirror_list */ >>>>> list_del(&s_job->node); >>>>> + /* queue TDR for next job */ >>>>> + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && >>>>> + !list_empty(&sched->ring_mirror_list)) >>>>> + schedule_delayed_work(&sched->work_tdr, sched->timeout); >>>>> spin_unlock(&sched->job_list_lock); >>>>> >>>>> dma_fence_put(&s_job->s_fence->finished); >>>>> @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) >>>>> if (sched->timeout != MAX_SCHEDULE_TIMEOUT && >>>>> list_first_entry_or_null(&sched->ring_mirror_list, >>>>> struct drm_sched_job, node) == s_job) >>>>> - schedule_delayed_work(&s_job->work_tdr, sched->timeout); >>>>> + schedule_delayed_work(&sched->work_tdr, sched->timeout); >>>>> spin_unlock(&sched->job_list_lock); >>>>> } >>>>> >>>>> static void drm_sched_job_timedout(struct work_struct *work) >>>>> { >>>>> - struct drm_sched_job *job = container_of(work, struct drm_sched_job, >>>>> - work_tdr.work); >>>>> + struct drm_gpu_scheduler *sched; >>>>> + struct drm_sched_job *job; >>>>> + >>>>> + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); >>>>> + job = list_first_entry_or_null(&sched->ring_mirror_list, >>>>> + struct drm_sched_job, node); >>>>> >>>>> - job->sched->ops->timedout_job(job); >>>>> + if (job) >>>>> + job->sched->ops->timedout_job(job); >>>> I don't think this is fully robust. Jobs are only removed from the >>>> ring_mirror_list once the job_finish worker has run. If execution of >>>> this worker is delayed for any reason (though it's really unlikely for >>>> a delay as long as the job timeout to happen) you are blaming the wrong >>>> job here. >>>> >>>> So I think what you need to to is find the first job in the ring mirror >>>> list with an unsignaled finish fence to robustly find the stuck job. >>> Yeah, that is a known problem I've pointed out as well. >>> >>> The issue is we have bug reports that this happened before the patch, >>> but I'm not 100% sure how. >>> >>> My suggestion is to move a good part of the logic from >>> drm_sched_hw_job_reset() and drm_sched_job_recovery() into >>> drm_sched_job_timedout(). >>> >>> E.g. we first call dma_fence_remove_callback() for each job and actually >>> check the return value if the fence was already signaled. >>> > We can move this part to drm_sched_job_timedout(). > >>> If we find a signaled fence we abort and add the callback back to the >>> ones where we removed it. >>> > I was not able to find the abort part. In drm_sched_hw_job_reset() we > don't take a action if the parent fence was already signaled. That is a bug a swell. > We cannot shift a lot from drm_sched_job_recovery() to > drm_sched_job_timedout(). The only part that seems shiftable is the > one where we cancel the guilty job. Actually I wanted to completely rework that part since it is rather unreliable as well. Considering how buggy all of that is and how important it is to get it right I think I will rather do it myself. Regards, Christian. > > Regards, > Nayan >>> Nayan do you want to take care of this or should I take a look? >> I can take care of it. >> >> Regards, >> Nayan >>> >>> Regards, >>> Christian. >>> >>>> Regards, >>>> Lucas >>>> >>>>> } >>>>> >>>>> /** >>>>> @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) >>>>> s_job = list_first_entry_or_null(&sched->ring_mirror_list, >>>>> struct drm_sched_job, node); >>>>> if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) >>>>> - schedule_delayed_work(&s_job->work_tdr, sched->timeout); >>>>> + schedule_delayed_work(&sched->work_tdr, sched->timeout); >>>>> >>>>> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { >>>>> struct drm_sched_fence *s_fence = s_job->s_fence; >>>>> @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job, >>>>> >>>>> INIT_WORK(&job->finish_work, drm_sched_job_finish); >>>>> INIT_LIST_HEAD(&job->node); >>>>> - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); >>>>> >>>>> return 0; >>>>> } >>>>> @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, >>>>> INIT_LIST_HEAD(&sched->ring_mirror_list); >>>>> spin_lock_init(&sched->job_list_lock); >>>>> atomic_set(&sched->hw_rq_count, 0); >>>>> + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); >>>>> atomic_set(&sched->num_jobs, 0); >>>>> atomic64_set(&sched->job_id_count, 0); >>>>> >>>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h >>>>> index daec50f887b3..d87b268f1781 100644 >>>>> --- a/include/drm/gpu_scheduler.h >>>>> +++ b/include/drm/gpu_scheduler.h >>>>> @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); >>>>> * finished to remove the job from the >>>>> * @drm_gpu_scheduler.ring_mirror_list. >>>>> * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. >>>>> - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout >>>>> - * interval is over. >>>>> * @id: a unique id assigned to each job scheduled on the scheduler. >>>>> * @karma: increment on every hang caused by this job. If this exceeds the hang >>>>> * limit of the scheduler then the job is marked guilty and will not >>>>> @@ -195,7 +193,6 @@ struct drm_sched_job { >>>>> struct dma_fence_cb finish_cb; >>>>> struct work_struct finish_work; >>>>> struct list_head node; >>>>> - struct delayed_work work_tdr; >>>>> uint64_t id; >>>>> atomic_t karma; >>>>> enum drm_sched_priority s_priority; >>>>> @@ -259,6 +256,8 @@ struct drm_sched_backend_ops { >>>>> * finished. >>>>> * @hw_rq_count: the number of jobs currently in the hardware queue. >>>>> * @job_id_count: used to assign unique id to the each job. >>>>> + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the >>>>> + * timeout interval is over. >>>>> * @thread: the kthread on which the scheduler which run. >>>>> * @ring_mirror_list: the list of jobs which are currently in the job queue. >>>>> * @job_list_lock: lock to protect the ring_mirror_list. >>>>> @@ -278,6 +277,7 @@ struct drm_gpu_scheduler { >>>>> wait_queue_head_t job_scheduled; >>>>> atomic_t hw_rq_count; >>>>> atomic64_t job_id_count; >>>>> + struct delayed_work work_tdr; >>>>> struct task_struct *thread; >>>>> struct list_head ring_mirror_list; >>>>> spinlock_t job_list_lock; >>>> _______________________________________________ >>>> dri-devel mailing list >>>> dri-devel@lists.freedesktop.org >>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 9ca741f3a0bc..4e8505d51795 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work) * manages to find this job as the next job in the list, the fence * signaled check below will prevent the timeout to be restarted. */ - cancel_delayed_work_sync(&s_job->work_tdr); + cancel_delayed_work_sync(&sched->work_tdr); spin_lock(&sched->job_list_lock); - /* queue TDR for next job */ - if (sched->timeout != MAX_SCHEDULE_TIMEOUT && - !list_is_last(&s_job->node, &sched->ring_mirror_list)) { - struct drm_sched_job *next = list_next_entry(s_job, node); - - if (!dma_fence_is_signaled(&next->s_fence->finished)) - schedule_delayed_work(&next->work_tdr, sched->timeout); - } /* remove job from ring_mirror_list */ list_del(&s_job->node); + /* queue TDR for next job */ + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && + !list_empty(&sched->ring_mirror_list)) + schedule_delayed_work(&sched->work_tdr, sched->timeout); spin_unlock(&sched->job_list_lock); dma_fence_put(&s_job->s_fence->finished); @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) if (sched->timeout != MAX_SCHEDULE_TIMEOUT && list_first_entry_or_null(&sched->ring_mirror_list, struct drm_sched_job, node) == s_job) - schedule_delayed_work(&s_job->work_tdr, sched->timeout); + schedule_delayed_work(&sched->work_tdr, sched->timeout); spin_unlock(&sched->job_list_lock); } static void drm_sched_job_timedout(struct work_struct *work) { - struct drm_sched_job *job = container_of(work, struct drm_sched_job, - work_tdr.work); + struct drm_gpu_scheduler *sched; + struct drm_sched_job *job; + + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); + job = list_first_entry_or_null(&sched->ring_mirror_list, + struct drm_sched_job, node); - job->sched->ops->timedout_job(job); + if (job) + job->sched->ops->timedout_job(job); } /** @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) s_job = list_first_entry_or_null(&sched->ring_mirror_list, struct drm_sched_job, node); if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) - schedule_delayed_work(&s_job->work_tdr, sched->timeout); + schedule_delayed_work(&sched->work_tdr, sched->timeout); list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct drm_sched_fence *s_fence = s_job->s_fence; @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job, INIT_WORK(&job->finish_work, drm_sched_job_finish); INIT_LIST_HEAD(&job->node); - INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout); return 0; } @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, INIT_LIST_HEAD(&sched->ring_mirror_list); spin_lock_init(&sched->job_list_lock); atomic_set(&sched->hw_rq_count, 0); + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); atomic_set(&sched->num_jobs, 0); atomic64_set(&sched->job_id_count, 0); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index daec50f887b3..d87b268f1781 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); * finished to remove the job from the * @drm_gpu_scheduler.ring_mirror_list. * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout - * interval is over. * @id: a unique id assigned to each job scheduled on the scheduler. * @karma: increment on every hang caused by this job. If this exceeds the hang * limit of the scheduler then the job is marked guilty and will not @@ -195,7 +193,6 @@ struct drm_sched_job { struct dma_fence_cb finish_cb; struct work_struct finish_work; struct list_head node; - struct delayed_work work_tdr; uint64_t id; atomic_t karma; enum drm_sched_priority s_priority; @@ -259,6 +256,8 @@ struct drm_sched_backend_ops { * finished. * @hw_rq_count: the number of jobs currently in the hardware queue. * @job_id_count: used to assign unique id to the each job. + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the + * timeout interval is over. * @thread: the kthread on which the scheduler which run. * @ring_mirror_list: the list of jobs which are currently in the job queue. * @job_list_lock: lock to protect the ring_mirror_list. @@ -278,6 +277,7 @@ struct drm_gpu_scheduler { wait_queue_head_t job_scheduled; atomic_t hw_rq_count; atomic64_t job_id_count; + struct delayed_work work_tdr; struct task_struct *thread; struct list_head ring_mirror_list; spinlock_t job_list_lock;
having a delayed work item per job is redundant as we only need one per scheduler to track the time out the currently executing job. v2: the first element of the ring mirror list is the currently executing job so we don't need a additional variable for it Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> Suggested-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/scheduler/sched_main.c | 31 ++++++++++++++++--------------- include/drm/gpu_scheduler.h | 6 +++--- 2 files changed, 19 insertions(+), 18 deletions(-)