diff mbox

[2/3] drm/scheduler: add counter for total jobs in scheduler

Message ID 20180712063643.8030-3-nayan26deshmukh@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nayan Deshmukh July 12, 2018, 6:36 a.m. UTC
Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
---
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 +++
 include/drm/gpu_scheduler.h               | 2 ++
 2 files changed, 5 insertions(+)

Comments

Zhang, Jerry(Junwei) Aug. 2, 2018, 5:01 a.m. UTC | #1
On 07/12/2018 02:36 PM, Nayan Deshmukh wrote:
> Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
> ---
>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 +++
>   include/drm/gpu_scheduler.h               | 2 ++
>   2 files changed, 5 insertions(+)
>
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index 429b1328653a..3dc1a4f07e3f 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -538,6 +538,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
>   	trace_drm_sched_job(sched_job, entity);
>
>   	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
> +	atomic_inc(&entity->sched->num_jobs);

Shall we use hw_rq_count directly or merge them together?

Regards,
Jerry
>
>   	/* first job wakes up scheduler */
>   	if (first) {
> @@ -818,6 +819,7 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
>
>   	dma_fence_get(&s_fence->finished);
>   	atomic_dec(&sched->hw_rq_count);
> +	atomic_dec(&sched->num_jobs);
>   	drm_sched_fence_finished(s_fence);
>
>   	trace_drm_sched_process_job(s_fence);
> @@ -935,6 +937,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>   	INIT_LIST_HEAD(&sched->ring_mirror_list);
>   	spin_lock_init(&sched->job_list_lock);
>   	atomic_set(&sched->hw_rq_count, 0);
> +	atomic_set(&sched->num_jobs, 0);
>   	atomic64_set(&sched->job_id_count, 0);
>
>   	/* Each scheduler will run on a seperate kernel thread */
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 43e93d6077cf..605bd4ad2397 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -257,6 +257,7 @@ struct drm_sched_backend_ops {
>    * @job_list_lock: lock to protect the ring_mirror_list.
>    * @hang_limit: once the hangs by a job crosses this limit then it is marked
>    *              guilty and it will be considered for scheduling further.
> + * @num_jobs: the number of jobs in queue in the scheduler
>    *
>    * One scheduler is implemented for each hardware ring.
>    */
> @@ -274,6 +275,7 @@ struct drm_gpu_scheduler {
>   	struct list_head		ring_mirror_list;
>   	spinlock_t			job_list_lock;
>   	int				hang_limit;
> +	atomic_t                        num_jobs;
>   };
>
>   int drm_sched_init(struct drm_gpu_scheduler *sched,
>
Nayan Deshmukh Aug. 2, 2018, 5:50 a.m. UTC | #2
On Thu, Aug 2, 2018 at 10:31 AM Zhang, Jerry (Junwei) <Jerry.Zhang@amd.com>
wrote:

> On 07/12/2018 02:36 PM, Nayan Deshmukh wrote:
> > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
> > ---
> >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 +++
> >   include/drm/gpu_scheduler.h               | 2 ++
> >   2 files changed, 5 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > index 429b1328653a..3dc1a4f07e3f 100644
> > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > @@ -538,6 +538,7 @@ void drm_sched_entity_push_job(struct drm_sched_job
> *sched_job,
> >       trace_drm_sched_job(sched_job, entity);
> >
> >       first = spsc_queue_push(&entity->job_queue,
> &sched_job->queue_node);
> > +     atomic_inc(&entity->sched->num_jobs);
>
> Shall we use hw_rq_count directly or merge them together?
>
hw_rq_count is the number of jobs that are currently in the hardware queue
as compared to num_jobs which is the number of jobs in the software queue.
num_jobs provides a give a better idea of the load on a scheduler that's
why I added that field and used it to decide the scheduler with the least
load.

Regards,
Nayan

>
> Regards,
> Jerry
> >
> >       /* first job wakes up scheduler */
> >       if (first) {
> > @@ -818,6 +819,7 @@ static void drm_sched_process_job(struct dma_fence
> *f, struct dma_fence_cb *cb)
> >
> >       dma_fence_get(&s_fence->finished);
> >       atomic_dec(&sched->hw_rq_count);
> > +     atomic_dec(&sched->num_jobs);
> >       drm_sched_fence_finished(s_fence);
> >
> >       trace_drm_sched_process_job(s_fence);
> > @@ -935,6 +937,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
> >       INIT_LIST_HEAD(&sched->ring_mirror_list);
> >       spin_lock_init(&sched->job_list_lock);
> >       atomic_set(&sched->hw_rq_count, 0);
> > +     atomic_set(&sched->num_jobs, 0);
> >       atomic64_set(&sched->job_id_count, 0);
> >
> >       /* Each scheduler will run on a seperate kernel thread */
> > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> > index 43e93d6077cf..605bd4ad2397 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -257,6 +257,7 @@ struct drm_sched_backend_ops {
> >    * @job_list_lock: lock to protect the ring_mirror_list.
> >    * @hang_limit: once the hangs by a job crosses this limit then it is
> marked
> >    *              guilty and it will be considered for scheduling
> further.
> > + * @num_jobs: the number of jobs in queue in the scheduler
> >    *
> >    * One scheduler is implemented for each hardware ring.
> >    */
> > @@ -274,6 +275,7 @@ struct drm_gpu_scheduler {
> >       struct list_head                ring_mirror_list;
> >       spinlock_t                      job_list_lock;
> >       int                             hang_limit;
> > +     atomic_t                        num_jobs;
> >   };
> >
> >   int drm_sched_init(struct drm_gpu_scheduler *sched,
> >
>
<div dir="ltr"><br><br><div class="gmail_quote"><div dir="ltr">On Thu, Aug 2, 2018 at 10:31 AM Zhang, Jerry (Junwei) &lt;<a href="mailto:Jerry.Zhang@amd.com">Jerry.Zhang@amd.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">On 07/12/2018 02:36 PM, Nayan Deshmukh wrote:<br>
&gt; Signed-off-by: Nayan Deshmukh &lt;<a href="mailto:nayan26deshmukh@gmail.com" target="_blank">nayan26deshmukh@gmail.com</a>&gt;<br>
&gt; ---<br>
&gt;   drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 +++<br>
&gt;   include/drm/gpu_scheduler.h               | 2 ++<br>
&gt;   2 files changed, 5 insertions(+)<br>
&gt;<br>
&gt; diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
&gt; index 429b1328653a..3dc1a4f07e3f 100644<br>
&gt; --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
&gt; +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
&gt; @@ -538,6 +538,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,<br>
&gt;       trace_drm_sched_job(sched_job, entity);<br>
&gt;<br>
&gt;       first = spsc_queue_push(&amp;entity-&gt;job_queue, &amp;sched_job-&gt;queue_node);<br>
&gt; +     atomic_inc(&amp;entity-&gt;sched-&gt;num_jobs);<br>
<br>
Shall we use hw_rq_count directly or merge them together?<br></blockquote><div>hw_rq_count is the number of jobs that are currently in the hardware queue as compared to num_jobs which is the number of jobs in the software queue. num_jobs provides a give a better idea of the load on a scheduler that&#39;s why I added that field and used it to decide the scheduler with the least load. <br><br></div><div>Regards,<br></div><div>Nayan <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
Regards,<br>
Jerry<br>
&gt;<br>
&gt;       /* first job wakes up scheduler */<br>
&gt;       if (first) {<br>
&gt; @@ -818,6 +819,7 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)<br>
&gt;<br>
&gt;       dma_fence_get(&amp;s_fence-&gt;finished);<br>
&gt;       atomic_dec(&amp;sched-&gt;hw_rq_count);<br>
&gt; +     atomic_dec(&amp;sched-&gt;num_jobs);<br>
&gt;       drm_sched_fence_finished(s_fence);<br>
&gt;<br>
&gt;       trace_drm_sched_process_job(s_fence);<br>
&gt; @@ -935,6 +937,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,<br>
&gt;       INIT_LIST_HEAD(&amp;sched-&gt;ring_mirror_list);<br>
&gt;       spin_lock_init(&amp;sched-&gt;job_list_lock);<br>
&gt;       atomic_set(&amp;sched-&gt;hw_rq_count, 0);<br>
&gt; +     atomic_set(&amp;sched-&gt;num_jobs, 0);<br>
&gt;       atomic64_set(&amp;sched-&gt;job_id_count, 0);<br>
&gt;<br>
&gt;       /* Each scheduler will run on a seperate kernel thread */<br>
&gt; diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h<br>
&gt; index 43e93d6077cf..605bd4ad2397 100644<br>
&gt; --- a/include/drm/gpu_scheduler.h<br>
&gt; +++ b/include/drm/gpu_scheduler.h<br>
&gt; @@ -257,6 +257,7 @@ struct drm_sched_backend_ops {<br>
&gt;    * @job_list_lock: lock to protect the ring_mirror_list.<br>
&gt;    * @hang_limit: once the hangs by a job crosses this limit then it is marked<br>
&gt;    *              guilty and it will be considered for scheduling further.<br>
&gt; + * @num_jobs: the number of jobs in queue in the scheduler<br>
&gt;    *<br>
&gt;    * One scheduler is implemented for each hardware ring.<br>
&gt;    */<br>
&gt; @@ -274,6 +275,7 @@ struct drm_gpu_scheduler {<br>
&gt;       struct list_head                ring_mirror_list;<br>
&gt;       spinlock_t                      job_list_lock;<br>
&gt;       int                             hang_limit;<br>
&gt; +     atomic_t                        num_jobs;<br>
&gt;   };<br>
&gt;<br>
&gt;   int drm_sched_init(struct drm_gpu_scheduler *sched,<br>
&gt;<br>
</blockquote></div></div>
Zhang, Jerry(Junwei) Aug. 2, 2018, 5:59 a.m. UTC | #3
On 08/02/2018 01:50 PM, Nayan Deshmukh wrote:
>
>
> On Thu, Aug 2, 2018 at 10:31 AM Zhang, Jerry (Junwei) <Jerry.Zhang@amd.com <mailto:Jerry.Zhang@amd.com>> wrote:
>
>     On 07/12/2018 02:36 PM, Nayan Deshmukh wrote:
>      > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com <mailto:nayan26deshmukh@gmail.com>>
>      > ---
>      >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 +++
>      >   include/drm/gpu_scheduler.h               | 2 ++
>      >   2 files changed, 5 insertions(+)
>      >
>      > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>      > index 429b1328653a..3dc1a4f07e3f 100644
>      > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>      > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>      > @@ -538,6 +538,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
>      >       trace_drm_sched_job(sched_job, entity);
>      >
>      >       first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
>      > +     atomic_inc(&entity->sched->num_jobs);
>
>     Shall we use hw_rq_count directly or merge them together?
>
> hw_rq_count is the number of jobs that are currently in the hardware queue as compared to num_jobs which is the number of jobs in the software queue. num_jobs provides a give a better idea of the load
> on a scheduler that's why I added that field and used it to decide the scheduler with the least load.

Thanks for your explanation.

Then may be more reasonable to move atomic_dec(&sched->num_jobs) after drm_sched_fence_scheduled() or just before atomic_inc(&sched->hw_rq_count).
How do you think that?

Regards,
Jerry

>
> Regards,
> Nayan
>
>
>     Regards,
>     Jerry
>      >
>      >       /* first job wakes up scheduler */
>      >       if (first) {
>      > @@ -818,6 +819,7 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
>      >
>      >       dma_fence_get(&s_fence->finished);
>      >       atomic_dec(&sched->hw_rq_count);
>      > +     atomic_dec(&sched->num_jobs);
>      >       drm_sched_fence_finished(s_fence);
>      >
>      >       trace_drm_sched_process_job(s_fence);
>      > @@ -935,6 +937,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>      >       INIT_LIST_HEAD(&sched->ring_mirror_list);
>      >       spin_lock_init(&sched->job_list_lock);
>      >       atomic_set(&sched->hw_rq_count, 0);
>      > +     atomic_set(&sched->num_jobs, 0);
>      >       atomic64_set(&sched->job_id_count, 0);
>      >
>      >       /* Each scheduler will run on a seperate kernel thread */
>      > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>      > index 43e93d6077cf..605bd4ad2397 100644
>      > --- a/include/drm/gpu_scheduler.h
>      > +++ b/include/drm/gpu_scheduler.h
>      > @@ -257,6 +257,7 @@ struct drm_sched_backend_ops {
>      >    * @job_list_lock: lock to protect the ring_mirror_list.
>      >    * @hang_limit: once the hangs by a job crosses this limit then it is marked
>      >    *              guilty and it will be considered for scheduling further.
>      > + * @num_jobs: the number of jobs in queue in the scheduler
>      >    *
>      >    * One scheduler is implemented for each hardware ring.
>      >    */
>      > @@ -274,6 +275,7 @@ struct drm_gpu_scheduler {
>      >       struct list_head                ring_mirror_list;
>      >       spinlock_t                      job_list_lock;
>      >       int                             hang_limit;
>      > +     atomic_t                        num_jobs;
>      >   };
>      >
>      >   int drm_sched_init(struct drm_gpu_scheduler *sched,
>      >
>
Nayan Deshmukh Aug. 2, 2018, 6:08 a.m. UTC | #4
On Thu, Aug 2, 2018 at 11:29 AM Zhang, Jerry (Junwei) <Jerry.Zhang@amd.com>
wrote:

> On 08/02/2018 01:50 PM, Nayan Deshmukh wrote:
> >
> >
> > On Thu, Aug 2, 2018 at 10:31 AM Zhang, Jerry (Junwei) <
> Jerry.Zhang@amd.com <mailto:Jerry.Zhang@amd.com>> wrote:
> >
> >     On 07/12/2018 02:36 PM, Nayan Deshmukh wrote:
> >      > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com <mailto:
> nayan26deshmukh@gmail.com>>
> >      > ---
> >      >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 +++
> >      >   include/drm/gpu_scheduler.h               | 2 ++
> >      >   2 files changed, 5 insertions(+)
> >      >
> >      > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> >      > index 429b1328653a..3dc1a4f07e3f 100644
> >      > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> >      > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> >      > @@ -538,6 +538,7 @@ void drm_sched_entity_push_job(struct
> drm_sched_job *sched_job,
> >      >       trace_drm_sched_job(sched_job, entity);
> >      >
> >      >       first = spsc_queue_push(&entity->job_queue,
> &sched_job->queue_node);
> >      > +     atomic_inc(&entity->sched->num_jobs);
> >
> >     Shall we use hw_rq_count directly or merge them together?
> >
> > hw_rq_count is the number of jobs that are currently in the hardware
> queue as compared to num_jobs which is the number of jobs in the software
> queue. num_jobs provides a give a better idea of the load
> > on a scheduler that's why I added that field and used it to decide the
> scheduler with the least load.
>
> Thanks for your explanation.
>
> Then may be more reasonable to move atomic_dec(&sched->num_jobs) after
> drm_sched_fence_scheduled() or just before atomic_inc(&sched->hw_rq_count).
> How do you think that?
>
> For now num_jobs also includes jobs that have been pushed to the hardware
queue. If I shift it before atomic_inc(&sched->hw_rq_count) then I also
need to handle cases when the job was reset and update the counter
properly. I went for the easier implementation as I felt that num_jobs will
correctly represent the load on the scheduler.

But the idea that you suggested can be a potential improvement over what I
have done.

Regards,
Nayan

> Regards,
> Jerry
>
> >
> > Regards,
> > Nayan
> >
> >
> >     Regards,
> >     Jerry
> >      >
> >      >       /* first job wakes up scheduler */
> >      >       if (first) {
> >      > @@ -818,6 +819,7 @@ static void drm_sched_process_job(struct
> dma_fence *f, struct dma_fence_cb *cb)
> >      >
> >      >       dma_fence_get(&s_fence->finished);
> >      >       atomic_dec(&sched->hw_rq_count);
> >      > +     atomic_dec(&sched->num_jobs);
> >      >       drm_sched_fence_finished(s_fence);
> >      >
> >      >       trace_drm_sched_process_job(s_fence);
> >      > @@ -935,6 +937,7 @@ int drm_sched_init(struct drm_gpu_scheduler
> *sched,
> >      >       INIT_LIST_HEAD(&sched->ring_mirror_list);
> >      >       spin_lock_init(&sched->job_list_lock);
> >      >       atomic_set(&sched->hw_rq_count, 0);
> >      > +     atomic_set(&sched->num_jobs, 0);
> >      >       atomic64_set(&sched->job_id_count, 0);
> >      >
> >      >       /* Each scheduler will run on a seperate kernel thread */
> >      > diff --git a/include/drm/gpu_scheduler.h
> b/include/drm/gpu_scheduler.h
> >      > index 43e93d6077cf..605bd4ad2397 100644
> >      > --- a/include/drm/gpu_scheduler.h
> >      > +++ b/include/drm/gpu_scheduler.h
> >      > @@ -257,6 +257,7 @@ struct drm_sched_backend_ops {
> >      >    * @job_list_lock: lock to protect the ring_mirror_list.
> >      >    * @hang_limit: once the hangs by a job crosses this limit then
> it is marked
> >      >    *              guilty and it will be considered for scheduling
> further.
> >      > + * @num_jobs: the number of jobs in queue in the scheduler
> >      >    *
> >      >    * One scheduler is implemented for each hardware ring.
> >      >    */
> >      > @@ -274,6 +275,7 @@ struct drm_gpu_scheduler {
> >      >       struct list_head                ring_mirror_list;
> >      >       spinlock_t                      job_list_lock;
> >      >       int                             hang_limit;
> >      > +     atomic_t                        num_jobs;
> >      >   };
> >      >
> >      >   int drm_sched_init(struct drm_gpu_scheduler *sched,
> >      >
> >
>
<div dir="ltr"><br><br><div class="gmail_quote"><div dir="ltr">On Thu, Aug 2, 2018 at 11:29 AM Zhang, Jerry (Junwei) &lt;<a href="mailto:Jerry.Zhang@amd.com">Jerry.Zhang@amd.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">On 08/02/2018 01:50 PM, Nayan Deshmukh wrote:<br>
&gt;<br>
&gt;<br>
&gt; On Thu, Aug 2, 2018 at 10:31 AM Zhang, Jerry (Junwei) &lt;<a href="mailto:Jerry.Zhang@amd.com" target="_blank">Jerry.Zhang@amd.com</a> &lt;mailto:<a href="mailto:Jerry.Zhang@amd.com" target="_blank">Jerry.Zhang@amd.com</a>&gt;&gt; wrote:<br>
&gt;<br>
&gt;     On 07/12/2018 02:36 PM, Nayan Deshmukh wrote:<br>
&gt;      &gt; Signed-off-by: Nayan Deshmukh &lt;<a href="mailto:nayan26deshmukh@gmail.com" target="_blank">nayan26deshmukh@gmail.com</a> &lt;mailto:<a href="mailto:nayan26deshmukh@gmail.com" target="_blank">nayan26deshmukh@gmail.com</a>&gt;&gt;<br>
&gt;      &gt; ---<br>
&gt;      &gt;   drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 +++<br>
&gt;      &gt;   include/drm/gpu_scheduler.h               | 2 ++<br>
&gt;      &gt;   2 files changed, 5 insertions(+)<br>
&gt;      &gt;<br>
&gt;      &gt; diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
&gt;      &gt; index 429b1328653a..3dc1a4f07e3f 100644<br>
&gt;      &gt; --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
&gt;      &gt; +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
&gt;      &gt; @@ -538,6 +538,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,<br>
&gt;      &gt;       trace_drm_sched_job(sched_job, entity);<br>
&gt;      &gt;<br>
&gt;      &gt;       first = spsc_queue_push(&amp;entity-&gt;job_queue, &amp;sched_job-&gt;queue_node);<br>
&gt;      &gt; +     atomic_inc(&amp;entity-&gt;sched-&gt;num_jobs);<br>
&gt;<br>
&gt;     Shall we use hw_rq_count directly or merge them together?<br>
&gt;<br>
&gt; hw_rq_count is the number of jobs that are currently in the hardware queue as compared to num_jobs which is the number of jobs in the software queue. num_jobs provides a give a better idea of the load<br>
&gt; on a scheduler that&#39;s why I added that field and used it to decide the scheduler with the least load.<br>
<br>
Thanks for your explanation.<br>
<br>
Then may be more reasonable to move atomic_dec(&amp;sched-&gt;num_jobs) after drm_sched_fence_scheduled() or just before atomic_inc(&amp;sched-&gt;hw_rq_count).<br>
How do you think that?<br>
<br></blockquote><div>For now num_jobs also includes jobs that have been pushed to the hardware queue. If I shift it before atomic_inc(&amp;sched-&gt;hw_rq_count) then I also need to handle cases when the job was reset and update the counter properly. I went for the easier implementation as I felt that num_jobs will correctly represent the load on the scheduler. <br><br></div><div>But the idea that you suggested can be a potential improvement over what I have done.<br><br></div><div>Regards,<br></div><div>Nayan <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Regards,<br>
Jerry<br>
<br>
&gt;<br>
&gt; Regards,<br>
&gt; Nayan<br>
&gt;<br>
&gt;<br>
&gt;     Regards,<br>
&gt;     Jerry<br>
&gt;      &gt;<br>
&gt;      &gt;       /* first job wakes up scheduler */<br>
&gt;      &gt;       if (first) {<br>
&gt;      &gt; @@ -818,6 +819,7 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)<br>
&gt;      &gt;<br>
&gt;      &gt;       dma_fence_get(&amp;s_fence-&gt;finished);<br>
&gt;      &gt;       atomic_dec(&amp;sched-&gt;hw_rq_count);<br>
&gt;      &gt; +     atomic_dec(&amp;sched-&gt;num_jobs);<br>
&gt;      &gt;       drm_sched_fence_finished(s_fence);<br>
&gt;      &gt;<br>
&gt;      &gt;       trace_drm_sched_process_job(s_fence);<br>
&gt;      &gt; @@ -935,6 +937,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,<br>
&gt;      &gt;       INIT_LIST_HEAD(&amp;sched-&gt;ring_mirror_list);<br>
&gt;      &gt;       spin_lock_init(&amp;sched-&gt;job_list_lock);<br>
&gt;      &gt;       atomic_set(&amp;sched-&gt;hw_rq_count, 0);<br>
&gt;      &gt; +     atomic_set(&amp;sched-&gt;num_jobs, 0);<br>
&gt;      &gt;       atomic64_set(&amp;sched-&gt;job_id_count, 0);<br>
&gt;      &gt;<br>
&gt;      &gt;       /* Each scheduler will run on a seperate kernel thread */<br>
&gt;      &gt; diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h<br>
&gt;      &gt; index 43e93d6077cf..605bd4ad2397 100644<br>
&gt;      &gt; --- a/include/drm/gpu_scheduler.h<br>
&gt;      &gt; +++ b/include/drm/gpu_scheduler.h<br>
&gt;      &gt; @@ -257,6 +257,7 @@ struct drm_sched_backend_ops {<br>
&gt;      &gt;    * @job_list_lock: lock to protect the ring_mirror_list.<br>
&gt;      &gt;    * @hang_limit: once the hangs by a job crosses this limit then it is marked<br>
&gt;      &gt;    *              guilty and it will be considered for scheduling further.<br>
&gt;      &gt; + * @num_jobs: the number of jobs in queue in the scheduler<br>
&gt;      &gt;    *<br>
&gt;      &gt;    * One scheduler is implemented for each hardware ring.<br>
&gt;      &gt;    */<br>
&gt;      &gt; @@ -274,6 +275,7 @@ struct drm_gpu_scheduler {<br>
&gt;      &gt;       struct list_head                ring_mirror_list;<br>
&gt;      &gt;       spinlock_t                      job_list_lock;<br>
&gt;      &gt;       int                             hang_limit;<br>
&gt;      &gt; +     atomic_t                        num_jobs;<br>
&gt;      &gt;   };<br>
&gt;      &gt;<br>
&gt;      &gt;   int drm_sched_init(struct drm_gpu_scheduler *sched,<br>
&gt;      &gt;<br>
&gt;<br>
</blockquote></div></div>
Zhang, Jerry(Junwei) Aug. 2, 2018, 6:16 a.m. UTC | #5
On 08/02/2018 02:08 PM, Nayan Deshmukh wrote:
>
>
> On Thu, Aug 2, 2018 at 11:29 AM Zhang, Jerry (Junwei) <Jerry.Zhang@amd.com <mailto:Jerry.Zhang@amd.com>> wrote:
>
>     On 08/02/2018 01:50 PM, Nayan Deshmukh wrote:
>      >
>      >
>      > On Thu, Aug 2, 2018 at 10:31 AM Zhang, Jerry (Junwei) <Jerry.Zhang@amd.com <mailto:Jerry.Zhang@amd.com> <mailto:Jerry.Zhang@amd.com <mailto:Jerry.Zhang@amd.com>>> wrote:
>      >
>      >     On 07/12/2018 02:36 PM, Nayan Deshmukh wrote:
>      >      > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com <mailto:nayan26deshmukh@gmail.com> <mailto:nayan26deshmukh@gmail.com <mailto:nayan26deshmukh@gmail.com>>>
>      >      > ---
>      >      >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 3 +++
>      >      >   include/drm/gpu_scheduler.h               | 2 ++
>      >      >   2 files changed, 5 insertions(+)
>      >      >
>      >      > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>      >      > index 429b1328653a..3dc1a4f07e3f 100644
>      >      > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>      >      > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>      >      > @@ -538,6 +538,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
>      >      >       trace_drm_sched_job(sched_job, entity);
>      >      >
>      >      >       first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
>      >      > +     atomic_inc(&entity->sched->num_jobs);
>      >
>      >     Shall we use hw_rq_count directly or merge them together?
>      >
>      > hw_rq_count is the number of jobs that are currently in the hardware queue as compared to num_jobs which is the number of jobs in the software queue. num_jobs provides a give a better idea of
>     the load
>      > on a scheduler that's why I added that field and used it to decide the scheduler with the least load.
>
>     Thanks for your explanation.
>
>     Then may be more reasonable to move atomic_dec(&sched->num_jobs) after drm_sched_fence_scheduled() or just before atomic_inc(&sched->hw_rq_count).
>     How do you think that?
>
> For now num_jobs also includes jobs that have been pushed to the hardware queue. If I shift it before atomic_inc(&sched->hw_rq_count) then I also need to handle cases when the job was reset and update
> the counter properly. I went for the easier implementation as I felt that num_jobs will correctly represent the load on the scheduler.

Thanks for your info more.
Yes, it's not a simple one deal work, just to clarify it's really meaning, a little overlap with hw_rq_count.
fine for now ;)

>
> But the idea that you suggested can be a potential improvement over what I have done.

Thanks.

Regards,
Jerry
>
> Regards,
> Nayan
>
>     Regards,
>     Jerry
>
>      >
>      > Regards,
>      > Nayan
>      >
>      >
>      >     Regards,
>      >     Jerry
>      >      >
>      >      >       /* first job wakes up scheduler */
>      >      >       if (first) {
>      >      > @@ -818,6 +819,7 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
>      >      >
>      >      >       dma_fence_get(&s_fence->finished);
>      >      >       atomic_dec(&sched->hw_rq_count);
>      >      > +     atomic_dec(&sched->num_jobs);
>      >      >       drm_sched_fence_finished(s_fence);
>      >      >
>      >      >       trace_drm_sched_process_job(s_fence);
>      >      > @@ -935,6 +937,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>      >      >       INIT_LIST_HEAD(&sched->ring_mirror_list);
>      >      >       spin_lock_init(&sched->job_list_lock);
>      >      >       atomic_set(&sched->hw_rq_count, 0);
>      >      > +     atomic_set(&sched->num_jobs, 0);
>      >      >       atomic64_set(&sched->job_id_count, 0);
>      >      >
>      >      >       /* Each scheduler will run on a seperate kernel thread */
>      >      > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>      >      > index 43e93d6077cf..605bd4ad2397 100644
>      >      > --- a/include/drm/gpu_scheduler.h
>      >      > +++ b/include/drm/gpu_scheduler.h
>      >      > @@ -257,6 +257,7 @@ struct drm_sched_backend_ops {
>      >      >    * @job_list_lock: lock to protect the ring_mirror_list.
>      >      >    * @hang_limit: once the hangs by a job crosses this limit then it is marked
>      >      >    *              guilty and it will be considered for scheduling further.
>      >      > + * @num_jobs: the number of jobs in queue in the scheduler
>      >      >    *
>      >      >    * One scheduler is implemented for each hardware ring.
>      >      >    */
>      >      > @@ -274,6 +275,7 @@ struct drm_gpu_scheduler {
>      >      >       struct list_head                ring_mirror_list;
>      >      >       spinlock_t                      job_list_lock;
>      >      >       int                             hang_limit;
>      >      > +     atomic_t                        num_jobs;
>      >      >   };
>      >      >
>      >      >   int drm_sched_init(struct drm_gpu_scheduler *sched,
>      >      >
>      >
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 429b1328653a..3dc1a4f07e3f 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -538,6 +538,7 @@  void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 	trace_drm_sched_job(sched_job, entity);
 
 	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
+	atomic_inc(&entity->sched->num_jobs);
 
 	/* first job wakes up scheduler */
 	if (first) {
@@ -818,6 +819,7 @@  static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 
 	dma_fence_get(&s_fence->finished);
 	atomic_dec(&sched->hw_rq_count);
+	atomic_dec(&sched->num_jobs);
 	drm_sched_fence_finished(s_fence);
 
 	trace_drm_sched_process_job(s_fence);
@@ -935,6 +937,7 @@  int drm_sched_init(struct drm_gpu_scheduler *sched,
 	INIT_LIST_HEAD(&sched->ring_mirror_list);
 	spin_lock_init(&sched->job_list_lock);
 	atomic_set(&sched->hw_rq_count, 0);
+	atomic_set(&sched->num_jobs, 0);
 	atomic64_set(&sched->job_id_count, 0);
 
 	/* Each scheduler will run on a seperate kernel thread */
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 43e93d6077cf..605bd4ad2397 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -257,6 +257,7 @@  struct drm_sched_backend_ops {
  * @job_list_lock: lock to protect the ring_mirror_list.
  * @hang_limit: once the hangs by a job crosses this limit then it is marked
  *              guilty and it will be considered for scheduling further.
+ * @num_jobs: the number of jobs in queue in the scheduler
  *
  * One scheduler is implemented for each hardware ring.
  */
@@ -274,6 +275,7 @@  struct drm_gpu_scheduler {
 	struct list_head		ring_mirror_list;
 	spinlock_t			job_list_lock;
 	int				hang_limit;
+	atomic_t                        num_jobs;
 };
 
 int drm_sched_init(struct drm_gpu_scheduler *sched,