diff mbox series

[4/4] drm/scheduler: move idle entities to scheduler with less load

Message ID 20180731103736.7813-5-nayan26deshmukh@gmail.com (mailing list archive)
State New, archived
Headers show
Series drm/scheduler: add dynamic balancing | expand

Commit Message

Nayan Deshmukh July 31, 2018, 10:37 a.m. UTC
This is the first attempt to move entities between schedulers to
have dynamic load balancing. We just move entities with no jobs for
now as moving the ones with jobs will lead to other compilcations
like ensuring that the other scheduler does not remove a job from
the current entity while we are moving.

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
---
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

Comments

Christian König July 31, 2018, 11:32 a.m. UTC | #1
Am 31.07.2018 um 12:37 schrieb Nayan Deshmukh:
> This is the first attempt to move entities between schedulers to
> have dynamic load balancing. We just move entities with no jobs for
> now as moving the ones with jobs will lead to other compilcations
> like ensuring that the other scheduler does not remove a job from
> the current entity while we are moving.
>
> Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
> ---
>   drivers/gpu/drm/scheduler/gpu_scheduler.c | 25 ++++++++++++++++++++-----
>   1 file changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index c67f65ad8f15..f665a84d48ef 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -540,6 +540,8 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>   	if (!sched_job)
>   		return NULL;
>   
> +	sched_job->sched = sched;
> +	sched_job->s_fence->sched = sched;
>   	while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
>   		if (drm_sched_entity_add_dependency_cb(entity))
>   			return NULL;
> @@ -570,16 +572,29 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>   void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
>   			       struct drm_sched_entity *entity)
>   {
> -	struct drm_gpu_scheduler *sched = sched_job->sched;
> -	bool first = false;
> +	struct drm_gpu_scheduler *sched = entity->rq->sched;

Is the local "sched" variable actually still used?

Might be a good idea to drop that since we potentially changing the 
scheduler/rq.


> +	struct drm_sched_rq *rq = entity->rq;
> +	bool first = false, reschedule, idle;
>   
> -	trace_drm_sched_job(sched_job, entity);
> +	idle = entity->last_scheduled == NULL ||
> +		dma_fence_is_signaled(entity->last_scheduled);
> +	first = spsc_queue_count(&entity->job_queue) == 0;
> +	reschedule = idle && first && (entity->num_rq_list > 1);
> +
> +	if (reschedule) {
> +		rq = drm_sched_entity_get_free_sched(entity);
> +		spin_lock(&entity->rq_lock);
> +		drm_sched_rq_remove_entity(entity->rq, entity);
> +		entity->rq = rq;
> +		spin_unlock(&entity->rq_lock);
> +	}
>   
> +	trace_drm_sched_job(sched_job, entity);
>   	atomic_inc(&entity->rq->sched->num_jobs);
>   	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
>   
>   	/* first job wakes up scheduler */
> -	if (first) {
> +	if (first || reschedule) {

You can drop that extra check since we can only rescheduler when there 
wasn't any jobs in the entity.

Christian.

>   		/* Add the entity to the run queue */
>   		spin_lock(&entity->rq_lock);
>   		if (!entity->rq) {
> @@ -589,7 +604,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
>   		}
>   		drm_sched_rq_add_entity(entity->rq, entity);
>   		spin_unlock(&entity->rq_lock);
> -		drm_sched_wakeup(sched);
> +		drm_sched_wakeup(entity->rq->sched);
>   	}
>   }
>   EXPORT_SYMBOL(drm_sched_entity_push_job);
Nayan Deshmukh July 31, 2018, 11:38 a.m. UTC | #2
On Tue, Jul 31, 2018 at 5:02 PM Christian König <
ckoenig.leichtzumerken@gmail.com> wrote:

> Am 31.07.2018 um 12:37 schrieb Nayan Deshmukh:
> > This is the first attempt to move entities between schedulers to
> > have dynamic load balancing. We just move entities with no jobs for
> > now as moving the ones with jobs will lead to other compilcations
> > like ensuring that the other scheduler does not remove a job from
> > the current entity while we are moving.
> >
> > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
> > ---
> >   drivers/gpu/drm/scheduler/gpu_scheduler.c | 25
> ++++++++++++++++++++-----
> >   1 file changed, 20 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > index c67f65ad8f15..f665a84d48ef 100644
> > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> > @@ -540,6 +540,8 @@ drm_sched_entity_pop_job(struct drm_sched_entity
> *entity)
> >       if (!sched_job)
> >               return NULL;
> >
> > +     sched_job->sched = sched;
> > +     sched_job->s_fence->sched = sched;
> >       while ((entity->dependency = sched->ops->dependency(sched_job,
> entity)))
> >               if (drm_sched_entity_add_dependency_cb(entity))
> >                       return NULL;
> > @@ -570,16 +572,29 @@ drm_sched_entity_pop_job(struct drm_sched_entity
> *entity)
> >   void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
> >                              struct drm_sched_entity *entity)
> >   {
> > -     struct drm_gpu_scheduler *sched = sched_job->sched;
> > -     bool first = false;
> > +     struct drm_gpu_scheduler *sched = entity->rq->sched;
>
> Is the local "sched" variable actually still used?
>
> Might be a good idea to drop that since we potentially changing the
> scheduler/rq.
>
Yes dropping it is a good idea to avoid confusion. I had kept it for
debugging purpose but forgot to remove it in the end.


>
> > +     struct drm_sched_rq *rq = entity->rq;
> > +     bool first = false, reschedule, idle;
> >
> > -     trace_drm_sched_job(sched_job, entity);
> > +     idle = entity->last_scheduled == NULL ||
> > +             dma_fence_is_signaled(entity->last_scheduled);
> > +     first = spsc_queue_count(&entity->job_queue) == 0;
> > +     reschedule = idle && first && (entity->num_rq_list > 1);
> > +
> > +     if (reschedule) {
> > +             rq = drm_sched_entity_get_free_sched(entity);
> > +             spin_lock(&entity->rq_lock);
> > +             drm_sched_rq_remove_entity(entity->rq, entity);
> > +             entity->rq = rq;
> > +             spin_unlock(&entity->rq_lock);
> > +     }
> >
> > +     trace_drm_sched_job(sched_job, entity);
> >       atomic_inc(&entity->rq->sched->num_jobs);
> >       first = spsc_queue_push(&entity->job_queue,
> &sched_job->queue_node);
> >
> >       /* first job wakes up scheduler */
> > -     if (first) {
> > +     if (first || reschedule) {
>
> You can drop that extra check since we can only rescheduler when there
> wasn't any jobs in the entity.

Will fix it in v2.

>
> Christian.
>
> >               /* Add the entity to the run queue */
> >               spin_lock(&entity->rq_lock);
> >               if (!entity->rq) {
> > @@ -589,7 +604,7 @@ void drm_sched_entity_push_job(struct drm_sched_job
> *sched_job,
> >               }
> >               drm_sched_rq_add_entity(entity->rq, entity);
> >               spin_unlock(&entity->rq_lock);
> > -             drm_sched_wakeup(sched);
> > +             drm_sched_wakeup(entity->rq->sched);
> >       }
> >   }
> >   EXPORT_SYMBOL(drm_sched_entity_push_job);
>
>
<div dir="ltr"><br><br><div class="gmail_quote"><div dir="ltr">On Tue, Jul 31, 2018 at 5:02 PM Christian König &lt;<a href="mailto:ckoenig.leichtzumerken@gmail.com">ckoenig.leichtzumerken@gmail.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Am 31.07.2018 um 12:37 schrieb Nayan Deshmukh:<br>
&gt; This is the first attempt to move entities between schedulers to<br>
&gt; have dynamic load balancing. We just move entities with no jobs for<br>
&gt; now as moving the ones with jobs will lead to other compilcations<br>
&gt; like ensuring that the other scheduler does not remove a job from<br>
&gt; the current entity while we are moving.<br>
&gt;<br>
&gt; Signed-off-by: Nayan Deshmukh &lt;<a href="mailto:nayan26deshmukh@gmail.com" target="_blank">nayan26deshmukh@gmail.com</a>&gt;<br>
&gt; ---<br>
&gt;   drivers/gpu/drm/scheduler/gpu_scheduler.c | 25 ++++++++++++++++++++-----<br>
&gt;   1 file changed, 20 insertions(+), 5 deletions(-)<br>
&gt;<br>
&gt; diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
&gt; index c67f65ad8f15..f665a84d48ef 100644<br>
&gt; --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
&gt; +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
&gt; @@ -540,6 +540,8 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity)<br>
&gt;       if (!sched_job)<br>
&gt;               return NULL;<br>
&gt;   <br>
&gt; +     sched_job-&gt;sched = sched;<br>
&gt; +     sched_job-&gt;s_fence-&gt;sched = sched;<br>
&gt;       while ((entity-&gt;dependency = sched-&gt;ops-&gt;dependency(sched_job, entity)))<br>
&gt;               if (drm_sched_entity_add_dependency_cb(entity))<br>
&gt;                       return NULL;<br>
&gt; @@ -570,16 +572,29 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity)<br>
&gt;   void drm_sched_entity_push_job(struct drm_sched_job *sched_job,<br>
&gt;                              struct drm_sched_entity *entity)<br>
&gt;   {<br>
&gt; -     struct drm_gpu_scheduler *sched = sched_job-&gt;sched;<br>
&gt; -     bool first = false;<br>
&gt; +     struct drm_gpu_scheduler *sched = entity-&gt;rq-&gt;sched;<br>
<br>
Is the local &quot;sched&quot; variable actually still used?<br>
<br>
Might be a good idea to drop that since we potentially changing the <br>
scheduler/rq.<br></blockquote><div>Yes dropping it is a good idea to avoid confusion. I had kept it for debugging purpose but forgot to remove it in the end. <br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
&gt; +     struct drm_sched_rq *rq = entity-&gt;rq;<br>
&gt; +     bool first = false, reschedule, idle;<br>
&gt;   <br>
&gt; -     trace_drm_sched_job(sched_job, entity);<br>
&gt; +     idle = entity-&gt;last_scheduled == NULL ||<br>
&gt; +             dma_fence_is_signaled(entity-&gt;last_scheduled);<br>
&gt; +     first = spsc_queue_count(&amp;entity-&gt;job_queue) == 0;<br>
&gt; +     reschedule = idle &amp;&amp; first &amp;&amp; (entity-&gt;num_rq_list &gt; 1);<br>
&gt; +<br>
&gt; +     if (reschedule) {<br>
&gt; +             rq = drm_sched_entity_get_free_sched(entity);<br>
&gt; +             spin_lock(&amp;entity-&gt;rq_lock);<br>
&gt; +             drm_sched_rq_remove_entity(entity-&gt;rq, entity);<br>
&gt; +             entity-&gt;rq = rq;<br>
&gt; +             spin_unlock(&amp;entity-&gt;rq_lock);<br>
&gt; +     }<br>
&gt;   <br>
&gt; +     trace_drm_sched_job(sched_job, entity);<br>
&gt;       atomic_inc(&amp;entity-&gt;rq-&gt;sched-&gt;num_jobs);<br>
&gt;       first = spsc_queue_push(&amp;entity-&gt;job_queue, &amp;sched_job-&gt;queue_node);<br>
&gt;   <br>
&gt;       /* first job wakes up scheduler */<br>
&gt; -     if (first) {<br>
&gt; +     if (first || reschedule) {<br>
<br>
You can drop that extra check since we can only rescheduler when there <br>
wasn&#39;t any jobs in the entity. </blockquote><div>Will fix it in v2. <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
Christian.<br>
<br>
&gt;               /* Add the entity to the run queue */<br>
&gt;               spin_lock(&amp;entity-&gt;rq_lock);<br>
&gt;               if (!entity-&gt;rq) {<br>
&gt; @@ -589,7 +604,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,<br>
&gt;               }<br>
&gt;               drm_sched_rq_add_entity(entity-&gt;rq, entity);<br>
&gt;               spin_unlock(&amp;entity-&gt;rq_lock);<br>
&gt; -             drm_sched_wakeup(sched);<br>
&gt; +             drm_sched_wakeup(entity-&gt;rq-&gt;sched);<br>
&gt;       }<br>
&gt;   }<br>
&gt;   EXPORT_SYMBOL(drm_sched_entity_push_job);<br>
<br>
</blockquote></div></div>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index c67f65ad8f15..f665a84d48ef 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -540,6 +540,8 @@  drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 	if (!sched_job)
 		return NULL;
 
+	sched_job->sched = sched;
+	sched_job->s_fence->sched = sched;
 	while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
 		if (drm_sched_entity_add_dependency_cb(entity))
 			return NULL;
@@ -570,16 +572,29 @@  drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 			       struct drm_sched_entity *entity)
 {
-	struct drm_gpu_scheduler *sched = sched_job->sched;
-	bool first = false;
+	struct drm_gpu_scheduler *sched = entity->rq->sched;
+	struct drm_sched_rq *rq = entity->rq;
+	bool first = false, reschedule, idle;
 
-	trace_drm_sched_job(sched_job, entity);
+	idle = entity->last_scheduled == NULL ||
+		dma_fence_is_signaled(entity->last_scheduled);
+	first = spsc_queue_count(&entity->job_queue) == 0;
+	reschedule = idle && first && (entity->num_rq_list > 1);
+
+	if (reschedule) {
+		rq = drm_sched_entity_get_free_sched(entity);
+		spin_lock(&entity->rq_lock);
+		drm_sched_rq_remove_entity(entity->rq, entity);
+		entity->rq = rq;
+		spin_unlock(&entity->rq_lock);
+	}
 
+	trace_drm_sched_job(sched_job, entity);
 	atomic_inc(&entity->rq->sched->num_jobs);
 	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
 
 	/* first job wakes up scheduler */
-	if (first) {
+	if (first || reschedule) {
 		/* Add the entity to the run queue */
 		spin_lock(&entity->rq_lock);
 		if (!entity->rq) {
@@ -589,7 +604,7 @@  void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 		}
 		drm_sched_rq_add_entity(entity->rq, entity);
 		spin_unlock(&entity->rq_lock);
-		drm_sched_wakeup(sched);
+		drm_sched_wakeup(entity->rq->sched);
 	}
 }
 EXPORT_SYMBOL(drm_sched_entity_push_job);