Message ID | 20180731103736.7813-5-nayan26deshmukh@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/scheduler: add dynamic balancing | expand |
Am 31.07.2018 um 12:37 schrieb Nayan Deshmukh: > This is the first attempt to move entities between schedulers to > have dynamic load balancing. We just move entities with no jobs for > now as moving the ones with jobs will lead to other compilcations > like ensuring that the other scheduler does not remove a job from > the current entity while we are moving. > > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> > --- > drivers/gpu/drm/scheduler/gpu_scheduler.c | 25 ++++++++++++++++++++----- > 1 file changed, 20 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c > index c67f65ad8f15..f665a84d48ef 100644 > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c > @@ -540,6 +540,8 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) > if (!sched_job) > return NULL; > > + sched_job->sched = sched; > + sched_job->s_fence->sched = sched; > while ((entity->dependency = sched->ops->dependency(sched_job, entity))) > if (drm_sched_entity_add_dependency_cb(entity)) > return NULL; > @@ -570,16 +572,29 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) > void drm_sched_entity_push_job(struct drm_sched_job *sched_job, > struct drm_sched_entity *entity) > { > - struct drm_gpu_scheduler *sched = sched_job->sched; > - bool first = false; > + struct drm_gpu_scheduler *sched = entity->rq->sched; Is the local "sched" variable actually still used? Might be a good idea to drop that since we potentially changing the scheduler/rq. > + struct drm_sched_rq *rq = entity->rq; > + bool first = false, reschedule, idle; > > - trace_drm_sched_job(sched_job, entity); > + idle = entity->last_scheduled == NULL || > + dma_fence_is_signaled(entity->last_scheduled); > + first = spsc_queue_count(&entity->job_queue) == 0; > + reschedule = idle && first && (entity->num_rq_list > 1); > + > + if (reschedule) { > + rq = drm_sched_entity_get_free_sched(entity); > + spin_lock(&entity->rq_lock); > + drm_sched_rq_remove_entity(entity->rq, entity); > + entity->rq = rq; > + spin_unlock(&entity->rq_lock); > + } > > + trace_drm_sched_job(sched_job, entity); > atomic_inc(&entity->rq->sched->num_jobs); > first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); > > /* first job wakes up scheduler */ > - if (first) { > + if (first || reschedule) { You can drop that extra check since we can only rescheduler when there wasn't any jobs in the entity. Christian. > /* Add the entity to the run queue */ > spin_lock(&entity->rq_lock); > if (!entity->rq) { > @@ -589,7 +604,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, > } > drm_sched_rq_add_entity(entity->rq, entity); > spin_unlock(&entity->rq_lock); > - drm_sched_wakeup(sched); > + drm_sched_wakeup(entity->rq->sched); > } > } > EXPORT_SYMBOL(drm_sched_entity_push_job);
On Tue, Jul 31, 2018 at 5:02 PM Christian König < ckoenig.leichtzumerken@gmail.com> wrote: > Am 31.07.2018 um 12:37 schrieb Nayan Deshmukh: > > This is the first attempt to move entities between schedulers to > > have dynamic load balancing. We just move entities with no jobs for > > now as moving the ones with jobs will lead to other compilcations > > like ensuring that the other scheduler does not remove a job from > > the current entity while we are moving. > > > > Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> > > --- > > drivers/gpu/drm/scheduler/gpu_scheduler.c | 25 > ++++++++++++++++++++----- > > 1 file changed, 20 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c > b/drivers/gpu/drm/scheduler/gpu_scheduler.c > > index c67f65ad8f15..f665a84d48ef 100644 > > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c > > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c > > @@ -540,6 +540,8 @@ drm_sched_entity_pop_job(struct drm_sched_entity > *entity) > > if (!sched_job) > > return NULL; > > > > + sched_job->sched = sched; > > + sched_job->s_fence->sched = sched; > > while ((entity->dependency = sched->ops->dependency(sched_job, > entity))) > > if (drm_sched_entity_add_dependency_cb(entity)) > > return NULL; > > @@ -570,16 +572,29 @@ drm_sched_entity_pop_job(struct drm_sched_entity > *entity) > > void drm_sched_entity_push_job(struct drm_sched_job *sched_job, > > struct drm_sched_entity *entity) > > { > > - struct drm_gpu_scheduler *sched = sched_job->sched; > > - bool first = false; > > + struct drm_gpu_scheduler *sched = entity->rq->sched; > > Is the local "sched" variable actually still used? > > Might be a good idea to drop that since we potentially changing the > scheduler/rq. > Yes dropping it is a good idea to avoid confusion. I had kept it for debugging purpose but forgot to remove it in the end. > > > + struct drm_sched_rq *rq = entity->rq; > > + bool first = false, reschedule, idle; > > > > - trace_drm_sched_job(sched_job, entity); > > + idle = entity->last_scheduled == NULL || > > + dma_fence_is_signaled(entity->last_scheduled); > > + first = spsc_queue_count(&entity->job_queue) == 0; > > + reschedule = idle && first && (entity->num_rq_list > 1); > > + > > + if (reschedule) { > > + rq = drm_sched_entity_get_free_sched(entity); > > + spin_lock(&entity->rq_lock); > > + drm_sched_rq_remove_entity(entity->rq, entity); > > + entity->rq = rq; > > + spin_unlock(&entity->rq_lock); > > + } > > > > + trace_drm_sched_job(sched_job, entity); > > atomic_inc(&entity->rq->sched->num_jobs); > > first = spsc_queue_push(&entity->job_queue, > &sched_job->queue_node); > > > > /* first job wakes up scheduler */ > > - if (first) { > > + if (first || reschedule) { > > You can drop that extra check since we can only rescheduler when there > wasn't any jobs in the entity. Will fix it in v2. > > Christian. > > > /* Add the entity to the run queue */ > > spin_lock(&entity->rq_lock); > > if (!entity->rq) { > > @@ -589,7 +604,7 @@ void drm_sched_entity_push_job(struct drm_sched_job > *sched_job, > > } > > drm_sched_rq_add_entity(entity->rq, entity); > > spin_unlock(&entity->rq_lock); > > - drm_sched_wakeup(sched); > > + drm_sched_wakeup(entity->rq->sched); > > } > > } > > EXPORT_SYMBOL(drm_sched_entity_push_job); > > <div dir="ltr"><br><br><div class="gmail_quote"><div dir="ltr">On Tue, Jul 31, 2018 at 5:02 PM Christian König <<a href="mailto:ckoenig.leichtzumerken@gmail.com">ckoenig.leichtzumerken@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Am 31.07.2018 um 12:37 schrieb Nayan Deshmukh:<br> > This is the first attempt to move entities between schedulers to<br> > have dynamic load balancing. We just move entities with no jobs for<br> > now as moving the ones with jobs will lead to other compilcations<br> > like ensuring that the other scheduler does not remove a job from<br> > the current entity while we are moving.<br> ><br> > Signed-off-by: Nayan Deshmukh <<a href="mailto:nayan26deshmukh@gmail.com" target="_blank">nayan26deshmukh@gmail.com</a>><br> > ---<br> > drivers/gpu/drm/scheduler/gpu_scheduler.c | 25 ++++++++++++++++++++-----<br> > 1 file changed, 20 insertions(+), 5 deletions(-)<br> ><br> > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br> > index c67f65ad8f15..f665a84d48ef 100644<br> > --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br> > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br> > @@ -540,6 +540,8 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity)<br> > if (!sched_job)<br> > return NULL;<br> > <br> > + sched_job->sched = sched;<br> > + sched_job->s_fence->sched = sched;<br> > while ((entity->dependency = sched->ops->dependency(sched_job, entity)))<br> > if (drm_sched_entity_add_dependency_cb(entity))<br> > return NULL;<br> > @@ -570,16 +572,29 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity)<br> > void drm_sched_entity_push_job(struct drm_sched_job *sched_job,<br> > struct drm_sched_entity *entity)<br> > {<br> > - struct drm_gpu_scheduler *sched = sched_job->sched;<br> > - bool first = false;<br> > + struct drm_gpu_scheduler *sched = entity->rq->sched;<br> <br> Is the local "sched" variable actually still used?<br> <br> Might be a good idea to drop that since we potentially changing the <br> scheduler/rq.<br></blockquote><div>Yes dropping it is a good idea to avoid confusion. I had kept it for debugging purpose but forgot to remove it in the end. <br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> <br> > + struct drm_sched_rq *rq = entity->rq;<br> > + bool first = false, reschedule, idle;<br> > <br> > - trace_drm_sched_job(sched_job, entity);<br> > + idle = entity->last_scheduled == NULL ||<br> > + dma_fence_is_signaled(entity->last_scheduled);<br> > + first = spsc_queue_count(&entity->job_queue) == 0;<br> > + reschedule = idle && first && (entity->num_rq_list > 1);<br> > +<br> > + if (reschedule) {<br> > + rq = drm_sched_entity_get_free_sched(entity);<br> > + spin_lock(&entity->rq_lock);<br> > + drm_sched_rq_remove_entity(entity->rq, entity);<br> > + entity->rq = rq;<br> > + spin_unlock(&entity->rq_lock);<br> > + }<br> > <br> > + trace_drm_sched_job(sched_job, entity);<br> > atomic_inc(&entity->rq->sched->num_jobs);<br> > first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);<br> > <br> > /* first job wakes up scheduler */<br> > - if (first) {<br> > + if (first || reschedule) {<br> <br> You can drop that extra check since we can only rescheduler when there <br> wasn't any jobs in the entity. </blockquote><div>Will fix it in v2. <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> <br> Christian.<br> <br> > /* Add the entity to the run queue */<br> > spin_lock(&entity->rq_lock);<br> > if (!entity->rq) {<br> > @@ -589,7 +604,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,<br> > }<br> > drm_sched_rq_add_entity(entity->rq, entity);<br> > spin_unlock(&entity->rq_lock);<br> > - drm_sched_wakeup(sched);<br> > + drm_sched_wakeup(entity->rq->sched);<br> > }<br> > }<br> > EXPORT_SYMBOL(drm_sched_entity_push_job);<br> <br> </blockquote></div></div>
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index c67f65ad8f15..f665a84d48ef 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -540,6 +540,8 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (!sched_job) return NULL; + sched_job->sched = sched; + sched_job->s_fence->sched = sched; while ((entity->dependency = sched->ops->dependency(sched_job, entity))) if (drm_sched_entity_add_dependency_cb(entity)) return NULL; @@ -570,16 +572,29 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) void drm_sched_entity_push_job(struct drm_sched_job *sched_job, struct drm_sched_entity *entity) { - struct drm_gpu_scheduler *sched = sched_job->sched; - bool first = false; + struct drm_gpu_scheduler *sched = entity->rq->sched; + struct drm_sched_rq *rq = entity->rq; + bool first = false, reschedule, idle; - trace_drm_sched_job(sched_job, entity); + idle = entity->last_scheduled == NULL || + dma_fence_is_signaled(entity->last_scheduled); + first = spsc_queue_count(&entity->job_queue) == 0; + reschedule = idle && first && (entity->num_rq_list > 1); + + if (reschedule) { + rq = drm_sched_entity_get_free_sched(entity); + spin_lock(&entity->rq_lock); + drm_sched_rq_remove_entity(entity->rq, entity); + entity->rq = rq; + spin_unlock(&entity->rq_lock); + } + trace_drm_sched_job(sched_job, entity); atomic_inc(&entity->rq->sched->num_jobs); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); /* first job wakes up scheduler */ - if (first) { + if (first || reschedule) { /* Add the entity to the run queue */ spin_lock(&entity->rq_lock); if (!entity->rq) { @@ -589,7 +604,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, } drm_sched_rq_add_entity(entity->rq, entity); spin_unlock(&entity->rq_lock); - drm_sched_wakeup(sched); + drm_sched_wakeup(entity->rq->sched); } } EXPORT_SYMBOL(drm_sched_entity_push_job);
This is the first attempt to move entities between schedulers to have dynamic load balancing. We just move entities with no jobs for now as moving the ones with jobs will lead to other compilcations like ensuring that the other scheduler does not remove a job from the current entity while we are moving. Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com> --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-)