[v3] sched/deadline: Fix the intention to re-evalute tick dependency for offline cpu
diff mbox

Message ID 20160831092125.GA10626@twins.programming.kicks-ass.net
State New
Headers show

Commit Message

Peter Zijlstra Aug. 31, 2016, 9:21 a.m. UTC
On Fri, Aug 19, 2016 at 09:56:59PM +0800, Wanpeng Li wrote:
> 2016-08-19 21:25 GMT+08:00 Peter Zijlstra <peterz@infradead.org>:
> > On Fri, Aug 12, 2016 at 05:24:03PM +0800, Wanpeng Li wrote:
> >
> >> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> >> index d091f4a..ce0fb00 100644
> >> --- a/kernel/sched/deadline.c
> >> +++ b/kernel/sched/deadline.c
> >> @@ -641,6 +641,11 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
> >>               goto unlock;
> >>       }
> >>
> >> +#ifdef CONFIG_SMP
> >> +     if (unlikely(!rq->online))
> >> +             goto offline;
> >> +#endif
> >> +
> >>       enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
> >>       if (dl_task(rq->curr))
> >>               check_preempt_curr_dl(rq, p, 0);
> >> @@ -648,6 +653,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
> >>               resched_curr(rq);
> >>
> >>  #ifdef CONFIG_SMP
> >> +offline:
> >>       /*
> >>        * Perform balancing operations here; after the replenishments.  We
> >>        * cannot drop rq->lock before this, otherwise the assertion in
> >> @@ -659,6 +665,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
> >>        * XXX figure out if select_task_rq_dl() deals with offline cpus.
> >>        */
> >>       if (unlikely(!rq->online)) {
> >> +             replenish_dl_entity(dl_se, dl_se);
> >>               lockdep_unpin_lock(&rq->lock, rf.cookie);
> >>               rq = dl_task_offline_migration(rq, p);
> >
> > So I don't like this, even if it magically works. With this we end up
> > calling dl_task_offline_migration() -> deactivate_task() while the task
> > isn't on the runqueue at all.
> 
> So how about v1, it also works :), https://lkml.org/lkml/2016/8/10/898

Does something like so work? Notice what we have preemption disabled (by
virtue of having pi_lock and rq->lock taken) and thus cannot hotplug.
Therefore if we notice a rq not being online, it must stay that way,
equally any online rq must also stay that way.

This means we can fold the two online tests you had and simply do the rq
switch beforehand.

Completely untested...

---
 kernel/sched/deadline.c | 46 ++++++++++++++++++----------------------------
 1 file changed, 18 insertions(+), 28 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Wanpeng Li Aug. 31, 2016, 10:31 a.m. UTC | #1
2016-08-31 17:21 GMT+08:00 Peter Zijlstra <peterz@infradead.org>:
> On Fri, Aug 19, 2016 at 09:56:59PM +0800, Wanpeng Li wrote:
>> 2016-08-19 21:25 GMT+08:00 Peter Zijlstra <peterz@infradead.org>:
>> > On Fri, Aug 12, 2016 at 05:24:03PM +0800, Wanpeng Li wrote:
>> >
>> >> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> >> index d091f4a..ce0fb00 100644
>> >> --- a/kernel/sched/deadline.c
>> >> +++ b/kernel/sched/deadline.c
>> >> @@ -641,6 +641,11 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
>> >>               goto unlock;
>> >>       }
>> >>
>> >> +#ifdef CONFIG_SMP
>> >> +     if (unlikely(!rq->online))
>> >> +             goto offline;
>> >> +#endif
>> >> +
>> >>       enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
>> >>       if (dl_task(rq->curr))
>> >>               check_preempt_curr_dl(rq, p, 0);
>> >> @@ -648,6 +653,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
>> >>               resched_curr(rq);
>> >>
>> >>  #ifdef CONFIG_SMP
>> >> +offline:
>> >>       /*
>> >>        * Perform balancing operations here; after the replenishments.  We
>> >>        * cannot drop rq->lock before this, otherwise the assertion in
>> >> @@ -659,6 +665,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
>> >>        * XXX figure out if select_task_rq_dl() deals with offline cpus.
>> >>        */
>> >>       if (unlikely(!rq->online)) {
>> >> +             replenish_dl_entity(dl_se, dl_se);
>> >>               lockdep_unpin_lock(&rq->lock, rf.cookie);
>> >>               rq = dl_task_offline_migration(rq, p);
>> >
>> > So I don't like this, even if it magically works. With this we end up
>> > calling dl_task_offline_migration() -> deactivate_task() while the task
>> > isn't on the runqueue at all.
>>
>> So how about v1, it also works :), https://lkml.org/lkml/2016/8/10/898
>
> Does something like so work? Notice what we have preemption disabled (by
> virtue of having pi_lock and rq->lock taken) and thus cannot hotplug.
> Therefore if we notice a rq not being online, it must stay that way,
> equally any online rq must also stay that way.
>
> This means we can fold the two online tests you had and simply do the rq
> switch beforehand.

Thanks for the proposal Peterz! It works and I just sent out v4. :)

Regards,
Wanpeng Li

>
> Completely untested...
>
> ---
>  kernel/sched/deadline.c | 46 ++++++++++++++++++----------------------------
>  1 file changed, 18 insertions(+), 28 deletions(-)
>
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index d091f4a95416..bcade08772a8 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -243,10 +243,8 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
>  static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
>  {
>         struct rq *later_rq = NULL;
> -       bool fallback = false;
>
>         later_rq = find_lock_later_rq(p, rq);
> -
>         if (!later_rq) {
>                 int cpu;
>
> @@ -254,7 +252,6 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
>                  * If we cannot preempt any rq, fall back to pick any
>                  * online cpu.
>                  */
> -               fallback = true;
>                 cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
>                 if (cpu >= nr_cpu_ids) {
>                         /*
> @@ -274,16 +271,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
>                 double_lock_balance(rq, later_rq);
>         }
>
> -       /*
> -        * By now the task is replenished and enqueued; migrate it.
> -        */
> -       deactivate_task(rq, p, 0);
>         set_task_cpu(p, later_rq->cpu);
> -       activate_task(later_rq, p, 0);
> -
> -       if (!fallback)
> -               resched_curr(later_rq);
> -
>         double_unlock_balance(later_rq, rq);
>
>         return later_rq;
> @@ -641,29 +629,31 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
>                 goto unlock;
>         }
>
> -       enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
> -       if (dl_task(rq->curr))
> -               check_preempt_curr_dl(rq, p, 0);
> -       else
> -               resched_curr(rq);
> -
>  #ifdef CONFIG_SMP
> -       /*
> -        * Perform balancing operations here; after the replenishments.  We
> -        * cannot drop rq->lock before this, otherwise the assertion in
> -        * start_dl_timer() about not missing updates is not true.
> -        *
> -        * If we find that the rq the task was on is no longer available, we
> -        * need to select a new rq.
> -        *
> -        * XXX figure out if select_task_rq_dl() deals with offline cpus.
> -        */
>         if (unlikely(!rq->online)) {
> +               /*
> +                * If the runqueue is no longer available, migrate the
> +                * task elsewhere. This necessarily changes rq.
> +                */
>                 lockdep_unpin_lock(&rq->lock, rf.cookie);
>                 rq = dl_task_offline_migration(rq, p);
>                 rf.cookie = lockdep_pin_lock(&rq->lock);
> +
> +               /*
> +                * Now that the task has been migrated to the new RQ and we
> +                * have that locked, proceed as normal and enqueue the task
> +                * there.
> +                */
>         }
> +#endif
>
> +       enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
> +       if (dl_task(rq->curr))
> +               check_preempt_curr_dl(rq, p, 0);
> +       else
> +               resched_curr(rq);
> +
> +#ifdef CONFIG_SMP
>         /*
>          * Queueing this task back might have overloaded rq, check if we need
>          * to kick someone away.

Patch
diff mbox

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index d091f4a95416..bcade08772a8 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -243,10 +243,8 @@  static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
 static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
 {
 	struct rq *later_rq = NULL;
-	bool fallback = false;
 
 	later_rq = find_lock_later_rq(p, rq);
-
 	if (!later_rq) {
 		int cpu;
 
@@ -254,7 +252,6 @@  static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
 		 * If we cannot preempt any rq, fall back to pick any
 		 * online cpu.
 		 */
-		fallback = true;
 		cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
 		if (cpu >= nr_cpu_ids) {
 			/*
@@ -274,16 +271,7 @@  static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
 		double_lock_balance(rq, later_rq);
 	}
 
-	/*
-	 * By now the task is replenished and enqueued; migrate it.
-	 */
-	deactivate_task(rq, p, 0);
 	set_task_cpu(p, later_rq->cpu);
-	activate_task(later_rq, p, 0);
-
-	if (!fallback)
-		resched_curr(later_rq);
-
 	double_unlock_balance(later_rq, rq);
 
 	return later_rq;
@@ -641,29 +629,31 @@  static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 		goto unlock;
 	}
 
-	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
-	if (dl_task(rq->curr))
-		check_preempt_curr_dl(rq, p, 0);
-	else
-		resched_curr(rq);
-
 #ifdef CONFIG_SMP
-	/*
-	 * Perform balancing operations here; after the replenishments.  We
-	 * cannot drop rq->lock before this, otherwise the assertion in
-	 * start_dl_timer() about not missing updates is not true.
-	 *
-	 * If we find that the rq the task was on is no longer available, we
-	 * need to select a new rq.
-	 *
-	 * XXX figure out if select_task_rq_dl() deals with offline cpus.
-	 */
 	if (unlikely(!rq->online)) {
+		/*
+		 * If the runqueue is no longer available, migrate the
+		 * task elsewhere. This necessarily changes rq.
+		 */
 		lockdep_unpin_lock(&rq->lock, rf.cookie);
 		rq = dl_task_offline_migration(rq, p);
 		rf.cookie = lockdep_pin_lock(&rq->lock);
+
+		/*
+		 * Now that the task has been migrated to the new RQ and we
+		 * have that locked, proceed as normal and enqueue the task
+		 * there.
+		 */
 	}
+#endif
 
+	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
+	if (dl_task(rq->curr))
+		check_preempt_curr_dl(rq, p, 0);
+	else
+		resched_curr(rq);
+
+#ifdef CONFIG_SMP
 	/*
 	 * Queueing this task back might have overloaded rq, check if we need
 	 * to kick someone away.