diff mbox

[07/24] xen: sched: don't rate limit context switches in case of yields

Message ID 147145430260.25877.10888566175316568591.stgit@Solace.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli Aug. 17, 2016, 5:18 p.m. UTC
In both Credit1 and Credit2, if a vcpu yields, let it...
well... yield!

In fact, context switch rate limiting has been primarily
introduced to avoid too heavy context switch rate due to
interrupts, and, in general, asynchronous events.

In a vcpu "voluntarily" yields, we really should let it
give up the cpu for a while. For instance, the reason may
be that it's about to start spinning, and there's few
point in forcing a vcpu to spin for (potentially) the
entire rate-limiting period.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
---
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Anshul Makkar <anshul.makkar@citrix.com>
---
 xen/common/sched_credit.c  |   20 +++++++++++--------
 xen/common/sched_credit2.c |   47 +++++++++++++++++++++++---------------------
 2 files changed, 37 insertions(+), 30 deletions(-)

Comments

George Dunlap Sept. 20, 2016, 1:32 p.m. UTC | #1
On 17/08/16 18:18, Dario Faggioli wrote:
> In both Credit1 and Credit2, if a vcpu yields, let it...
> well... yield!
> 
> In fact, context switch rate limiting has been primarily
> introduced to avoid too heavy context switch rate due to
> interrupts, and, in general, asynchronous events.
> 
> In a vcpu "voluntarily" yields, we really should let it
> give up the cpu for a while. For instance, the reason may
> be that it's about to start spinning, and there's few
> point in forcing a vcpu to spin for (potentially) the
> entire rate-limiting period.
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> ---
> Cc: George Dunlap <george.dunlap@eu.citrix.com>
> Cc: Anshul Makkar <anshul.makkar@citrix.com>
> ---
>  xen/common/sched_credit.c  |   20 +++++++++++--------
>  xen/common/sched_credit2.c |   47 +++++++++++++++++++++++---------------------
>  2 files changed, 37 insertions(+), 30 deletions(-)
> 
> diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
> index 3f439a0..ca04732 100644
> --- a/xen/common/sched_credit.c
> +++ b/xen/common/sched_credit.c
> @@ -1771,9 +1771,18 @@ csched_schedule(
>       *   cpu and steal it.
>       */
>  
> -    /* If we have schedule rate limiting enabled, check to see
> -     * how long we've run for. */
> -    if ( !tasklet_work_scheduled
> +    /*
> +     * If we have schedule rate limiting enabled, check to see
> +     * how long we've run for.
> +     *
> +     * If scurr is yielding, however, we don't let rate limiting kick in.
> +     * In fact, it may be the case that scurr is about to spin, and there's
> +     * no point forcing it to do so until rate limiting expires.
> +     *
> +     * While there, take the chance for clearing the yield flag at once.
> +     */
> +    if ( !test_and_clear_bit(CSCHED_FLAG_VCPU_YIELD, &scurr->flags)

It looks like YIELD is implemented by putting it lower in the runqueue
in __runqueue_insert().  But here you're clearing the flag before the
insert happens -- won't this effectively disable yield() for credit1?


> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 569174b..c8e0ee7 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -2267,36 +2267,40 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>      struct list_head *iter;
>      struct csched2_vcpu *snext = NULL;
>      struct csched2_private *prv = CSCHED2_PRIV(per_cpu(scheduler, cpu));
> -    int yield_bias = 0;
> -
> -    /* Default to current if runnable, idle otherwise */
> -    if ( vcpu_runnable(scurr->vcpu) )
> -    {
> -        /*
> -         * The way we actually take yields into account is like this:
> -         * if scurr is yielding, when comparing its credits with other
> -         * vcpus in the runqueue, act like those other vcpus had yield_bias
> -         * more credits.
> -         */
> -        if ( unlikely(scurr->flags & CSFLAG_vcpu_yield) )
> -            yield_bias = CSCHED2_YIELD_BIAS;
> -
> -        snext = scurr;
> -    }
> -    else
> -        snext = CSCHED2_VCPU(idle_vcpu[cpu]);
> +    /*
> +     * The way we actually take yields into account is like this:
> +     * if scurr is yielding, when comparing its credits with other vcpus in
> +     * the runqueue, act like those other vcpus had yield_bias more credits.
> +     */
> +    int yield_bias = __test_and_clear_bit(__CSFLAG_vcpu_yield, &scurr->flags) ?
> +                     CSCHED2_YIELD_BIAS : 0;
>  
>      /*
>       * Return the current vcpu if it has executed for less than ratelimit.
>       * Adjuststment for the selected vcpu's credit and decision
>       * for how long it will run will be taken in csched2_runtime.
> +     *
> +     * Note that, if scurr is yielding, we don't let rate limiting kick in.
> +     * In fact, it may be the case that scurr is about to spin, and there's
> +     * no point forcing it to do so until rate limiting expires.
> +     *
> +     * To check whether we are yielding, it's enough to look at yield_bias
> +     * (as CSCHED2_YIELD_BIAS can't be zero). Also, note that the yield flag
> +     * has been cleared already above.
>       */
> -    if ( prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
> +    if ( !yield_bias &&
> +         prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
>           vcpu_runnable(scurr->vcpu) &&
>           (now - scurr->vcpu->runstate.state_entry_time) <
>            MICROSECS(prv->ratelimit_us) )
>          return scurr;
>  
> +    /* Default to current if runnable, idle otherwise */
> +    if ( vcpu_runnable(scurr->vcpu) )
> +        snext = scurr;
> +    else
> +        snext = CSCHED2_VCPU(idle_vcpu[cpu]);

This looks good, but the code re-organization probably goes better in
the previous patch.  Since you're re-sending anyway, would you mind
moving it there?

I'm not sure the credit2 yield-ratelimit needs to be in a separate
patch; since you're implementing yield in credit2 from scratch you could
just implement it all in one go.  But since you have a patch for credit1
anyway, I think whichever way is fine.

> +
>      list_for_each( iter, &rqd->runq )
>      {
>          struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem);
> @@ -2423,7 +2427,8 @@ csched2_schedule(
>       */
>      if ( tasklet_work_scheduled )
>      {
> -        trace_var(TRC_CSCHED2_SCHED_TASKLET, 1, 0,  NULL);
> +        __clear_bit(__CSFLAG_vcpu_yield, &scurr->flags);
> +        trace_var(TRC_CSCHED2_SCHED_TASKLET, 1, 0, NULL);
>          snext = CSCHED2_VCPU(idle_vcpu[cpu]);
>      }
>      else
> @@ -2436,8 +2441,6 @@ csched2_schedule(
>           && vcpu_runnable(current) )
>          __set_bit(__CSFLAG_delayed_runq_add, &scurr->flags);
>  
> -    __clear_bit(__CSFLAG_vcpu_yield, &scurr->flags);
> -

This should probably go in the previous patch though.

Thanks,
 -George
Dario Faggioli Sept. 29, 2016, 4:46 p.m. UTC | #2
On Tue, 2016-09-20 at 14:32 +0100, George Dunlap wrote:
> On 17/08/16 18:18, Dario Faggioli wrote:
> > diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
> > @@ -1771,9 +1771,18 @@ csched_schedule(
> >       *   cpu and steal it.
> >       */
> >  
> > -    /* If we have schedule rate limiting enabled, check to see
> > -     * how long we've run for. */
> > -    if ( !tasklet_work_scheduled
> > +    /*
> > +     * If we have schedule rate limiting enabled, check to see
> > +     * how long we've run for.
> > +     *
> > +     * If scurr is yielding, however, we don't let rate limiting
> > kick in.
> > +     * In fact, it may be the case that scurr is about to spin,
> > and there's
> > +     * no point forcing it to do so until rate limiting expires.
> > +     *
> > +     * While there, take the chance for clearing the yield flag at
> > once.
> > +     */
> > +    if ( !test_and_clear_bit(CSCHED_FLAG_VCPU_YIELD, &scurr-
> > >flags)
> 
> It looks like YIELD is implemented by putting it lower in the
> runqueue
> in __runqueue_insert().  But here you're clearing the flag before the
> insert happens -- won't this effectively disable yield() for credit1?
> 
Yes, I think you're right... I'm not sure how I thought it would work.
:-O

Thanks for pointing this out, will fix in v2.

> > diff --git a/xen/common/sched_credit2.c
> > b/xen/common/sched_credit2.c
> > index 569174b..c8e0ee7 100644
> > --- a/xen/common/sched_credit2.c
> > +++ b/xen/common/sched_credit2.c
> > @@ -2267,36 +2267,40 @@ runq_candidate(struct csched2_runqueue_data
> > *rqd,
> > [...]
> This looks good, but the code re-organization probably goes better in
> the previous patch.  Since you're re-sending anyway, would you mind
> moving it there?
> 
> I'm not sure the credit2 yield-ratelimit needs to be in a separate
> patch; since you're implementing yield in credit2 from scratch you
> could
> just implement it all in one go.  But since you have a patch for
> credit1
> anyway, I think whichever way is fine.
> 
Ok, yes, I'm moving all the Credit2 bits from this patch to the one
that is actually implementing yield in Credit2 from scratch, and
leaving this as the patch that makes Credit1 stop ratelimiting upon
yield.

Thanks and Regards,
Dario
diff mbox

Patch

diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index 3f439a0..ca04732 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -1771,9 +1771,18 @@  csched_schedule(
      *   cpu and steal it.
      */
 
-    /* If we have schedule rate limiting enabled, check to see
-     * how long we've run for. */
-    if ( !tasklet_work_scheduled
+    /*
+     * If we have schedule rate limiting enabled, check to see
+     * how long we've run for.
+     *
+     * If scurr is yielding, however, we don't let rate limiting kick in.
+     * In fact, it may be the case that scurr is about to spin, and there's
+     * no point forcing it to do so until rate limiting expires.
+     *
+     * While there, take the chance for clearing the yield flag at once.
+     */
+    if ( !test_and_clear_bit(CSCHED_FLAG_VCPU_YIELD, &scurr->flags)
+         && !tasklet_work_scheduled
          && prv->ratelimit_us
          && vcpu_runnable(current)
          && !is_idle_vcpu(current)
@@ -1808,11 +1817,6 @@  csched_schedule(
     }
 
     /*
-     * Clear YIELD flag before scheduling out
-     */
-    clear_bit(CSCHED_FLAG_VCPU_YIELD, &scurr->flags);
-
-    /*
      * SMP Load balance:
      *
      * If the next highest priority local runnable VCPU has already eaten
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 569174b..c8e0ee7 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -2267,36 +2267,40 @@  runq_candidate(struct csched2_runqueue_data *rqd,
     struct list_head *iter;
     struct csched2_vcpu *snext = NULL;
     struct csched2_private *prv = CSCHED2_PRIV(per_cpu(scheduler, cpu));
-    int yield_bias = 0;
-
-    /* Default to current if runnable, idle otherwise */
-    if ( vcpu_runnable(scurr->vcpu) )
-    {
-        /*
-         * The way we actually take yields into account is like this:
-         * if scurr is yielding, when comparing its credits with other
-         * vcpus in the runqueue, act like those other vcpus had yield_bias
-         * more credits.
-         */
-        if ( unlikely(scurr->flags & CSFLAG_vcpu_yield) )
-            yield_bias = CSCHED2_YIELD_BIAS;
-
-        snext = scurr;
-    }
-    else
-        snext = CSCHED2_VCPU(idle_vcpu[cpu]);
+    /*
+     * The way we actually take yields into account is like this:
+     * if scurr is yielding, when comparing its credits with other vcpus in
+     * the runqueue, act like those other vcpus had yield_bias more credits.
+     */
+    int yield_bias = __test_and_clear_bit(__CSFLAG_vcpu_yield, &scurr->flags) ?
+                     CSCHED2_YIELD_BIAS : 0;
 
     /*
      * Return the current vcpu if it has executed for less than ratelimit.
      * Adjuststment for the selected vcpu's credit and decision
      * for how long it will run will be taken in csched2_runtime.
+     *
+     * Note that, if scurr is yielding, we don't let rate limiting kick in.
+     * In fact, it may be the case that scurr is about to spin, and there's
+     * no point forcing it to do so until rate limiting expires.
+     *
+     * To check whether we are yielding, it's enough to look at yield_bias
+     * (as CSCHED2_YIELD_BIAS can't be zero). Also, note that the yield flag
+     * has been cleared already above.
      */
-    if ( prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
+    if ( !yield_bias &&
+         prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
          vcpu_runnable(scurr->vcpu) &&
          (now - scurr->vcpu->runstate.state_entry_time) <
           MICROSECS(prv->ratelimit_us) )
         return scurr;
 
+    /* Default to current if runnable, idle otherwise */
+    if ( vcpu_runnable(scurr->vcpu) )
+        snext = scurr;
+    else
+        snext = CSCHED2_VCPU(idle_vcpu[cpu]);
+
     list_for_each( iter, &rqd->runq )
     {
         struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem);
@@ -2423,7 +2427,8 @@  csched2_schedule(
      */
     if ( tasklet_work_scheduled )
     {
-        trace_var(TRC_CSCHED2_SCHED_TASKLET, 1, 0,  NULL);
+        __clear_bit(__CSFLAG_vcpu_yield, &scurr->flags);
+        trace_var(TRC_CSCHED2_SCHED_TASKLET, 1, 0, NULL);
         snext = CSCHED2_VCPU(idle_vcpu[cpu]);
     }
     else
@@ -2436,8 +2441,6 @@  csched2_schedule(
          && vcpu_runnable(current) )
         __set_bit(__CSFLAG_delayed_runq_add, &scurr->flags);
 
-    __clear_bit(__CSFLAG_vcpu_yield, &scurr->flags);
-
     ret.migrated = 0;
 
     /* Accounting for non-idle tasks */