diff mbox

[6/7] xen: credit2: optimize runq_candidate() a little bit

Message ID 149762245830.11899.18267597682474657110.stgit@Solace.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli June 16, 2017, 2:14 p.m. UTC
By factoring into one (at the top) all the checks
to see whether current is the idle vcpu, and mark
it as unlikely().

In fact, if current is idle, all the logic for
dealing with yielding, context switching rate
limiting and soft-affinity, is just pure overhead,
and we better rush checking the runq and pick some
vcpu up.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
---
Cc: George Dunlap <george.dunlap@citrix.com>
Cc: Anshul Makkar <anshul.makkar@citrix.com>
---
 xen/common/sched_credit2.c |   22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

Comments

George Dunlap July 25, 2017, 11:25 a.m. UTC | #1
On 06/16/2017 03:14 PM, Dario Faggioli wrote:
> By factoring into one (at the top) all the checks
> to see whether current is the idle vcpu, and mark
> it as unlikely().
> 
> In fact, if current is idle, all the logic for
> dealing with yielding, context switching rate
> limiting and soft-affinity, is just pure overhead,
> and we better rush checking the runq and pick some
> vcpu up.
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

Overall looks good -- great idea.  Just one comment...

> ---
> Cc: George Dunlap <george.dunlap@citrix.com>
> Cc: Anshul Makkar <anshul.makkar@citrix.com>
> ---
>  xen/common/sched_credit2.c |   22 +++++++++++++---------
>  1 file changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 5d8f25c..bbda790 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -2634,13 +2634,20 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>                 unsigned int *skipped)
>  {
>      struct list_head *iter;
> -    struct csched2_vcpu *snext = NULL;
> +    struct csched2_vcpu *snext = csched2_vcpu(idle_vcpu[cpu]);
>      struct csched2_private *prv = csched2_priv(per_cpu(scheduler, cpu));
> -    bool yield = __test_and_clear_bit(__CSFLAG_vcpu_yield, &scurr->flags);
> -    bool soft_aff_preempt = false;
> +    bool yield = false, soft_aff_preempt = false;
>  
>      *skipped = 0;
>  
> +    if ( unlikely(is_idle_vcpu(scurr->vcpu)) )
> +    {
> +        snext = scurr;

So first of all, since we set snext above, this should (in theory) be
redundant.  However...

> +        goto check_runq;
> +    }
> +
> +    yield = __test_and_clear_bit(__CSFLAG_vcpu_yield, &scurr->flags);
> +
>      /*
>       * Return the current vcpu if it has executed for less than ratelimit.
>       * Adjuststment for the selected vcpu's credit and decision
> @@ -2650,8 +2657,7 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>       * In fact, it may be the case that scurr is about to spin, and there's
>       * no point forcing it to do so until rate limiting expires.
>       */
> -    if ( !yield && prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
> -         vcpu_runnable(scurr->vcpu) &&
> +    if ( !yield && prv->ratelimit_us && vcpu_runnable(scurr->vcpu) &&
>           (now - scurr->vcpu->runstate.state_entry_time) <
>            MICROSECS(prv->ratelimit_us) )
>      {
> @@ -2672,8 +2678,7 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>      }
>  
>      /* If scurr has a soft-affinity, let's check whether cpu is part of it */
> -    if ( !is_idle_vcpu(scurr->vcpu) &&
> -         has_soft_affinity(scurr->vcpu, scurr->vcpu->cpu_hard_affinity) )
> +    if ( has_soft_affinity(scurr->vcpu, scurr->vcpu->cpu_hard_affinity) )
>      {
>          affinity_balance_cpumask(scurr->vcpu, BALANCE_SOFT_AFFINITY,
>                                   cpumask_scratch);
> @@ -2709,9 +2714,8 @@ runq_candidate(struct csched2_runqueue_data *rqd,
>       */
>      if ( vcpu_runnable(scurr->vcpu) && !soft_aff_preempt )
>          snext = scurr;
> -    else
> -        snext = csched2_vcpu(idle_vcpu[cpu]);

...does moving this up to the declaration buy us anything?

Personally I'm not a fan of the "Set everything at declaration as much
as possible" style (although Jan seems to be).  I think it's easier to
understand what happens here if we leave it as-is rather than having to
go up and see what it is.

I don't feel terribly strongly about it; but I think if we set it at
declaration, we *shouldn't* have the redundant assignment in the "idle
bypass" stanza.  (And if it makes you feel uncomfortable to remove it, I
think that's evidence that "set at declaration" is sub-optimal in this
case.)

Either option (leave unset at declaration and set it explicitly on both
paths, or set it at declaration and leave it unset on all paths except
the 'snext = scurr' here) can have my Reviewed-by.

 -George
diff mbox

Patch

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 5d8f25c..bbda790 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -2634,13 +2634,20 @@  runq_candidate(struct csched2_runqueue_data *rqd,
                unsigned int *skipped)
 {
     struct list_head *iter;
-    struct csched2_vcpu *snext = NULL;
+    struct csched2_vcpu *snext = csched2_vcpu(idle_vcpu[cpu]);
     struct csched2_private *prv = csched2_priv(per_cpu(scheduler, cpu));
-    bool yield = __test_and_clear_bit(__CSFLAG_vcpu_yield, &scurr->flags);
-    bool soft_aff_preempt = false;
+    bool yield = false, soft_aff_preempt = false;
 
     *skipped = 0;
 
+    if ( unlikely(is_idle_vcpu(scurr->vcpu)) )
+    {
+        snext = scurr;
+        goto check_runq;
+    }
+
+    yield = __test_and_clear_bit(__CSFLAG_vcpu_yield, &scurr->flags);
+
     /*
      * Return the current vcpu if it has executed for less than ratelimit.
      * Adjuststment for the selected vcpu's credit and decision
@@ -2650,8 +2657,7 @@  runq_candidate(struct csched2_runqueue_data *rqd,
      * In fact, it may be the case that scurr is about to spin, and there's
      * no point forcing it to do so until rate limiting expires.
      */
-    if ( !yield && prv->ratelimit_us && !is_idle_vcpu(scurr->vcpu) &&
-         vcpu_runnable(scurr->vcpu) &&
+    if ( !yield && prv->ratelimit_us && vcpu_runnable(scurr->vcpu) &&
          (now - scurr->vcpu->runstate.state_entry_time) <
           MICROSECS(prv->ratelimit_us) )
     {
@@ -2672,8 +2678,7 @@  runq_candidate(struct csched2_runqueue_data *rqd,
     }
 
     /* If scurr has a soft-affinity, let's check whether cpu is part of it */
-    if ( !is_idle_vcpu(scurr->vcpu) &&
-         has_soft_affinity(scurr->vcpu, scurr->vcpu->cpu_hard_affinity) )
+    if ( has_soft_affinity(scurr->vcpu, scurr->vcpu->cpu_hard_affinity) )
     {
         affinity_balance_cpumask(scurr->vcpu, BALANCE_SOFT_AFFINITY,
                                  cpumask_scratch);
@@ -2709,9 +2714,8 @@  runq_candidate(struct csched2_runqueue_data *rqd,
      */
     if ( vcpu_runnable(scurr->vcpu) && !soft_aff_preempt )
         snext = scurr;
-    else
-        snext = csched2_vcpu(idle_vcpu[cpu]);
 
+ check_runq:
     list_for_each( iter, &rqd->runq )
     {
         struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem);