diff mbox series

[RFC,07/49] xen/sched: fix credit2 smt idle handling

Message ID 20190329150934.17694-8-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen: add core scheduling support | expand

Commit Message

Jürgen Groß March 29, 2019, 3:08 p.m. UTC
Credit2's smt_idle_mask_set() and smt_idle_mask_clear() are used to
identify idle cores where vcpus can be moved to. A core is thought to
be idle when all siblings are known to have the idle vcpu running on
them.

Unfortunately the information of a vcpu running on a cpu is per
runqueue. So in case not all siblings are in the same runqueue a core
will never be regarded to be idle, as the sibling not in the runqueue
is never known to run the idle vcpu.

Use a credit2 specific cpumask of siblings with only those cpus
being marked which are in the same runqueue as the cpu in question.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- use credit2 per-cpu specific sibling mask
---
 xen/common/sched_credit2.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

Comments

Dario Faggioli March 29, 2019, 6:22 p.m. UTC | #1
On Fri, 2019-03-29 at 16:08 +0100, Juergen Gross wrote:
> Credit2's smt_idle_mask_set() and smt_idle_mask_clear() are used to
> identify idle cores where vcpus can be moved to. A core is thought to
> be idle when all siblings are known to have the idle vcpu running on
> them.
> 
> Unfortunately the information of a vcpu running on a cpu is per
> runqueue. So in case not all siblings are in the same runqueue a core
> will never be regarded to be idle, as the sibling not in the runqueue
> is never known to run the idle vcpu.
> 
> Use a credit2 specific cpumask of siblings with only those cpus
> being marked which are in the same runqueue as the cpu in question.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> V2:
> - use credit2 per-cpu specific sibling mask
> ---
>
FYI, I've sent my RoB to (I think) patches 1 to 7 in the other threads
where they've been posted.

Regards,
Dario
diff mbox series

Patch

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 543dc3664d..6958b265fc 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -504,6 +504,7 @@  struct csched2_private {
  * Physical CPU
  */
 struct csched2_pcpu {
+    cpumask_t sibling_mask;            /* Siblings in the same runqueue      */
     int runq_id;
 };
 
@@ -656,7 +657,7 @@  static inline
 void smt_idle_mask_set(unsigned int cpu, const cpumask_t *idlers,
                        cpumask_t *mask)
 {
-    const cpumask_t *cpu_siblings = per_cpu(cpu_sibling_mask, cpu);
+    const cpumask_t *cpu_siblings = &csched2_pcpu(cpu)->sibling_mask;
 
     if ( cpumask_subset(cpu_siblings, idlers) )
         cpumask_or(mask, mask, cpu_siblings);
@@ -668,10 +669,10 @@  void smt_idle_mask_set(unsigned int cpu, const cpumask_t *idlers,
 static inline
 void smt_idle_mask_clear(unsigned int cpu, cpumask_t *mask)
 {
-    const cpumask_t *cpu_siblings = per_cpu(cpu_sibling_mask, cpu);
+    const cpumask_t *cpu_siblings = &csched2_pcpu(cpu)->sibling_mask;
 
     if ( cpumask_subset(cpu_siblings, mask) )
-        cpumask_andnot(mask, mask, per_cpu(cpu_sibling_mask, cpu));
+        cpumask_andnot(mask, mask, cpu_siblings);
 }
 
 /*
@@ -3793,6 +3794,7 @@  init_pdata(struct csched2_private *prv, struct csched2_pcpu *spc,
            unsigned int cpu)
 {
     struct csched2_runqueue_data *rqd;
+    unsigned int rcpu;
 
     ASSERT(rw_is_write_locked(&prv->lock));
     ASSERT(!cpumask_test_cpu(cpu, &prv->initialized));
@@ -3810,12 +3812,23 @@  init_pdata(struct csched2_private *prv, struct csched2_pcpu *spc,
         printk(XENLOG_INFO " First cpu on runqueue, activating\n");
         activate_runqueue(prv, spc->runq_id);
     }
-    
+
     __cpumask_set_cpu(cpu, &rqd->idle);
     __cpumask_set_cpu(cpu, &rqd->active);
     __cpumask_set_cpu(cpu, &prv->initialized);
     __cpumask_set_cpu(cpu, &rqd->smt_idle);
 
+    /* On the boot cpu we are called before cpu_sibling_mask has been set up. */
+    if ( cpu == 0 && system_state < SYS_STATE_active )
+        __cpumask_set_cpu(cpu, &csched2_pcpu(cpu)->sibling_mask);
+    else
+        for_each_cpu ( rcpu, per_cpu(cpu_sibling_mask, cpu) )
+            if ( cpumask_test_cpu(rcpu, &rqd->active) )
+            {
+                __cpumask_set_cpu(cpu, &csched2_pcpu(rcpu)->sibling_mask);
+                __cpumask_set_cpu(rcpu, &csched2_pcpu(cpu)->sibling_mask);
+            }
+
     if ( cpumask_weight(&rqd->active) == 1 )
         rqd->pick_bias = cpu;
 
@@ -3897,6 +3910,7 @@  csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
     struct csched2_private *prv = csched2_priv(ops);
     struct csched2_runqueue_data *rqd;
     struct csched2_pcpu *spc = pcpu;
+    unsigned int rcpu;
 
     write_lock_irqsave(&prv->lock, flags);
 
@@ -3923,6 +3937,9 @@  csched2_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
 
     printk(XENLOG_INFO "Removing cpu %d from runqueue %d\n", cpu, spc->runq_id);
 
+    for_each_cpu ( rcpu, &rqd->active )
+        __cpumask_clear_cpu(cpu, &csched2_pcpu(rcpu)->sibling_mask);
+
     __cpumask_clear_cpu(cpu, &rqd->idle);
     __cpumask_clear_cpu(cpu, &rqd->smt_idle);
     __cpumask_clear_cpu(cpu, &rqd->active);