diff mbox series

[v2,31/48] xen/sched: add support for multiple vcpus per sched unit where missing

Message ID 20190809145833.1020-32-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen: add core scheduling support | expand

Commit Message

Jürgen Groß Aug. 9, 2019, 2:58 p.m. UTC
In several places there is support for multiple vcpus per sched unit
missing. Add that missing support (with the exception of initial
allocation) and missing helpers for that.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
RFC V2:
- fix vcpu_runstate_helper()
V1:
- add special handling for idle unit in unit_runnable() and
  unit_runnable_state()
V2:
- handle affinity_broken correctly (Jan Beulich)
---
 xen/common/domain.c        |  5 ++-
 xen/common/schedule.c      | 36 ++++++++++---------
 xen/include/xen/sched-if.h | 88 +++++++++++++++++++++++++++++++++++++---------
 3 files changed, 95 insertions(+), 34 deletions(-)

Comments

Jan Beulich Sept. 11, 2019, 10:43 a.m. UTC | #1
On 09.08.2019 16:58, Juergen Gross wrote:
> V1:
> - add special handling for idle unit in unit_runnable() and
>   unit_runnable_state()

Why was this done? Isn't vcpu_runnable() going to always return
true for idle vCPU-s?

> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -1255,7 +1255,10 @@ int vcpu_reset(struct vcpu *v)
>      v->async_exception_mask = 0;
>      memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
>  #endif
> -    v->affinity_broken = 0;
> +    if ( v->affinity_broken & VCPU_AFFINITY_OVERRIDE )
> +        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
> +    if ( v->affinity_broken & VCPU_AFFINITY_WAIT )
> +        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_WAIT);

Shouldn't this live in an earlier patch? I guess the same question
is applicable to almost everything here, as also already mentioned
e.g. in the previous patch.

>  static inline void sched_set_res(struct sched_unit *unit,
>                                   struct sched_resource *res)
>  {
> -    unit->vcpu_list->processor = res->processor;
> +    int cpu = cpumask_first(res->cpus);

unsigned int

>  static inline void sched_set_pause_flags_atomic(struct sched_unit *unit,
>                                                  unsigned int bit)
>  {
> -    set_bit(bit, &unit->vcpu_list->pause_flags);
> +    struct vcpu *v;
> +
> +    for_each_sched_unit_vcpu ( unit, v )
> +        set_bit(bit, &v->pause_flags);
>  }
>  
>  static inline void sched_clear_pause_flags_atomic(struct sched_unit *unit,
>                                                    unsigned int bit)
>  {
> -    clear_bit(bit, &unit->vcpu_list->pause_flags);
> +    struct vcpu *v;
> +
> +    for_each_sched_unit_vcpu ( unit, v )
> +        clear_bit(bit, &v->pause_flags);
>  }

The atomicity is (necessarily) limited here - it's atomic for an
individual vCPU, but not atomic for a unit as a whole. If this
is indeed a useful hybrid, then I think it would be nice if there
was a comment next to these functions clarifying under what
conditions use of them is correct without it also being correct
to use their non-atomic counterparts (e.g. due to use of an
external lock).

Jan
Jürgen Groß Sept. 13, 2019, 3:01 p.m. UTC | #2
On 11.09.19 12:43, Jan Beulich wrote:
> On 09.08.2019 16:58, Juergen Gross wrote:
>> V1:
>> - add special handling for idle unit in unit_runnable() and
>>    unit_runnable_state()
> 
> Why was this done? Isn't vcpu_runnable() going to always return
> true for idle vCPU-s?

The problem is the for_each_sched_unit_vcpu() loop handling.

The loop is ending as soon as vcpu->sched_unit != unit. But this
might be true for idle vcpus in case they are temporarily active
for a normal domain's unit.

> 
>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -1255,7 +1255,10 @@ int vcpu_reset(struct vcpu *v)
>>       v->async_exception_mask = 0;
>>       memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
>>   #endif
>> -    v->affinity_broken = 0;
>> +    if ( v->affinity_broken & VCPU_AFFINITY_OVERRIDE )
>> +        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
>> +    if ( v->affinity_broken & VCPU_AFFINITY_WAIT )
>> +        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_WAIT);
> 
> Shouldn't this live in an earlier patch? I guess the same question
> is applicable to almost everything here, as also already mentioned
> e.g. in the previous patch.

Hmm, this _is_ a missing part for multiple vcpus per unit.

I wouldn't know which other patch would be more suitable.

> 
>>   static inline void sched_set_res(struct sched_unit *unit,
>>                                    struct sched_resource *res)
>>   {
>> -    unit->vcpu_list->processor = res->processor;
>> +    int cpu = cpumask_first(res->cpus);
> 
> unsigned int

Yes.

> 
>>   static inline void sched_set_pause_flags_atomic(struct sched_unit *unit,
>>                                                   unsigned int bit)
>>   {
>> -    set_bit(bit, &unit->vcpu_list->pause_flags);
>> +    struct vcpu *v;
>> +
>> +    for_each_sched_unit_vcpu ( unit, v )
>> +        set_bit(bit, &v->pause_flags);
>>   }
>>   
>>   static inline void sched_clear_pause_flags_atomic(struct sched_unit *unit,
>>                                                     unsigned int bit)
>>   {
>> -    clear_bit(bit, &unit->vcpu_list->pause_flags);
>> +    struct vcpu *v;
>> +
>> +    for_each_sched_unit_vcpu ( unit, v )
>> +        clear_bit(bit, &v->pause_flags);
>>   }
> 
> The atomicity is (necessarily) limited here - it's atomic for an
> individual vCPU, but not atomic for a unit as a whole. If this
> is indeed a useful hybrid, then I think it would be nice if there
> was a comment next to these functions clarifying under what
> conditions use of them is correct without it also being correct
> to use their non-atomic counterparts (e.g. due to use of an
> external lock).

Okay, I'll add that.


Juergen
diff mbox series

Patch

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 3797f954f5..0c763ceb25 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -1255,7 +1255,10 @@  int vcpu_reset(struct vcpu *v)
     v->async_exception_mask = 0;
     memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
 #endif
-    v->affinity_broken = 0;
+    if ( v->affinity_broken & VCPU_AFFINITY_OVERRIDE )
+        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
+    if ( v->affinity_broken & VCPU_AFFINITY_WAIT )
+        vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_WAIT);
     clear_bit(_VPF_blocked, &v->pause_flags);
     clear_bit(_VPF_in_reset, &v->pause_flags);
 
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 2ecb76e3b9..1f45fc7373 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -240,8 +240,9 @@  static inline void vcpu_runstate_change(
     s_time_t delta;
     struct sched_unit *unit = v->sched_unit;
 
-    ASSERT(v->runstate.state != new_state);
     ASSERT(spin_is_locked(get_sched_res(v->processor)->schedule_lock));
+    if ( v->runstate.state == new_state )
+        return;
 
     vcpu_urgent_count_update(v);
 
@@ -263,15 +264,16 @@  static inline void vcpu_runstate_change(
 static inline void sched_unit_runstate_change(struct sched_unit *unit,
     bool running, s_time_t new_entry_time)
 {
-    struct vcpu *v = unit->vcpu_list;
+    struct vcpu *v;
 
-    if ( running )
-        vcpu_runstate_change(v, v->new_state, new_entry_time);
-    else
-        vcpu_runstate_change(v,
-            ((v->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
-             (vcpu_runnable(v) ? RUNSTATE_runnable : RUNSTATE_offline)),
-            new_entry_time);
+    for_each_sched_unit_vcpu ( unit, v )
+        if ( running )
+            vcpu_runstate_change(v, v->new_state, new_entry_time);
+        else
+            vcpu_runstate_change(v,
+                ((v->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
+                 (vcpu_runnable(v) ? RUNSTATE_runnable : RUNSTATE_offline)),
+                new_entry_time);
 }
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
@@ -1035,9 +1037,9 @@  int cpu_disable_scheduler(unsigned int cpu)
             if ( cpumask_empty(&online_affinity) &&
                  cpumask_test_cpu(cpu, unit->cpu_hard_affinity) )
             {
-                if ( unit->vcpu_list->affinity_broken )
+                if ( sched_check_affinity_broken(unit) )
                 {
-                    /* The vcpu is temporarily pinned, can't move it. */
+                    /* The unit is temporarily pinned, can't move it. */
                     unit_schedule_unlock_irqrestore(lock, flags, unit);
                     ret = -EADDRINUSE;
                     break;
@@ -1395,17 +1397,17 @@  int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason)
             ret = 0;
             v->affinity_broken &= ~reason;
         }
-        if ( !ret && !v->affinity_broken )
+        if ( !ret && !sched_check_affinity_broken(unit) )
             sched_set_affinity(v, unit->cpu_hard_affinity_saved, NULL);
     }
     else if ( cpu < nr_cpu_ids )
     {
         if ( (v->affinity_broken & reason) ||
-             (v->affinity_broken && v->processor != cpu) )
+             (sched_check_affinity_broken(unit) && v->processor != cpu) )
             ret = -EBUSY;
         else if ( cpumask_test_cpu(cpu, VCPU2ONLINE(v)) )
         {
-            if ( !v->affinity_broken )
+            if ( !sched_check_affinity_broken(unit) )
             {
                 cpumask_copy(unit->cpu_hard_affinity_saved,
                              unit->cpu_hard_affinity);
@@ -1701,14 +1703,14 @@  static void sched_switch_units(struct sched_resource *sd,
              (next->vcpu_list->runstate.state == RUNSTATE_runnable) ?
              (now - next->state_entry_time) : 0, prev->next_time);
 
-    ASSERT(prev->vcpu_list->runstate.state == RUNSTATE_running);
+    ASSERT(unit_running(prev));
 
     TRACE_4D(TRC_SCHED_SWITCH, prev->domain->domain_id, prev->unit_id,
              next->domain->domain_id, next->unit_id);
 
     sched_unit_runstate_change(prev, false, now);
 
-    ASSERT(next->vcpu_list->runstate.state != RUNSTATE_running);
+    ASSERT(!unit_running(next));
     sched_unit_runstate_change(next, true, now);
 
     /*
@@ -1829,7 +1831,7 @@  void sched_context_switched(struct vcpu *vprev, struct vcpu *vnext)
             while ( atomic_read(&next->rendezvous_out_cnt) )
                 cpu_relax();
     }
-    else if ( vprev != vnext )
+    else if ( vprev != vnext && sched_granularity == 1 )
         context_saved(vprev);
 }
 
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index aa896f49ef..e1d61a05b7 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -67,32 +67,70 @@  static inline bool is_idle_unit(const struct sched_unit *unit)
 
 static inline bool is_unit_online(const struct sched_unit *unit)
 {
-    return is_vcpu_online(unit->vcpu_list);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        if ( is_vcpu_online(v) )
+            return true;
+
+    return false;
+}
+
+static inline unsigned int unit_running(const struct sched_unit *unit)
+{
+    return unit->runstate_cnt[RUNSTATE_running];
 }
 
 static inline bool unit_runnable(const struct sched_unit *unit)
 {
-    return vcpu_runnable(unit->vcpu_list);
+    struct vcpu *v;
+
+    if ( is_idle_unit(unit) )
+        return true;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        if ( vcpu_runnable(v) )
+            return true;
+
+    return false;
 }
 
 static inline bool unit_runnable_state(const struct sched_unit *unit)
 {
     struct vcpu *v;
-    bool runnable;
+    bool runnable, ret = false;
+
+    if ( is_idle_unit(unit) )
+        return true;
+
+    for_each_sched_unit_vcpu ( unit, v )
+    {
+        runnable = vcpu_runnable(v);
+
+        v->new_state = runnable ? RUNSTATE_running
+                                : (v->pause_flags & VPF_blocked)
+                                  ? RUNSTATE_blocked : RUNSTATE_offline;
 
-    v = unit->vcpu_list;
-    runnable = vcpu_runnable(v);
+        if ( runnable )
+            ret = true;
+    }
 
-    v->new_state = runnable ? RUNSTATE_running
-                            : (v->pause_flags & VPF_blocked)
-                              ? RUNSTATE_blocked : RUNSTATE_offline;
-    return runnable;
+    return ret;
 }
 
 static inline void sched_set_res(struct sched_unit *unit,
                                  struct sched_resource *res)
 {
-    unit->vcpu_list->processor = res->processor;
+    int cpu = cpumask_first(res->cpus);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+    {
+        ASSERT(cpu < nr_cpu_ids);
+        v->processor = cpu;
+        cpu = cpumask_next(cpu, res->cpus);
+    }
+
     unit->res = res;
 }
 
@@ -104,25 +142,37 @@  static inline unsigned int sched_unit_cpu(struct sched_unit *unit)
 static inline void sched_set_pause_flags(struct sched_unit *unit,
                                          unsigned int bit)
 {
-    __set_bit(bit, &unit->vcpu_list->pause_flags);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        __set_bit(bit, &v->pause_flags);
 }
 
 static inline void sched_clear_pause_flags(struct sched_unit *unit,
                                            unsigned int bit)
 {
-    __clear_bit(bit, &unit->vcpu_list->pause_flags);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        __clear_bit(bit, &v->pause_flags);
 }
 
 static inline void sched_set_pause_flags_atomic(struct sched_unit *unit,
                                                 unsigned int bit)
 {
-    set_bit(bit, &unit->vcpu_list->pause_flags);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        set_bit(bit, &v->pause_flags);
 }
 
 static inline void sched_clear_pause_flags_atomic(struct sched_unit *unit,
                                                   unsigned int bit)
 {
-    clear_bit(bit, &unit->vcpu_list->pause_flags);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        clear_bit(bit, &v->pause_flags);
 }
 
 static inline struct sched_unit *sched_idle_unit(unsigned int cpu)
@@ -448,12 +498,18 @@  static inline int sched_adjust_cpupool(const struct scheduler *s,
 
 static inline void sched_unit_pause_nosync(struct sched_unit *unit)
 {
-    vcpu_pause_nosync(unit->vcpu_list);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        vcpu_pause_nosync(v);
 }
 
 static inline void sched_unit_unpause(struct sched_unit *unit)
 {
-    vcpu_unpause(unit->vcpu_list);
+    struct vcpu *v;
+
+    for_each_sched_unit_vcpu ( unit, v )
+        vcpu_unpause(v);
 }
 
 #define REGISTER_SCHEDULER(x) static const struct scheduler *x##_entry \