diff mbox series

[v2,2/9] xen/sched: make sched-if.h really scheduler private

Message ID 20200108152328.27194-3-jgross@suse.com (mailing list archive)
State New, archived
Headers show
Series xen: scheduler cleanups | expand

Commit Message

Juergen Gross Jan. 8, 2020, 3:23 p.m. UTC
include/xen/sched-if.h should be private to scheduler code, so move it
to common/sched/private.h and move the remaining use cases to
cpupool.c and core.c.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Dario Faggioli <dfaggioli@suse.com>
---
V2:
- rename to private.h (Andrew Cooper)
---
 xen/arch/x86/dom0_build.c                          |   5 +-
 xen/common/domain.c                                |  70 --------
 xen/common/domctl.c                                | 135 +--------------
 xen/common/sched/arinc653.c                        |   3 +-
 xen/common/sched/core.c                            | 191 ++++++++++++++++++++-
 xen/common/sched/cpupool.c                         |  13 +-
 xen/common/sched/credit.c                          |   2 +-
 xen/common/sched/credit2.c                         |   3 +-
 xen/common/sched/null.c                            |   3 +-
 .../xen/sched-if.h => common/sched/private.h}      |   3 -
 xen/common/sched/rt.c                              |   3 +-
 xen/include/xen/domain.h                           |   3 +
 xen/include/xen/sched.h                            |   7 +
 13 files changed, 228 insertions(+), 213 deletions(-)
 rename xen/{include/xen/sched-if.h => common/sched/private.h} (99%)

Comments

Jan Beulich Jan. 14, 2020, 2:27 p.m. UTC | #1
On 08.01.2020 16:23, Juergen Gross wrote:
> @@ -234,16 +233,6 @@ void domctl_lock_release(void)
>      spin_unlock(&current->domain->hypercall_deadlock_mutex);
>  }
>  
> -static inline
> -int vcpuaffinity_params_invalid(const struct xen_domctl_vcpuaffinity *vcpuaff)
> -{
> -    return vcpuaff->flags == 0 ||
> -           ((vcpuaff->flags & XEN_VCPUAFFINITY_HARD) &&
> -            guest_handle_is_null(vcpuaff->cpumap_hard.bitmap)) ||
> -           ((vcpuaff->flags & XEN_VCPUAFFINITY_SOFT) &&
> -            guest_handle_is_null(vcpuaff->cpumap_soft.bitmap));
> -}

I'd like to suggest keeping this and ...

> @@ -608,122 +597,8 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>  
>      case XEN_DOMCTL_setvcpuaffinity:
>      case XEN_DOMCTL_getvcpuaffinity:
> -    {
> -        struct vcpu *v;
> -        const struct sched_unit *unit;
> -        struct xen_domctl_vcpuaffinity *vcpuaff = &op->u.vcpuaffinity;
> -
> -        ret = -EINVAL;
> -        if ( vcpuaff->vcpu >= d->max_vcpus )
> -            break;
> -
> -        ret = -ESRCH;
> -        if ( (v = d->vcpu[vcpuaff->vcpu]) == NULL )
> -            break;
> -
> -        unit = v->sched_unit;
> -        ret = -EINVAL;
> -        if ( vcpuaffinity_params_invalid(vcpuaff) )
> -            break;

... everything up to here (except the [too early] unit assignment),
as not being scheduler specific at all. The remainder then would
better become two distinct functions, eliminating the need to pass
op->cmd (and presumably passing "v" instead of "d"). If, otoh, the
decision (supported by others) is to move everything, then I think
it would be appropriate to make at least some adjustments: The code
above should be converted to use domain_vcpu(), and e.g. ...

> -        if ( op->cmd == XEN_DOMCTL_setvcpuaffinity )
> -        {
> -            cpumask_var_t new_affinity, old_affinity;
> -            cpumask_t *online = cpupool_domain_master_cpumask(v->domain);

... this should use "d".

> @@ -875,6 +876,16 @@ int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
>      return ret;
>  }
>  
> +int cpupool_get_id(const struct domain *d)

I find plain int odd for something like an ID, but I can see why
this is.

> +cpumask_t *cpupool_valid_cpus(struct cpupool *pool)

const twice?

Jan
Juergen Gross Jan. 14, 2020, 2:33 p.m. UTC | #2
On 14.01.20 15:27, Jan Beulich wrote:
> On 08.01.2020 16:23, Juergen Gross wrote:
>> @@ -234,16 +233,6 @@ void domctl_lock_release(void)
>>       spin_unlock(&current->domain->hypercall_deadlock_mutex);
>>   }
>>   
>> -static inline
>> -int vcpuaffinity_params_invalid(const struct xen_domctl_vcpuaffinity *vcpuaff)
>> -{
>> -    return vcpuaff->flags == 0 ||
>> -           ((vcpuaff->flags & XEN_VCPUAFFINITY_HARD) &&
>> -            guest_handle_is_null(vcpuaff->cpumap_hard.bitmap)) ||
>> -           ((vcpuaff->flags & XEN_VCPUAFFINITY_SOFT) &&
>> -            guest_handle_is_null(vcpuaff->cpumap_soft.bitmap));
>> -}
> 
> I'd like to suggest keeping this and ...
> 
>> @@ -608,122 +597,8 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>>   
>>       case XEN_DOMCTL_setvcpuaffinity:
>>       case XEN_DOMCTL_getvcpuaffinity:
>> -    {
>> -        struct vcpu *v;
>> -        const struct sched_unit *unit;
>> -        struct xen_domctl_vcpuaffinity *vcpuaff = &op->u.vcpuaffinity;
>> -
>> -        ret = -EINVAL;
>> -        if ( vcpuaff->vcpu >= d->max_vcpus )
>> -            break;
>> -
>> -        ret = -ESRCH;
>> -        if ( (v = d->vcpu[vcpuaff->vcpu]) == NULL )
>> -            break;
>> -
>> -        unit = v->sched_unit;
>> -        ret = -EINVAL;
>> -        if ( vcpuaffinity_params_invalid(vcpuaff) )
>> -            break;
> 
> ... everything up to here (except the [too early] unit assignment),
> as not being scheduler specific at all. The remainder then would
> better become two distinct functions, eliminating the need to pass
> op->cmd (and presumably passing "v" instead of "d"). If, otoh, the
> decision (supported by others) is to move everything, then I think
> it would be appropriate to make at least some adjustments: The code
> above should be converted to use domain_vcpu(), and e.g. ...

Either would be fine with me.

> 
>> -        if ( op->cmd == XEN_DOMCTL_setvcpuaffinity )
>> -        {
>> -            cpumask_var_t new_affinity, old_affinity;
>> -            cpumask_t *online = cpupool_domain_master_cpumask(v->domain);
> 
> ... this should use "d".

Yes.

> 
>> @@ -875,6 +876,16 @@ int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
>>       return ret;
>>   }
>>   
>> +int cpupool_get_id(const struct domain *d)
> 
> I find plain int odd for something like an ID, but I can see why
> this is.
> 
>> +cpumask_t *cpupool_valid_cpus(struct cpupool *pool)
> 
> const twice?

See patch 9.


Juergen
Jan Beulich Jan. 14, 2020, 2:39 p.m. UTC | #3
On 14.01.2020 15:33, Jürgen Groß  wrote:
> On 14.01.20 15:27, Jan Beulich wrote:
>> On 08.01.2020 16:23, Juergen Gross wrote:
>>> +cpumask_t *cpupool_valid_cpus(struct cpupool *pool)
>>
>> const twice?
> 
> See patch 9.

Well, in such a case either justify the omission in the description,
or introduce the function with const here and drop them there. As
things are, no reviewer should really let this pass uncommented.

Jan
Juergen Gross Jan. 14, 2020, 2:50 p.m. UTC | #4
On 14.01.20 15:39, Jan Beulich wrote:
> On 14.01.2020 15:33, Jürgen Groß  wrote:
>> On 14.01.20 15:27, Jan Beulich wrote:
>>> On 08.01.2020 16:23, Juergen Gross wrote:
>>>> +cpumask_t *cpupool_valid_cpus(struct cpupool *pool)
>>>
>>> const twice?
>>
>> See patch 9.
> 
> Well, in such a case either justify the omission in the description,
> or introduce the function with const here and drop them there. As
> things are, no reviewer should really let this pass uncommented.

Oh, sorry, you are right. When writing my reply I believed I just moved
those functions. The introduction should have the const qualifiers
already, of course.


Juergen
diff mbox series

Patch

diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
index 28b964e018..56c2dee0fc 100644
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -9,7 +9,6 @@ 
 #include <xen/libelf.h>
 #include <xen/pfn.h>
 #include <xen/sched.h>
-#include <xen/sched-if.h>
 #include <xen/softirq.h>
 
 #include <asm/amd.h>
@@ -227,9 +226,9 @@  unsigned int __init dom0_max_vcpus(void)
         dom0_nodes = node_online_map;
     for_each_node_mask ( node, dom0_nodes )
         cpumask_or(&dom0_cpus, &dom0_cpus, &node_to_cpumask(node));
-    cpumask_and(&dom0_cpus, &dom0_cpus, cpupool0->cpu_valid);
+    cpumask_and(&dom0_cpus, &dom0_cpus, cpupool_valid_cpus(cpupool0));
     if ( cpumask_empty(&dom0_cpus) )
-        cpumask_copy(&dom0_cpus, cpupool0->cpu_valid);
+        cpumask_copy(&dom0_cpus, cpupool_valid_cpus(cpupool0));
 
     max_vcpus = cpumask_weight(&dom0_cpus);
     if ( opt_dom0_max_vcpus_min > max_vcpus )
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 0b1103fdb2..71a7c2776f 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -10,7 +10,6 @@ 
 #include <xen/ctype.h>
 #include <xen/err.h>
 #include <xen/sched.h>
-#include <xen/sched-if.h>
 #include <xen/domain.h>
 #include <xen/mm.h>
 #include <xen/event.h>
@@ -565,75 +564,6 @@  void __init setup_system_domains(void)
 #endif
 }
 
-void domain_update_node_affinity(struct domain *d)
-{
-    cpumask_var_t dom_cpumask, dom_cpumask_soft;
-    cpumask_t *dom_affinity;
-    const cpumask_t *online;
-    struct sched_unit *unit;
-    unsigned int cpu;
-
-    /* Do we have vcpus already? If not, no need to update node-affinity. */
-    if ( !d->vcpu || !d->vcpu[0] )
-        return;
-
-    if ( !zalloc_cpumask_var(&dom_cpumask) )
-        return;
-    if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
-    {
-        free_cpumask_var(dom_cpumask);
-        return;
-    }
-
-    online = cpupool_domain_master_cpumask(d);
-
-    spin_lock(&d->node_affinity_lock);
-
-    /*
-     * If d->auto_node_affinity is true, let's compute the domain's
-     * node-affinity and update d->node_affinity accordingly. if false,
-     * just leave d->auto_node_affinity alone.
-     */
-    if ( d->auto_node_affinity )
-    {
-        /*
-         * We want the narrowest possible set of pcpus (to get the narowest
-         * possible set of nodes). What we need is the cpumask of where the
-         * domain can run (the union of the hard affinity of all its vcpus),
-         * and the full mask of where it would prefer to run (the union of
-         * the soft affinity of all its various vcpus). Let's build them.
-         */
-        for_each_sched_unit ( d, unit )
-        {
-            cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity);
-            cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
-                       unit->cpu_soft_affinity);
-        }
-        /* Filter out non-online cpus */
-        cpumask_and(dom_cpumask, dom_cpumask, online);
-        ASSERT(!cpumask_empty(dom_cpumask));
-        /* And compute the intersection between hard, online and soft */
-        cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask);
-
-        /*
-         * If not empty, the intersection of hard, soft and online is the
-         * narrowest set we want. If empty, we fall back to hard&online.
-         */
-        dom_affinity = cpumask_empty(dom_cpumask_soft) ?
-                           dom_cpumask : dom_cpumask_soft;
-
-        nodes_clear(d->node_affinity);
-        for_each_cpu ( cpu, dom_affinity )
-            node_set(cpu_to_node(cpu), d->node_affinity);
-    }
-
-    spin_unlock(&d->node_affinity_lock);
-
-    free_cpumask_var(dom_cpumask_soft);
-    free_cpumask_var(dom_cpumask);
-}
-
-
 int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
 {
     /* Being disjoint with the system is just wrong. */
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 650310e874..8b819f56e5 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -11,7 +11,6 @@ 
 #include <xen/err.h>
 #include <xen/mm.h>
 #include <xen/sched.h>
-#include <xen/sched-if.h>
 #include <xen/domain.h>
 #include <xen/event.h>
 #include <xen/grant_table.h>
@@ -65,9 +64,9 @@  static int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
     return err;
 }
 
-static int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
-                                   const struct xenctl_bitmap *xenctl_bitmap,
-                                   unsigned int nbits)
+int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
+                            const struct xenctl_bitmap *xenctl_bitmap,
+                            unsigned int nbits)
 {
     unsigned int guest_bytes, copy_bytes;
     int err = 0;
@@ -200,7 +199,7 @@  void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
     info->shared_info_frame = mfn_to_gmfn(d, virt_to_mfn(d->shared_info));
     BUG_ON(SHARED_M2P(info->shared_info_frame));
 
-    info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
+    info->cpupool = cpupool_get_id(d);
 
     memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
 
@@ -234,16 +233,6 @@  void domctl_lock_release(void)
     spin_unlock(&current->domain->hypercall_deadlock_mutex);
 }
 
-static inline
-int vcpuaffinity_params_invalid(const struct xen_domctl_vcpuaffinity *vcpuaff)
-{
-    return vcpuaff->flags == 0 ||
-           ((vcpuaff->flags & XEN_VCPUAFFINITY_HARD) &&
-            guest_handle_is_null(vcpuaff->cpumap_hard.bitmap)) ||
-           ((vcpuaff->flags & XEN_VCPUAFFINITY_SOFT) &&
-            guest_handle_is_null(vcpuaff->cpumap_soft.bitmap));
-}
-
 void vnuma_destroy(struct vnuma_info *vnuma)
 {
     if ( vnuma )
@@ -608,122 +597,8 @@  long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
 
     case XEN_DOMCTL_setvcpuaffinity:
     case XEN_DOMCTL_getvcpuaffinity:
-    {
-        struct vcpu *v;
-        const struct sched_unit *unit;
-        struct xen_domctl_vcpuaffinity *vcpuaff = &op->u.vcpuaffinity;
-
-        ret = -EINVAL;
-        if ( vcpuaff->vcpu >= d->max_vcpus )
-            break;
-
-        ret = -ESRCH;
-        if ( (v = d->vcpu[vcpuaff->vcpu]) == NULL )
-            break;
-
-        unit = v->sched_unit;
-        ret = -EINVAL;
-        if ( vcpuaffinity_params_invalid(vcpuaff) )
-            break;
-
-        if ( op->cmd == XEN_DOMCTL_setvcpuaffinity )
-        {
-            cpumask_var_t new_affinity, old_affinity;
-            cpumask_t *online = cpupool_domain_master_cpumask(v->domain);
-
-            /*
-             * We want to be able to restore hard affinity if we are trying
-             * setting both and changing soft affinity (which happens later,
-             * when hard affinity has been succesfully chaged already) fails.
-             */
-            if ( !alloc_cpumask_var(&old_affinity) )
-            {
-                ret = -ENOMEM;
-                break;
-            }
-            cpumask_copy(old_affinity, unit->cpu_hard_affinity);
-
-            if ( !alloc_cpumask_var(&new_affinity) )
-            {
-                free_cpumask_var(old_affinity);
-                ret = -ENOMEM;
-                break;
-            }
-
-            /* Undo a stuck SCHED_pin_override? */
-            if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE )
-                vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
-
-            ret = 0;
-
-            /*
-             * We both set a new affinity and report back to the caller what
-             * the scheduler will be effectively using.
-             */
-            if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
-            {
-                ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity),
-                                              &vcpuaff->cpumap_hard,
-                                              nr_cpu_ids);
-                if ( !ret )
-                    ret = vcpu_set_hard_affinity(v, new_affinity);
-                if ( ret )
-                    goto setvcpuaffinity_out;
-
-                /*
-                 * For hard affinity, what we return is the intersection of
-                 * cpupool's online mask and the new hard affinity.
-                 */
-                cpumask_and(new_affinity, online, unit->cpu_hard_affinity);
-                ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard,
-                                               new_affinity);
-            }
-            if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT )
-            {
-                ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity),
-                                              &vcpuaff->cpumap_soft,
-                                              nr_cpu_ids);
-                if ( !ret)
-                    ret = vcpu_set_soft_affinity(v, new_affinity);
-                if ( ret )
-                {
-                    /*
-                     * Since we're returning error, the caller expects nothing
-                     * happened, so we rollback the changes to hard affinity
-                     * (if any).
-                     */
-                    if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
-                        vcpu_set_hard_affinity(v, old_affinity);
-                    goto setvcpuaffinity_out;
-                }
-
-                /*
-                 * For soft affinity, we return the intersection between the
-                 * new soft affinity, the cpupool's online map and the (new)
-                 * hard affinity.
-                 */
-                cpumask_and(new_affinity, new_affinity, online);
-                cpumask_and(new_affinity, new_affinity,
-                            unit->cpu_hard_affinity);
-                ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft,
-                                               new_affinity);
-            }
-
- setvcpuaffinity_out:
-            free_cpumask_var(new_affinity);
-            free_cpumask_var(old_affinity);
-        }
-        else
-        {
-            if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
-                ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard,
-                                               unit->cpu_hard_affinity);
-            if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT )
-                ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft,
-                                               unit->cpu_soft_affinity);
-        }
+        ret = vcpu_affinity_domctl(d, op->cmd, &op->u.vcpuaffinity);
         break;
-    }
 
     case XEN_DOMCTL_scheduler_op:
         ret = sched_adjust(d, &op->u.scheduler_op);
diff --git a/xen/common/sched/arinc653.c b/xen/common/sched/arinc653.c
index 565575c326..8895d92b5e 100644
--- a/xen/common/sched/arinc653.c
+++ b/xen/common/sched/arinc653.c
@@ -26,7 +26,6 @@ 
 
 #include <xen/lib.h>
 #include <xen/sched.h>
-#include <xen/sched-if.h>
 #include <xen/timer.h>
 #include <xen/softirq.h>
 #include <xen/time.h>
@@ -35,6 +34,8 @@ 
 #include <xen/guest_access.h>
 #include <public/sysctl.h>
 
+#include "private.h"
+
 /**************************************************************************
  * Private Macros                                                         *
  **************************************************************************/
diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index 4d8eb4c617..2fae959e90 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -23,7 +23,6 @@ 
 #include <xen/time.h>
 #include <xen/timer.h>
 #include <xen/perfc.h>
-#include <xen/sched-if.h>
 #include <xen/softirq.h>
 #include <xen/trace.h>
 #include <xen/mm.h>
@@ -38,6 +37,8 @@ 
 #include <xsm/xsm.h>
 #include <xen/err.h>
 
+#include "private.h"
+
 #ifdef CONFIG_XEN_GUEST
 #include <asm/guest.h>
 #else
@@ -1607,6 +1608,194 @@  int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason)
     return ret;
 }
 
+static inline
+int vcpuaffinity_params_invalid(const struct xen_domctl_vcpuaffinity *vcpuaff)
+{
+    return vcpuaff->flags == 0 ||
+           ((vcpuaff->flags & XEN_VCPUAFFINITY_HARD) &&
+            guest_handle_is_null(vcpuaff->cpumap_hard.bitmap)) ||
+           ((vcpuaff->flags & XEN_VCPUAFFINITY_SOFT) &&
+            guest_handle_is_null(vcpuaff->cpumap_soft.bitmap));
+}
+
+int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
+                         struct xen_domctl_vcpuaffinity *vcpuaff)
+{
+    struct vcpu *v;
+    const struct sched_unit *unit;
+    int ret = 0;
+
+    if ( vcpuaff->vcpu >= d->max_vcpus )
+        return -EINVAL;
+
+    if ( (v = d->vcpu[vcpuaff->vcpu]) == NULL )
+        return -ESRCH;
+
+    if ( vcpuaffinity_params_invalid(vcpuaff) )
+        return -EINVAL;
+
+    unit = v->sched_unit;
+
+    if ( cmd == XEN_DOMCTL_setvcpuaffinity )
+    {
+        cpumask_var_t new_affinity, old_affinity;
+        cpumask_t *online = cpupool_domain_master_cpumask(v->domain);
+
+        /*
+         * We want to be able to restore hard affinity if we are trying
+         * setting both and changing soft affinity (which happens later,
+         * when hard affinity has been succesfully chaged already) fails.
+         */
+        if ( !alloc_cpumask_var(&old_affinity) )
+            return -ENOMEM;
+
+        cpumask_copy(old_affinity, unit->cpu_hard_affinity);
+
+        if ( !alloc_cpumask_var(&new_affinity) )
+        {
+            free_cpumask_var(old_affinity);
+            return -ENOMEM;
+        }
+
+        /* Undo a stuck SCHED_pin_override? */
+        if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE )
+            vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
+
+        ret = 0;
+
+        /*
+         * We both set a new affinity and report back to the caller what
+         * the scheduler will be effectively using.
+         */
+        if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
+        {
+            ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity),
+                                          &vcpuaff->cpumap_hard, nr_cpu_ids);
+            if ( !ret )
+                ret = vcpu_set_hard_affinity(v, new_affinity);
+            if ( ret )
+                goto setvcpuaffinity_out;
+
+            /*
+             * For hard affinity, what we return is the intersection of
+             * cpupool's online mask and the new hard affinity.
+             */
+            cpumask_and(new_affinity, online, unit->cpu_hard_affinity);
+            ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard, new_affinity);
+        }
+        if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT )
+        {
+            ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity),
+                                          &vcpuaff->cpumap_soft, nr_cpu_ids);
+            if ( !ret)
+                ret = vcpu_set_soft_affinity(v, new_affinity);
+            if ( ret )
+            {
+                /*
+                 * Since we're returning error, the caller expects nothing
+                 * happened, so we rollback the changes to hard affinity
+                 * (if any).
+                 */
+                if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
+                    vcpu_set_hard_affinity(v, old_affinity);
+                goto setvcpuaffinity_out;
+            }
+
+            /*
+             * For soft affinity, we return the intersection between the
+             * new soft affinity, the cpupool's online map and the (new)
+             * hard affinity.
+             */
+            cpumask_and(new_affinity, new_affinity, online);
+            cpumask_and(new_affinity, new_affinity, unit->cpu_hard_affinity);
+            ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft, new_affinity);
+        }
+
+ setvcpuaffinity_out:
+        free_cpumask_var(new_affinity);
+        free_cpumask_var(old_affinity);
+    }
+    else
+    {
+        if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
+            ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard,
+                                           unit->cpu_hard_affinity);
+        if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT )
+            ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft,
+                                           unit->cpu_soft_affinity);
+    }
+
+    return ret;
+}
+
+void domain_update_node_affinity(struct domain *d)
+{
+    cpumask_var_t dom_cpumask, dom_cpumask_soft;
+    cpumask_t *dom_affinity;
+    const cpumask_t *online;
+    struct sched_unit *unit;
+    unsigned int cpu;
+
+    /* Do we have vcpus already? If not, no need to update node-affinity. */
+    if ( !d->vcpu || !d->vcpu[0] )
+        return;
+
+    if ( !zalloc_cpumask_var(&dom_cpumask) )
+        return;
+    if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
+    {
+        free_cpumask_var(dom_cpumask);
+        return;
+    }
+
+    online = cpupool_domain_master_cpumask(d);
+
+    spin_lock(&d->node_affinity_lock);
+
+    /*
+     * If d->auto_node_affinity is true, let's compute the domain's
+     * node-affinity and update d->node_affinity accordingly. if false,
+     * just leave d->auto_node_affinity alone.
+     */
+    if ( d->auto_node_affinity )
+    {
+        /*
+         * We want the narrowest possible set of pcpus (to get the narowest
+         * possible set of nodes). What we need is the cpumask of where the
+         * domain can run (the union of the hard affinity of all its vcpus),
+         * and the full mask of where it would prefer to run (the union of
+         * the soft affinity of all its various vcpus). Let's build them.
+         */
+        for_each_sched_unit ( d, unit )
+        {
+            cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity);
+            cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
+                       unit->cpu_soft_affinity);
+        }
+        /* Filter out non-online cpus */
+        cpumask_and(dom_cpumask, dom_cpumask, online);
+        ASSERT(!cpumask_empty(dom_cpumask));
+        /* And compute the intersection between hard, online and soft */
+        cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask);
+
+        /*
+         * If not empty, the intersection of hard, soft and online is the
+         * narrowest set we want. If empty, we fall back to hard&online.
+         */
+        dom_affinity = cpumask_empty(dom_cpumask_soft) ?
+                           dom_cpumask : dom_cpumask_soft;
+
+        nodes_clear(d->node_affinity);
+        for_each_cpu ( cpu, dom_affinity )
+            node_set(cpu_to_node(cpu), d->node_affinity);
+    }
+
+    spin_unlock(&d->node_affinity_lock);
+
+    free_cpumask_var(dom_cpumask_soft);
+    free_cpumask_var(dom_cpumask);
+}
+
 typedef long ret_t;
 
 #endif /* !COMPAT */
diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index d66b541a94..7b31ab0d61 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -16,11 +16,12 @@ 
 #include <xen/cpumask.h>
 #include <xen/percpu.h>
 #include <xen/sched.h>
-#include <xen/sched-if.h>
 #include <xen/warning.h>
 #include <xen/keyhandler.h>
 #include <xen/cpu.h>
 
+#include "private.h"
+
 #define for_each_cpupool(ptr)    \
     for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next))
 
@@ -875,6 +876,16 @@  int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
     return ret;
 }
 
+int cpupool_get_id(const struct domain *d)
+{
+    return d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
+}
+
+cpumask_t *cpupool_valid_cpus(struct cpupool *pool)
+{
+    return pool->cpu_valid;
+}
+
 void dump_runq(unsigned char key)
 {
     unsigned long    flags;
diff --git a/xen/common/sched/credit.c b/xen/common/sched/credit.c
index aa41a3301b..4329d9df56 100644
--- a/xen/common/sched/credit.c
+++ b/xen/common/sched/credit.c
@@ -15,7 +15,6 @@ 
 #include <xen/delay.h>
 #include <xen/event.h>
 #include <xen/time.h>
-#include <xen/sched-if.h>
 #include <xen/softirq.h>
 #include <asm/atomic.h>
 #include <asm/div64.h>
@@ -24,6 +23,7 @@ 
 #include <xen/trace.h>
 #include <xen/err.h>
 
+#include "private.h"
 
 /*
  * Locking:
diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index f7c477053c..65e8ab052e 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -18,7 +18,6 @@ 
 #include <xen/event.h>
 #include <xen/time.h>
 #include <xen/perfc.h>
-#include <xen/sched-if.h>
 #include <xen/softirq.h>
 #include <asm/div64.h>
 #include <xen/errno.h>
@@ -26,6 +25,8 @@ 
 #include <xen/cpu.h>
 #include <xen/keyhandler.h>
 
+#include "private.h"
+
 /* Meant only for helping developers during debugging. */
 /* #define d2printk printk */
 #define d2printk(x...)
diff --git a/xen/common/sched/null.c b/xen/common/sched/null.c
index 3f3418c9b1..b99f1e3c65 100644
--- a/xen/common/sched/null.c
+++ b/xen/common/sched/null.c
@@ -29,10 +29,11 @@ 
  */
 
 #include <xen/sched.h>
-#include <xen/sched-if.h>
 #include <xen/softirq.h>
 #include <xen/trace.h>
 
+#include "private.h"
+
 /*
  * null tracing events. Check include/public/trace.h for more details.
  */
diff --git a/xen/include/xen/sched-if.h b/xen/common/sched/private.h
similarity index 99%
rename from xen/include/xen/sched-if.h
rename to xen/common/sched/private.h
index b0ac54e63d..a702fd23b1 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/common/sched/private.h
@@ -12,9 +12,6 @@ 
 #include <xen/err.h>
 #include <xen/rcupdate.h>
 
-/* A global pointer to the initial cpupool (POOL0). */
-extern struct cpupool *cpupool0;
-
 /* cpus currently in no cpupool */
 extern cpumask_t cpupool_free_cpus;
 
diff --git a/xen/common/sched/rt.c b/xen/common/sched/rt.c
index b2b29481f3..8203b63a9d 100644
--- a/xen/common/sched/rt.c
+++ b/xen/common/sched/rt.c
@@ -20,7 +20,6 @@ 
 #include <xen/time.h>
 #include <xen/timer.h>
 #include <xen/perfc.h>
-#include <xen/sched-if.h>
 #include <xen/softirq.h>
 #include <asm/atomic.h>
 #include <xen/errno.h>
@@ -31,6 +30,8 @@ 
 #include <xen/err.h>
 #include <xen/guest_access.h>
 
+#include "private.h"
+
 /*
  * TODO:
  *
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index 1cb205d977..7e51d361de 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -27,6 +27,9 @@  struct xen_domctl_getdomaininfo;
 void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info);
 void arch_get_domain_info(const struct domain *d,
                           struct xen_domctl_getdomaininfo *info);
+int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
+                            const struct xenctl_bitmap *xenctl_bitmap,
+                            unsigned int nbits);
 
 /*
  * Arch-specifics.
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index cc942a3621..d3adc69ab9 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -50,6 +50,9 @@  DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
 /* A global pointer to the hardware domain (usually DOM0). */
 extern struct domain *hardware_domain;
 
+/* A global pointer to the initial cpupool (POOL0). */
+extern struct cpupool *cpupool0;
+
 #ifdef CONFIG_LATE_HWDOM
 extern domid_t hardware_domid;
 #else
@@ -931,6 +934,8 @@  int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason);
 int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
 int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity);
 void restore_vcpu_affinity(struct domain *d);
+int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
+                         struct xen_domctl_vcpuaffinity *vcpuaff);
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
 uint64_t get_cpu_idle_time(unsigned int cpu);
@@ -1068,6 +1073,8 @@  int cpupool_add_domain(struct domain *d, int poolid);
 void cpupool_rm_domain(struct domain *d);
 int cpupool_move_domain(struct domain *d, struct cpupool *c);
 int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op);
+int cpupool_get_id(const struct domain *d);
+cpumask_t *cpupool_valid_cpus(struct cpupool *pool);
 void schedule_dump(struct cpupool *c);
 extern void dump_runq(unsigned char key);