diff mbox series

[3/8] x86/domctl: Implement XEN_DOMCTL_set_cpumsr_policy

Message ID 20190911200504.5693-4-andrew.cooper3@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86/cpuid: Switch to using XEN_DOMCTL_set_cpumsr_policy | expand

Commit Message

Andrew Cooper Sept. 11, 2019, 8:04 p.m. UTC
This hypercall allows the toolstack to present one combined CPUID and MSR
policy for a domain, which can be audited in one go by Xen, which is necessary
for correctness of the auditing.

Reuse the existing set_cpuid XSM access vector, as this is logically the same
operation.

As x86_cpu_policies_are_compatible() is still only a stub, retain the call to
recalculate_cpuid_policy() to discard unsafe toolstack settings.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 tools/libxc/include/xenctrl.h       |  5 +++
 tools/libxc/xc_cpuid_x86.c          | 49 +++++++++++++++++++++++
 xen/arch/x86/domctl.c               | 80 +++++++++++++++++++++++++++++++++++++
 xen/include/public/domctl.h         | 15 +++++--
 xen/xsm/flask/hooks.c               |  1 +
 xen/xsm/flask/policy/access_vectors |  1 +
 6 files changed, 147 insertions(+), 4 deletions(-)

Comments

Jan Beulich Sept. 12, 2019, 8:06 a.m. UTC | #1
On 11.09.2019 22:04, Andrew Cooper wrote:
> --- a/tools/libxc/xc_cpuid_x86.c
> +++ b/tools/libxc/xc_cpuid_x86.c
> @@ -229,6 +229,55 @@ int xc_get_domain_cpu_policy(xc_interface *xch, uint32_t domid,
>      return ret;
>  }
>  
> +int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid,
> +                             uint32_t nr_leaves, xen_cpuid_leaf_t *leaves,
> +                             uint32_t nr_msrs, xen_msr_entry_t *msrs,
> +                             uint32_t *err_leaf_p, uint32_t *err_subleaf_p,
> +                             uint32_t *err_msr_idx_p)
> +{
> +    DECLARE_DOMCTL;
> +    DECLARE_HYPERCALL_BOUNCE(leaves,
> +                             nr_leaves * sizeof(*leaves),
> +                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
> +    DECLARE_HYPERCALL_BOUNCE(msrs,
> +                             nr_msrs * sizeof(*msrs),
> +                             XC_HYPERCALL_BUFFER_BOUNCE_IN);

With both being IN, the respective function parameters should imo
be pointers to const.

> +    int ret;
> +
> +    if ( xc_hypercall_bounce_pre(xch, leaves) )
> +        return -1;
> +
> +    if ( xc_hypercall_bounce_pre(xch, msrs) )
> +        return -1;
> +
> +    domctl.cmd = XEN_DOMCTL_set_cpu_policy;
> +    domctl.domain = domid;
> +    domctl.u.cpu_policy.nr_leaves = nr_leaves;
> +    set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves);
> +    domctl.u.cpu_policy.nr_msrs = nr_msrs;
> +    set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs);
> +    domctl.u.cpu_policy.err_leaf = ~0;
> +    domctl.u.cpu_policy.err_subleaf = ~0;
> +    domctl.u.cpu_policy.err_msr_idx = ~0;

The fields are marked OUT only in the public header, which implies
no initialization should be needed here, as the hypercall would
overwrite the fields in any event.

> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -294,6 +294,65 @@ static int update_domain_cpuid_info(struct domain *d,
>      return 0;
>  }
>  
> +static int update_domain_cpu_policy(struct domain *d,
> +                                    xen_domctl_cpu_policy_t *xdpc)
> +{
> +    struct cpu_policy new = {};
> +    const struct cpu_policy *sys = is_pv_domain(d)
> +        ? &system_policies[XEN_SYSCTL_cpu_policy_pv_max]
> +        : &system_policies[XEN_SYSCTL_cpu_policy_hvm_max];
> +    struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS;
> +    int ret = -ENOMEM;
> +
> +    /* Start by copying the domain's existing policies. */
> +    if ( !(new.cpuid = xmemdup(d->arch.cpuid)) ||
> +         !(new.msr   = xmemdup(d->arch.msr)) )

To avoid the redundant initialization, this could as well be the
initializer of the variable.

> @@ -1476,6 +1535,27 @@ long arch_do_domctl(
>          copyback = true;
>          break;
>  
> +    case XEN_DOMCTL_set_cpu_policy:
> +        if ( d == currd ) /* No domain_pause() */
> +        {
> +            ret = -EINVAL;
> +            break;
> +        }
> +
> +        domain_pause(d);
> +
> +        if ( d->creation_finished )
> +            ret = -EEXIST; /* No changing once the domain is running. */
> +        else
> +        {
> +            ret = update_domain_cpu_policy(d, &domctl->u.cpu_policy);
> +            if ( ret ) /* Copy domctl->u.cpu_policy.err_* to guest. */
> +                copyback = true;

Due to the OUT in the public header I think it would be better to
always copy this back (making sure the invalid markers are in place
in case of success). But I guess we're not very consistent with
honoring OUT like this.

> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -658,17 +658,23 @@ struct xen_domctl_cpuid {
>  };
>  
>  /*
> - * XEN_DOMCTL_get_cpu_policy (x86 specific)
> + * XEN_DOMCTL_{get,set}_cpu_policy (x86 specific)
>   *
> - * Query the CPUID and MSR policies for a specific domain.
> + * Query or set the CPUID and MSR policies for a specific domain.
>   */
>  struct xen_domctl_cpu_policy {
>      uint32_t nr_leaves; /* IN/OUT: Number of leaves in/written to
>                           * 'cpuid_policy'. */
>      uint32_t nr_msrs;   /* IN/OUT: Number of MSRs in/written to
>                           * 'msr_domain_policy' */
> -    XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* OUT */
> -    XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy;    /* OUT */
> +    XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* IN/OUT */
> +    XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy;    /* IN/OUT */
> +    uint32_t err_leaf, err_subleaf; /* OUT, set_policy only.  If not ~0,
> +                                     * indicates the leaf/subleaf which
> +                                     * auditing objected to. */
> +    uint32_t err_msr_idx;           /* OUT, set_policy only.  If not ~0,
> +                                     * indicates the MSR idx which
> +                                     * auditing objected to. */
>  };
>  typedef struct xen_domctl_cpu_policy xen_domctl_cpu_policy_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpu_policy_t);

I know you're not liking the concept, but XEN_DOMCTL_INTERFACE_VERSION
hasn't been bumped in this release cycle yet, and hence a binary
incompatible change like this one needs to. With at least this last
aspect taken care of, hypervisor parts
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan
Andrew Cooper Sept. 12, 2019, 1:15 p.m. UTC | #2
On 12/09/2019 09:06, Jan Beulich wrote:
> On 11.09.2019 22:04, Andrew Cooper wrote:
>> --- a/tools/libxc/xc_cpuid_x86.c
>> +++ b/tools/libxc/xc_cpuid_x86.c
>> @@ -229,6 +229,55 @@ int xc_get_domain_cpu_policy(xc_interface *xch, uint32_t domid,
>>      return ret;
>>  }
>>  
>> +int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid,
>> +                             uint32_t nr_leaves, xen_cpuid_leaf_t *leaves,
>> +                             uint32_t nr_msrs, xen_msr_entry_t *msrs,
>> +                             uint32_t *err_leaf_p, uint32_t *err_subleaf_p,
>> +                             uint32_t *err_msr_idx_p)
>> +{
>> +    DECLARE_DOMCTL;
>> +    DECLARE_HYPERCALL_BOUNCE(leaves,
>> +                             nr_leaves * sizeof(*leaves),
>> +                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
>> +    DECLARE_HYPERCALL_BOUNCE(msrs,
>> +                             nr_msrs * sizeof(*msrs),
>> +                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
> With both being IN, the respective function parameters should imo
> be pointers to const.

Ok.

>
>> +    int ret;
>> +
>> +    if ( xc_hypercall_bounce_pre(xch, leaves) )
>> +        return -1;
>> +
>> +    if ( xc_hypercall_bounce_pre(xch, msrs) )
>> +        return -1;
>> +
>> +    domctl.cmd = XEN_DOMCTL_set_cpu_policy;
>> +    domctl.domain = domid;
>> +    domctl.u.cpu_policy.nr_leaves = nr_leaves;
>> +    set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves);
>> +    domctl.u.cpu_policy.nr_msrs = nr_msrs;
>> +    set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs);
>> +    domctl.u.cpu_policy.err_leaf = ~0;
>> +    domctl.u.cpu_policy.err_subleaf = ~0;
>> +    domctl.u.cpu_policy.err_msr_idx = ~0;
> The fields are marked OUT only in the public header, which implies
> no initialization should be needed here, as the hypercall would
> overwrite the fields in any event.

See below.

>
>> --- a/xen/arch/x86/domctl.c
>> +++ b/xen/arch/x86/domctl.c
>> @@ -294,6 +294,65 @@ static int update_domain_cpuid_info(struct domain *d,
>>      return 0;
>>  }
>>  
>> +static int update_domain_cpu_policy(struct domain *d,
>> +                                    xen_domctl_cpu_policy_t *xdpc)
>> +{
>> +    struct cpu_policy new = {};
>> +    const struct cpu_policy *sys = is_pv_domain(d)
>> +        ? &system_policies[XEN_SYSCTL_cpu_policy_pv_max]
>> +        : &system_policies[XEN_SYSCTL_cpu_policy_hvm_max];
>> +    struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS;
>> +    int ret = -ENOMEM;
>> +
>> +    /* Start by copying the domain's existing policies. */
>> +    if ( !(new.cpuid = xmemdup(d->arch.cpuid)) ||
>> +         !(new.msr   = xmemdup(d->arch.msr)) )
> To avoid the redundant initialization, this could as well be the
> initializer of the variable.

I'm not sure that is the wisest course of action.  We wouldn't want to
proactively perform the memory allocation if new logic needs to appear
ahead of this.

In this example, the compiler ought to be able to do DSE to get rid of
the first assignment.

>
>> @@ -1476,6 +1535,27 @@ long arch_do_domctl(
>>          copyback = true;
>>          break;
>>  
>> +    case XEN_DOMCTL_set_cpu_policy:
>> +        if ( d == currd ) /* No domain_pause() */
>> +        {
>> +            ret = -EINVAL;
>> +            break;
>> +        }
>> +
>> +        domain_pause(d);
>> +
>> +        if ( d->creation_finished )
>> +            ret = -EEXIST; /* No changing once the domain is running. */
>> +        else
>> +        {
>> +            ret = update_domain_cpu_policy(d, &domctl->u.cpu_policy);
>> +            if ( ret ) /* Copy domctl->u.cpu_policy.err_* to guest. */
>> +                copyback = true;
> Due to the OUT in the public header I think it would be better to
> always copy this back (making sure the invalid markers are in place
> in case of success). But I guess we're not very consistent with
> honoring OUT like this.

This doesn't work, because an early ESRCH/EBUSY won't fill in the
pointers even with copyback being changed here.

This is why xc_set_domain_cpu_policy() fills the values to begin with.

>
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -658,17 +658,23 @@ struct xen_domctl_cpuid {
>>  };
>>  
>>  /*
>> - * XEN_DOMCTL_get_cpu_policy (x86 specific)
>> + * XEN_DOMCTL_{get,set}_cpu_policy (x86 specific)
>>   *
>> - * Query the CPUID and MSR policies for a specific domain.
>> + * Query or set the CPUID and MSR policies for a specific domain.
>>   */
>>  struct xen_domctl_cpu_policy {
>>      uint32_t nr_leaves; /* IN/OUT: Number of leaves in/written to
>>                           * 'cpuid_policy'. */
>>      uint32_t nr_msrs;   /* IN/OUT: Number of MSRs in/written to
>>                           * 'msr_domain_policy' */
>> -    XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* OUT */
>> -    XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy;    /* OUT */
>> +    XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* IN/OUT */
>> +    XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy;    /* IN/OUT */
>> +    uint32_t err_leaf, err_subleaf; /* OUT, set_policy only.  If not ~0,
>> +                                     * indicates the leaf/subleaf which
>> +                                     * auditing objected to. */
>> +    uint32_t err_msr_idx;           /* OUT, set_policy only.  If not ~0,
>> +                                     * indicates the MSR idx which
>> +                                     * auditing objected to. */
>>  };
>>  typedef struct xen_domctl_cpu_policy xen_domctl_cpu_policy_t;
>>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpu_policy_t);
> I know you're not liking the concept, but XEN_DOMCTL_INTERFACE_VERSION
> hasn't been bumped in this release cycle yet, and hence a binary
> incompatible change like this one needs to.

Oh.  The lack of bump had escaped me.

>  With at least this last
> aspect taken care of, hypervisor parts
> Reviewed-by: Jan Beulich <jbeulich@suse.com>

Thanks,

~Andrew
Jan Beulich Sept. 12, 2019, 1:20 p.m. UTC | #3
On 12.09.2019 15:15, Andrew Cooper wrote:
> On 12/09/2019 09:06, Jan Beulich wrote:
>> On 11.09.2019 22:04, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/domctl.c
>>> +++ b/xen/arch/x86/domctl.c
>>> @@ -294,6 +294,65 @@ static int update_domain_cpuid_info(struct domain *d,
>>>      return 0;
>>>  }
>>>  
>>> +static int update_domain_cpu_policy(struct domain *d,
>>> +                                    xen_domctl_cpu_policy_t *xdpc)
>>> +{
>>> +    struct cpu_policy new = {};
>>> +    const struct cpu_policy *sys = is_pv_domain(d)
>>> +        ? &system_policies[XEN_SYSCTL_cpu_policy_pv_max]
>>> +        : &system_policies[XEN_SYSCTL_cpu_policy_hvm_max];
>>> +    struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS;
>>> +    int ret = -ENOMEM;
>>> +
>>> +    /* Start by copying the domain's existing policies. */
>>> +    if ( !(new.cpuid = xmemdup(d->arch.cpuid)) ||
>>> +         !(new.msr   = xmemdup(d->arch.msr)) )
>> To avoid the redundant initialization, this could as well be the
>> initializer of the variable.
> 
> I'm not sure that is the wisest course of action.  We wouldn't want to
> proactively perform the memory allocation if new logic needs to appear
> ahead of this.
> 
> In this example, the compiler ought to be able to do DSE to get rid of
> the first assignment.

Okay. I said "could" in the first place to make clear this
really is just an option to consider.

>>> @@ -1476,6 +1535,27 @@ long arch_do_domctl(
>>>          copyback = true;
>>>          break;
>>>  
>>> +    case XEN_DOMCTL_set_cpu_policy:
>>> +        if ( d == currd ) /* No domain_pause() */
>>> +        {
>>> +            ret = -EINVAL;
>>> +            break;
>>> +        }
>>> +
>>> +        domain_pause(d);
>>> +
>>> +        if ( d->creation_finished )
>>> +            ret = -EEXIST; /* No changing once the domain is running. */
>>> +        else
>>> +        {
>>> +            ret = update_domain_cpu_policy(d, &domctl->u.cpu_policy);
>>> +            if ( ret ) /* Copy domctl->u.cpu_policy.err_* to guest. */
>>> +                copyback = true;
>> Due to the OUT in the public header I think it would be better to
>> always copy this back (making sure the invalid markers are in place
>> in case of success). But I guess we're not very consistent with
>> honoring OUT like this.
> 
> This doesn't work, because an early ESRCH/EBUSY won't fill in the
> pointers even with copyback being changed here.
> 
> This is why xc_set_domain_cpu_policy() fills the values to begin with.

Oh, right. Perhaps the public header comments then want refining,
since ...

>>> --- a/xen/include/public/domctl.h
>>> +++ b/xen/include/public/domctl.h
>>> @@ -658,17 +658,23 @@ struct xen_domctl_cpuid {
>>>  };
>>>  
>>>  /*
>>> - * XEN_DOMCTL_get_cpu_policy (x86 specific)
>>> + * XEN_DOMCTL_{get,set}_cpu_policy (x86 specific)
>>>   *
>>> - * Query the CPUID and MSR policies for a specific domain.
>>> + * Query or set the CPUID and MSR policies for a specific domain.
>>>   */
>>>  struct xen_domctl_cpu_policy {
>>>      uint32_t nr_leaves; /* IN/OUT: Number of leaves in/written to
>>>                           * 'cpuid_policy'. */
>>>      uint32_t nr_msrs;   /* IN/OUT: Number of MSRs in/written to
>>>                           * 'msr_domain_policy' */
>>> -    XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* OUT */
>>> -    XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy;    /* OUT */
>>> +    XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* IN/OUT */
>>> +    XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy;    /* IN/OUT */
>>> +    uint32_t err_leaf, err_subleaf; /* OUT, set_policy only.  If not ~0,
>>> +                                     * indicates the leaf/subleaf which
>>> +                                     * auditing objected to. */
>>> +    uint32_t err_msr_idx;           /* OUT, set_policy only.  If not ~0,
>>> +                                     * indicates the MSR idx which
>>> +                                     * auditing objected to. */

... what is being said here isn't true in the case you mention
if the caller didn't set the fields accordingly.

Jan
Andrew Cooper Sept. 12, 2019, 4:34 p.m. UTC | #4
On 12/09/2019 14:15, Andrew Cooper wrote:
> On 12/09/2019 09:06, Jan Beulich wrote:
>> On 11.09.2019 22:04, Andrew Cooper wrote:
>>> --- a/tools/libxc/xc_cpuid_x86.c
>>> +++ b/tools/libxc/xc_cpuid_x86.c
>>> @@ -229,6 +229,55 @@ int xc_get_domain_cpu_policy(xc_interface *xch, uint32_t domid,
>>>      return ret;
>>>  }
>>>  
>>> +int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid,
>>> +                             uint32_t nr_leaves, xen_cpuid_leaf_t *leaves,
>>> +                             uint32_t nr_msrs, xen_msr_entry_t *msrs,
>>> +                             uint32_t *err_leaf_p, uint32_t *err_subleaf_p,
>>> +                             uint32_t *err_msr_idx_p)
>>> +{
>>> +    DECLARE_DOMCTL;
>>> +    DECLARE_HYPERCALL_BOUNCE(leaves,
>>> +                             nr_leaves * sizeof(*leaves),
>>> +                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
>>> +    DECLARE_HYPERCALL_BOUNCE(msrs,
>>> +                             nr_msrs * sizeof(*msrs),
>>> +                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
>> With both being IN, the respective function parameters should imo
>> be pointers to const.
> Ok.

Sadly not.  It turns out that this is incompatible with the internals of
DECLARE_HYPERCALL_BOUNCE().

~Andrew
diff mbox series

Patch

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 7559e1bc69..e47778535d 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2530,6 +2530,11 @@  int xc_get_system_cpu_policy(xc_interface *xch, uint32_t index,
 int xc_get_domain_cpu_policy(xc_interface *xch, uint32_t domid,
                              uint32_t *nr_leaves, xen_cpuid_leaf_t *leaves,
                              uint32_t *nr_msrs, xen_msr_entry_t *msrs);
+int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid,
+                             uint32_t nr_leaves, xen_cpuid_leaf_t *leaves,
+                             uint32_t nr_msrs, xen_msr_entry_t *msrs,
+                             uint32_t *err_leaf_p, uint32_t *err_subleaf_p,
+                             uint32_t *err_msr_idx_p);
 
 uint32_t xc_get_cpu_featureset_size(void);
 
diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
index b829336082..33b9e9fc85 100644
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -229,6 +229,55 @@  int xc_get_domain_cpu_policy(xc_interface *xch, uint32_t domid,
     return ret;
 }
 
+int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid,
+                             uint32_t nr_leaves, xen_cpuid_leaf_t *leaves,
+                             uint32_t nr_msrs, xen_msr_entry_t *msrs,
+                             uint32_t *err_leaf_p, uint32_t *err_subleaf_p,
+                             uint32_t *err_msr_idx_p)
+{
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(leaves,
+                             nr_leaves * sizeof(*leaves),
+                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
+    DECLARE_HYPERCALL_BOUNCE(msrs,
+                             nr_msrs * sizeof(*msrs),
+                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
+    int ret;
+
+    if ( xc_hypercall_bounce_pre(xch, leaves) )
+        return -1;
+
+    if ( xc_hypercall_bounce_pre(xch, msrs) )
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_set_cpu_policy;
+    domctl.domain = domid;
+    domctl.u.cpu_policy.nr_leaves = nr_leaves;
+    set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves);
+    domctl.u.cpu_policy.nr_msrs = nr_msrs;
+    set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs);
+    domctl.u.cpu_policy.err_leaf = ~0;
+    domctl.u.cpu_policy.err_subleaf = ~0;
+    domctl.u.cpu_policy.err_msr_idx = ~0;
+
+    ret = do_domctl(xch, &domctl);
+
+    xc_hypercall_bounce_post(xch, leaves);
+    xc_hypercall_bounce_post(xch, msrs);
+
+    if ( ret )
+    {
+        if ( err_leaf_p )
+            *err_leaf_p = domctl.u.cpu_policy.err_leaf;
+        if ( err_subleaf_p )
+            *err_subleaf_p = domctl.u.cpu_policy.err_subleaf;
+        if ( err_msr_idx_p )
+            *err_msr_idx_p = domctl.u.cpu_policy.err_msr_idx;
+    }
+
+    return ret;
+}
+
 struct cpuid_domain_info
 {
     unsigned int vendor; /* X86_VENDOR_* */
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index d15ae066c3..99bc2fb10d 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -294,6 +294,65 @@  static int update_domain_cpuid_info(struct domain *d,
     return 0;
 }
 
+static int update_domain_cpu_policy(struct domain *d,
+                                    xen_domctl_cpu_policy_t *xdpc)
+{
+    struct cpu_policy new = {};
+    const struct cpu_policy *sys = is_pv_domain(d)
+        ? &system_policies[XEN_SYSCTL_cpu_policy_pv_max]
+        : &system_policies[XEN_SYSCTL_cpu_policy_hvm_max];
+    struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS;
+    int ret = -ENOMEM;
+
+    /* Start by copying the domain's existing policies. */
+    if ( !(new.cpuid = xmemdup(d->arch.cpuid)) ||
+         !(new.msr   = xmemdup(d->arch.msr)) )
+        goto out;
+
+    /* Merge the toolstack provided data. */
+    if ( (ret = x86_cpuid_copy_from_buffer(
+              new.cpuid, xdpc->cpuid_policy, xdpc->nr_leaves,
+              &err.leaf, &err.subleaf)) ||
+         (ret = x86_msr_copy_from_buffer(
+              new.msr, xdpc->msr_policy, xdpc->nr_msrs, &err.msr)) )
+        goto out;
+
+    /* Trim any newly-stale out-of-range leaves. */
+    x86_cpuid_policy_clear_out_of_range_leaves(new.cpuid);
+
+    /* Audit the combined dataset. */
+    ret = x86_cpu_policies_are_compatible(sys, &new, &err);
+    if ( ret )
+        goto out;
+
+    /*
+     * Audit was successful.  Replace existing policies, leaving the old
+     * policies to be freed.
+     */
+    SWAP(new.cpuid, d->arch.cpuid);
+    SWAP(new.msr,   d->arch.msr);
+
+    /* TODO: Drop when x86_cpu_policies_are_compatible() is completed. */
+    recalculate_cpuid_policy(d);
+
+    /* Recalculate relevant dom/vcpu state now the policy has changed. */
+    domain_cpu_policy_changed(d);
+
+ out:
+    /* Free whichever cpuid/msr structs are not installed in struct domain. */
+    xfree(new.cpuid);
+    xfree(new.msr);
+
+    if ( ret )
+    {
+        xdpc->err_leaf    = err.leaf;
+        xdpc->err_subleaf = err.subleaf;
+        xdpc->err_msr_idx = err.msr;
+    }
+
+    return ret;
+}
+
 static int vcpu_set_vmce(struct vcpu *v,
                          const struct xen_domctl_ext_vcpucontext *evc)
 {
@@ -1476,6 +1535,27 @@  long arch_do_domctl(
         copyback = true;
         break;
 
+    case XEN_DOMCTL_set_cpu_policy:
+        if ( d == currd ) /* No domain_pause() */
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        domain_pause(d);
+
+        if ( d->creation_finished )
+            ret = -EEXIST; /* No changing once the domain is running. */
+        else
+        {
+            ret = update_domain_cpu_policy(d, &domctl->u.cpu_policy);
+            if ( ret ) /* Copy domctl->u.cpu_policy.err_* to guest. */
+                copyback = true;
+        }
+
+        domain_unpause(d);
+        break;
+
     default:
         ret = iommu_do_domctl(domctl, d, u_domctl);
         break;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 77f546cbb8..0471d3c680 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -658,17 +658,23 @@  struct xen_domctl_cpuid {
 };
 
 /*
- * XEN_DOMCTL_get_cpu_policy (x86 specific)
+ * XEN_DOMCTL_{get,set}_cpu_policy (x86 specific)
  *
- * Query the CPUID and MSR policies for a specific domain.
+ * Query or set the CPUID and MSR policies for a specific domain.
  */
 struct xen_domctl_cpu_policy {
     uint32_t nr_leaves; /* IN/OUT: Number of leaves in/written to
                          * 'cpuid_policy'. */
     uint32_t nr_msrs;   /* IN/OUT: Number of MSRs in/written to
                          * 'msr_domain_policy' */
-    XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* OUT */
-    XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy;    /* OUT */
+    XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* IN/OUT */
+    XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy;    /* IN/OUT */
+    uint32_t err_leaf, err_subleaf; /* OUT, set_policy only.  If not ~0,
+                                     * indicates the leaf/subleaf which
+                                     * auditing objected to. */
+    uint32_t err_msr_idx;           /* OUT, set_policy only.  If not ~0,
+                                     * indicates the MSR idx which
+                                     * auditing objected to. */
 };
 typedef struct xen_domctl_cpu_policy xen_domctl_cpu_policy_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpu_policy_t);
@@ -1193,6 +1199,7 @@  struct xen_domctl {
 /* #define XEN_DOMCTL_set_gnttab_limits          80 - Moved into XEN_DOMCTL_createdomain */
 #define XEN_DOMCTL_vuart_op                      81
 #define XEN_DOMCTL_get_cpu_policy                82
+#define XEN_DOMCTL_set_cpu_policy                83
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 6800f2d9a0..b23772786a 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -715,6 +715,7 @@  static int flask_domctl(struct domain *d, int cmd)
     case XEN_DOMCTL_set_virq_handler:
         return current_has_perm(d, SECCLASS_DOMAIN, DOMAIN__SET_VIRQ_HANDLER);
 
+    case XEN_DOMCTL_set_cpu_policy:
     case XEN_DOMCTL_set_cpuid:
         return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__SET_CPUID);
 
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index 76f3d60ddd..6f3f9493f8 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -207,6 +207,7 @@  class domain2
 #  source = the domain making the hypercall
 #  target = the new target domain
     set_as_target
+# XEN_DOMCTL_set_cpu_policy
 # XEN_DOMCTL_set_cpuid
     set_cpuid
 # XEN_DOMCTL_gettscinfo