diff mbox

[v3,15/28] x86/cpu: Sysctl and common infrastructure for levelling context switching

Message ID 1458056124-8024-16-git-send-email-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andrew Cooper March 15, 2016, 3:35 p.m. UTC
A toolstack needs to know how much control Xen has over the visible cpuid
values in PV guests.  Provide an explicit mechanism to query what Xen is
capable of.

This interface will currently report no capabilities.  This change is
scaffolding for future patches, which will introduce detection and switching
logic, after which the interface will report hardware capabilities correctly.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>

v3:
 * Reintroduce XEN_SYSCTL_get_levelling_caps (requested by Joao for some
   libvirt development he has planned).
 * Rename to XEN_SYSCTL_get_cpu_levelling_caps, and rename the constants to
   match the Xen command line options.
v2:
 * s/cpumasks/cpuidmasks/
---
 xen/arch/x86/cpu/common.c        |  6 ++++++
 xen/arch/x86/sysctl.c            |  6 ++++++
 xen/include/asm-x86/cpufeature.h |  1 +
 xen/include/asm-x86/processor.h  | 31 +++++++++++++++++++++++++++++++
 xen/include/public/sysctl.h      | 23 +++++++++++++++++++++++
 5 files changed, 67 insertions(+)

Comments

Joao Martins March 15, 2016, 5:35 p.m. UTC | #1
On 03/15/2016 03:35 PM, Andrew Cooper wrote:
> A toolstack needs to know how much control Xen has over the visible cpuid
> values in PV guests.  Provide an explicit mechanism to query what Xen is
> capable of.
> 
> This interface will currently report no capabilities.  This change is
> scaffolding for future patches, which will introduce detection and switching
> logic, after which the interface will report hardware capabilities correctly.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> 
> v3:
>  * Reintroduce XEN_SYSCTL_get_levelling_caps (requested by Joao for some
>    libvirt development he has planned).
Hey Andrew,

Perhaps we got mixed in between threads but just to clarify: the exposure of
XEN_SYSCTL_get_levelling_caps isn't related to libvirt but rather related to
libxc cpuid policy[0] for supporting CPU topology on PV guests in the purpose of
the series I proposed here[1]. The libvirt related one was about the exposure of
featuresets (Patch 21) and/or stabilization of hw_caps format (Patch 2). My
apologies, if there was something I said that lead you into error.

Joao

[0] http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg00397.html
[1] http://lists.xenproject.org/archives/html/xen-devel/2016-02/msg03115.html

>  * Rename to XEN_SYSCTL_get_cpu_levelling_caps, and rename the constants to
>    match the Xen command line options.
> v2:
>  * s/cpumasks/cpuidmasks/
> ---
>  xen/arch/x86/cpu/common.c        |  6 ++++++
>  xen/arch/x86/sysctl.c            |  6 ++++++
>  xen/include/asm-x86/cpufeature.h |  1 +
>  xen/include/asm-x86/processor.h  | 31 +++++++++++++++++++++++++++++++
>  xen/include/public/sysctl.h      | 23 +++++++++++++++++++++++
>  5 files changed, 67 insertions(+)
> 
> diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
> index a325dc0..dd32a94 100644
> --- a/xen/arch/x86/cpu/common.c
> +++ b/xen/arch/x86/cpu/common.c
> @@ -36,6 +36,12 @@ integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
>  unsigned int opt_cpuid_mask_ext_edx = ~0u;
>  integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
>  
> +unsigned int __initdata expected_levelling_cap;
> +unsigned int __read_mostly levelling_caps;
> +
> +DEFINE_PER_CPU(struct cpuidmasks, cpuidmasks);
> +struct cpuidmasks __read_mostly cpuidmask_defaults;
> +
>  const struct cpu_dev *__read_mostly cpu_devs[X86_VENDOR_NUM] = {};
>  
>  unsigned int paddr_bits __read_mostly = 36;
> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
> index 58cbd70..f68cbec 100644
> --- a/xen/arch/x86/sysctl.c
> +++ b/xen/arch/x86/sysctl.c
> @@ -190,6 +190,12 @@ long arch_do_sysctl(
>          }
>          break;
>  
> +    case XEN_SYSCTL_get_cpu_levelling_caps:
> +        sysctl->u.cpu_levelling_caps.caps = levelling_caps;
> +        if ( __copy_field_to_guest(u_sysctl, sysctl, u.cpu_levelling_caps.caps) )
> +            ret = -EFAULT;
> +        break;
> +
>      default:
>          ret = -ENOSYS;
>          break;
> diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
> index ed29d6f..50414e3 100644
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -81,6 +81,7 @@
>  #define cpu_has_xsavec		boot_cpu_has(X86_FEATURE_XSAVEC)
>  #define cpu_has_xgetbv1		boot_cpu_has(X86_FEATURE_XGETBV1)
>  #define cpu_has_xsaves		boot_cpu_has(X86_FEATURE_XSAVES)
> +#define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
>  
>  enum _cache_type {
>      CACHE_TYPE_NULL = 0,
> diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
> index ba907ee..9aa93db 100644
> --- a/xen/include/asm-x86/processor.h
> +++ b/xen/include/asm-x86/processor.h
> @@ -618,6 +618,37 @@ void microcode_set_module(unsigned int);
>  int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void), unsigned long len);
>  int microcode_resume_cpu(unsigned int cpu);
>  
> +#define LCAP_faulting XEN_SYSCTL_CPU_LEVELCAP_faulting
> +#define LCAP_1cd      (XEN_SYSCTL_CPU_LEVELCAP_ecx |        \
> +                       XEN_SYSCTL_CPU_LEVELCAP_edx)
> +#define LCAP_e1cd     (XEN_SYSCTL_CPU_LEVELCAP_extd_ecx |   \
> +                       XEN_SYSCTL_CPU_LEVELCAP_extd_edx)
> +#define LCAP_Da1      XEN_SYSCTL_CPU_LEVELCAP_xsave_eax
> +#define LCAP_6c       XEN_SYSCTL_CPU_LEVELCAP_themal_ecx
> +#define LCAP_7ab0     (XEN_SYSCTL_CPU_LEVELCAP_l7s0_eax |   \
> +                       XEN_SYSCTL_CPU_LEVELCAP_17s0_ebx)
> +
> +/*
> + * Expected levelling capabilities (given cpuid vendor/family information),
> + * and levelling capabilities actually available (given MSR probing).
> + */
> +extern unsigned int expected_levelling_cap, levelling_caps;
> +
> +struct cpuidmasks
> +{
> +    uint64_t _1cd;
> +    uint64_t e1cd;
> +    uint64_t Da1;
> +    uint64_t _6c;
> +    uint64_t _7ab0;
> +};
> +
> +/* Per CPU shadows of masking MSR values, for lazy context switching. */
> +DECLARE_PER_CPU(struct cpuidmasks, cpuidmasks);
> +
> +/* Default masking MSR values, calculated at boot. */
> +extern struct cpuidmasks cpuidmask_defaults;
> +
>  enum get_cpu_vendor {
>     gcv_host_early,
>     gcv_host_late,
> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index 96680eb..0dacca2 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -766,6 +766,27 @@ struct xen_sysctl_tmem_op {
>  typedef struct xen_sysctl_tmem_op xen_sysctl_tmem_op_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tmem_op_t);
>  
> +/*
> + * XEN_SYSCTL_get_cpu_levelling_caps (x86 specific)
> + *
> + * Return hardware capabilities concerning masking or faulting of the cpuid
> + * instruction for PV guests.
> + */
> +struct xen_sysctl_cpu_levelling_caps {
> +#define XEN_SYSCTL_CPU_LEVELCAP_faulting   (1ul <<  0) /* CPUID faulting    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_ecx        (1ul <<  1) /* 0x00000001.ecx    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_edx        (1ul <<  2) /* 0x00000001.edx    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_extd_ecx   (1ul <<  3) /* 0x80000001.ecx    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_extd_edx   (1ul <<  4) /* 0x80000001.edx    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_xsave_eax  (1ul <<  5) /* 0x0000000D:1.eax  */
> +#define XEN_SYSCTL_CPU_LEVELCAP_themal_ecx (1ul <<  6) /* 0x00000006.ecx    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_l7s0_eax   (1ul <<  7) /* 0x00000007:0.eax  */
> +#define XEN_SYSCTL_CPU_LEVELCAP_17s0_ebx   (1ul <<  8) /* 0x00000007:0.ebx  */
> +    uint32_t caps;
> +};
> +typedef struct xen_sysctl_cpu_levelling_caps xen_sysctl_cpu_levelling_caps_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_levelling_caps_t);
> +
>  struct xen_sysctl {
>      uint32_t cmd;
>  #define XEN_SYSCTL_readconsole                    1
> @@ -791,6 +812,7 @@ struct xen_sysctl {
>  #define XEN_SYSCTL_pcitopoinfo                   22
>  #define XEN_SYSCTL_psr_cat_op                    23
>  #define XEN_SYSCTL_tmem_op                       24
> +#define XEN_SYSCTL_get_cpu_levelling_caps        25
>      uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
>      union {
>          struct xen_sysctl_readconsole       readconsole;
> @@ -816,6 +838,7 @@ struct xen_sysctl {
>          struct xen_sysctl_psr_cmt_op        psr_cmt_op;
>          struct xen_sysctl_psr_cat_op        psr_cat_op;
>          struct xen_sysctl_tmem_op           tmem_op;
> +        struct xen_sysctl_cpu_levelling_caps cpu_levelling_caps;
>          uint8_t                             pad[128];
>      } u;
>  };
>
Andrew Cooper March 15, 2016, 7:29 p.m. UTC | #2
On 15/03/16 17:35, Joao Martins wrote:
>
> On 03/15/2016 03:35 PM, Andrew Cooper wrote:
>> A toolstack needs to know how much control Xen has over the visible cpuid
>> values in PV guests.  Provide an explicit mechanism to query what Xen is
>> capable of.
>>
>> This interface will currently report no capabilities.  This change is
>> scaffolding for future patches, which will introduce detection and switching
>> logic, after which the interface will report hardware capabilities correctly.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> ---
>> CC: Jan Beulich <JBeulich@suse.com>
>>
>> v3:
>>  * Reintroduce XEN_SYSCTL_get_levelling_caps (requested by Joao for some
>>    libvirt development he has planned).
> Hey Andrew,
>
> Perhaps we got mixed in between threads but just to clarify: the exposure of
> XEN_SYSCTL_get_levelling_caps isn't related to libvirt but rather related to
> libxc cpuid policy[0] for supporting CPU topology on PV guests in the purpose of
> the series I proposed here[1]. The libvirt related one was about the exposure of
> featuresets (Patch 21) and/or stabilization of hw_caps format (Patch 2). My
> apologies, if there was something I said that lead you into error.
>
> Joao
>
> [0] http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg00397.html
> [1] http://lists.xenproject.org/archives/html/xen-devel/2016-02/msg03115.html

Sorry - my mistake.  Either way, you still need this hypercall and
hopefully it is in a suitable form for you to use.

~Andrew
Joao Martins March 15, 2016, 7:34 p.m. UTC | #3
On 03/15/2016 07:29 PM, Andrew Cooper wrote:
> On 15/03/16 17:35, Joao Martins wrote:
>>
>> On 03/15/2016 03:35 PM, Andrew Cooper wrote:
>>> A toolstack needs to know how much control Xen has over the visible cpuid
>>> values in PV guests.  Provide an explicit mechanism to query what Xen is
>>> capable of.
>>>
>>> This interface will currently report no capabilities.  This change is
>>> scaffolding for future patches, which will introduce detection and switching
>>> logic, after which the interface will report hardware capabilities correctly.
>>>
>>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>> ---
>>> CC: Jan Beulich <JBeulich@suse.com>
>>>
>>> v3:
>>>  * Reintroduce XEN_SYSCTL_get_levelling_caps (requested by Joao for some
>>>    libvirt development he has planned).
>> Hey Andrew,
>>
>> Perhaps we got mixed in between threads but just to clarify: the exposure of
>> XEN_SYSCTL_get_levelling_caps isn't related to libvirt but rather related to
>> libxc cpuid policy[0] for supporting CPU topology on PV guests in the purpose of
>> the series I proposed here[1]. The libvirt related one was about the exposure of
>> featuresets (Patch 21) and/or stabilization of hw_caps format (Patch 2). My
>> apologies, if there was something I said that lead you into error.
>>
>> Joao
>>
>> [0] http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg00397.html
>> [1] http://lists.xenproject.org/archives/html/xen-devel/2016-02/msg03115.html
> 
> Sorry - my mistake.  Either way, you still need this hypercall and
> hopefully it is in a suitable form for you to use.
OK, Thanks a lot!
Jan Beulich March 21, 2016, 4:23 p.m. UTC | #4
>>> On 15.03.16 at 16:35, <andrew.cooper3@citrix.com> wrote:
> --- a/xen/arch/x86/cpu/common.c
> +++ b/xen/arch/x86/cpu/common.c
> @@ -36,6 +36,12 @@ integer_param("cpuid_mask_ext_ecx", 
> opt_cpuid_mask_ext_ecx);
>  unsigned int opt_cpuid_mask_ext_edx = ~0u;
>  integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
>  
> +unsigned int __initdata expected_levelling_cap;

Especially for an __initdata item to be non-static, its use(s) should
be visible in a patch adding such a variable.

> --- a/xen/include/asm-x86/processor.h
> +++ b/xen/include/asm-x86/processor.h
> @@ -618,6 +618,37 @@ void microcode_set_module(unsigned int);
>  int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void), unsigned long len);
>  int microcode_resume_cpu(unsigned int cpu);
>  
> +#define LCAP_faulting XEN_SYSCTL_CPU_LEVELCAP_faulting
> +#define LCAP_1cd      (XEN_SYSCTL_CPU_LEVELCAP_ecx |        \
> +                       XEN_SYSCTL_CPU_LEVELCAP_edx)
> +#define LCAP_e1cd     (XEN_SYSCTL_CPU_LEVELCAP_extd_ecx |   \
> +                       XEN_SYSCTL_CPU_LEVELCAP_extd_edx)
> +#define LCAP_Da1      XEN_SYSCTL_CPU_LEVELCAP_xsave_eax
> +#define LCAP_6c       XEN_SYSCTL_CPU_LEVELCAP_themal_ecx
> +#define LCAP_7ab0     (XEN_SYSCTL_CPU_LEVELCAP_l7s0_eax |   \
> +                       XEN_SYSCTL_CPU_LEVELCAP_17s0_ebx)
> +
> +/*
> + * Expected levelling capabilities (given cpuid vendor/family information),
> + * and levelling capabilities actually available (given MSR probing).
> + */
> +extern unsigned int expected_levelling_cap, levelling_caps;
> +
> +struct cpuidmasks
> +{
> +    uint64_t _1cd;
> +    uint64_t e1cd;
> +    uint64_t Da1;
> +    uint64_t _6c;
> +    uint64_t _7ab0;
> +};
> +
> +/* Per CPU shadows of masking MSR values, for lazy context switching. */
> +DECLARE_PER_CPU(struct cpuidmasks, cpuidmasks);
> +
> +/* Default masking MSR values, calculated at boot. */
> +extern struct cpuidmasks cpuidmask_defaults;

Would much/all of this not better go into cpuid.h?

> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -766,6 +766,27 @@ struct xen_sysctl_tmem_op {
>  typedef struct xen_sysctl_tmem_op xen_sysctl_tmem_op_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tmem_op_t);
>  
> +/*
> + * XEN_SYSCTL_get_cpu_levelling_caps (x86 specific)
> + *
> + * Return hardware capabilities concerning masking or faulting of the cpuid
> + * instruction for PV guests.
> + */
> +struct xen_sysctl_cpu_levelling_caps {
> +#define XEN_SYSCTL_CPU_LEVELCAP_faulting   (1ul <<  0) /* CPUID faulting    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_ecx        (1ul <<  1) /* 0x00000001.ecx    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_edx        (1ul <<  2) /* 0x00000001.edx    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_extd_ecx   (1ul <<  3) /* 0x80000001.ecx    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_extd_edx   (1ul <<  4) /* 0x80000001.edx    */
> +#define XEN_SYSCTL_CPU_LEVELCAP_xsave_eax  (1ul <<  5) /* 0x0000000D:1.eax  */
> +#define XEN_SYSCTL_CPU_LEVELCAP_themal_ecx (1ul <<  6) /* 0x00000006.ecx    */

thermal

> +#define XEN_SYSCTL_CPU_LEVELCAP_l7s0_eax   (1ul <<  7) /* 0x00000007:0.eax  */
> +#define XEN_SYSCTL_CPU_LEVELCAP_17s0_ebx   (1ul <<  8) /* 0x00000007:0.ebx  */

There appears to be a 1 here when I think an l is meant.

Jan
Andrew Cooper March 22, 2016, 3:57 p.m. UTC | #5
On 21/03/16 16:23, Jan Beulich wrote:
>>>> On 15.03.16 at 16:35, <andrew.cooper3@citrix.com> wrote:
>> --- a/xen/arch/x86/cpu/common.c
>> +++ b/xen/arch/x86/cpu/common.c
>> @@ -36,6 +36,12 @@ integer_param("cpuid_mask_ext_ecx", 
>> opt_cpuid_mask_ext_ecx);
>>  unsigned int opt_cpuid_mask_ext_edx = ~0u;
>>  integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
>>  
>> +unsigned int __initdata expected_levelling_cap;
> Especially for an __initdata item to be non-static, its use(s) should
> be visible in a patch adding such a variable.

From the commit message:
> This interface will currently report no capabilities.  This change is
> scaffolding for future patches, which will introduce detection and switching
> logic, after which the interface will report hardware capabilities correctly.

This is specifically to reduce the complexity of the following patches. 
How much more clearly do you want this stating?

~Andrew
Jan Beulich March 22, 2016, 4:16 p.m. UTC | #6
>>> On 22.03.16 at 16:57, <andrew.cooper3@citrix.com> wrote:
> On 21/03/16 16:23, Jan Beulich wrote:
>>>>> On 15.03.16 at 16:35, <andrew.cooper3@citrix.com> wrote:
>>> --- a/xen/arch/x86/cpu/common.c
>>> +++ b/xen/arch/x86/cpu/common.c
>>> @@ -36,6 +36,12 @@ integer_param("cpuid_mask_ext_ecx", 
>>> opt_cpuid_mask_ext_ecx);
>>>  unsigned int opt_cpuid_mask_ext_edx = ~0u;
>>>  integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
>>>  
>>> +unsigned int __initdata expected_levelling_cap;
>> Especially for an __initdata item to be non-static, its use(s) should
>> be visible in a patch adding such a variable.
> 
> From the commit message:
>> This interface will currently report no capabilities.  This change is
>> scaffolding for future patches, which will introduce detection and switching
>> logic, after which the interface will report hardware capabilities 
> correctly.
> 
> This is specifically to reduce the complexity of the following patches. 
> How much more clearly do you want this stating?

The problem isn't that this wasn't stated, but that (as said) namely
for __initdata objects, which put restrictions on where use of them
is valid, introducing them in one patch and adding uses only later is
difficult to review. Quite a bit more difficult than if such variables
got added only once needed (which "bloats" the affected patch by
just a handful of lines at most).

But anyway, I've got through it, and I'm not insisting on this needing
to change. I merely wanted to point out that while there are cases
where it's reasonable to add unused objects or functions to make
later stuff easier to review, I don't think this is such a case (at least
not as far as the variables here are concerned).

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index a325dc0..dd32a94 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -36,6 +36,12 @@  integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
 unsigned int opt_cpuid_mask_ext_edx = ~0u;
 integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
 
+unsigned int __initdata expected_levelling_cap;
+unsigned int __read_mostly levelling_caps;
+
+DEFINE_PER_CPU(struct cpuidmasks, cpuidmasks);
+struct cpuidmasks __read_mostly cpuidmask_defaults;
+
 const struct cpu_dev *__read_mostly cpu_devs[X86_VENDOR_NUM] = {};
 
 unsigned int paddr_bits __read_mostly = 36;
diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
index 58cbd70..f68cbec 100644
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -190,6 +190,12 @@  long arch_do_sysctl(
         }
         break;
 
+    case XEN_SYSCTL_get_cpu_levelling_caps:
+        sysctl->u.cpu_levelling_caps.caps = levelling_caps;
+        if ( __copy_field_to_guest(u_sysctl, sysctl, u.cpu_levelling_caps.caps) )
+            ret = -EFAULT;
+        break;
+
     default:
         ret = -ENOSYS;
         break;
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index ed29d6f..50414e3 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -81,6 +81,7 @@ 
 #define cpu_has_xsavec		boot_cpu_has(X86_FEATURE_XSAVEC)
 #define cpu_has_xgetbv1		boot_cpu_has(X86_FEATURE_XGETBV1)
 #define cpu_has_xsaves		boot_cpu_has(X86_FEATURE_XSAVES)
+#define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index ba907ee..9aa93db 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -618,6 +618,37 @@  void microcode_set_module(unsigned int);
 int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void), unsigned long len);
 int microcode_resume_cpu(unsigned int cpu);
 
+#define LCAP_faulting XEN_SYSCTL_CPU_LEVELCAP_faulting
+#define LCAP_1cd      (XEN_SYSCTL_CPU_LEVELCAP_ecx |        \
+                       XEN_SYSCTL_CPU_LEVELCAP_edx)
+#define LCAP_e1cd     (XEN_SYSCTL_CPU_LEVELCAP_extd_ecx |   \
+                       XEN_SYSCTL_CPU_LEVELCAP_extd_edx)
+#define LCAP_Da1      XEN_SYSCTL_CPU_LEVELCAP_xsave_eax
+#define LCAP_6c       XEN_SYSCTL_CPU_LEVELCAP_themal_ecx
+#define LCAP_7ab0     (XEN_SYSCTL_CPU_LEVELCAP_l7s0_eax |   \
+                       XEN_SYSCTL_CPU_LEVELCAP_17s0_ebx)
+
+/*
+ * Expected levelling capabilities (given cpuid vendor/family information),
+ * and levelling capabilities actually available (given MSR probing).
+ */
+extern unsigned int expected_levelling_cap, levelling_caps;
+
+struct cpuidmasks
+{
+    uint64_t _1cd;
+    uint64_t e1cd;
+    uint64_t Da1;
+    uint64_t _6c;
+    uint64_t _7ab0;
+};
+
+/* Per CPU shadows of masking MSR values, for lazy context switching. */
+DECLARE_PER_CPU(struct cpuidmasks, cpuidmasks);
+
+/* Default masking MSR values, calculated at boot. */
+extern struct cpuidmasks cpuidmask_defaults;
+
 enum get_cpu_vendor {
    gcv_host_early,
    gcv_host_late,
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 96680eb..0dacca2 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -766,6 +766,27 @@  struct xen_sysctl_tmem_op {
 typedef struct xen_sysctl_tmem_op xen_sysctl_tmem_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tmem_op_t);
 
+/*
+ * XEN_SYSCTL_get_cpu_levelling_caps (x86 specific)
+ *
+ * Return hardware capabilities concerning masking or faulting of the cpuid
+ * instruction for PV guests.
+ */
+struct xen_sysctl_cpu_levelling_caps {
+#define XEN_SYSCTL_CPU_LEVELCAP_faulting   (1ul <<  0) /* CPUID faulting    */
+#define XEN_SYSCTL_CPU_LEVELCAP_ecx        (1ul <<  1) /* 0x00000001.ecx    */
+#define XEN_SYSCTL_CPU_LEVELCAP_edx        (1ul <<  2) /* 0x00000001.edx    */
+#define XEN_SYSCTL_CPU_LEVELCAP_extd_ecx   (1ul <<  3) /* 0x80000001.ecx    */
+#define XEN_SYSCTL_CPU_LEVELCAP_extd_edx   (1ul <<  4) /* 0x80000001.edx    */
+#define XEN_SYSCTL_CPU_LEVELCAP_xsave_eax  (1ul <<  5) /* 0x0000000D:1.eax  */
+#define XEN_SYSCTL_CPU_LEVELCAP_themal_ecx (1ul <<  6) /* 0x00000006.ecx    */
+#define XEN_SYSCTL_CPU_LEVELCAP_l7s0_eax   (1ul <<  7) /* 0x00000007:0.eax  */
+#define XEN_SYSCTL_CPU_LEVELCAP_17s0_ebx   (1ul <<  8) /* 0x00000007:0.ebx  */
+    uint32_t caps;
+};
+typedef struct xen_sysctl_cpu_levelling_caps xen_sysctl_cpu_levelling_caps_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_levelling_caps_t);
+
 struct xen_sysctl {
     uint32_t cmd;
 #define XEN_SYSCTL_readconsole                    1
@@ -791,6 +812,7 @@  struct xen_sysctl {
 #define XEN_SYSCTL_pcitopoinfo                   22
 #define XEN_SYSCTL_psr_cat_op                    23
 #define XEN_SYSCTL_tmem_op                       24
+#define XEN_SYSCTL_get_cpu_levelling_caps        25
     uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
     union {
         struct xen_sysctl_readconsole       readconsole;
@@ -816,6 +838,7 @@  struct xen_sysctl {
         struct xen_sysctl_psr_cmt_op        psr_cmt_op;
         struct xen_sysctl_psr_cat_op        psr_cat_op;
         struct xen_sysctl_tmem_op           tmem_op;
+        struct xen_sysctl_cpu_levelling_caps cpu_levelling_caps;
         uint8_t                             pad[128];
     } u;
 };