diff mbox

[13/19] x86/mce_intel: detect and enable LMCE on Intel host

Message ID 20170217063936.13208-14-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang Feb. 17, 2017, 6:39 a.m. UTC
Enable LMCE if it's supported by the host CPU. If Xen boot parameter
"mce_fb = 1" is present, LMCE will be disabled forcibly.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
Cc: Christoph Egger <chegger@amazon.de>
Cc: Liu Jinsong <jinsong.liu@alibaba-inc.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
 xen/arch/x86/cpu/mcheck/mce.h       |  1 +
 xen/arch/x86/cpu/mcheck/mce_intel.c | 44 ++++++++++++++++++++++++++++++++-----
 xen/arch/x86/cpu/mcheck/x86_mca.h   |  5 +++++
 xen/include/asm-x86/msr-index.h     |  2 ++
 4 files changed, 46 insertions(+), 6 deletions(-)

Comments

Jan Beulich Feb. 22, 2017, 3:10 p.m. UTC | #1
>>> On 17.02.17 at 07:39, <haozhong.zhang@intel.com> wrote:
> --- a/xen/arch/x86/cpu/mcheck/mce.h
> +++ b/xen/arch/x86/cpu/mcheck/mce.h
> @@ -38,6 +38,7 @@ enum mcheck_type {
>  };
>  
>  extern uint8_t cmci_apic_vector;
> +extern bool lmce_support;
>  
>  /* Init functions */
>  enum mcheck_type amd_mcheck_init(struct cpuinfo_x86 *c);
> diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c 
> b/xen/arch/x86/cpu/mcheck/mce_intel.c
> index 9e5ee3d..b4cc41a 100644
> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c
> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
> @@ -29,6 +29,9 @@ boolean_param("mce_fb", mce_force_broadcast);
>  
>  static int __read_mostly nr_intel_ext_msrs;
>  
> +/* If mce_force_broadcast == 1, lmce_support will be disabled forcibly. */
> +bool __read_mostly lmce_support = 0;

false (but really there's no need for an initializer here)

> @@ -677,10 +680,34 @@ static int mce_is_broadcast(struct cpuinfo_x86 *c)
>      return 0;
>  }
>  
> +static bool intel_enable_lmce(void)
> +{
> +    uint64_t msr_content;
> +
> +    /*
> +     * Section "Enabling Local Machine Check" in Intel SDM Vol 3
> +     * requires software must ensure the LOCK bit and LMCE_ON bit
> +     * of MSR_IA32_FEATURE_CONTROL are set before setting
> +     * MSR_IA32_MCG_EXT_CTL.LMCE_EN.
> +     */
> +
> +    if ( rdmsr_safe(MSR_IA32_FEATURE_CONTROL, msr_content) )
> +        return 0;

false (and so on further down)

> +    if ( msr_content &
> +         (IA32_FEATURE_CONTROL_LOCK | IA32_FEATURE_CONTROL_LMCE_ON) )

This checks whether at least one of the bits is on, which isn't in
line with the comment.

>  static void intel_init_mca(struct cpuinfo_x86 *c)
>  {
> -    bool_t broadcast, cmci = 0, ser = 0;
> +    bool_t broadcast, cmci = 0, ser = 0, lmce = 0;

Please use the opportunity to change to plain bool (and false).

> @@ -700,26 +727,31 @@ static void intel_init_mca(struct cpuinfo_x86 *c)
>  
>      first = mce_firstbank(c);
>  
> +    if ( !mce_force_broadcast && (msr_content & MCG_LMCE_P) )

Please make all your additions match the prevailing coding style in
this file (which admittedly is neither ours nor Linux'es, but a mix).

> +        lmce = intel_enable_lmce();
> +
>      if (smp_processor_id() == 0)
>      {
>          dprintk(XENLOG_INFO, "MCA Capability: BCAST %x SER %x"
> -                " CMCI %x firstbank %x extended MCE MSR %x\n",
> -                broadcast, ser, cmci, first, ext_num);
> +                " CMCI %x firstbank %x extended MCE MSR %x LMCE %x\n",
> +                broadcast, ser, cmci, first, ext_num, lmce);

Please can you switch over to not printing booleans as numbers
here, but simply omitting the respective string from the output if
a feature is not there? Only actual numbers should be printed as
such.

Jan
Haozhong Zhang Feb. 23, 2017, 3:16 a.m. UTC | #2
On 02/22/17 08:10 -0700, Jan Beulich wrote:
> >>> On 17.02.17 at 07:39, <haozhong.zhang@intel.com> wrote:
> > --- a/xen/arch/x86/cpu/mcheck/mce.h
> > +++ b/xen/arch/x86/cpu/mcheck/mce.h
> > @@ -38,6 +38,7 @@ enum mcheck_type {
> >  };
> >  
> >  extern uint8_t cmci_apic_vector;
> > +extern bool lmce_support;
> >  
> >  /* Init functions */
> >  enum mcheck_type amd_mcheck_init(struct cpuinfo_x86 *c);
> > diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c 
> > b/xen/arch/x86/cpu/mcheck/mce_intel.c
> > index 9e5ee3d..b4cc41a 100644
> > --- a/xen/arch/x86/cpu/mcheck/mce_intel.c
> > +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
> > @@ -29,6 +29,9 @@ boolean_param("mce_fb", mce_force_broadcast);
> >  
> >  static int __read_mostly nr_intel_ext_msrs;
> >  
> > +/* If mce_force_broadcast == 1, lmce_support will be disabled forcibly. */
> > +bool __read_mostly lmce_support = 0;
> 
> false (but really there's no need for an initializer here)
> 
> > @@ -677,10 +680,34 @@ static int mce_is_broadcast(struct cpuinfo_x86 *c)
> >      return 0;
> >  }
> >  
> > +static bool intel_enable_lmce(void)
> > +{
> > +    uint64_t msr_content;
> > +
> > +    /*
> > +     * Section "Enabling Local Machine Check" in Intel SDM Vol 3
> > +     * requires software must ensure the LOCK bit and LMCE_ON bit
> > +     * of MSR_IA32_FEATURE_CONTROL are set before setting
> > +     * MSR_IA32_MCG_EXT_CTL.LMCE_EN.
> > +     */
> > +
> > +    if ( rdmsr_safe(MSR_IA32_FEATURE_CONTROL, msr_content) )
> > +        return 0;
> 
> false (and so on further down)

I'll fix these boolean stuffs here and in other patches.

> 
> > +    if ( msr_content &
> > +         (IA32_FEATURE_CONTROL_LOCK | IA32_FEATURE_CONTROL_LMCE_ON) )
> 
> This checks whether at least one of the bits is on, which isn't in
> line with the comment.
>

I'll fix this check.

> >  static void intel_init_mca(struct cpuinfo_x86 *c)
> >  {
> > -    bool_t broadcast, cmci = 0, ser = 0;
> > +    bool_t broadcast, cmci = 0, ser = 0, lmce = 0;
> 
> Please use the opportunity to change to plain bool (and false).

sure

> 
> > @@ -700,26 +727,31 @@ static void intel_init_mca(struct cpuinfo_x86 *c)
> >  
> >      first = mce_firstbank(c);
> >  
> > +    if ( !mce_force_broadcast && (msr_content & MCG_LMCE_P) )
> 
> Please make all your additions match the prevailing coding style in
> this file (which admittedly is neither ours nor Linux'es, but a mix).

The problem is the existing style in this file is not consistent. Both
if ( cond ) and if (cond) are being used in this file. I chose to use
Xen style in the new code.

> 
> > +        lmce = intel_enable_lmce();
> > +
> >      if (smp_processor_id() == 0)
> >      {
> >          dprintk(XENLOG_INFO, "MCA Capability: BCAST %x SER %x"
> > -                " CMCI %x firstbank %x extended MCE MSR %x\n",
> > -                broadcast, ser, cmci, first, ext_num);
> > +                " CMCI %x firstbank %x extended MCE MSR %x LMCE %x\n",
> > +                broadcast, ser, cmci, first, ext_num, lmce);
> 
> Please can you switch over to not printing booleans as numbers
> here, but simply omitting the respective string from the output if
> a feature is not there? Only actual numbers should be printed as
> such.

sure

Thanks,
Haozhong
Jan Beulich Feb. 23, 2017, 7:45 a.m. UTC | #3
>>> On 23.02.17 at 04:16, <haozhong.zhang@intel.com> wrote:
> On 02/22/17 08:10 -0700, Jan Beulich wrote:
>> >>> On 17.02.17 at 07:39, <haozhong.zhang@intel.com> wrote:
>> > @@ -700,26 +727,31 @@ static void intel_init_mca(struct cpuinfo_x86 *c)
>> >  
>> >      first = mce_firstbank(c);
>> >  
>> > +    if ( !mce_force_broadcast && (msr_content & MCG_LMCE_P) )
>> 
>> Please make all your additions match the prevailing coding style in
>> this file (which admittedly is neither ours nor Linux'es, but a mix).
> 
> The problem is the existing style in this file is not consistent. Both
> if ( cond ) and if (cond) are being used in this file. I chose to use
> Xen style in the new code.

Well, as said - the file isn't cleanly using one style. In such a case,
rather than making a function mixing styles, you should try to
match surrounding code's style (unless you feel up to making a
patch to convert the entire file to uniform style).

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h
index 2c033af..461141a 100644
--- a/xen/arch/x86/cpu/mcheck/mce.h
+++ b/xen/arch/x86/cpu/mcheck/mce.h
@@ -38,6 +38,7 @@  enum mcheck_type {
 };
 
 extern uint8_t cmci_apic_vector;
+extern bool lmce_support;
 
 /* Init functions */
 enum mcheck_type amd_mcheck_init(struct cpuinfo_x86 *c);
diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c
index 9e5ee3d..b4cc41a 100644
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -29,6 +29,9 @@  boolean_param("mce_fb", mce_force_broadcast);
 
 static int __read_mostly nr_intel_ext_msrs;
 
+/* If mce_force_broadcast == 1, lmce_support will be disabled forcibly. */
+bool __read_mostly lmce_support = 0;
+
 /* Intel SDM define bit15~bit0 of IA32_MCi_STATUS as the MC error code */
 #define INTEL_MCCOD_MASK 0xFFFF
 
@@ -677,10 +680,34 @@  static int mce_is_broadcast(struct cpuinfo_x86 *c)
     return 0;
 }
 
+static bool intel_enable_lmce(void)
+{
+    uint64_t msr_content;
+
+    /*
+     * Section "Enabling Local Machine Check" in Intel SDM Vol 3
+     * requires software must ensure the LOCK bit and LMCE_ON bit
+     * of MSR_IA32_FEATURE_CONTROL are set before setting
+     * MSR_IA32_MCG_EXT_CTL.LMCE_EN.
+     */
+
+    if ( rdmsr_safe(MSR_IA32_FEATURE_CONTROL, msr_content) )
+        return 0;
+
+    if ( msr_content &
+         (IA32_FEATURE_CONTROL_LOCK | IA32_FEATURE_CONTROL_LMCE_ON) )
+    {
+        wrmsrl(MSR_IA32_MCG_EXT_CTL, MCG_EXT_CTL_LMCE_EN);
+        return 1;
+    }
+
+    return 0;
+}
+
 /* Check and init MCA */
 static void intel_init_mca(struct cpuinfo_x86 *c)
 {
-    bool_t broadcast, cmci = 0, ser = 0;
+    bool_t broadcast, cmci = 0, ser = 0, lmce = 0;
     int ext_num = 0, first;
     uint64_t msr_content;
 
@@ -700,26 +727,31 @@  static void intel_init_mca(struct cpuinfo_x86 *c)
 
     first = mce_firstbank(c);
 
+    if ( !mce_force_broadcast && (msr_content & MCG_LMCE_P) )
+        lmce = intel_enable_lmce();
+
     if (smp_processor_id() == 0)
     {
         dprintk(XENLOG_INFO, "MCA Capability: BCAST %x SER %x"
-                " CMCI %x firstbank %x extended MCE MSR %x\n",
-                broadcast, ser, cmci, first, ext_num);
+                " CMCI %x firstbank %x extended MCE MSR %x LMCE %x\n",
+                broadcast, ser, cmci, first, ext_num, lmce);
 
         mce_broadcast = broadcast;
         cmci_support = cmci;
         ser_support = ser;
         nr_intel_ext_msrs = ext_num;
         firstbank = first;
+        lmce_support = lmce;
     }
     else if (cmci != cmci_support || ser != ser_support ||
              broadcast != mce_broadcast ||
-             first != firstbank || ext_num != nr_intel_ext_msrs)
+             first != firstbank || ext_num != nr_intel_ext_msrs ||
+             lmce != lmce_support)
     {
         dprintk(XENLOG_WARNING,
-                "CPU %u has different MCA capability (%x,%x,%x,%x,%x)"
+                "CPU %u has different MCA capability (%x,%x,%x,%x,%x,%x)"
                 " than BSP, may cause undetermined result!!!\n",
-                smp_processor_id(), broadcast, ser, cmci, first, ext_num);
+                smp_processor_id(), broadcast, ser, cmci, first, ext_num, lmce);
     }
 }
 
diff --git a/xen/arch/x86/cpu/mcheck/x86_mca.h b/xen/arch/x86/cpu/mcheck/x86_mca.h
index 322b7d4..3b5060e 100644
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h
@@ -36,6 +36,7 @@ 
 #define MCG_TES_P               (1ULL<<11) /* Intel specific */
 #define MCG_EXT_CNT             16         /* Intel specific */
 #define MCG_SER_P               (1ULL<<24) /* Intel specific */
+#define MCG_LMCE_P              (1ULL<<27) /* Intel specific */
 /* Other bits are reserved */
 
 /* Bitfield of the MSR_IA32_MCG_STATUS register */
@@ -46,6 +47,10 @@ 
 /* Bits 3-63 are reserved on CPU not supporting LMCE */
 /* Bits 4-63 are reserved on CPU supporting LMCE */
 
+/* Bitfield of MSR_IA32_MCG_EXT_CTL register (Intel Specific) */
+#define MCG_EXT_CTL_LMCE_EN     (1ULL<<0)
+/* Other bits are reserved */
+
 /* Bitfield of MSR_K8_MCi_STATUS registers */
 /* MCA error code */
 #define MCi_STATUS_MCA          0x000000000000ffffULL
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index 98dbff1..f0bc574 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -51,6 +51,7 @@ 
 #define MSR_IA32_MCG_CAP		0x00000179
 #define MSR_IA32_MCG_STATUS		0x0000017a
 #define MSR_IA32_MCG_CTL		0x0000017b
+#define MSR_IA32_MCG_EXT_CTL	0x000004d0
 
 #define MSR_IA32_PEBS_ENABLE		0x000003f1
 #define MSR_IA32_DS_AREA		0x00000600
@@ -294,6 +295,7 @@ 
 #define IA32_FEATURE_CONTROL_SENTER_PARAM_CTL         0x7f00
 #define IA32_FEATURE_CONTROL_ENABLE_SENTER            0x8000
 #define IA32_FEATURE_CONTROL_SGX_ENABLE               0x40000
+#define IA32_FEATURE_CONTROL_LMCE_ON                  0x100000
 
 #define MSR_IA32_TSC_ADJUST		0x0000003b