diff mbox

[v3,5/9] x86/vmce: enable injecting LMCE to guest on Intel host

Message ID 20170330062003.9119-6-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang March 30, 2017, 6:19 a.m. UTC
Inject LMCE to guest if the host MCE is LMCE and the affected vcpu is
known. Otherwise, broadcast MCE to all vcpus on Intel host.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>

Changes in v3:
 * Adjust a check in mc_memerr_dhandler() and add code comment for it.
---
 xen/arch/x86/cpu/mcheck/mcaction.c | 23 ++++++++++++++++-------
 xen/arch/x86/cpu/mcheck/vmce.c     | 11 ++++++++++-
 xen/arch/x86/cpu/mcheck/vmce.h     |  2 +-
 3 files changed, 27 insertions(+), 9 deletions(-)

Comments

Jan Beulich March 30, 2017, 2:51 p.m. UTC | #1
>>> On 30.03.17 at 08:19, <haozhong.zhang@intel.com> wrote:
> --- a/xen/arch/x86/cpu/mcheck/mcaction.c
> +++ b/xen/arch/x86/cpu/mcheck/mcaction.c
> @@ -44,6 +44,7 @@ mc_memerr_dhandler(struct mca_binfo *binfo,
>      unsigned long mfn, gfn;
>      uint32_t status;
>      int vmce_vcpuid;
> +    uint16_t mc_vcpuid;

Any reason this can't be plain unsigned int? Nowadays we try to
stay away from fixed width types where they aren't really needed.

> --- a/xen/arch/x86/cpu/mcheck/vmce.c
> +++ b/xen/arch/x86/cpu/mcheck/vmce.c
> @@ -464,14 +464,23 @@ static int vcpu_fill_mc_msrs(struct vcpu *v, uint64_t mcg_status,
>  }
>  
>  int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
> -                   uint64_t gstatus, bool broadcast)
> +                   uint64_t gstatus, int vmce_vcpuid)
>  {
>      struct vcpu *v = d->vcpu[0];
> +    bool broadcast = (vmce_vcpuid == VMCE_INJECT_BROADCAST);
>      int ret, err;
>  
>      if ( mc_bank->mc_domid == DOMID_INVALID )
>          return -EINVAL;
>  
> +    if ( broadcast )
> +        gstatus &= ~MCG_STATUS_LMCE;
> +    else if ( gstatus & MCG_STATUS_LMCE )
> +    {
> +        ASSERT(vmce_vcpuid >=0 && vmce_vcpuid < d->max_vcpus);

Missing blank.

With these taken care of
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/cpu/mcheck/mcaction.c b/xen/arch/x86/cpu/mcheck/mcaction.c
index ca17d22..2556bb0 100644
--- a/xen/arch/x86/cpu/mcheck/mcaction.c
+++ b/xen/arch/x86/cpu/mcheck/mcaction.c
@@ -44,6 +44,7 @@  mc_memerr_dhandler(struct mca_binfo *binfo,
     unsigned long mfn, gfn;
     uint32_t status;
     int vmce_vcpuid;
+    uint16_t mc_vcpuid;
 
     if (!mc_check_addr(bank->mc_status, bank->mc_misc, MC_ADDR_PHYSICAL)) {
         dprintk(XENLOG_WARNING,
@@ -88,18 +89,26 @@  mc_memerr_dhandler(struct mca_binfo *binfo,
                     goto vmce_failed;
                 }
 
-                if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
-                    global->mc_vcpuid == XEN_MC_VCPUID_INVALID)
+                mc_vcpuid = global->mc_vcpuid;
+                if (mc_vcpuid == XEN_MC_VCPUID_INVALID ||
+                    /*
+                     * Because MC# may happen asynchronously with the actual
+                     * operation that triggers the error, the domain ID as
+                     * well as the vCPU ID collected in 'global' at MC# are
+                     * not always precise. In that case, fallback to broadcast.
+                     */
+                    global->mc_domid != bank->mc_domid ||
+                    (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+                     (!(global->mc_gstatus & MCG_STATUS_LMCE) ||
+                      !(d->vcpu[mc_vcpuid]->arch.vmce.mcg_ext_ctl &
+                        MCG_EXT_CTL_LMCE_EN))))
                     vmce_vcpuid = VMCE_INJECT_BROADCAST;
                 else
-                    vmce_vcpuid = global->mc_vcpuid;
+                    vmce_vcpuid = mc_vcpuid;
 
                 bank->mc_addr = gfn << PAGE_SHIFT |
                   (bank->mc_addr & (PAGE_SIZE -1 ));
-                /* TODO: support injecting LMCE */
-                if (fill_vmsr_data(bank, d,
-                                   global->mc_gstatus & ~MCG_STATUS_LMCE,
-                                   vmce_vcpuid == VMCE_INJECT_BROADCAST))
+                if (fill_vmsr_data(bank, d, global->mc_gstatus, vmce_vcpuid))
                 {
                     mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
                       "failed\n", bank->mc_domid);
diff --git a/xen/arch/x86/cpu/mcheck/vmce.c b/xen/arch/x86/cpu/mcheck/vmce.c
index 2106706..e19be69 100644
--- a/xen/arch/x86/cpu/mcheck/vmce.c
+++ b/xen/arch/x86/cpu/mcheck/vmce.c
@@ -464,14 +464,23 @@  static int vcpu_fill_mc_msrs(struct vcpu *v, uint64_t mcg_status,
 }
 
 int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
-                   uint64_t gstatus, bool broadcast)
+                   uint64_t gstatus, int vmce_vcpuid)
 {
     struct vcpu *v = d->vcpu[0];
+    bool broadcast = (vmce_vcpuid == VMCE_INJECT_BROADCAST);
     int ret, err;
 
     if ( mc_bank->mc_domid == DOMID_INVALID )
         return -EINVAL;
 
+    if ( broadcast )
+        gstatus &= ~MCG_STATUS_LMCE;
+    else if ( gstatus & MCG_STATUS_LMCE )
+    {
+        ASSERT(vmce_vcpuid >=0 && vmce_vcpuid < d->max_vcpus);
+        v = d->vcpu[vmce_vcpuid];
+    }
+
     /*
      * vMCE with the actual error information is injected to vCPU0,
      * and, if broadcast is required, we choose to inject less severe
diff --git a/xen/arch/x86/cpu/mcheck/vmce.h b/xen/arch/x86/cpu/mcheck/vmce.h
index 74f6381..2797e00 100644
--- a/xen/arch/x86/cpu/mcheck/vmce.h
+++ b/xen/arch/x86/cpu/mcheck/vmce.h
@@ -17,7 +17,7 @@  int vmce_amd_rdmsr(const struct vcpu *, uint32_t msr, uint64_t *val);
 int vmce_amd_wrmsr(struct vcpu *, uint32_t msr, uint64_t val);
 
 int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
-                   uint64_t gstatus, bool broadcast);
+                   uint64_t gstatus, int vmce_vcpuid);
 
 #define VMCE_INJECT_BROADCAST (-1)
 int inject_vmce(struct domain *d, int vcpu);