diff mbox

[2/2] x86/VMX: fix vmentry failure with TSX bits in LBR

Message ID 1485365191-26692-3-git-send-email-sergey.dyasli@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sergey Dyasli Jan. 25, 2017, 5:26 p.m. UTC
During VM entry, H/W will automatically load guest's MSRs from MSR-load
area in the same way as they would be written by WRMSR.

However, under the following conditions:

    1. LBR (Last Branch Record) MSRs were placed in the MSR-load area
    2. Address format of LBR includes TSX bits 61:62
    3. CPU has TSX support disabled

VM entry will fail with a message in the log similar to:

    (XEN) [   97.239514] d1v0 vmentry failure (reason 0x80000022): MSR loading (entry 3)
    (XEN) [   97.239516]   msr 00000680 val 1fff800000102e60 (mbz 0)

This happens because of the following behaviour:

    - When capturing branches, LBR H/W will always clear bits 61:62
      regardless of the sign extension
    - For WRMSR, bits 61:62 are considered the part of the sign extension

This bug affects only certain pCPUs (e.g. Haswell) with vCPUs that
use LBR.  Fix it by sign-extending TSX bits in all LBR entries during
VM entry in affected cases.

Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
---
 xen/arch/x86/hvm/vmx/vmx.c         | 68 ++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/cpufeature.h   |  3 ++
 xen/include/asm-x86/hvm/vmx/vmcs.h |  2 ++
 3 files changed, 73 insertions(+)

Comments

Jan Beulich Jan. 31, 2017, 11:52 a.m. UTC | #1
>>> On 25.01.17 at 18:26, <sergey.dyasli@citrix.com> wrote:
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2241,6 +2241,25 @@ static void pi_notification_interrupt(struct cpu_user_regs *regs)
>      raise_softirq(VCPU_KICK_SOFTIRQ);
>  }
>  
> +static bool __read_mostly vmx_lbr_tsx_fixup_needed;
> +
> +static void __init vmx_lbr_tsx_fixup_check(void)
> +{
> +    bool tsx_support = cpu_has_hle || cpu_has_rtm;
> +    u64 caps;
> +    u32 lbr_format;
> +
> +    if ( !cpu_has_pdcm )
> +        return;
> +
> +    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
> +    /* Lower 6 bits define the format of the address in the LBR stack */
> +    lbr_format = caps & 0x3f;

Please add a #define for the constant here and ...

> +    /* 000100B -- TSX info is reported in the upper bits of 'FROM' registers */
> +    vmx_lbr_tsx_fixup_needed = !tsx_support && lbr_format == 0x4;

... an enum for the known values (to be used here).

> @@ -2833,7 +2854,11 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>              for ( ; (rc == 0) && lbr->count; lbr++ )
>                  for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
>                      if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
> +                    {
>                          vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R | MSR_TYPE_W);
> +                        if ( vmx_lbr_tsx_fixup_needed )
> +                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;

Why not simply

                        vmx_lbr_tsx_fixup_needed = v->arch.hvm_vmx.lbr_tsx_fixup_enabled;

?

> @@ -3854,6 +3879,46 @@ out:
>      }
>  }
>  
> +static void vmx_lbr_tsx_fixup(void)
> +{
> +    static const u64 LBR_FROM_SIGNEXT_2MSB = (1ULL << 59) | (1ULL << 60);

3ULL << 59 would likely be a little less strange while reading, without
having reached the point of use yet. I'm not convinced the use of a
static const variable here is warranted anyway, the more that the
identifier is a #define one anyway (by being all upper case). Yet if
you really want to keep it, uint64_t please (and likewise elsewhere).

> +    static u32 lbr_from_start = 0, lbr_from_end = 0, last_in_from_ip = 0;
> +

No blank line in the middle of declarations please. And did you
perhaps mean "last_int_from_ip"?

> +    const struct lbr_info *lbr;
> +    const struct vcpu *curr = current;
> +    const unsigned int msr_count = curr->arch.hvm_vmx.msr_count;
> +    const const struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
> +    struct vmx_msr_entry *msr;
> +
> +    if ( lbr_from_start == ~0U )
> +        return;
> +
> +    if ( unlikely(lbr_from_start == 0) )
> +    {
> +        lbr = last_branch_msr_get();
> +        if ( lbr == NULL )
> +        {
> +            lbr_from_start = ~0U;
> +            return;
> +        }
> +
> +        /* Warning: this depends on struct lbr_info[] layout! */

Please make suitable #define-s for them then, placed next to the
array definition.

> +        last_in_from_ip = lbr[0].base;
> +        lbr_from_start = lbr[3].base;
> +        lbr_from_end = lbr_from_start + lbr[3].count;

There's a race here: lbr_from_start needs to be written strictly
last, for the static variable latching to work in all cases.

> +    }
> +
> +    if ( (msr = vmx_find_guest_msr(lbr_from_start)) != NULL )
> +    {
> +        /* Sign extend into bits 61:62 while preserving bit 63 */
> +        for ( ; msr < msr_area + msr_count && msr->index < lbr_from_end; msr++ )
> +            msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);

Please also clarify in the comment that this depends on the array
being sorted at all times.

> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -78,6 +78,9 @@
>  #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
>  #define cpu_has_tbm		boot_cpu_has(X86_FEATURE_TBM)
>  #define cpu_has_itsc		boot_cpu_has(X86_FEATURE_ITSC)
> +#define cpu_has_hle             boot_cpu_has(X86_FEATURE_HLE)
> +#define cpu_has_rtm             boot_cpu_has(X86_FEATURE_RTM)
> +#define cpu_has_pdcm            boot_cpu_has(X86_FEATURE_PDCM)

Hard tabs are to be used here, as suggested by the neighboring
entries.

> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -231,6 +231,8 @@ struct arch_vmx_struct {
>       * pCPU and wakeup the related vCPU.
>       */
>      struct pi_blocking_vcpu pi_blocking;
> +
> +    bool lbr_tsx_fixup_enabled;

Please fit this into a currently unused byte.

Having reached here - migration doesn't need any similar
adjustment simply because we don't migrate these MSR values
(yet)? This may be worth stating in the commit message.

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index a5e5ffd..586aaca 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2241,6 +2241,25 @@  static void pi_notification_interrupt(struct cpu_user_regs *regs)
     raise_softirq(VCPU_KICK_SOFTIRQ);
 }
 
+static bool __read_mostly vmx_lbr_tsx_fixup_needed;
+
+static void __init vmx_lbr_tsx_fixup_check(void)
+{
+    bool tsx_support = cpu_has_hle || cpu_has_rtm;
+    u64 caps;
+    u32 lbr_format;
+
+    if ( !cpu_has_pdcm )
+        return;
+
+    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
+    /* Lower 6 bits define the format of the address in the LBR stack */
+    lbr_format = caps & 0x3f;
+
+    /* 000100B -- TSX info is reported in the upper bits of 'FROM' registers */
+    vmx_lbr_tsx_fixup_needed = !tsx_support && lbr_format == 0x4;
+}
+
 const struct hvm_function_table * __init start_vmx(void)
 {
     set_in_cr4(X86_CR4_VMXE);
@@ -2310,6 +2329,8 @@  const struct hvm_function_table * __init start_vmx(void)
 
     setup_vmcs_dump();
 
+    vmx_lbr_tsx_fixup_check();
+
     return &vmx_function_table;
 }
 
@@ -2833,7 +2854,11 @@  static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
             for ( ; (rc == 0) && lbr->count; lbr++ )
                 for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
                     if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
+                    {
                         vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R | MSR_TYPE_W);
+                        if ( vmx_lbr_tsx_fixup_needed )
+                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;
+                    }
         }
 
         if ( (rc < 0) ||
@@ -3854,6 +3879,46 @@  out:
     }
 }
 
+static void vmx_lbr_tsx_fixup(void)
+{
+    static const u64 LBR_FROM_SIGNEXT_2MSB = (1ULL << 59) | (1ULL << 60);
+    static u32 lbr_from_start = 0, lbr_from_end = 0, last_in_from_ip = 0;
+
+    const struct lbr_info *lbr;
+    const struct vcpu *curr = current;
+    const unsigned int msr_count = curr->arch.hvm_vmx.msr_count;
+    const const struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
+    struct vmx_msr_entry *msr;
+
+    if ( lbr_from_start == ~0U )
+        return;
+
+    if ( unlikely(lbr_from_start == 0) )
+    {
+        lbr = last_branch_msr_get();
+        if ( lbr == NULL )
+        {
+            lbr_from_start = ~0U;
+            return;
+        }
+
+        /* Warning: this depends on struct lbr_info[] layout! */
+        last_in_from_ip = lbr[0].base;
+        lbr_from_start = lbr[3].base;
+        lbr_from_end = lbr_from_start + lbr[3].count;
+    }
+
+    if ( (msr = vmx_find_guest_msr(lbr_from_start)) != NULL )
+    {
+        /* Sign extend into bits 61:62 while preserving bit 63 */
+        for ( ; msr < msr_area + msr_count && msr->index < lbr_from_end; msr++ )
+            msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
+    }
+
+    if ( (msr = vmx_find_guest_msr(last_in_from_ip)) != NULL )
+        msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
+}
+
 void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
@@ -3910,6 +3975,9 @@  void vmx_vmenter_helper(const struct cpu_user_regs *regs)
     }
 
  out:
+    if ( unlikely(curr->arch.hvm_vmx.lbr_tsx_fixup_enabled) )
+        vmx_lbr_tsx_fixup();
+
     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
 
     __vmwrite(GUEST_RIP,    regs->rip);
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 39ad582..8e14728 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -78,6 +78,9 @@ 
 #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
 #define cpu_has_tbm		boot_cpu_has(X86_FEATURE_TBM)
 #define cpu_has_itsc		boot_cpu_has(X86_FEATURE_ITSC)
+#define cpu_has_hle             boot_cpu_has(X86_FEATURE_HLE)
+#define cpu_has_rtm             boot_cpu_has(X86_FEATURE_RTM)
+#define cpu_has_pdcm            boot_cpu_has(X86_FEATURE_PDCM)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index d01099e..8d9db36 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -231,6 +231,8 @@  struct arch_vmx_struct {
      * pCPU and wakeup the related vCPU.
      */
     struct pi_blocking_vcpu pi_blocking;
+
+    bool lbr_tsx_fixup_enabled;
 };
 
 int vmx_create_vmcs(struct vcpu *v);