diff mbox

[v2,3/3] x86/vmx: fix vmentry failure with TSX bits in LBR

Message ID 20170217154233.28514-4-sergey.dyasli@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sergey Dyasli Feb. 17, 2017, 3:42 p.m. UTC
During VM entry, H/W will automatically load guest's MSRs from MSR-load
area in the same way as they would be written by WRMSR.

However, under the following conditions:

    1. LBR (Last Branch Record) MSRs were placed in the MSR-load area
    2. Address format of LBR includes TSX bits 61:62
    3. CPU has TSX support disabled

VM entry will fail with a message in the log similar to:

    (XEN) [   97.239514] d1v0 vmentry failure (reason 0x80000022): MSR loading (entry 3)
    (XEN) [   97.239516]   msr 00000680 val 1fff800000102e60 (mbz 0)

This happens because of the following behaviour:

    - When capturing branches, LBR H/W will always clear bits 61:62
      regardless of the sign extension
    - For WRMSR, bits 61:62 are considered the part of the sign extension

This bug affects only certain pCPUs (e.g. Haswell) with vCPUs that
use LBR.  Fix it by sign-extending TSX bits in all LBR entries during
VM entry in affected cases.

LBR MSRs are currently not Live Migrated. In order to implement such
functionality, the MSR levelling work has to be done first because
hosts can have different LBR formats.

Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
---
v1 --> v2:
- LBR_FORMAT_MSR_IA32_PERF_CAP define is added
- enum for LBR_FORMAT_* is added
- LBR_FROM_SIGNEXT_2MSB is now a define instead of static const
- u32/64 replaced with uint32/64_t
- Renamed "last_in_from_ip" --> "lbr_lastint_from"
- Added defines for the layout of struct lbr_info[]
- Added a comment to vmx_lbr_tsx_fixup() about MSR array being sorted
- Fixed tabs for cpu_has_* defines
- arch_vmx_struct::lbr_tsx_fixup_enabled is moved to fill up 1-byte hole
- Made lbr_from_start, lbr_from_end and lbr_lastint_from
  global static __read_mostly.
  Moved their initialization into vmx_lbr_tsx_fixup_check().
 - Information about Live Migration is added to the commit message

 xen/arch/x86/hvm/vmx/vmx.c         | 90 ++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/cpufeature.h   |  3 ++
 xen/include/asm-x86/hvm/vmx/vmcs.h |  1 +
 xen/include/asm-x86/msr-index.h    |  2 +
 4 files changed, 96 insertions(+)

Comments

Tian, Kevin Feb. 21, 2017, 9:13 a.m. UTC | #1
> From: Sergey Dyasli [mailto:sergey.dyasli@citrix.com]
> Sent: Friday, February 17, 2017 11:43 PM
> 
> During VM entry, H/W will automatically load guest's MSRs from MSR-load
> area in the same way as they would be written by WRMSR.
> 
> However, under the following conditions:
> 
>     1. LBR (Last Branch Record) MSRs were placed in the MSR-load area
>     2. Address format of LBR includes TSX bits 61:62
>     3. CPU has TSX support disabled
> 
> VM entry will fail with a message in the log similar to:
> 
>     (XEN) [   97.239514] d1v0 vmentry failure (reason 0x80000022): MSR loading (entry
> 3)
>     (XEN) [   97.239516]   msr 00000680 val 1fff800000102e60 (mbz 0)
> 
> This happens because of the following behaviour:
> 
>     - When capturing branches, LBR H/W will always clear bits 61:62
>       regardless of the sign extension
>     - For WRMSR, bits 61:62 are considered the part of the sign extension
> 
> This bug affects only certain pCPUs (e.g. Haswell) with vCPUs that
> use LBR.  Fix it by sign-extending TSX bits in all LBR entries during
> VM entry in affected cases.
> 
> LBR MSRs are currently not Live Migrated. In order to implement such
> functionality, the MSR levelling work has to be done first because
> hosts can have different LBR formats.
> 
> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
> ---
> v1 --> v2:
> - LBR_FORMAT_MSR_IA32_PERF_CAP define is added
> - enum for LBR_FORMAT_* is added
> - LBR_FROM_SIGNEXT_2MSB is now a define instead of static const
> - u32/64 replaced with uint32/64_t
> - Renamed "last_in_from_ip" --> "lbr_lastint_from"
> - Added defines for the layout of struct lbr_info[]
> - Added a comment to vmx_lbr_tsx_fixup() about MSR array being sorted
> - Fixed tabs for cpu_has_* defines
> - arch_vmx_struct::lbr_tsx_fixup_enabled is moved to fill up 1-byte hole
> - Made lbr_from_start, lbr_from_end and lbr_lastint_from
>   global static __read_mostly.
>   Moved their initialization into vmx_lbr_tsx_fixup_check().
>  - Information about Live Migration is added to the commit message
> 
>  xen/arch/x86/hvm/vmx/vmx.c         | 90
> ++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-x86/cpufeature.h   |  3 ++
>  xen/include/asm-x86/hvm/vmx/vmcs.h |  1 +
>  xen/include/asm-x86/msr-index.h    |  2 +
>  4 files changed, 96 insertions(+)
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 42f4fbd..3547b78 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2284,6 +2284,8 @@ static void pi_notification_interrupt(struct cpu_user_regs
> *regs)
>      raise_softirq(VCPU_KICK_SOFTIRQ);
>  }
> 
> +static void __init vmx_lbr_tsx_fixup_check(void);
> +
>  const struct hvm_function_table * __init start_vmx(void)
>  {
>      set_in_cr4(X86_CR4_VMXE);
> @@ -2353,6 +2355,8 @@ const struct hvm_function_table * __init start_vmx(void)
> 
>      setup_vmcs_dump();
> 
> +    vmx_lbr_tsx_fixup_check();
> +
>      return &vmx_function_table;
>  }
> 
> @@ -2513,6 +2517,14 @@ static int vmx_cr_access(unsigned long exit_qualification)
>      return X86EMUL_OKAY;
>  }
> 
> +/* This defines the layout of struct lbr_info[] */
> +#define LBR_LASTINT_FROM_IDX    0
> +#define LBR_LASTINT_TO_IDX      1
> +#define LBR_LASTBRANCH_TOS_IDX  2
> +#define LBR_LASTBRANCH_FROM_IDX 3
> +#define LBR_LASTBRANCH_TO_IDX   4
> +#define LBR_LASTBRANCH_INFO     5
> +
>  static const struct lbr_info {
>      u32 base, count;
>  } p4_lbr[] = {
> @@ -2618,6 +2630,56 @@ static const struct lbr_info *last_branch_msr_get(void)
>      return NULL;
>  }
> 
> +enum
> +{
> +    LBR_FORMAT_32                 = 0x0, /* 32-bit record format */
> +    LBR_FORMAT_LIP                = 0x1, /* 64-bit LIP record format */
> +    LBR_FORMAT_EIP                = 0x2, /* 64-bit EIP record format */
> +    LBR_FORMAT_EIP_FLAGS          = 0x3, /* 64-bit EIP, Flags */
> +    LBR_FORMAT_EIP_FLAGS_TSX      = 0x4, /* 64-bit EIP, Flags, TSX */
> +    LBR_FORMAT_EIP_FLAGS_TSX_INFO = 0x5, /* 64-bit EIP, Flags, TSX, LBR_INFO */
> +    LBR_FORMAT_EIP_FLAGS_CYCLES   = 0x6, /* 64-bit EIP, Flags, Cycles */
> +};
> +
> +#define LBR_FROM_SIGNEXT_2MSB  ((1ULL << 59) | (1ULL << 60))
> +
> +static bool __read_mostly vmx_lbr_tsx_fixup_needed;
> +static uint32_t __read_mostly lbr_from_start;
> +static uint32_t __read_mostly lbr_from_end;
> +static uint32_t __read_mostly lbr_lastint_from;
> +
> +static void __init vmx_lbr_tsx_fixup_check(void)
> +{
> +    bool tsx_support = cpu_has_hle || cpu_has_rtm;
> +    uint64_t caps;
> +    uint32_t lbr_format;
> +
> +    /* Fixup is needed only when TSX support is disabled ... */
> +    if ( tsx_support )
> +        return;

!tsx_support

> +
> +    if ( !cpu_has_pdcm )
> +        return;

combine two ifs into one

> +
> +    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
> +    lbr_format = caps & LBR_FORMAT_MSR_IA32_PERF_CAP;
> +
> +    /* ... and the address format of LBR includes TSX bits 61:62 */

what's "..."?

> +    if ( lbr_format == LBR_FORMAT_EIP_FLAGS_TSX )
> +    {
> +        const struct lbr_info *lbr = last_branch_msr_get();
> +
> +        if ( lbr == NULL )
> +            return;

move above check before rdmsrl. At least avoid one unnecessary
msr read when LBR is not available.

> +
> +        lbr_lastint_from = lbr[LBR_LASTINT_FROM_IDX].base;
> +        lbr_from_start = lbr[LBR_LASTBRANCH_FROM_IDX].base;
> +        lbr_from_end = lbr_from_start + lbr[LBR_LASTBRANCH_FROM_IDX].count;
> +
> +        vmx_lbr_tsx_fixup_needed = true;
> +    }
> +}
> +
>  static int is_last_branch_msr(u32 ecx)
>  {
>      const struct lbr_info *lbr = last_branch_msr_get();
> @@ -2876,7 +2938,11 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t
> msr_content)
>              for ( ; (rc == 0) && lbr->count; lbr++ )
>                  for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
>                      if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
> +                    {
>                          vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R |
> MSR_TYPE_W);
> +                        if ( vmx_lbr_tsx_fixup_needed )
> +                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;
> +                    }
>          }
> 
>          if ( (rc < 0) ||
> @@ -3897,6 +3963,27 @@ out:
>      }
>  }
> 
> +static void vmx_lbr_tsx_fixup(void)
> +{
> +    struct vcpu *curr = current;
> +    unsigned int msr_count = curr->arch.hvm_vmx.msr_count;
> +    struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
> +    struct vmx_msr_entry *msr;
> +
> +    if ( (msr = vmx_find_msr(lbr_from_start, VMX_GUEST_MSR)) != NULL )

Curious whether NULL is a valid situation here. If not we need handle it.
Otherwise please ignore this comment.

> +    {
> +        /*
> +         * Sign extend into bits 61:62 while preserving bit 63
> +         * The loop relies on the fact that MSR array is sorted.
> +         */
> +        for ( ; msr < msr_area + msr_count && msr->index < lbr_from_end; msr++ )
> +            msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
> +    }
> +
> +    if ( (msr = vmx_find_msr(lbr_lastint_from, VMX_GUEST_MSR)) != NULL )
> +        msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
> +}
> +
>  void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>  {
>      struct vcpu *curr = current;
> @@ -3953,6 +4040,9 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>      }
> 
>   out:
> +    if ( unlikely(curr->arch.hvm_vmx.lbr_tsx_fixup_enabled) )
> +        vmx_lbr_tsx_fixup();
> +
>      HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
> 
>      __vmwrite(GUEST_RIP,    regs->rip);
> diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
> index 39ad582..3b187ac 100644
> --- a/xen/include/asm-x86/cpufeature.h
> +++ b/xen/include/asm-x86/cpufeature.h
> @@ -78,6 +78,9 @@
>  #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
>  #define cpu_has_tbm		boot_cpu_has(X86_FEATURE_TBM)
>  #define cpu_has_itsc		boot_cpu_has(X86_FEATURE_ITSC)
> +#define cpu_has_hle		boot_cpu_has(X86_FEATURE_HLE)
> +#define cpu_has_rtm		boot_cpu_has(X86_FEATURE_RTM)
> +#define cpu_has_pdcm		boot_cpu_has(X86_FEATURE_PDCM)
> 
>  enum _cache_type {
>      CACHE_TYPE_NULL = 0,
> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h
> b/xen/include/asm-x86/hvm/vmx/vmcs.h
> index 903af51..2c9c3c9 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -147,6 +147,7 @@ struct arch_vmx_struct {
>      uint8_t              vmx_realmode;
>      /* Are we emulating rather than VMENTERing? */
>      uint8_t              vmx_emulate;
> +    bool                 lbr_tsx_fixup_enabled;
>      /* Bitmask of segments that we can't safely use in virtual 8086 mode */
>      uint16_t             vm86_segment_mask;
>      /* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */
> diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
> index 98dbff1..8841419 100644
> --- a/xen/include/asm-x86/msr-index.h
> +++ b/xen/include/asm-x86/msr-index.h
> @@ -55,6 +55,8 @@
>  #define MSR_IA32_PEBS_ENABLE		0x000003f1
>  #define MSR_IA32_DS_AREA		0x00000600
>  #define MSR_IA32_PERF_CAPABILITIES	0x00000345
> +/* Lower 6 bits define the format of the address in the LBR stack */
> +#define LBR_FORMAT_MSR_IA32_PERF_CAP	0x3f
> 
>  #define MSR_IA32_BNDCFGS		0x00000d90
>  #define IA32_BNDCFGS_ENABLE		0x00000001
> --
> 2.9.3
Andrew Cooper Feb. 21, 2017, 11:25 a.m. UTC | #2
On 21/02/17 09:13, Tian, Kevin wrote:
>> @@ -2618,6 +2630,56 @@ static const struct lbr_info *last_branch_msr_get(void)
>>      return NULL;
>>  }
>>
>> +enum
>> +{
>> +    LBR_FORMAT_32                 = 0x0, /* 32-bit record format */
>> +    LBR_FORMAT_LIP                = 0x1, /* 64-bit LIP record format */
>> +    LBR_FORMAT_EIP                = 0x2, /* 64-bit EIP record format */
>> +    LBR_FORMAT_EIP_FLAGS          = 0x3, /* 64-bit EIP, Flags */
>> +    LBR_FORMAT_EIP_FLAGS_TSX      = 0x4, /* 64-bit EIP, Flags, TSX */
>> +    LBR_FORMAT_EIP_FLAGS_TSX_INFO = 0x5, /* 64-bit EIP, Flags, TSX, LBR_INFO */
>> +    LBR_FORMAT_EIP_FLAGS_CYCLES   = 0x6, /* 64-bit EIP, Flags, Cycles */
>> +};
>> +
>> +#define LBR_FROM_SIGNEXT_2MSB  ((1ULL << 59) | (1ULL << 60))
>> +
>> +static bool __read_mostly vmx_lbr_tsx_fixup_needed;
>> +static uint32_t __read_mostly lbr_from_start;
>> +static uint32_t __read_mostly lbr_from_end;
>> +static uint32_t __read_mostly lbr_lastint_from;
>> +
>> +static void __init vmx_lbr_tsx_fixup_check(void)
>> +{
>> +    bool tsx_support = cpu_has_hle || cpu_has_rtm;
>> +    uint64_t caps;
>> +    uint32_t lbr_format;
>> +
>> +    /* Fixup is needed only when TSX support is disabled ... */
>> +    if ( tsx_support )
>> +        return;
> !tsx_support

The original code is correct.  This problem only manifests when TSX is
unavailable.

>
>> +
>> +    if ( !cpu_has_pdcm )
>> +        return;
> combine two ifs into one
>
>> +
>> +    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
>> +    lbr_format = caps & LBR_FORMAT_MSR_IA32_PERF_CAP;
>> +
>> +    /* ... and the address format of LBR includes TSX bits 61:62 */
> what's "..."?

From the previous comment.

>
>> +    if ( lbr_format == LBR_FORMAT_EIP_FLAGS_TSX )
>> +    {
>> +        const struct lbr_info *lbr = last_branch_msr_get();
>> +
>> +        if ( lbr == NULL )
>> +            return;
> move above check before rdmsrl. At least avoid one unnecessary
> msr read when LBR is not available.

Which check and which rdmsr?  In reality, if cpu_has_pdcm is set, we
would always expect to have LBR.

>
>> +
>> +        lbr_lastint_from = lbr[LBR_LASTINT_FROM_IDX].base;
>> +        lbr_from_start = lbr[LBR_LASTBRANCH_FROM_IDX].base;
>> +        lbr_from_end = lbr_from_start + lbr[LBR_LASTBRANCH_FROM_IDX].count;
>> +
>> +        vmx_lbr_tsx_fixup_needed = true;
>> +    }
>> +}
>> +
>>  static int is_last_branch_msr(u32 ecx)
>>  {
>>      const struct lbr_info *lbr = last_branch_msr_get();
>> @@ -2876,7 +2938,11 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t
>> msr_content)
>>              for ( ; (rc == 0) && lbr->count; lbr++ )
>>                  for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
>>                      if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
>> +                    {
>>                          vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R |
>> MSR_TYPE_W);
>> +                        if ( vmx_lbr_tsx_fixup_needed )
>> +                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;
>> +                    }
>>          }
>>
>>          if ( (rc < 0) ||
>> @@ -3897,6 +3963,27 @@ out:
>>      }
>>  }
>>
>> +static void vmx_lbr_tsx_fixup(void)
>> +{
>> +    struct vcpu *curr = current;
>> +    unsigned int msr_count = curr->arch.hvm_vmx.msr_count;
>> +    struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
>> +    struct vmx_msr_entry *msr;
>> +
>> +    if ( (msr = vmx_find_msr(lbr_from_start, VMX_GUEST_MSR)) != NULL )
> Curious whether NULL is a valid situation here. If not we need handle it.
> Otherwise please ignore this comment.

It is safer in the case that this function gets called and there are no
LBR MSRs in the load list.

~Andrew
Tian, Kevin Feb. 22, 2017, 2:56 a.m. UTC | #3
> From: Andrew Cooper [mailto:andrew.cooper3@citrix.com]
> Sent: Tuesday, February 21, 2017 7:25 PM
> 
> On 21/02/17 09:13, Tian, Kevin wrote:
> >> @@ -2618,6 +2630,56 @@ static const struct lbr_info *last_branch_msr_get(void)
> >>      return NULL;
> >>  }
> >>
> >> +enum
> >> +{
> >> +    LBR_FORMAT_32                 = 0x0, /* 32-bit record format */
> >> +    LBR_FORMAT_LIP                = 0x1, /* 64-bit LIP record format */
> >> +    LBR_FORMAT_EIP                = 0x2, /* 64-bit EIP record format */
> >> +    LBR_FORMAT_EIP_FLAGS          = 0x3, /* 64-bit EIP, Flags */
> >> +    LBR_FORMAT_EIP_FLAGS_TSX      = 0x4, /* 64-bit EIP, Flags, TSX */
> >> +    LBR_FORMAT_EIP_FLAGS_TSX_INFO = 0x5, /* 64-bit EIP, Flags, TSX, LBR_INFO
> */
> >> +    LBR_FORMAT_EIP_FLAGS_CYCLES   = 0x6, /* 64-bit EIP, Flags, Cycles */
> >> +};
> >> +
> >> +#define LBR_FROM_SIGNEXT_2MSB  ((1ULL << 59) | (1ULL << 60))
> >> +
> >> +static bool __read_mostly vmx_lbr_tsx_fixup_needed;
> >> +static uint32_t __read_mostly lbr_from_start;
> >> +static uint32_t __read_mostly lbr_from_end;
> >> +static uint32_t __read_mostly lbr_lastint_from;
> >> +
> >> +static void __init vmx_lbr_tsx_fixup_check(void)
> >> +{
> >> +    bool tsx_support = cpu_has_hle || cpu_has_rtm;
> >> +    uint64_t caps;
> >> +    uint32_t lbr_format;
> >> +
> >> +    /* Fixup is needed only when TSX support is disabled ... */
> >> +    if ( tsx_support )
> >> +        return;
> > !tsx_support
> 
> The original code is correct.  This problem only manifests when TSX is
> unavailable.
> 

yes. my bad.

> >
> >> +
> >> +    if ( !cpu_has_pdcm )
> >> +        return;
> > combine two ifs into one
> >
> >> +
> >> +    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
> >> +    lbr_format = caps & LBR_FORMAT_MSR_IA32_PERF_CAP;
> >> +
> >> +    /* ... and the address format of LBR includes TSX bits 61:62 */
> > what's "..."?
> 
> From the previous comment.
> 
> >
> >> +    if ( lbr_format == LBR_FORMAT_EIP_FLAGS_TSX )
> >> +    {
> >> +        const struct lbr_info *lbr = last_branch_msr_get();
> >> +
> >> +        if ( lbr == NULL )
> >> +            return;
> > move above check before rdmsrl. At least avoid one unnecessary
> > msr read when LBR is not available.
> 
> Which check and which rdmsr?  In reality, if cpu_has_pdcm is set, we
> would always expect to have LBR.

move above 4 lines before reading PERF_CAPABILITIES. but
looks not necessary after another thought.

> 
> >
> >> +
> >> +        lbr_lastint_from = lbr[LBR_LASTINT_FROM_IDX].base;
> >> +        lbr_from_start = lbr[LBR_LASTBRANCH_FROM_IDX].base;
> >> +        lbr_from_end = lbr_from_start + lbr[LBR_LASTBRANCH_FROM_IDX].count;
> >> +
> >> +        vmx_lbr_tsx_fixup_needed = true;
> >> +    }
> >> +}
> >> +
> >>  static int is_last_branch_msr(u32 ecx)
> >>  {
> >>      const struct lbr_info *lbr = last_branch_msr_get();
> >> @@ -2876,7 +2938,11 @@ static int vmx_msr_write_intercept(unsigned int msr,
> uint64_t
> >> msr_content)
> >>              for ( ; (rc == 0) && lbr->count; lbr++ )
> >>                  for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
> >>                      if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
> >> +                    {
> >>                          vmx_disable_intercept_for_msr(v, lbr->base + i,
> MSR_TYPE_R |
> >> MSR_TYPE_W);
> >> +                        if ( vmx_lbr_tsx_fixup_needed )
> >> +                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;
> >> +                    }
> >>          }
> >>
> >>          if ( (rc < 0) ||
> >> @@ -3897,6 +3963,27 @@ out:
> >>      }
> >>  }
> >>
> >> +static void vmx_lbr_tsx_fixup(void)
> >> +{
> >> +    struct vcpu *curr = current;
> >> +    unsigned int msr_count = curr->arch.hvm_vmx.msr_count;
> >> +    struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
> >> +    struct vmx_msr_entry *msr;
> >> +
> >> +    if ( (msr = vmx_find_msr(lbr_from_start, VMX_GUEST_MSR)) != NULL )
> > Curious whether NULL is a valid situation here. If not we need handle it.
> > Otherwise please ignore this comment.
> 
> It is safer in the case that this function gets called and there are no
> LBR MSRs in the load list.
>
Jan Beulich Feb. 22, 2017, 10:26 a.m. UTC | #4
>>> On 17.02.17 at 16:42, <sergey.dyasli@citrix.com> wrote:
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2284,6 +2284,8 @@ static void pi_notification_interrupt(struct cpu_user_regs *regs)
>      raise_softirq(VCPU_KICK_SOFTIRQ);
>  }
>  
> +static void __init vmx_lbr_tsx_fixup_check(void);

vmx_ prefixes are pretty pointless for static functions in this
particular source file (there's at least one more below).

> @@ -2876,7 +2938,11 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>              for ( ; (rc == 0) && lbr->count; lbr++ )
>                  for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
>                      if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
> +                    {
>                          vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R | MSR_TYPE_W);
> +                        if ( vmx_lbr_tsx_fixup_needed )
> +                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;

Is there anything wrong with

    v->arch.hvm_vmx.lbr_tsx_fixup_enabled = vmx_lbr_tsx_fixup_needed;

?

> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -147,6 +147,7 @@ struct arch_vmx_struct {
>      uint8_t              vmx_realmode;
>      /* Are we emulating rather than VMENTERing? */
>      uint8_t              vmx_emulate;
> +    bool                 lbr_tsx_fixup_enabled;
>      /* Bitmask of segments that we can't safely use in virtual 8086 mode */
>      uint16_t             vm86_segment_mask;
>      /* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */

Please put blank lines around your addition.

> --- a/xen/include/asm-x86/msr-index.h
> +++ b/xen/include/asm-x86/msr-index.h
> @@ -55,6 +55,8 @@
>  #define MSR_IA32_PEBS_ENABLE		0x000003f1
>  #define MSR_IA32_DS_AREA		0x00000600
>  #define MSR_IA32_PERF_CAPABILITIES	0x00000345
> +/* Lower 6 bits define the format of the address in the LBR stack */
> +#define LBR_FORMAT_MSR_IA32_PERF_CAP	0x3f

Commonly the MSR name precedes the field specific name component.

Jan
Sergey Dyasli Feb. 22, 2017, 1:58 p.m. UTC | #5
On Wed, 2017-02-22 at 03:26 -0700, Jan Beulich wrote:
> > > > On 17.02.17 at 16:42, <sergey.dyasli@citrix.com> wrote:

> > 

> > --- a/xen/arch/x86/hvm/vmx/vmx.c

> > +++ b/xen/arch/x86/hvm/vmx/vmx.c

> > @@ -2284,6 +2284,8 @@ static void pi_notification_interrupt(struct cpu_user_regs *regs)

> >      raise_softirq(VCPU_KICK_SOFTIRQ);

> >  }

> >  

> > +static void __init vmx_lbr_tsx_fixup_check(void);

> 

> vmx_ prefixes are pretty pointless for static functions in this

> particular source file (there's at least one more below).


I agree on that. Will remove in v3.

> 

> > @@ -2876,7 +2938,11 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)

> >              for ( ; (rc == 0) && lbr->count; lbr++ )

> >                  for ( i = 0; (rc == 0) && (i < lbr->count); i++ )

> >                      if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )

> > +                    {

> >                          vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R | MSR_TYPE_W);

> > +                        if ( vmx_lbr_tsx_fixup_needed )

> > +                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;

> 

> Is there anything wrong with

> 

>     v->arch.hvm_vmx.lbr_tsx_fixup_enabled = vmx_lbr_tsx_fixup_needed;

> 

> ?


Only 80 characters limit prevented me from doing it that way.

> 

> > --- a/xen/include/asm-x86/hvm/vmx/vmcs.h

> > +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h

> > @@ -147,6 +147,7 @@ struct arch_vmx_struct {

> >      uint8_t              vmx_realmode;

> >      /* Are we emulating rather than VMENTERing? */

> >      uint8_t              vmx_emulate;

> > +    bool                 lbr_tsx_fixup_enabled;

> >      /* Bitmask of segments that we can't safely use in virtual 8086 mode */

> >      uint16_t             vm86_segment_mask;

> >      /* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */

> 

> Please put blank lines around your addition.


Will do in v3.

> 

> > --- a/xen/include/asm-x86/msr-index.h

> > +++ b/xen/include/asm-x86/msr-index.h

> > @@ -55,6 +55,8 @@

> >  #define MSR_IA32_PEBS_ENABLE		0x000003f1

> >  #define MSR_IA32_DS_AREA		0x00000600

> >  #define MSR_IA32_PERF_CAPABILITIES	0x00000345

> > +/* Lower 6 bits define the format of the address in the LBR stack */

> > +#define LBR_FORMAT_MSR_IA32_PERF_CAP	0x3f

> 

> Commonly the MSR name precedes the field specific name component.


I was trying to follow the naming style in that file but I might've
gotten it wrong. The new define can be renamed to a more appropriate
name, I'm open to suggestions.

-- 
Thanks,
Sergey
Andrew Cooper Feb. 22, 2017, 2:04 p.m. UTC | #6
On 22/02/17 13:58, Sergey Dyasli wrote:
>
>>> @@ -2876,7 +2938,11 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
>>>              for ( ; (rc == 0) && lbr->count; lbr++ )
>>>                  for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
>>>                      if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
>>> +                    {
>>>                          vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R | MSR_TYPE_W);
>>> +                        if ( vmx_lbr_tsx_fixup_needed )
>>> +                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;
>> Is there anything wrong with
>>
>>     v->arch.hvm_vmx.lbr_tsx_fixup_enabled = vmx_lbr_tsx_fixup_needed;
>>
>> ?
> Only 80 characters limit prevented me from doing it that way.

Splitting after the = is acceptable, e.g.

v->arch.hvm_vmx.lbr_tsx_fixup_enabled =
    vmx_lbr_tsx_fixup_needed;

~Andrew
Jan Beulich Feb. 22, 2017, 2:13 p.m. UTC | #7
>>> On 22.02.17 at 14:58, <sergey.dyasli@citrix.com> wrote:
> On Wed, 2017-02-22 at 03:26 -0700, Jan Beulich wrote:
>> > > > On 17.02.17 at 16:42, <sergey.dyasli@citrix.com> wrote:
>> > --- a/xen/include/asm-x86/msr-index.h
>> > +++ b/xen/include/asm-x86/msr-index.h
>> > @@ -55,6 +55,8 @@
>> >  #define MSR_IA32_PEBS_ENABLE		0x000003f1
>> >  #define MSR_IA32_DS_AREA		0x00000600
>> >  #define MSR_IA32_PERF_CAPABILITIES	0x00000345
>> > +/* Lower 6 bits define the format of the address in the LBR stack */
>> > +#define LBR_FORMAT_MSR_IA32_PERF_CAP	0x3f
>> 
>> Commonly the MSR name precedes the field specific name component.
> 
> I was trying to follow the naming style in that file but I might've
> gotten it wrong. The new define can be renamed to a more appropriate
> name, I'm open to suggestions.

MSR_IA32_PERF_CAP_LBR_FORMAT

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 42f4fbd..3547b78 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2284,6 +2284,8 @@  static void pi_notification_interrupt(struct cpu_user_regs *regs)
     raise_softirq(VCPU_KICK_SOFTIRQ);
 }
 
+static void __init vmx_lbr_tsx_fixup_check(void);
+
 const struct hvm_function_table * __init start_vmx(void)
 {
     set_in_cr4(X86_CR4_VMXE);
@@ -2353,6 +2355,8 @@  const struct hvm_function_table * __init start_vmx(void)
 
     setup_vmcs_dump();
 
+    vmx_lbr_tsx_fixup_check();
+
     return &vmx_function_table;
 }
 
@@ -2513,6 +2517,14 @@  static int vmx_cr_access(unsigned long exit_qualification)
     return X86EMUL_OKAY;
 }
 
+/* This defines the layout of struct lbr_info[] */
+#define LBR_LASTINT_FROM_IDX    0
+#define LBR_LASTINT_TO_IDX      1
+#define LBR_LASTBRANCH_TOS_IDX  2
+#define LBR_LASTBRANCH_FROM_IDX 3
+#define LBR_LASTBRANCH_TO_IDX   4
+#define LBR_LASTBRANCH_INFO     5
+
 static const struct lbr_info {
     u32 base, count;
 } p4_lbr[] = {
@@ -2618,6 +2630,56 @@  static const struct lbr_info *last_branch_msr_get(void)
     return NULL;
 }
 
+enum
+{
+    LBR_FORMAT_32                 = 0x0, /* 32-bit record format */
+    LBR_FORMAT_LIP                = 0x1, /* 64-bit LIP record format */
+    LBR_FORMAT_EIP                = 0x2, /* 64-bit EIP record format */
+    LBR_FORMAT_EIP_FLAGS          = 0x3, /* 64-bit EIP, Flags */
+    LBR_FORMAT_EIP_FLAGS_TSX      = 0x4, /* 64-bit EIP, Flags, TSX */
+    LBR_FORMAT_EIP_FLAGS_TSX_INFO = 0x5, /* 64-bit EIP, Flags, TSX, LBR_INFO */
+    LBR_FORMAT_EIP_FLAGS_CYCLES   = 0x6, /* 64-bit EIP, Flags, Cycles */
+};
+
+#define LBR_FROM_SIGNEXT_2MSB  ((1ULL << 59) | (1ULL << 60))
+
+static bool __read_mostly vmx_lbr_tsx_fixup_needed;
+static uint32_t __read_mostly lbr_from_start;
+static uint32_t __read_mostly lbr_from_end;
+static uint32_t __read_mostly lbr_lastint_from;
+
+static void __init vmx_lbr_tsx_fixup_check(void)
+{
+    bool tsx_support = cpu_has_hle || cpu_has_rtm;
+    uint64_t caps;
+    uint32_t lbr_format;
+
+    /* Fixup is needed only when TSX support is disabled ... */
+    if ( tsx_support )
+        return;
+
+    if ( !cpu_has_pdcm )
+        return;
+
+    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
+    lbr_format = caps & LBR_FORMAT_MSR_IA32_PERF_CAP;
+
+    /* ... and the address format of LBR includes TSX bits 61:62 */
+    if ( lbr_format == LBR_FORMAT_EIP_FLAGS_TSX )
+    {
+        const struct lbr_info *lbr = last_branch_msr_get();
+
+        if ( lbr == NULL )
+            return;
+
+        lbr_lastint_from = lbr[LBR_LASTINT_FROM_IDX].base;
+        lbr_from_start = lbr[LBR_LASTBRANCH_FROM_IDX].base;
+        lbr_from_end = lbr_from_start + lbr[LBR_LASTBRANCH_FROM_IDX].count;
+
+        vmx_lbr_tsx_fixup_needed = true;
+    }
+}
+
 static int is_last_branch_msr(u32 ecx)
 {
     const struct lbr_info *lbr = last_branch_msr_get();
@@ -2876,7 +2938,11 @@  static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
             for ( ; (rc == 0) && lbr->count; lbr++ )
                 for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
                     if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
+                    {
                         vmx_disable_intercept_for_msr(v, lbr->base + i, MSR_TYPE_R | MSR_TYPE_W);
+                        if ( vmx_lbr_tsx_fixup_needed )
+                            v->arch.hvm_vmx.lbr_tsx_fixup_enabled = true;
+                    }
         }
 
         if ( (rc < 0) ||
@@ -3897,6 +3963,27 @@  out:
     }
 }
 
+static void vmx_lbr_tsx_fixup(void)
+{
+    struct vcpu *curr = current;
+    unsigned int msr_count = curr->arch.hvm_vmx.msr_count;
+    struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
+    struct vmx_msr_entry *msr;
+
+    if ( (msr = vmx_find_msr(lbr_from_start, VMX_GUEST_MSR)) != NULL )
+    {
+        /*
+         * Sign extend into bits 61:62 while preserving bit 63
+         * The loop relies on the fact that MSR array is sorted.
+         */
+        for ( ; msr < msr_area + msr_count && msr->index < lbr_from_end; msr++ )
+            msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
+    }
+
+    if ( (msr = vmx_find_msr(lbr_lastint_from, VMX_GUEST_MSR)) != NULL )
+        msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
+}
+
 void vmx_vmenter_helper(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
@@ -3953,6 +4040,9 @@  void vmx_vmenter_helper(const struct cpu_user_regs *regs)
     }
 
  out:
+    if ( unlikely(curr->arch.hvm_vmx.lbr_tsx_fixup_enabled) )
+        vmx_lbr_tsx_fixup();
+
     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
 
     __vmwrite(GUEST_RIP,    regs->rip);
diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
index 39ad582..3b187ac 100644
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -78,6 +78,9 @@ 
 #define cpu_has_cmp_legacy	boot_cpu_has(X86_FEATURE_CMP_LEGACY)
 #define cpu_has_tbm		boot_cpu_has(X86_FEATURE_TBM)
 #define cpu_has_itsc		boot_cpu_has(X86_FEATURE_ITSC)
+#define cpu_has_hle		boot_cpu_has(X86_FEATURE_HLE)
+#define cpu_has_rtm		boot_cpu_has(X86_FEATURE_RTM)
+#define cpu_has_pdcm		boot_cpu_has(X86_FEATURE_PDCM)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 903af51..2c9c3c9 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -147,6 +147,7 @@  struct arch_vmx_struct {
     uint8_t              vmx_realmode;
     /* Are we emulating rather than VMENTERing? */
     uint8_t              vmx_emulate;
+    bool                 lbr_tsx_fixup_enabled;
     /* Bitmask of segments that we can't safely use in virtual 8086 mode */
     uint16_t             vm86_segment_mask;
     /* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index 98dbff1..8841419 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -55,6 +55,8 @@ 
 #define MSR_IA32_PEBS_ENABLE		0x000003f1
 #define MSR_IA32_DS_AREA		0x00000600
 #define MSR_IA32_PERF_CAPABILITIES	0x00000345
+/* Lower 6 bits define the format of the address in the LBR stack */
+#define LBR_FORMAT_MSR_IA32_PERF_CAP	0x3f
 
 #define MSR_IA32_BNDCFGS		0x00000d90
 #define IA32_BNDCFGS_ENABLE		0x00000001