diff mbox series

[1/8] x86/msr: Fix migration compatibility issue with MSR_SPEC_CTRL

Message ID 20220126084452.28975-2-andrew.cooper3@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86: MSR_SPEC_CTRL support for SVM guests | expand

Commit Message

Andrew Cooper Jan. 26, 2022, 8:44 a.m. UTC
This bug existed in early in 2018 between MSR_SPEC_CTRL arriving in microcode,
and SSBD arriving a few months later.  It went unnoticed presumably because
everyone was busy rebooting everything.

The same bug will reappear when adding PSFD support.

Clamp the guest MSR_SPEC_CTRL value to that permitted by CPUID on migrate.
The guest is already playing with reserved bits at this point, and clamping
the value will prevent a migration to a less capable host from failing.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 xen/arch/x86/hvm/hvm.c         | 25 +++++++++++++++++++++++--
 xen/arch/x86/include/asm/msr.h |  2 ++
 xen/arch/x86/msr.c             | 33 +++++++++++++++++++++------------
 3 files changed, 46 insertions(+), 14 deletions(-)

Comments

Roger Pau Monné Jan. 26, 2022, 12:17 p.m. UTC | #1
On Wed, Jan 26, 2022 at 08:44:45AM +0000, Andrew Cooper wrote:
> This bug existed in early in 2018 between MSR_SPEC_CTRL arriving in microcode,
> and SSBD arriving a few months later.  It went unnoticed presumably because
> everyone was busy rebooting everything.
> 
> The same bug will reappear when adding PSFD support.
> 
> Clamp the guest MSR_SPEC_CTRL value to that permitted by CPUID on migrate.
> The guest is already playing with reserved bits at this point, and clamping
> the value will prevent a migration to a less capable host from failing.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Wei Liu <wl@xen.org>
> ---
>  xen/arch/x86/hvm/hvm.c         | 25 +++++++++++++++++++++++--
>  xen/arch/x86/include/asm/msr.h |  2 ++
>  xen/arch/x86/msr.c             | 33 +++++++++++++++++++++------------
>  3 files changed, 46 insertions(+), 14 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index d7d3299b431e..c4ddb8607d9c 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -1340,6 +1340,7 @@ static const uint32_t msrs_to_send[] = {
>  
>  static int hvm_save_cpu_msrs(struct vcpu *v, hvm_domain_context_t *h)
>  {
> +    const struct domain *d = v->domain;
>      struct hvm_save_descriptor *desc = _p(&h->data[h->cur]);
>      struct hvm_msr *ctxt;
>      unsigned int i;
> @@ -1355,7 +1356,8 @@ static int hvm_save_cpu_msrs(struct vcpu *v, hvm_domain_context_t *h)
>      for ( i = 0; i < ARRAY_SIZE(msrs_to_send); ++i )
>      {
>          uint64_t val;
> -        int rc = guest_rdmsr(v, msrs_to_send[i], &val);
> +        unsigned int msr = msrs_to_send[i];
> +        int rc = guest_rdmsr(v, msr, &val);
>  
>          /*
>           * It is the programmers responsibility to ensure that
> @@ -1375,7 +1377,26 @@ static int hvm_save_cpu_msrs(struct vcpu *v, hvm_domain_context_t *h)
>          if ( !val )
>              continue; /* Skip empty MSRs. */
>  
> -        ctxt->msr[ctxt->count].index = msrs_to_send[i];
> +        /*
> +         * Guests are given full access to certain MSRs for performance
> +         * reasons.  A consequence is that Xen is unable to enforce that all
> +         * bits disallowed by the CPUID policy yield #GP, and an enterprising
> +         * guest may be able to set and use a bit it ought to leave alone.
> +         *
> +         * When migrating from a more capable host to a less capable one, such
> +         * bits may be rejected by the destination, and the migration failed.
> +         *
> +         * Discard such bits here on the source side.  Such bits have reserved
> +         * behaviour, and the guest has only itself to blame.
> +         */
> +        switch ( msr )
> +        {
> +        case MSR_SPEC_CTRL:
> +            val &= msr_spec_ctrl_valid_bits(d->arch.cpuid);
> +            break;
> +        }

Should you move the check for !val here, in case the clearing done
here zeros the MSR?

> +
> +        ctxt->msr[ctxt->count].index = msr;
>          ctxt->msr[ctxt->count++].val = val;
>      }
>  
> diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
> index 10039c2d227b..657a3295613d 100644
> --- a/xen/arch/x86/include/asm/msr.h
> +++ b/xen/arch/x86/include/asm/msr.h
> @@ -277,6 +277,8 @@ static inline void wrmsr_tsc_aux(uint32_t val)
>      }
>  }
>  
> +uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp);
> +
>  extern struct msr_policy     raw_msr_policy,
>                              host_msr_policy,
>                            pv_max_msr_policy,
> diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
> index 2cc355575d45..5e80c8b47c21 100644
> --- a/xen/arch/x86/msr.c
> +++ b/xen/arch/x86/msr.c
> @@ -435,6 +435,24 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>      return X86EMUL_EXCEPTION;
>  }
>  
> +/*
> + * Caller to confirm that MSR_SPEC_CTRL is available.  Intel and AMD have
> + * separate CPUID features for this functionality, but only set will be
> + * active.
> + */
> +uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp)
> +{
> +    bool ssbd = cp->feat.ssbd;
> +
> +    /*
> +     * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored)
> +     * when STIBP isn't enumerated in hardware.
> +     */
> +    return (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
> +            (ssbd       ? SPEC_CTRL_SSBD       : 0) |
> +            0);

The format here looks weird to me, and I don't get why you
unconditionally or a 0 at the end?

I would also be fine with using cp->feat.ssbd directly here.

Thanks, Roger.
Andrew Cooper Jan. 26, 2022, 12:54 p.m. UTC | #2
On 26/01/2022 12:17, Roger Pau Monne wrote:
> On Wed, Jan 26, 2022 at 08:44:45AM +0000, Andrew Cooper wrote:
>> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
>> index d7d3299b431e..c4ddb8607d9c 100644
>> --- a/xen/arch/x86/hvm/hvm.c
>> +++ b/xen/arch/x86/hvm/hvm.c
>> @@ -1375,7 +1377,26 @@ static int hvm_save_cpu_msrs(struct vcpu *v, hvm_domain_context_t *h)
>>          if ( !val )
>>              continue; /* Skip empty MSRs. */
>>  
>> -        ctxt->msr[ctxt->count].index = msrs_to_send[i];
>> +        /*
>> +         * Guests are given full access to certain MSRs for performance
>> +         * reasons.  A consequence is that Xen is unable to enforce that all
>> +         * bits disallowed by the CPUID policy yield #GP, and an enterprising
>> +         * guest may be able to set and use a bit it ought to leave alone.
>> +         *
>> +         * When migrating from a more capable host to a less capable one, such
>> +         * bits may be rejected by the destination, and the migration failed.
>> +         *
>> +         * Discard such bits here on the source side.  Such bits have reserved
>> +         * behaviour, and the guest has only itself to blame.
>> +         */
>> +        switch ( msr )
>> +        {
>> +        case MSR_SPEC_CTRL:
>> +            val &= msr_spec_ctrl_valid_bits(d->arch.cpuid);
>> +            break;
>> +        }
> Should you move the check for !val here, in case the clearing done
> here zeros the MSR?

Skipping MSRs with a value of 0 is purely an optimisation to avoid
putting a load of empty MSR records in the stream, but nothing will go
wrong if such records are present.

The cost of the extra logic in Xen to spot the 0 corner case is going to
be larger than the data&downtime saving of 16 bytes for an already-buggy VM.

I can't say I care for fixing it...

>> +
>> +        ctxt->msr[ctxt->count].index = msr;
>>          ctxt->msr[ctxt->count++].val = val;
>>      }
>>  
>> diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
>> index 10039c2d227b..657a3295613d 100644
>> --- a/xen/arch/x86/include/asm/msr.h
>> +++ b/xen/arch/x86/include/asm/msr.h
>> @@ -277,6 +277,8 @@ static inline void wrmsr_tsc_aux(uint32_t val)
>>      }
>>  }
>>  
>> +uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp);
>> +
>>  extern struct msr_policy     raw_msr_policy,
>>                              host_msr_policy,
>>                            pv_max_msr_policy,
>> diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
>> index 2cc355575d45..5e80c8b47c21 100644
>> --- a/xen/arch/x86/msr.c
>> +++ b/xen/arch/x86/msr.c
>> @@ -435,6 +435,24 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>>      return X86EMUL_EXCEPTION;
>>  }
>>  
>> +/*
>> + * Caller to confirm that MSR_SPEC_CTRL is available.  Intel and AMD have
>> + * separate CPUID features for this functionality, but only set will be
>> + * active.
>> + */
>> +uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp)
>> +{
>> +    bool ssbd = cp->feat.ssbd;
>> +
>> +    /*
>> +     * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored)
>> +     * when STIBP isn't enumerated in hardware.
>> +     */
>> +    return (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
>> +            (ssbd       ? SPEC_CTRL_SSBD       : 0) |
>> +            0);
> The format here looks weird to me, and I don't get why you
> unconditionally or a 0 at the end?
>
> I would also be fine with using cp->feat.ssbd directly here.

See patch 7 to cover both points.

The trailing 0 is like trailing commas, and there to reduce the diffstat
of patches adding new lines.

~Andrew
Jan Beulich Jan. 26, 2022, 4:33 p.m. UTC | #3
On 26.01.2022 09:44, Andrew Cooper wrote:
> This bug existed in early in 2018 between MSR_SPEC_CTRL arriving in microcode,
> and SSBD arriving a few months later.  It went unnoticed presumably because
> everyone was busy rebooting everything.
> 
> The same bug will reappear when adding PSFD support.
> 
> Clamp the guest MSR_SPEC_CTRL value to that permitted by CPUID on migrate.
> The guest is already playing with reserved bits at this point, and clamping
> the value will prevent a migration to a less capable host from failing.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index d7d3299b431e..c4ddb8607d9c 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1340,6 +1340,7 @@  static const uint32_t msrs_to_send[] = {
 
 static int hvm_save_cpu_msrs(struct vcpu *v, hvm_domain_context_t *h)
 {
+    const struct domain *d = v->domain;
     struct hvm_save_descriptor *desc = _p(&h->data[h->cur]);
     struct hvm_msr *ctxt;
     unsigned int i;
@@ -1355,7 +1356,8 @@  static int hvm_save_cpu_msrs(struct vcpu *v, hvm_domain_context_t *h)
     for ( i = 0; i < ARRAY_SIZE(msrs_to_send); ++i )
     {
         uint64_t val;
-        int rc = guest_rdmsr(v, msrs_to_send[i], &val);
+        unsigned int msr = msrs_to_send[i];
+        int rc = guest_rdmsr(v, msr, &val);
 
         /*
          * It is the programmers responsibility to ensure that
@@ -1375,7 +1377,26 @@  static int hvm_save_cpu_msrs(struct vcpu *v, hvm_domain_context_t *h)
         if ( !val )
             continue; /* Skip empty MSRs. */
 
-        ctxt->msr[ctxt->count].index = msrs_to_send[i];
+        /*
+         * Guests are given full access to certain MSRs for performance
+         * reasons.  A consequence is that Xen is unable to enforce that all
+         * bits disallowed by the CPUID policy yield #GP, and an enterprising
+         * guest may be able to set and use a bit it ought to leave alone.
+         *
+         * When migrating from a more capable host to a less capable one, such
+         * bits may be rejected by the destination, and the migration failed.
+         *
+         * Discard such bits here on the source side.  Such bits have reserved
+         * behaviour, and the guest has only itself to blame.
+         */
+        switch ( msr )
+        {
+        case MSR_SPEC_CTRL:
+            val &= msr_spec_ctrl_valid_bits(d->arch.cpuid);
+            break;
+        }
+
+        ctxt->msr[ctxt->count].index = msr;
         ctxt->msr[ctxt->count++].val = val;
     }
 
diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h
index 10039c2d227b..657a3295613d 100644
--- a/xen/arch/x86/include/asm/msr.h
+++ b/xen/arch/x86/include/asm/msr.h
@@ -277,6 +277,8 @@  static inline void wrmsr_tsc_aux(uint32_t val)
     }
 }
 
+uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp);
+
 extern struct msr_policy     raw_msr_policy,
                             host_msr_policy,
                           pv_max_msr_policy,
diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index 2cc355575d45..5e80c8b47c21 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -435,6 +435,24 @@  int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
     return X86EMUL_EXCEPTION;
 }
 
+/*
+ * Caller to confirm that MSR_SPEC_CTRL is available.  Intel and AMD have
+ * separate CPUID features for this functionality, but only set will be
+ * active.
+ */
+uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp)
+{
+    bool ssbd = cp->feat.ssbd;
+
+    /*
+     * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored)
+     * when STIBP isn't enumerated in hardware.
+     */
+    return (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
+            (ssbd       ? SPEC_CTRL_SSBD       : 0) |
+            0);
+}
+
 int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
 {
     const struct vcpu *curr = current;
@@ -508,18 +526,9 @@  int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
         break;
 
     case MSR_SPEC_CTRL:
-        if ( !cp->feat.ibrsb )
-            goto gp_fault; /* MSR available? */
-
-        /*
-         * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored)
-         * when STIBP isn't enumerated in hardware.
-         */
-        rsvd = ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
-                 (cp->feat.ssbd ? SPEC_CTRL_SSBD : 0));
-
-        if ( val & rsvd )
-            goto gp_fault; /* Rsvd bit set? */
+        if ( !cp->feat.ibrsb ||
+             (val & ~msr_spec_ctrl_valid_bits(cp)) )
+            goto gp_fault;
         goto set_reg;
 
     case MSR_PRED_CMD: