diff mbox

[v2,3/6] x86: fill high bits of mtrr mask

Message ID 1467659769-15900-4-git-send-email-dgilbert@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dr. David Alan Gilbert July 4, 2016, 7:16 p.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

Fill the bits between 51..number-of-physical-address-bits in the
MTRR_PHYSMASKn variable range mtrr masks so that they're consistent
in the migration stream irrespective of the physical address space
of the source VM in a migration.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/hw/i386/pc.h |  5 +++++
 target-i386/cpu.c    |  1 +
 target-i386/cpu.h    |  3 +++
 target-i386/kvm.c    | 25 ++++++++++++++++++++++++-
 4 files changed, 33 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin July 4, 2016, 8:03 p.m. UTC | #1
On Mon, Jul 04, 2016 at 08:16:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> 
> Fill the bits between 51..number-of-physical-address-bits in the
> MTRR_PHYSMASKn variable range mtrr masks so that they're consistent
> in the migration stream irrespective of the physical address space
> of the source VM in a migration.
> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>

Could a bit more explanation be added here?
Why don't we migrate exactly what guest programmed?
With previous patch we mask on destination, do we not?

If it's for compat with old PC types, then why not
limit it to that?

> ---
>  include/hw/i386/pc.h |  5 +++++
>  target-i386/cpu.c    |  1 +
>  target-i386/cpu.h    |  3 +++
>  target-i386/kvm.c    | 25 ++++++++++++++++++++++++-
>  4 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 7e43b20..d85e924 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -374,6 +374,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
>          .driver   = TYPE_X86_CPU,\
>          .property = "cpuid-0xb",\
>          .value    = "off",\
> +    },\
> +    {\
> +        .driver = TYPE_X86_CPU,\
> +        .property = "fill-mtrr-mask",\
> +        .value = "off",\
>      },
>  
>  #define PC_COMPAT_2_5 \
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index ab13ef5..5737aba 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -3258,6 +3258,7 @@ static Property x86_cpu_properties[] = {
>      DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
>      DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
>      DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
> +    DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true),
>      DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 40),
>      DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, 0),
>      DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, 0),
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 221b1a2..a9bbd91 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -1181,6 +1181,9 @@ struct X86CPU {
>      /* Compatibility bits for old machine types: */
>      bool enable_cpuid_0xb;
>  
> +    /* if true fill the top bits of the MTRR_PHYSMASKn variable range */
> +    bool fill_mtrr_mask;
> +
>      /* Number of physical address bits supported */
>      uint32_t phys_bits;
>  
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 6429205..cbcc30d 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -1935,6 +1935,7 @@ static int kvm_get_msrs(X86CPU *cpu)
>      CPUX86State *env = &cpu->env;
>      struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries;
>      int ret, i;
> +    uint64_t mtrr_top_bits;
>  
>      kvm_msr_buf_reset(cpu);
>  
> @@ -2084,6 +2085,27 @@ static int kvm_get_msrs(X86CPU *cpu)
>      }
>  
>      assert(ret == cpu->kvm_msr_buf->nmsrs);
> +    /*
> +     * MTRR masks: Each mask consists of 5 parts
> +     * a  10..0: must be zero
> +     * b  11   : valid bit
> +     * c n-1.12: actual mask bits
> +     * d  51..n: reserved must be zero
> +     * e  63.52: reserved must be zero
> +     *
> +     * 'n' is the number of physical bits supported by the CPU and is
> +     * apparently always <= 52.   We know our 'n' but don't know what
> +     * the destinations 'n' is; it might be smaller, in which case
> +     * it masks (c) on loading. It might be larger, in which case
> +     * we fill 'd' so that d..c is consistent irrespetive of the 'n'
> +     * we're migrating to.
> +     */
> +    if (cpu->fill_mtrr_mask && cpu->phys_bits < 52) {
> +        mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits);
> +    } else {
> +        mtrr_top_bits = 0;
> +    }
> +
>      for (i = 0; i < ret; i++) {
>          uint32_t index = msrs[i].index;
>          switch (index) {
> @@ -2279,7 +2301,8 @@ static int kvm_get_msrs(X86CPU *cpu)
>              break;
>          case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):
>              if (index & 1) {
> -                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data;
> +                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data |
> +                                                               mtrr_top_bits;
>              } else {
>                  env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
>              }
> -- 
> 2.7.4
Eduardo Habkost July 4, 2016, 8:14 p.m. UTC | #2
On Mon, Jul 04, 2016 at 11:03:59PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jul 04, 2016 at 08:16:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > 
> > Fill the bits between 51..number-of-physical-address-bits in the
> > MTRR_PHYSMASKn variable range mtrr masks so that they're consistent
> > in the migration stream irrespective of the physical address space
> > of the source VM in a migration.
> > 
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> Could a bit more explanation be added here?
> Why don't we migrate exactly what guest programmed?

Because "exactly what the guest programmed" stops making sense if
phys_bits seen by the guest change when migrating (see Paolo's
explanation below).

> With previous patch we mask on destination, do we not?

The previous patch is necessary when phys_bits is smaller on
migration. This part is necessary when phys_bits increase on
migration.

> 
> If it's for compat with old PC types, then why not
> limit it to that?

It's not just for compat with old PC types. It is necessary when
phys_bits increase on migration, which may happen because it is
the default for a machine-type, or in case it was configured this
way by management.


I asked similar questions on v1, see
http://article.gmane.org/gmane.comp.emulators.qemu/419712

Paolo's reply is below.

On Fri, Jun 17, 2016 at 03:01:55PM +0200, Paolo Bonzini wrote:
> On 17/06/2016 14:46, Eduardo Habkost wrote:
> >> > 
> >> > The bits are reserved anyway, so we can do whatever we want with them.
> >> > In fact I think it's weird for the architecture to make them
> >> > must-be-zero, it might even make more sense to make them must-be-one...
> >> > It's a mask after all, and there's no way to access out-of-range
> >> > physical addresses.
> > 
> > If we always fill the bits on the source, the destination can't
> > differentiate between a 40-bit source that set the MTRR to
> > 0xffffffffff from a 36-bit source that set the MTRR to
> > 0xfffffffff.
> 
> That's not a bug, it's a feature.  MTRRs work by comparing (address &
> mtrr_mask) with mtrr_base.
> 
> A 40-bit source that sets the MTRR to 0xffffffffff must set higher bits
> of mtrr_base to zero, but when you migrate to another destination, you
> want the comparison to hold.  There are two ways for it to hold:
> 
> - if the physical address space becomes shorter, the base bits must be
> zero or writing mtrr_base fails, and the address bits must be zero or
> the processor fails to walk EPT page tables.  So it's fine to strip the
> top four bits of the mask.
> 
> - if the physical address space becomes larger, the base bits must be
> zero but the mask bits must become one.  So why not migrate them this
> way directly.
> 
> (For what it's worth, KVM also stores the mask extended to all ones, and
> strips the unnecessary high bits when reading the MSRs).
> 
> > I really want to print a warning if the MTRR value or the
> > phys-bits value is being changed during migration, just in case
> > this has unintended consequences in the future. We can send the
> > current phys_bits value in the migration stream, so the
> > destination can decide how to handle it (and which warnings to
> > print).
> 
> The problem with this is that it must be a warning only, because usually
> things will work (evidence: they have worked on RHEL6 and RHEL7 since
> 2010).  No one will read it until it's too late and they have lost a VM.
> 
> Note that the failure mode is pretty brutal since KVM reports an
> internal error right after restarting on the destination, and who knows
> what used to happen before the assertion was introduced.  All MSRs after
> MTRRs would be ignored by KVM!
> 
> Migrating and checking the configuration complicates the code and,
> because it's only for a warning, doesn't save you from massaging the
> values to make things work.
> 
> Paolo
>
Eduardo Habkost July 4, 2016, 8:21 p.m. UTC | #3
On Mon, Jul 04, 2016 at 08:16:06PM +0100, Dr. David Alan Gilbert (git) wrote:
[...]
> @@ -2084,6 +2085,27 @@ static int kvm_get_msrs(X86CPU *cpu)
>      }
>  
>      assert(ret == cpu->kvm_msr_buf->nmsrs);
> +    /*
> +     * MTRR masks: Each mask consists of 5 parts
> +     * a  10..0: must be zero
> +     * b  11   : valid bit
> +     * c n-1.12: actual mask bits
> +     * d  51..n: reserved must be zero
> +     * e  63.52: reserved must be zero
> +     *
> +     * 'n' is the number of physical bits supported by the CPU and is
> +     * apparently always <= 52.   We know our 'n' but don't know what
> +     * the destinations 'n' is; it might be smaller, in which case
> +     * it masks (c) on loading. It might be larger, in which case
> +     * we fill 'd' so that d..c is consistent irrespetive of the 'n'
> +     * we're migrating to.
> +     */
> +    if (cpu->fill_mtrr_mask && cpu->phys_bits < 52) {
> +        mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits);
> +    } else {
> +        mtrr_top_bits = 0;

How/where did you find this 52-bit limit? Is it documented
somewhere?

> +    }
> +
>      for (i = 0; i < ret; i++) {
>          uint32_t index = msrs[i].index;
>          switch (index) {
> @@ -2279,7 +2301,8 @@ static int kvm_get_msrs(X86CPU *cpu)
>              break;
>          case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):
>              if (index & 1) {
> -                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data;
> +                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data |
> +                                                               mtrr_top_bits;
>              } else {
>                  env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
>              }
> -- 
> 2.7.4
>
Dr. David Alan Gilbert July 5, 2016, 8:39 a.m. UTC | #4
* Eduardo Habkost (ehabkost@redhat.com) wrote:
> On Mon, Jul 04, 2016 at 08:16:06PM +0100, Dr. David Alan Gilbert (git) wrote:
> [...]
> > @@ -2084,6 +2085,27 @@ static int kvm_get_msrs(X86CPU *cpu)
> >      }
> >  
> >      assert(ret == cpu->kvm_msr_buf->nmsrs);
> > +    /*
> > +     * MTRR masks: Each mask consists of 5 parts
> > +     * a  10..0: must be zero
> > +     * b  11   : valid bit
> > +     * c n-1.12: actual mask bits
> > +     * d  51..n: reserved must be zero
> > +     * e  63.52: reserved must be zero
> > +     *
> > +     * 'n' is the number of physical bits supported by the CPU and is
> > +     * apparently always <= 52.   We know our 'n' but don't know what
> > +     * the destinations 'n' is; it might be smaller, in which case
> > +     * it masks (c) on loading. It might be larger, in which case
> > +     * we fill 'd' so that d..c is consistent irrespetive of the 'n'
> > +     * we're migrating to.
> > +     */
> > +    if (cpu->fill_mtrr_mask && cpu->phys_bits < 52) {
> > +        mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits);
> > +    } else {
> > +        mtrr_top_bits = 0;
> 
> How/where did you find this 52-bit limit? Is it documented
> somewhere?

It seems to come from AMDs original specification of AMD64; but you're
right we could do with a constant rather than the magical 52 everywhere.

Looking in AMD doc 24593 Rev 3.26 (AMD64 Arch Programmer's manual vol. 2)
p.191 Fig 7.6 MTRRphysBasen Register it shows it as PhysBase running from 51:32
and 63:52 being Reserved/MBZ;
The corresponding Intel doc (Intel 64 & IA-32 Architectures Dev manual 3A 11-25
fig 11-7) doesn't have that limit shown; however it does talk about 52-bit
physical addresses in a few places; e.g. 4.4 PAE paging talks about
'paging translates 32-bit linear addresses to 52-bit physical addresses'
I think the most relevant place in the Intel doc is 5.13.3 'Reserved Bit Checking'
which has a
  Table 5-8 'IA-32e Mode Page Level Protection Matrix with Execute-Disable Bit Capability Enabled'
this is a table of reserved bit fields and for each of the
page tables it shows bits checked as [51:MAXPHYADDR].
Any suggestions for a name for the 52 constant? I guess MaxMaxPhyAddress?

I guess someone decided that 4PB ought to be enough for anyone.

Dave

> 
> > +    }
> > +
> >      for (i = 0; i < ret; i++) {
> >          uint32_t index = msrs[i].index;
> >          switch (index) {
> > @@ -2279,7 +2301,8 @@ static int kvm_get_msrs(X86CPU *cpu)
> >              break;
> >          case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):
> >              if (index & 1) {
> > -                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data;
> > +                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data |
> > +                                                               mtrr_top_bits;
> >              } else {
> >                  env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
> >              }
> > -- 
> > 2.7.4
> > 
> 
> -- 
> Eduardo
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 7e43b20..d85e924 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -374,6 +374,11 @@  bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
         .driver   = TYPE_X86_CPU,\
         .property = "cpuid-0xb",\
         .value    = "off",\
+    },\
+    {\
+        .driver = TYPE_X86_CPU,\
+        .property = "fill-mtrr-mask",\
+        .value = "off",\
     },
 
 #define PC_COMPAT_2_5 \
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index ab13ef5..5737aba 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -3258,6 +3258,7 @@  static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
     DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
+    DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true),
     DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 40),
     DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, 0),
     DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, 0),
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 221b1a2..a9bbd91 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1181,6 +1181,9 @@  struct X86CPU {
     /* Compatibility bits for old machine types: */
     bool enable_cpuid_0xb;
 
+    /* if true fill the top bits of the MTRR_PHYSMASKn variable range */
+    bool fill_mtrr_mask;
+
     /* Number of physical address bits supported */
     uint32_t phys_bits;
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 6429205..cbcc30d 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1935,6 +1935,7 @@  static int kvm_get_msrs(X86CPU *cpu)
     CPUX86State *env = &cpu->env;
     struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries;
     int ret, i;
+    uint64_t mtrr_top_bits;
 
     kvm_msr_buf_reset(cpu);
 
@@ -2084,6 +2085,27 @@  static int kvm_get_msrs(X86CPU *cpu)
     }
 
     assert(ret == cpu->kvm_msr_buf->nmsrs);
+    /*
+     * MTRR masks: Each mask consists of 5 parts
+     * a  10..0: must be zero
+     * b  11   : valid bit
+     * c n-1.12: actual mask bits
+     * d  51..n: reserved must be zero
+     * e  63.52: reserved must be zero
+     *
+     * 'n' is the number of physical bits supported by the CPU and is
+     * apparently always <= 52.   We know our 'n' but don't know what
+     * the destinations 'n' is; it might be smaller, in which case
+     * it masks (c) on loading. It might be larger, in which case
+     * we fill 'd' so that d..c is consistent irrespetive of the 'n'
+     * we're migrating to.
+     */
+    if (cpu->fill_mtrr_mask && cpu->phys_bits < 52) {
+        mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits);
+    } else {
+        mtrr_top_bits = 0;
+    }
+
     for (i = 0; i < ret; i++) {
         uint32_t index = msrs[i].index;
         switch (index) {
@@ -2279,7 +2301,8 @@  static int kvm_get_msrs(X86CPU *cpu)
             break;
         case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):
             if (index & 1) {
-                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data;
+                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data |
+                                                               mtrr_top_bits;
             } else {
                 env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
             }