diff mbox

[v3] fw/msr_feature_control: add support to set MSR_IA32_FEATURE_CONTROL

Message ID 20160622065324.23812-1-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang June 22, 2016, 6:53 a.m. UTC
OS usually expects BIOS to set certain bits in MSR_IA32_FEATURE_CONTROL
for some features (e.g. VMX and LMCE). QEMU provides a fw_cfg file
"etc/msr_feature_control" to advise bits that should be set in
MSR_IA32_FEATURE_CONTROL. If this file exists, SeaBIOS will set the
advised bits in that MSR.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
Changes in v3:
 * Move msr_feature_control_setup() to paravirt.c.

Changes in v2:
 * Call msr_feature_control_setup() before smp_setup().
 * Use wrmsr_smp() instead of wrmsr() on BSP.
 * Rename smp_mtrr and smp_mtrr_count to smp_msr and smp_msr_count
   as they are not only used for MTRR now.
---
 src/fw/paravirt.c | 12 +++++++++++-
 src/fw/smp.c      | 20 ++++++++++----------
 2 files changed, 21 insertions(+), 11 deletions(-)

Comments

Gerd Hoffmann July 1, 2016, 12:02 p.m. UTC | #1
On Mi, 2016-06-22 at 14:53 +0800, Haozhong Zhang wrote:
> OS usually expects BIOS to set certain bits in MSR_IA32_FEATURE_CONTROL
> for some features (e.g. VMX and LMCE). QEMU provides a fw_cfg file
> "etc/msr_feature_control" to advise bits that should be set in
> MSR_IA32_FEATURE_CONTROL. If this file exists, SeaBIOS will set the
> advised bits in that MSR.

Committed to master, cherry-picked into 1.9-stable.

thanks,
  Gerd
Laszlo Ersek July 5, 2016, 10:19 p.m. UTC | #2
On 06/22/16 08:53, Haozhong Zhang wrote:
> OS usually expects BIOS to set certain bits in MSR_IA32_FEATURE_CONTROL
> for some features (e.g. VMX and LMCE). QEMU provides a fw_cfg file
> "etc/msr_feature_control" to advise bits that should be set in
> MSR_IA32_FEATURE_CONTROL. If this file exists, SeaBIOS will set the
> advised bits in that MSR.
> 
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> Changes in v3:
>  * Move msr_feature_control_setup() to paravirt.c.
> 
> Changes in v2:
>  * Call msr_feature_control_setup() before smp_setup().
>  * Use wrmsr_smp() instead of wrmsr() on BSP.
>  * Rename smp_mtrr and smp_mtrr_count to smp_msr and smp_msr_count
>    as they are not only used for MTRR now.
> ---
>  src/fw/paravirt.c | 12 +++++++++++-
>  src/fw/smp.c      | 20 ++++++++++----------
>  2 files changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c
> index 8ed4380..73a08f0 100644
> --- a/src/fw/paravirt.c
> +++ b/src/fw/paravirt.c
> @@ -131,6 +131,15 @@ qemu_preinit(void)
>      dprintf(1, "RamSize: 0x%08x [cmos]\n", RamSize);
>  }
>  
> +#define MSR_IA32_FEATURE_CONTROL 0x0000003a
> +
> +static void msr_feature_control_setup(void)
> +{
> +    u64 feature_control_bits = romfile_loadint("etc/msr_feature_control", 0);
> +    if (feature_control_bits)
> +        wrmsr_smp(MSR_IA32_FEATURE_CONTROL, feature_control_bits);
> +}
> +
>  void
>  qemu_platform_setup(void)
>  {
> @@ -149,8 +158,9 @@ qemu_platform_setup(void)
>      smm_device_setup();
>      smm_setup();
>  
> -    // Initialize mtrr and smp
> +    // Initialize mtrr, msr_feature_control and smp
>      mtrr_setup();
> +    msr_feature_control_setup();
>      smp_setup();
>  
>      // Create bios tables
> diff --git a/src/fw/smp.c b/src/fw/smp.c
> index 579acdb..6e706e4 100644
> --- a/src/fw/smp.c
> +++ b/src/fw/smp.c
> @@ -10,7 +10,7 @@
>  #include "output.h" // dprintf
>  #include "romfile.h" // romfile_loadint
>  #include "stacks.h" // yield
> -#include "util.h" // smp_setup
> +#include "util.h" // smp_setup, msr_feature_control_setup
>  #include "x86.h" // wrmsr
>  
>  #define APIC_ICR_LOW ((u8*)BUILD_APIC_ADDR + 0x300)
> @@ -20,20 +20,20 @@
>  
>  #define APIC_ENABLED 0x0100
>  
> -static struct { u32 index; u64 val; } smp_mtrr[32];
> -static u32 smp_mtrr_count;
> +static struct { u32 index; u64 val; } smp_msr[32];
> +static u32 smp_msr_count;
>  
>  void
>  wrmsr_smp(u32 index, u64 val)
>  {
>      wrmsr(index, val);
> -    if (smp_mtrr_count >= ARRAY_SIZE(smp_mtrr)) {
> +    if (smp_msr_count >= ARRAY_SIZE(smp_msr)) {
>          warn_noalloc();
>          return;
>      }
> -    smp_mtrr[smp_mtrr_count].index = index;
> -    smp_mtrr[smp_mtrr_count].val = val;
> -    smp_mtrr_count++;
> +    smp_msr[smp_msr_count].index = index;
> +    smp_msr[smp_msr_count].val = val;
> +    smp_msr_count++;
>  }
>  
>  u32 MaxCountCPUs;
> @@ -58,10 +58,10 @@ handle_smp(void)
>      u8 apic_id = ebx>>24;
>      dprintf(DEBUG_HDL_smp, "handle_smp: apic_id=%d\n", apic_id);
>  
> -    // MTRR setup
> +    // MTRR and MSR_IA32_FEATURE_CONTROL setup
>      int i;
> -    for (i=0; i<smp_mtrr_count; i++)
> -        wrmsr(smp_mtrr[i].index, smp_mtrr[i].val);
> +    for (i=0; i<smp_msr_count; i++)
> +        wrmsr(smp_msr[i].index, smp_msr[i].val);
>  
>      // Set bit on FoundAPICIDs
>      FoundAPICIDs[apic_id/32] |= (1 << (apic_id % 32));
> 

For <https://github.com/tianocore/edk2/issues/97> (i.e., doing the same in OVMF), I've been trying to come up with designs.

The combined requirement that the MSR be written on all CPUs *and* at S3 resume too is a big complication for OVMF. If we can relax either of those (that is, write the MSR on all CPUs but only on cold boot, or else, write the MSR at cold boot and resume too, but only on the BSP), that would be a relief for OVMF.

So I recalled that this patch programmed the MSR on all APs, but I wanted to see if it really did the same on S3 resume. And, I think it doesn't. I tried to construct a "maximal" call tree from the bottom up, and this is all I can come up with:

reset_vector()                              [src/romlayout.S]
  entry_post()                              [src/romlayout.S]
    handle_post()                           [src/post.c]
      dopost()                              [src/post.c]
        reloc_preinit()                     [src/post.c]
          maininit()                        [src/post.c]
            platform_hardware_setup()       [src/post.c]
              qemu_platform_setup()         [src/fw/paravirt.c]
                msr_feature_control_setup() [src/fw/paravirt.c]
                  wrmsr_smp()               [src/fw/smp.c]
                    wrmsr()                 [src/x86.h]       -- on the BSP
                    // + stash in array
                smp_setup()                 [src/fw/smp.c]
                  entry_smp()               [src/romlayout.S] -- on all APs
                    handle_smp()            [src/fw/smp.c]
                      wrmsr() from array    [src/x86.h]

smp_setup() is not reached on the S3 resume path:

reset_vector()                              [src/romlayout.S]
  entry_post()                              [src/romlayout.S]
    entry_resume()                          [src/romlayout.S] <--- on a different branch here
      handle_resume()                       [src/resume.c]
        handle_resume32()                   [src/resume.c]
          s3_resume()                       [src/resume.c]
            ...
            farcall16big()

I also checked "docs/Execution_and_code_flow.md", and my findings seem to be consistent with it.

Now, MSR_IA32_FEATURE_CONTROL not being set on S3 is perfectly fine with me. I just want to avoid implementing something for OVMF that is several times harder than necessary.

So, is the fact (*) that SeaBIOS does not program the MSR on S3 a bug (or missing feature) in SeaBIOS, or is it okay? (For example because the resume code in Linux copies the MSR's value from the BSP to the APs anyway?)

If it is okay, then we should remove the S3 requirement from <https://github.com/tianocore/edk2/issues/97>.

(*) "fact" unless I missed something, of course

Thanks!
Laszlo
Haozhong Zhang July 6, 2016, 1:26 a.m. UTC | #3
On 07/06/16 00:19, Laszlo Ersek wrote:
> On 06/22/16 08:53, Haozhong Zhang wrote:
> > OS usually expects BIOS to set certain bits in MSR_IA32_FEATURE_CONTROL
> > for some features (e.g. VMX and LMCE). QEMU provides a fw_cfg file
> > "etc/msr_feature_control" to advise bits that should be set in
> > MSR_IA32_FEATURE_CONTROL. If this file exists, SeaBIOS will set the
> > advised bits in that MSR.
> > 
> > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> > ---
> > Changes in v3:
> >  * Move msr_feature_control_setup() to paravirt.c.
> > 
> > Changes in v2:
> >  * Call msr_feature_control_setup() before smp_setup().
> >  * Use wrmsr_smp() instead of wrmsr() on BSP.
> >  * Rename smp_mtrr and smp_mtrr_count to smp_msr and smp_msr_count
> >    as they are not only used for MTRR now.
> > ---
> >  src/fw/paravirt.c | 12 +++++++++++-
> >  src/fw/smp.c      | 20 ++++++++++----------
> >  2 files changed, 21 insertions(+), 11 deletions(-)
> > 
> > diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c
> > index 8ed4380..73a08f0 100644
> > --- a/src/fw/paravirt.c
> > +++ b/src/fw/paravirt.c
> > @@ -131,6 +131,15 @@ qemu_preinit(void)
> >      dprintf(1, "RamSize: 0x%08x [cmos]\n", RamSize);
> >  }
> >  
> > +#define MSR_IA32_FEATURE_CONTROL 0x0000003a
> > +
> > +static void msr_feature_control_setup(void)
> > +{
> > +    u64 feature_control_bits = romfile_loadint("etc/msr_feature_control", 0);
> > +    if (feature_control_bits)
> > +        wrmsr_smp(MSR_IA32_FEATURE_CONTROL, feature_control_bits);
> > +}
> > +
> >  void
> >  qemu_platform_setup(void)
> >  {
> > @@ -149,8 +158,9 @@ qemu_platform_setup(void)
> >      smm_device_setup();
> >      smm_setup();
> >  
> > -    // Initialize mtrr and smp
> > +    // Initialize mtrr, msr_feature_control and smp
> >      mtrr_setup();
> > +    msr_feature_control_setup();
> >      smp_setup();
> >  
> >      // Create bios tables
> > diff --git a/src/fw/smp.c b/src/fw/smp.c
> > index 579acdb..6e706e4 100644
> > --- a/src/fw/smp.c
> > +++ b/src/fw/smp.c
> > @@ -10,7 +10,7 @@
> >  #include "output.h" // dprintf
> >  #include "romfile.h" // romfile_loadint
> >  #include "stacks.h" // yield
> > -#include "util.h" // smp_setup
> > +#include "util.h" // smp_setup, msr_feature_control_setup
> >  #include "x86.h" // wrmsr
> >  
> >  #define APIC_ICR_LOW ((u8*)BUILD_APIC_ADDR + 0x300)
> > @@ -20,20 +20,20 @@
> >  
> >  #define APIC_ENABLED 0x0100
> >  
> > -static struct { u32 index; u64 val; } smp_mtrr[32];
> > -static u32 smp_mtrr_count;
> > +static struct { u32 index; u64 val; } smp_msr[32];
> > +static u32 smp_msr_count;
> >  
> >  void
> >  wrmsr_smp(u32 index, u64 val)
> >  {
> >      wrmsr(index, val);
> > -    if (smp_mtrr_count >= ARRAY_SIZE(smp_mtrr)) {
> > +    if (smp_msr_count >= ARRAY_SIZE(smp_msr)) {
> >          warn_noalloc();
> >          return;
> >      }
> > -    smp_mtrr[smp_mtrr_count].index = index;
> > -    smp_mtrr[smp_mtrr_count].val = val;
> > -    smp_mtrr_count++;
> > +    smp_msr[smp_msr_count].index = index;
> > +    smp_msr[smp_msr_count].val = val;
> > +    smp_msr_count++;
> >  }
> >  
> >  u32 MaxCountCPUs;
> > @@ -58,10 +58,10 @@ handle_smp(void)
> >      u8 apic_id = ebx>>24;
> >      dprintf(DEBUG_HDL_smp, "handle_smp: apic_id=%d\n", apic_id);
> >  
> > -    // MTRR setup
> > +    // MTRR and MSR_IA32_FEATURE_CONTROL setup
> >      int i;
> > -    for (i=0; i<smp_mtrr_count; i++)
> > -        wrmsr(smp_mtrr[i].index, smp_mtrr[i].val);
> > +    for (i=0; i<smp_msr_count; i++)
> > +        wrmsr(smp_msr[i].index, smp_msr[i].val);
> >  
> >      // Set bit on FoundAPICIDs
> >      FoundAPICIDs[apic_id/32] |= (1 << (apic_id % 32));
> > 
> 
> For <https://github.com/tianocore/edk2/issues/97> (i.e., doing the same in OVMF), I've been trying to come up with designs.
> 
> The combined requirement that the MSR be written on all CPUs *and* at S3 resume too is a big complication for OVMF. If we can relax either of those (that is, write the MSR on all CPUs but only on cold boot, or else, write the MSR at cold boot and resume too, but only on the BSP), that would be a relief for OVMF.
> 
> So I recalled that this patch programmed the MSR on all APs, but I wanted to see if it really did the same on S3 resume. And, I think it doesn't. I tried to construct a "maximal" call tree from the bottom up, and this is all I can come up with:
> 
> reset_vector()                              [src/romlayout.S]
>   entry_post()                              [src/romlayout.S]
>     handle_post()                           [src/post.c]
>       dopost()                              [src/post.c]
>         reloc_preinit()                     [src/post.c]
>           maininit()                        [src/post.c]
>             platform_hardware_setup()       [src/post.c]
>               qemu_platform_setup()         [src/fw/paravirt.c]
>                 msr_feature_control_setup() [src/fw/paravirt.c]
>                   wrmsr_smp()               [src/fw/smp.c]
>                     wrmsr()                 [src/x86.h]       -- on the BSP
>                     // + stash in array
>                 smp_setup()                 [src/fw/smp.c]
>                   entry_smp()               [src/romlayout.S] -- on all APs
>                     handle_smp()            [src/fw/smp.c]
>                       wrmsr() from array    [src/x86.h]
> 
> smp_setup() is not reached on the S3 resume path:
> 
> reset_vector()                              [src/romlayout.S]
>   entry_post()                              [src/romlayout.S]
>     entry_resume()                          [src/romlayout.S] <--- on a different branch here
>       handle_resume()                       [src/resume.c]
>         handle_resume32()                   [src/resume.c]
>           s3_resume()                       [src/resume.c]
>             ...
>             farcall16big()
> 
> I also checked "docs/Execution_and_code_flow.md", and my findings seem to be consistent with it.
>

I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
MSR_IA32_FEATURE_CONTROL is zero after S3 resume.

> Now, MSR_IA32_FEATURE_CONTROL not being set on S3 is perfectly fine with me. I just want to avoid implementing something for OVMF that is several times harder than necessary.
> 
> So, is the fact (*) that SeaBIOS does not program the MSR on S3 a bug (or missing feature) in SeaBIOS, or is it okay? (For example because the resume code in Linux copies the MSR's value from the BSP to the APs anyway?)
>

Not sure if it can be treated as a bug or not.

Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
at least Linux guest (tested 4.5). Current QEMU may advise the guest
firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
bit).
- For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
  keeps using the result even after resume.
- For VMX, if KVM in Linux (guest) finds bit 20 and bit 0 are cleared
  in MSR_IA32_FEATURE_CONTROL while cpuid.1 says VMX is supported, it
  will set those bits. Therefore, KVM just works after resume even
  though MSR_IA32_FEATURE_CONTROL is not properly set by firmware.

I'm not sure whether other operating systems or virtual machine
monitors can work properly in this case. If all of them are fine with
this, I think we can remove the S3 requirement.

Thanks,
Haozhong

> If it is okay, then we should remove the S3 requirement from <https://github.com/tianocore/edk2/issues/97>.
> 
> (*) "fact" unless I missed something, of course
> 
> Thanks!
> Laszlo
Paolo Bonzini July 6, 2016, 6:18 a.m. UTC | #4
> I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
> MSR_IA32_FEATURE_CONTROL is zero after S3 resume.

This is a bug.  Sorry Laszlo. :)

> Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
> at least Linux guest (tested 4.5). Current QEMU may advise the guest
> firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
> bit).
> - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
>   keeps using the result even after resume.

On real hardware, LMCE would not be enabled after resume.  I'm not
sure what would happen, but it wouldn't be good.

Paolo
Haozhong Zhang July 6, 2016, 6:28 a.m. UTC | #5
Hi Ashok,

On 07/06/16 02:18, Paolo Bonzini wrote:
> > I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
> > MSR_IA32_FEATURE_CONTROL is zero after S3 resume.
> 
> This is a bug.  Sorry Laszlo. :)
> 
> > Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
> > at least Linux guest (tested 4.5). Current QEMU may advise the guest
> > firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
> > bit).
> > - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
> >   keeps using the result even after resume.
> 
> On real hardware, LMCE would not be enabled after resume.  I'm not
> sure what would happen, but it wouldn't be good.

Could you help to check if the LMCE bit in MSR_IA32_FEATURE_CONTROL is
set after S3 resume on the real hardware?

Thanks,
Haozhong
Laszlo Ersek July 6, 2016, 6:42 a.m. UTC | #6
On 07/06/16 08:28, Haozhong Zhang wrote:
> Hi Ashok,
> 
> On 07/06/16 02:18, Paolo Bonzini wrote:
>>> I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
>>> MSR_IA32_FEATURE_CONTROL is zero after S3 resume.
>>
>> This is a bug.  Sorry Laszlo. :)
>>
>>> Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
>>> at least Linux guest (tested 4.5). Current QEMU may advise the guest
>>> firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
>>> bit).
>>> - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
>>>   keeps using the result even after resume.
>>
>> On real hardware, LMCE would not be enabled after resume.  I'm not
>> sure what would happen, but it wouldn't be good.
> 
> Could you help to check if the LMCE bit in MSR_IA32_FEATURE_CONTROL is
> set after S3 resume on the real hardware?

The SDM says that IA32_FEATURE_CONTROL is zeroed on logical processor reset.

  23.7 ENABLING AND ENTERING VMX OPERATION

  [...] VMXON is also controlled by the IA32_FEATURE_CONTROL MSR (MSR
  address 3AH). This MSR is cleared to zero when a logical processor is
  reset. [...]

Thanks
Laszlo
Haozhong Zhang July 6, 2016, 6:49 a.m. UTC | #7
On 07/06/16 08:42, Laszlo Ersek wrote:
> On 07/06/16 08:28, Haozhong Zhang wrote:
> > Hi Ashok,
> > 
> > On 07/06/16 02:18, Paolo Bonzini wrote:
> >>> I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
> >>> MSR_IA32_FEATURE_CONTROL is zero after S3 resume.
> >>
> >> This is a bug.  Sorry Laszlo. :)
> >>
> >>> Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
> >>> at least Linux guest (tested 4.5). Current QEMU may advise the guest
> >>> firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
> >>> bit).
> >>> - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
> >>>   keeps using the result even after resume.
> >>
> >> On real hardware, LMCE would not be enabled after resume.  I'm not
> >> sure what would happen, but it wouldn't be good.
> > 
> > Could you help to check if the LMCE bit in MSR_IA32_FEATURE_CONTROL is
> > set after S3 resume on the real hardware?
> 
> The SDM says that IA32_FEATURE_CONTROL is zeroed on logical processor reset.
> 
>   23.7 ENABLING AND ENTERING VMX OPERATION
> 
>   [...] VMXON is also controlled by the IA32_FEATURE_CONTROL MSR (MSR
>   address 3AH). This MSR is cleared to zero when a logical processor is
>   reset. [...]
> 

Ah, I missed a bit in my question. I meant to check whether the
firmware on the real machine sets the LMCE bit and other necessary
bits in MSR_IA32_FEATURE_CONTROL after S3 resume.
Laszlo Ersek July 6, 2016, 7:44 a.m. UTC | #8
On 07/06/16 08:49, Haozhong Zhang wrote:
> On 07/06/16 08:42, Laszlo Ersek wrote:
>> On 07/06/16 08:28, Haozhong Zhang wrote:
>>> Hi Ashok,
>>>
>>> On 07/06/16 02:18, Paolo Bonzini wrote:
>>>>> I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
>>>>> MSR_IA32_FEATURE_CONTROL is zero after S3 resume.
>>>>
>>>> This is a bug.  Sorry Laszlo. :)
>>>>
>>>>> Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
>>>>> at least Linux guest (tested 4.5). Current QEMU may advise the guest
>>>>> firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
>>>>> bit).
>>>>> - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
>>>>>   keeps using the result even after resume.
>>>>
>>>> On real hardware, LMCE would not be enabled after resume.  I'm not
>>>> sure what would happen, but it wouldn't be good.
>>>
>>> Could you help to check if the LMCE bit in MSR_IA32_FEATURE_CONTROL is
>>> set after S3 resume on the real hardware?
>>
>> The SDM says that IA32_FEATURE_CONTROL is zeroed on logical processor reset.
>>
>>   23.7 ENABLING AND ENTERING VMX OPERATION
>>
>>   [...] VMXON is also controlled by the IA32_FEATURE_CONTROL MSR (MSR
>>   address 3AH). This MSR is cleared to zero when a logical processor is
>>   reset. [...]
>>
> 
> Ah, I missed a bit in my question. I meant to check whether the
> firmware on the real machine sets the LMCE bit and other necessary
> bits in MSR_IA32_FEATURE_CONTROL after S3 resume.
> 

I attached a minimal kernel module (reproducer / tester) to the github
issue here:

https://github.com/tianocore/edk2/issues/97#issuecomment-230697897

We can use it for both testing the feature in guests, and for querying
the MSR on physical machines.

Specifically on my ThinkPad W541, the MSR has value 0x5:

[ 2885.877339] MSR 0x3a on CPU 0: 0x5
[ 2908.151693] MSR 0x3a on CPU 1: 0x5

which, according to

#define FEATURE_CONTROL_LOCKED				(1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX	(1<<1)
#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX	(1<<2)
#define FEATURE_CONTROL_LMCE				(1<<20)

corresponds to

(FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX).

FEATURE_CONTROL_LMCE is not set at all. (I didn't do any S3 cycles in my
current laptopt boot.)

My CPU is i7-4810MQ. It's a pretty modern laptop, so I think it is
capable of LMCE, hardware-wise (if LMCE is hw-dependent, to begin with).

In order to verify if my laptop was indeed capable of LCME, I read the
IA32_MCG_CAP MSR as well:

# insmod ./rdmsr.ko msr=0x00000179
MSR 0x179 on CPU 0: 0xc09

"MCG_LMCE_P" is bit 27 (value 0x8000000). So, apparently, I was wrong;
my laptop does not support LMCE, and it's not surprising the BIOS
doesn't set the LCME bit in the feature control MSR :)

Anyway I think you should be able to use the kernel module for
experimenting with MSRs on other hosts.

Thanks
Laszlo
Haozhong Zhang July 6, 2016, 8:48 a.m. UTC | #9
On 07/06/16 09:44, Laszlo Ersek wrote:
> On 07/06/16 08:49, Haozhong Zhang wrote:
> > On 07/06/16 08:42, Laszlo Ersek wrote:
> >> On 07/06/16 08:28, Haozhong Zhang wrote:
> >>> Hi Ashok,
> >>>
> >>> On 07/06/16 02:18, Paolo Bonzini wrote:
> >>>>> I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
> >>>>> MSR_IA32_FEATURE_CONTROL is zero after S3 resume.
> >>>>
> >>>> This is a bug.  Sorry Laszlo. :)
> >>>>
> >>>>> Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
> >>>>> at least Linux guest (tested 4.5). Current QEMU may advise the guest
> >>>>> firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
> >>>>> bit).
> >>>>> - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
> >>>>>   keeps using the result even after resume.
> >>>>
> >>>> On real hardware, LMCE would not be enabled after resume.  I'm not
> >>>> sure what would happen, but it wouldn't be good.
> >>>
> >>> Could you help to check if the LMCE bit in MSR_IA32_FEATURE_CONTROL is
> >>> set after S3 resume on the real hardware?
> >>
> >> The SDM says that IA32_FEATURE_CONTROL is zeroed on logical processor reset.
> >>
> >>   23.7 ENABLING AND ENTERING VMX OPERATION
> >>
> >>   [...] VMXON is also controlled by the IA32_FEATURE_CONTROL MSR (MSR
> >>   address 3AH). This MSR is cleared to zero when a logical processor is
> >>   reset. [...]
> >>
> > 
> > Ah, I missed a bit in my question. I meant to check whether the
> > firmware on the real machine sets the LMCE bit and other necessary
> > bits in MSR_IA32_FEATURE_CONTROL after S3 resume.
> > 
> 
> I attached a minimal kernel module (reproducer / tester) to the github
> issue here:
> 
> https://github.com/tianocore/edk2/issues/97#issuecomment-230697897
> 
> We can use it for both testing the feature in guests, and for querying
> the MSR on physical machines.
>

Thanks for the kernel module! In fact, I'm using the rdmsr command
provided by msr-tools (which is included in some Linux distros).

> Specifically on my ThinkPad W541, the MSR has value 0x5:
> 
> [ 2885.877339] MSR 0x3a on CPU 0: 0x5
> [ 2908.151693] MSR 0x3a on CPU 1: 0x5
> 
> which, according to
> 
> #define FEATURE_CONTROL_LOCKED				(1<<0)
> #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX	(1<<1)
> #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX	(1<<2)
> #define FEATURE_CONTROL_LMCE				(1<<20)
> 
> corresponds to
> 
> (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX).
> 
> FEATURE_CONTROL_LMCE is not set at all. (I didn't do any S3 cycles in my
> current laptopt boot.)
> 
> My CPU is i7-4810MQ. It's a pretty modern laptop, so I think it is
> capable of LMCE, hardware-wise (if LMCE is hw-dependent, to begin with).
>

LMCE is supported only by server CPUs (Skylake-EX or later) and not
available on desktop CPUs.

For test in guest, you could follow what I added in "How to verify?"
https://github.com/tianocore/edk2/issues/97.

Thanks,
Haozhong

> In order to verify if my laptop was indeed capable of LCME, I read the
> IA32_MCG_CAP MSR as well:
> 
> # insmod ./rdmsr.ko msr=0x00000179
> MSR 0x179 on CPU 0: 0xc09
> 
> "MCG_LMCE_P" is bit 27 (value 0x8000000). So, apparently, I was wrong;
> my laptop does not support LMCE, and it's not surprising the BIOS
> doesn't set the LCME bit in the feature control MSR :)
> 
> Anyway I think you should be able to use the kernel module for
> experimenting with MSRs on other hosts.
> 
> Thanks
> Laszlo
>
Laszlo Ersek July 6, 2016, 9 a.m. UTC | #10
On 07/06/16 10:48, Haozhong Zhang wrote:
> On 07/06/16 09:44, Laszlo Ersek wrote:
>> On 07/06/16 08:49, Haozhong Zhang wrote:
>>> On 07/06/16 08:42, Laszlo Ersek wrote:
>>>> On 07/06/16 08:28, Haozhong Zhang wrote:
>>>>> Hi Ashok,
>>>>>
>>>>> On 07/06/16 02:18, Paolo Bonzini wrote:
>>>>>>> I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
>>>>>>> MSR_IA32_FEATURE_CONTROL is zero after S3 resume.
>>>>>>
>>>>>> This is a bug.  Sorry Laszlo. :)
>>>>>>
>>>>>>> Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
>>>>>>> at least Linux guest (tested 4.5). Current QEMU may advise the guest
>>>>>>> firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
>>>>>>> bit).
>>>>>>> - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
>>>>>>>   keeps using the result even after resume.
>>>>>>
>>>>>> On real hardware, LMCE would not be enabled after resume.  I'm not
>>>>>> sure what would happen, but it wouldn't be good.
>>>>>
>>>>> Could you help to check if the LMCE bit in MSR_IA32_FEATURE_CONTROL is
>>>>> set after S3 resume on the real hardware?
>>>>
>>>> The SDM says that IA32_FEATURE_CONTROL is zeroed on logical processor reset.
>>>>
>>>>   23.7 ENABLING AND ENTERING VMX OPERATION
>>>>
>>>>   [...] VMXON is also controlled by the IA32_FEATURE_CONTROL MSR (MSR
>>>>   address 3AH). This MSR is cleared to zero when a logical processor is
>>>>   reset. [...]
>>>>
>>>
>>> Ah, I missed a bit in my question. I meant to check whether the
>>> firmware on the real machine sets the LMCE bit and other necessary
>>> bits in MSR_IA32_FEATURE_CONTROL after S3 resume.
>>>
>>
>> I attached a minimal kernel module (reproducer / tester) to the github
>> issue here:
>>
>> https://github.com/tianocore/edk2/issues/97#issuecomment-230697897
>>
>> We can use it for both testing the feature in guests, and for querying
>> the MSR on physical machines.
>>
> 
> Thanks for the kernel module! In fact, I'm using the rdmsr command
> provided by msr-tools (which is included in some Linux distros).

Welp, that package is available on RHEL-7 too (from EPEL), I just didn't
have it installed. (I guessed that the command would be called "rdmsr",
and tried to run it, but when that failed, I didn't look for any packages.)

>> Specifically on my ThinkPad W541, the MSR has value 0x5:
>>
>> [ 2885.877339] MSR 0x3a on CPU 0: 0x5
>> [ 2908.151693] MSR 0x3a on CPU 1: 0x5
>>
>> which, according to
>>
>> #define FEATURE_CONTROL_LOCKED				(1<<0)
>> #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX	(1<<1)
>> #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX	(1<<2)
>> #define FEATURE_CONTROL_LMCE				(1<<20)
>>
>> corresponds to
>>
>> (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX).
>>
>> FEATURE_CONTROL_LMCE is not set at all. (I didn't do any S3 cycles in my
>> current laptopt boot.)
>>
>> My CPU is i7-4810MQ. It's a pretty modern laptop, so I think it is
>> capable of LMCE, hardware-wise (if LMCE is hw-dependent, to begin with).
>>
> 
> LMCE is supported only by server CPUs (Skylake-EX or later) and not
> available on desktop CPUs.
> 
> For test in guest, you could follow what I added in "How to verify?"
> https://github.com/tianocore/edk2/issues/97.

Great, apparently I can't even read. Those steps are in the first
comment. :/

Thanks
Laszlo
Laszlo Ersek July 6, 2016, 9:05 a.m. UTC | #11
On 07/06/16 11:00, Laszlo Ersek wrote:
> On 07/06/16 10:48, Haozhong Zhang wrote:

>> For test in guest, you could follow what I added in "How to verify?"
>> https://github.com/tianocore/edk2/issues/97.
> 
> Great, apparently I can't even read. Those steps are in the first
> comment. :/

Ah wait, you must have edited that comment in-place, just now. I checked
my github issue mail, and the first version of that comment didn't
contain the verification steps.

I absolutely hate that github allows in-place comment edits (without
sending them out in separate emails). Bugzilla doesn't allow this
(unless a user has extra privileges).

So, thank you very much for the verification steps :), and next time
please add them in a separate comment. Otherwise I might not find them :)

Thanks!
Laszlo
Haozhong Zhang July 6, 2016, 9:07 a.m. UTC | #12
On 07/06/16 11:00, Laszlo Ersek wrote:
> On 07/06/16 10:48, Haozhong Zhang wrote:
> > On 07/06/16 09:44, Laszlo Ersek wrote:
> >> On 07/06/16 08:49, Haozhong Zhang wrote:
> >>> On 07/06/16 08:42, Laszlo Ersek wrote:
> >>>> On 07/06/16 08:28, Haozhong Zhang wrote:
> >>>>> Hi Ashok,
> >>>>>
> >>>>> On 07/06/16 02:18, Paolo Bonzini wrote:
> >>>>>>> I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
> >>>>>>> MSR_IA32_FEATURE_CONTROL is zero after S3 resume.
> >>>>>>
> >>>>>> This is a bug.  Sorry Laszlo. :)
> >>>>>>
> >>>>>>> Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
> >>>>>>> at least Linux guest (tested 4.5). Current QEMU may advise the guest
> >>>>>>> firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
> >>>>>>> bit).
> >>>>>>> - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
> >>>>>>>   keeps using the result even after resume.
> >>>>>>
> >>>>>> On real hardware, LMCE would not be enabled after resume.  I'm not
> >>>>>> sure what would happen, but it wouldn't be good.
> >>>>>
> >>>>> Could you help to check if the LMCE bit in MSR_IA32_FEATURE_CONTROL is
> >>>>> set after S3 resume on the real hardware?
> >>>>
> >>>> The SDM says that IA32_FEATURE_CONTROL is zeroed on logical processor reset.
> >>>>
> >>>>   23.7 ENABLING AND ENTERING VMX OPERATION
> >>>>
> >>>>   [...] VMXON is also controlled by the IA32_FEATURE_CONTROL MSR (MSR
> >>>>   address 3AH). This MSR is cleared to zero when a logical processor is
> >>>>   reset. [...]
> >>>>
> >>>
> >>> Ah, I missed a bit in my question. I meant to check whether the
> >>> firmware on the real machine sets the LMCE bit and other necessary
> >>> bits in MSR_IA32_FEATURE_CONTROL after S3 resume.
> >>>
> >>
> >> I attached a minimal kernel module (reproducer / tester) to the github
> >> issue here:
> >>
> >> https://github.com/tianocore/edk2/issues/97#issuecomment-230697897
> >>
> >> We can use it for both testing the feature in guests, and for querying
> >> the MSR on physical machines.
> >>
> > 
> > Thanks for the kernel module! In fact, I'm using the rdmsr command
> > provided by msr-tools (which is included in some Linux distros).
> 
> Welp, that package is available on RHEL-7 too (from EPEL), I just didn't
> have it installed. (I guessed that the command would be called "rdmsr",
> and tried to run it, but when that failed, I didn't look for any packages.)
> 
> >> Specifically on my ThinkPad W541, the MSR has value 0x5:
> >>
> >> [ 2885.877339] MSR 0x3a on CPU 0: 0x5
> >> [ 2908.151693] MSR 0x3a on CPU 1: 0x5
> >>
> >> which, according to
> >>
> >> #define FEATURE_CONTROL_LOCKED				(1<<0)
> >> #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX	(1<<1)
> >> #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX	(1<<2)
> >> #define FEATURE_CONTROL_LMCE				(1<<20)
> >>
> >> corresponds to
> >>
> >> (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX).
> >>
> >> FEATURE_CONTROL_LMCE is not set at all. (I didn't do any S3 cycles in my
> >> current laptopt boot.)
> >>
> >> My CPU is i7-4810MQ. It's a pretty modern laptop, so I think it is
> >> capable of LMCE, hardware-wise (if LMCE is hw-dependent, to begin with).
> >>
> > 
> > LMCE is supported only by server CPUs (Skylake-EX or later) and not
> > available on desktop CPUs.
> > 
> > For test in guest, you could follow what I added in "How to verify?"
> > https://github.com/tianocore/edk2/issues/97.
> 
> Great, apparently I can't even read. Those steps are in the first
> comment. :/
>

I supposed every modification in github issues would trigger mail
notifications to others, but apparently it does not. I'll take care in
the future.

Haozhong
Laszlo Ersek July 6, 2016, 9:13 a.m. UTC | #13
On 07/06/16 11:07, Haozhong Zhang wrote:

> I supposed every modification in github issues would trigger mail
> notifications to others, but apparently it does not. I'll take care in
> the future.

That's actually a valid assumption, given that Bugzilla is sane like
that, for example. GitHub perpetrates even worse atrocities: people
don't get emails about their own comments nor metadata changes (labels
etc). github's email integration is practically unusable. A bugzilla
instance for edk2 has been in the making for a while now, but it's not
ready just yet. I can't wait.

Thanks!
Laszlo
Laszlo Ersek July 6, 2016, 11:04 a.m. UTC | #14
On 07/06/16 08:42, Laszlo Ersek wrote:
> On 07/06/16 08:28, Haozhong Zhang wrote:
>> Hi Ashok,
>>
>> On 07/06/16 02:18, Paolo Bonzini wrote:
>>>> I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
>>>> MSR_IA32_FEATURE_CONTROL is zero after S3 resume.
>>>
>>> This is a bug.  Sorry Laszlo. :)
>>>
>>>> Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
>>>> at least Linux guest (tested 4.5). Current QEMU may advise the guest
>>>> firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
>>>> bit).
>>>> - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
>>>>   keeps using the result even after resume.
>>>
>>> On real hardware, LMCE would not be enabled after resume.  I'm not
>>> sure what would happen, but it wouldn't be good.
>>
>> Could you help to check if the LMCE bit in MSR_IA32_FEATURE_CONTROL is
>> set after S3 resume on the real hardware?
> 
> The SDM says that IA32_FEATURE_CONTROL is zeroed on logical processor reset.
> 
>   23.7 ENABLING AND ENTERING VMX OPERATION
> 
>   [...] VMXON is also controlled by the IA32_FEATURE_CONTROL MSR (MSR
>   address 3AH). This MSR is cleared to zero when a logical processor is
>   reset. [...]

Actually, I think there is a bug in KVM at the moment. I ran the
following test:

- modified OVMF to set the MSR to value 0x5 on just the BSP
- booted an i440fx and a Q35 (SMM-enabled) OVMF guest
- checked "rdmsr -a 0x3a" in both
- ran "pm-suspend" in both guests, woke them
- repeated the rdmsr command

The result is that the BSP had the 0x5 MSR value both after cold boot
and after S3 resume. So, KVM does not seem to implement clearing of the MSR.

I checked kvm/next (currently at
196f20ca52e8c7281932663c348fa54b82d03914), and vmx_vcpu_reset() does not
seem to zero vmx->msr_ia32_feature_control.

Now, what I absolutely can't tell you is whether this zeroing should
happen regardless of "init_event", or just for a specific value of
"init_event". Whenever I look at "init_event", I have to track it down
to the commit that added it, then locate all the commits that fixed it,
then guess whether the SDM language "logical processor reset" implies a
specific "init_event" value or not. So, I have no idea about "init_event".

Either way, why it is important to me: the edk2 modules that are built
into OVMF use loads of INIT-SIPI-SIPI sequences, for implementing
multiprocessing in the firmware (regardless of the feature at hand). For
example, EFI_MP_SERVICES_PROTOCOL.StartupAllAps() boots all APs with a
programmer-specified routine, after which the APs are HLTed. (Until the
next such call.) INIT-SIPI-SIPI is used by the implementation for
booting the APs.

So I need to know whether those INIT-SIPI-SIPI sequences are supposed to
clear the MSR -- in other words, whether I have to write patches that
explicitly sustain the MSR across these IPIs.

If the answer is "yes", then I would like to ask for a KVM patch that
zeroes vmx->msr_ia32_feature_control "under the right circumstances".

I can only attempt to re-program the MSR in all the necessary places in
edk2 if I can trust that KVM will clear the MSR (visibly to the "rdmsr"
command in the guest) as long as I miss at least one location in edk2.

Thanks!
Laszlo
Haozhong Zhang July 6, 2016, 1:03 p.m. UTC | #15
On 07/06/16 13:04, Laszlo Ersek wrote:
> On 07/06/16 08:42, Laszlo Ersek wrote:
> > On 07/06/16 08:28, Haozhong Zhang wrote:
> >> Hi Ashok,
> >>
> >> On 07/06/16 02:18, Paolo Bonzini wrote:
> >>>> I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
> >>>> MSR_IA32_FEATURE_CONTROL is zero after S3 resume.
> >>>
> >>> This is a bug.  Sorry Laszlo. :)
> >>>
> >>>> Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
> >>>> at least Linux guest (tested 4.5). Current QEMU may advise the guest
> >>>> firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
> >>>> bit).
> >>>> - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
> >>>>   keeps using the result even after resume.
> >>>
> >>> On real hardware, LMCE would not be enabled after resume.  I'm not
> >>> sure what would happen, but it wouldn't be good.
> >>
> >> Could you help to check if the LMCE bit in MSR_IA32_FEATURE_CONTROL is
> >> set after S3 resume on the real hardware?
> > 
> > The SDM says that IA32_FEATURE_CONTROL is zeroed on logical processor reset.
> > 
> >   23.7 ENABLING AND ENTERING VMX OPERATION
> > 
> >   [...] VMXON is also controlled by the IA32_FEATURE_CONTROL MSR (MSR
> >   address 3AH). This MSR is cleared to zero when a logical processor is
> >   reset. [...]
> 
> Actually, I think there is a bug in KVM at the moment. I ran the
> following test:
> 
> - modified OVMF to set the MSR to value 0x5 on just the BSP
> - booted an i440fx and a Q35 (SMM-enabled) OVMF guest
> - checked "rdmsr -a 0x3a" in both
> - ran "pm-suspend" in both guests, woke them
> - repeated the rdmsr command
> 
> The result is that the BSP had the 0x5 MSR value both after cold boot
> and after S3 resume. So, KVM does not seem to implement clearing of the MSR.
>

Interesting result, is setting MSR on BSP also called after S3 resume?
I went through your test steps with OVMF replaced by a modified
SeaBIOS which only sets MSR_IA32_FEATURE_CONTROL on BSP at boot time,
the result before S3 resume is the value on BSP is 5 and others are 0,
and the result after S3 resume is values on all CPUs are 0.

> I checked kvm/next (currently at
> 196f20ca52e8c7281932663c348fa54b82d03914), and vmx_vcpu_reset() does not
> seem to zero vmx->msr_ia32_feature_control.
>

The function reset cpu state in QEMU after S3 resume is
x86_cpu_reset(CPUState *s) in target-i386/cpu.c which is called for
all vcpus and does

    memset(env, 0, offsetof(CPUX86State, cpuid_level));

CPUX86State.msr_ia32_feature_control is before .cpuid_level, so guest
MSR_IA32_FEATURE_CONTROL on all vcpus should be zero after S3 resume.

Thanks,
Haozhong

> Now, what I absolutely can't tell you is whether this zeroing should
> happen regardless of "init_event", or just for a specific value of
> "init_event". Whenever I look at "init_event", I have to track it down
> to the commit that added it, then locate all the commits that fixed it,
> then guess whether the SDM language "logical processor reset" implies a
> specific "init_event" value or not. So, I have no idea about "init_event".
> 
> Either way, why it is important to me: the edk2 modules that are built
> into OVMF use loads of INIT-SIPI-SIPI sequences, for implementing
> multiprocessing in the firmware (regardless of the feature at hand). For
> example, EFI_MP_SERVICES_PROTOCOL.StartupAllAps() boots all APs with a
> programmer-specified routine, after which the APs are HLTed. (Until the
> next such call.) INIT-SIPI-SIPI is used by the implementation for
> booting the APs.
> 
> So I need to know whether those INIT-SIPI-SIPI sequences are supposed to
> clear the MSR -- in other words, whether I have to write patches that
> explicitly sustain the MSR across these IPIs.
> 
> If the answer is "yes", then I would like to ask for a KVM patch that
> zeroes vmx->msr_ia32_feature_control "under the right circumstances".
> 
> I can only attempt to re-program the MSR in all the necessary places in
> edk2 if I can trust that KVM will clear the MSR (visibly to the "rdmsr"
> command in the guest) as long as I miss at least one location in edk2.
> 
> Thanks!
> Laszlo
>
Laszlo Ersek July 6, 2016, 3:32 p.m. UTC | #16
On 07/06/16 15:03, Haozhong Zhang wrote:
> On 07/06/16 13:04, Laszlo Ersek wrote:
>> On 07/06/16 08:42, Laszlo Ersek wrote:
>>> On 07/06/16 08:28, Haozhong Zhang wrote:
>>>> Hi Ashok,
>>>>
>>>> On 07/06/16 02:18, Paolo Bonzini wrote:
>>>>>> I forgot to restore MSR_IA32_FEATURE_CONTROL in the resume path, and
>>>>>> MSR_IA32_FEATURE_CONTROL is zero after S3 resume.
>>>>>
>>>>> This is a bug.  Sorry Laszlo. :)
>>>>>
>>>>>> Not restore MSR_IA32_FEATURE_CONTROL during S3 resume does not affect
>>>>>> at least Linux guest (tested 4.5). Current QEMU may advise the guest
>>>>>> firmware to set bit 20 (for LMCE), bit 2 (for VMX) and bit 0 (lock
>>>>>> bit).
>>>>>> - For LMCE, Linux only checks bit 20 and bit 0 at boot time and then
>>>>>>   keeps using the result even after resume.
>>>>>
>>>>> On real hardware, LMCE would not be enabled after resume.  I'm not
>>>>> sure what would happen, but it wouldn't be good.
>>>>
>>>> Could you help to check if the LMCE bit in MSR_IA32_FEATURE_CONTROL is
>>>> set after S3 resume on the real hardware?
>>>
>>> The SDM says that IA32_FEATURE_CONTROL is zeroed on logical processor reset.
>>>
>>>   23.7 ENABLING AND ENTERING VMX OPERATION
>>>
>>>   [...] VMXON is also controlled by the IA32_FEATURE_CONTROL MSR (MSR
>>>   address 3AH). This MSR is cleared to zero when a logical processor is
>>>   reset. [...]
>>
>> Actually, I think there is a bug in KVM at the moment. I ran the
>> following test:
>>
>> - modified OVMF to set the MSR to value 0x5 on just the BSP
>> - booted an i440fx and a Q35 (SMM-enabled) OVMF guest
>> - checked "rdmsr -a 0x3a" in both
>> - ran "pm-suspend" in both guests, woke them
>> - repeated the rdmsr command
>>
>> The result is that the BSP had the 0x5 MSR value both after cold boot
>> and after S3 resume. So, KVM does not seem to implement clearing of the MSR.
>>
> 
> Interesting result, is setting MSR on BSP also called after S3 resume?

Yes. Henceforth my middle name should be "bumblebee", because today I've
been bumbling from error to error.

In short, I messed up my ad-hoc OVMF patch, and added the wrmsr to a
location that is on both the normal boot path and the S3 resume path. I
fixed the patch and now I get the same result as you -- the MSR is
indeed clear after S3 resume.

> I went through your test steps with OVMF replaced by a modified
> SeaBIOS which only sets MSR_IA32_FEATURE_CONTROL on BSP at boot time,
> the result before S3 resume is the value on BSP is 5 and others are 0,
> and the result after S3 resume is values on all CPUs are 0.

Right.

>> I checked kvm/next (currently at
>> 196f20ca52e8c7281932663c348fa54b82d03914), and vmx_vcpu_reset() does not
>> seem to zero vmx->msr_ia32_feature_control.
>>
> 
> The function reset cpu state in QEMU after S3 resume is
> x86_cpu_reset(CPUState *s) in target-i386/cpu.c which is called for
> all vcpus and does
> 
>     memset(env, 0, offsetof(CPUX86State, cpuid_level));
> 
> CPUX86State.msr_ia32_feature_control is before .cpuid_level, so guest
> MSR_IA32_FEATURE_CONTROL on all vcpus should be zero after S3 resume.

Thank you for the analysis and sorry about the noise.

Laszlo
diff mbox

Patch

diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c
index 8ed4380..73a08f0 100644
--- a/src/fw/paravirt.c
+++ b/src/fw/paravirt.c
@@ -131,6 +131,15 @@  qemu_preinit(void)
     dprintf(1, "RamSize: 0x%08x [cmos]\n", RamSize);
 }
 
+#define MSR_IA32_FEATURE_CONTROL 0x0000003a
+
+static void msr_feature_control_setup(void)
+{
+    u64 feature_control_bits = romfile_loadint("etc/msr_feature_control", 0);
+    if (feature_control_bits)
+        wrmsr_smp(MSR_IA32_FEATURE_CONTROL, feature_control_bits);
+}
+
 void
 qemu_platform_setup(void)
 {
@@ -149,8 +158,9 @@  qemu_platform_setup(void)
     smm_device_setup();
     smm_setup();
 
-    // Initialize mtrr and smp
+    // Initialize mtrr, msr_feature_control and smp
     mtrr_setup();
+    msr_feature_control_setup();
     smp_setup();
 
     // Create bios tables
diff --git a/src/fw/smp.c b/src/fw/smp.c
index 579acdb..6e706e4 100644
--- a/src/fw/smp.c
+++ b/src/fw/smp.c
@@ -10,7 +10,7 @@ 
 #include "output.h" // dprintf
 #include "romfile.h" // romfile_loadint
 #include "stacks.h" // yield
-#include "util.h" // smp_setup
+#include "util.h" // smp_setup, msr_feature_control_setup
 #include "x86.h" // wrmsr
 
 #define APIC_ICR_LOW ((u8*)BUILD_APIC_ADDR + 0x300)
@@ -20,20 +20,20 @@ 
 
 #define APIC_ENABLED 0x0100
 
-static struct { u32 index; u64 val; } smp_mtrr[32];
-static u32 smp_mtrr_count;
+static struct { u32 index; u64 val; } smp_msr[32];
+static u32 smp_msr_count;
 
 void
 wrmsr_smp(u32 index, u64 val)
 {
     wrmsr(index, val);
-    if (smp_mtrr_count >= ARRAY_SIZE(smp_mtrr)) {
+    if (smp_msr_count >= ARRAY_SIZE(smp_msr)) {
         warn_noalloc();
         return;
     }
-    smp_mtrr[smp_mtrr_count].index = index;
-    smp_mtrr[smp_mtrr_count].val = val;
-    smp_mtrr_count++;
+    smp_msr[smp_msr_count].index = index;
+    smp_msr[smp_msr_count].val = val;
+    smp_msr_count++;
 }
 
 u32 MaxCountCPUs;
@@ -58,10 +58,10 @@  handle_smp(void)
     u8 apic_id = ebx>>24;
     dprintf(DEBUG_HDL_smp, "handle_smp: apic_id=%d\n", apic_id);
 
-    // MTRR setup
+    // MTRR and MSR_IA32_FEATURE_CONTROL setup
     int i;
-    for (i=0; i<smp_mtrr_count; i++)
-        wrmsr(smp_mtrr[i].index, smp_mtrr[i].val);
+    for (i=0; i<smp_msr_count; i++)
+        wrmsr(smp_msr[i].index, smp_msr[i].val);
 
     // Set bit on FoundAPICIDs
     FoundAPICIDs[apic_id/32] |= (1 << (apic_id % 32));