diff mbox series

[v4,2/7] x86/hyperv: setup hypercall page

Message ID 20200122202343.5703-3-liuwe@microsoft.com (mailing list archive)
State New, archived
Headers show
Series More Hyper-V infrastructure | expand

Commit Message

Wei Liu Jan. 22, 2020, 8:23 p.m. UTC
Use the top-most addressable page for that purpose. Adjust e820 code
accordingly.

We also need to register Xen's guest OS ID to Hyper-V. Use 0x300 as the
OS type.

Signed-off-by: Wei Liu <liuwe@microsoft.com>
---
XXX the decision on Xen's vendor ID is pending.

v4:
1. Use fixmap
2. Follow routines listed in TLFS
---
 xen/arch/x86/e820.c                     | 41 +++++++++++++++----
 xen/arch/x86/guest/hyperv/hyperv.c      | 53 +++++++++++++++++++++++--
 xen/include/asm-x86/guest/hyperv-tlfs.h |  5 ++-
 3 files changed, 86 insertions(+), 13 deletions(-)

Comments

Andrew Cooper Jan. 22, 2020, 9:31 p.m. UTC | #1
On 22/01/2020 20:23, Wei Liu wrote:
> Use the top-most addressable page for that purpose. Adjust e820 code
> accordingly.
>
> We also need to register Xen's guest OS ID to Hyper-V. Use 0x300 as the
> OS type.
>
> Signed-off-by: Wei Liu <liuwe@microsoft.com>
> ---
> XXX the decision on Xen's vendor ID is pending.

Presumably this is pending a published update to the TLFS?  (And I
presume using 0x8088 is out of the question?  That is an X in the bottom
byte, not a reference to an 8 bit microprocessor.)

> diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c
> index 082f9928a1..5a4ef27a0b 100644
> --- a/xen/arch/x86/e820.c
> +++ b/xen/arch/x86/e820.c
> @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose);
> @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void)
>              max_pfn = end;
>      }
>  
> +#ifdef CONFIG_HYPERV_GUEST
> +    {
> +	/*
> +	 * We reserve the top-most page for hypercall page. Adjust
> +	 * max_pfn if necessary.

It might be worth leaving a "TODO: Better algorithm/guess?" here.

> +	 */
> +        unsigned int phys_bits = find_phys_addr_bits();
> +        unsigned long hcall_pfn =
> +          ((1ull << phys_bits) - 1) >> PAGE_SHIFT;

(1ull << (phys_bits - PAGE_SHIFT)) - 1 is equivalent, and doesn't
require a right shift.  I don't know if the compiler is smart enough to
make this optimisation automatically.

> +
> +        if ( max_pfn >= hcall_pfn )
> +          max_pfn = hcall_pfn - 1;

Indentation looks weird.

> @@ -446,13 +477,7 @@ static uint64_t __init mtrr_top_of_ram(void)
>           return 0;
>  
>      /* Find the physical address size for this CPU. */
> -    eax = cpuid_eax(0x80000000);
> -    if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 )
> -    {
> -        phys_bits = (uint8_t)cpuid_eax(0x80000008);
> -        if ( phys_bits > PADDR_BITS )
> -            phys_bits = PADDR_BITS;
> -    }
> +    phys_bits = find_phys_addr_bits();
>      addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1);

Note for whomever is next doing cleanup in this area.  This wants to be
& PAGE_MASK.

> diff --git a/xen/arch/x86/guest/hyperv/hyperv.c b/xen/arch/x86/guest/hyperv/hyperv.c
> index 8d38313d7a..f986c1a805 100644
> --- a/xen/arch/x86/guest/hyperv/hyperv.c
> +++ b/xen/arch/x86/guest/hyperv/hyperv.c
> @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void)
>      return &ops;
>  }
>  
> +static void __init setup_hypercall_page(void)
> +{
> +    union hv_x64_msr_hypercall_contents hypercall_msr;
> +    union hv_guest_os_id guest_id;
> +    unsigned long mfn;
> +
> +    rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> +    if ( !guest_id.raw )
> +    {
> +        guest_id.raw = generate_guest_id();
> +        wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> +    }
> +
> +    rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> +    if ( !hypercall_msr.enable )
> +    {
> +        mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT;
> +        hypercall_msr.enable = 1;
> +        hypercall_msr.guest_physical_address = mfn;
> +        wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);

Is it worth reading back, and BUG() if it is different?  It will be a
more obvious failure than hypercalls disappearing mysteriously.

> +    } else {
> +        mfn = hypercall_msr.guest_physical_address;
> +    }

Style.

Otherwise, LGTM.

~Andrew
Michael Kelley (LINUX) Jan. 23, 2020, 1:35 a.m. UTC | #2
From: Wei Liu <wei.liu.xen@gmail.com> On Behalf Of Wei Liu  Sent: Wednesday, January 22, 2020 12:24 PM
> 
> Use the top-most addressable page for that purpose. Adjust e820 code
> accordingly.
> 
> We also need to register Xen's guest OS ID to Hyper-V. Use 0x300 as the
> OS type.
> 
> Signed-off-by: Wei Liu <liuwe@microsoft.com>
> ---
> XXX the decision on Xen's vendor ID is pending.
> 
> v4:
> 1. Use fixmap
> 2. Follow routines listed in TLFS
> ---
>  xen/arch/x86/e820.c                     | 41 +++++++++++++++----
>  xen/arch/x86/guest/hyperv/hyperv.c      | 53 +++++++++++++++++++++++--
>  xen/include/asm-x86/guest/hyperv-tlfs.h |  5 ++-
>  3 files changed, 86 insertions(+), 13 deletions(-)
> 
> diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c
> index 082f9928a1..5a4ef27a0b 100644
> --- a/xen/arch/x86/e820.c
> +++ b/xen/arch/x86/e820.c
> @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose);
>  struct e820map e820;
>  struct e820map __initdata e820_raw;
> 
> +static unsigned int find_phys_addr_bits(void)
> +{
> +    uint32_t eax;
> +    unsigned int phys_bits = 36;
> +
> +    eax = cpuid_eax(0x80000000);
> +    if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 )
> +    {
> +        phys_bits = (uint8_t)cpuid_eax(0x80000008);
> +        if ( phys_bits > PADDR_BITS )
> +            phys_bits = PADDR_BITS;
> +    }
> +
> +    return phys_bits;
> +}
> +
>  /*
>   * This function checks if the entire range <start,end> is mapped with type.
>   *
> @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void)
>              max_pfn = end;
>      }
> 
> +#ifdef CONFIG_HYPERV_GUEST
> +    {
> +	/*
> +	 * We reserve the top-most page for hypercall page. Adjust
> +	 * max_pfn if necessary.
> +	 */
> +        unsigned int phys_bits = find_phys_addr_bits();
> +        unsigned long hcall_pfn =
> +          ((1ull << phys_bits) - 1) >> PAGE_SHIFT;
> +
> +        if ( max_pfn >= hcall_pfn )
> +          max_pfn = hcall_pfn - 1;
> +    }
> +#endif
> +
>      return max_pfn;
>  }
> 
> @@ -420,7 +451,7 @@ static uint64_t __init mtrr_top_of_ram(void)
>  {
>      uint32_t eax, ebx, ecx, edx;
>      uint64_t mtrr_cap, mtrr_def, addr_mask, base, mask, top;
> -    unsigned int i, phys_bits = 36;
> +    unsigned int i, phys_bits;
> 
>      /* By default we check only Intel systems. */
>      if ( e820_mtrr_clip == -1 )
> @@ -446,13 +477,7 @@ static uint64_t __init mtrr_top_of_ram(void)
>           return 0;
> 
>      /* Find the physical address size for this CPU. */
> -    eax = cpuid_eax(0x80000000);
> -    if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 )
> -    {
> -        phys_bits = (uint8_t)cpuid_eax(0x80000008);
> -        if ( phys_bits > PADDR_BITS )
> -            phys_bits = PADDR_BITS;
> -    }
> +    phys_bits = find_phys_addr_bits();
>      addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1);
> 
>      rdmsrl(MSR_MTRRcap, mtrr_cap);
> diff --git a/xen/arch/x86/guest/hyperv/hyperv.c b/xen/arch/x86/guest/hyperv/hyperv.c
> index 8d38313d7a..f986c1a805 100644
> --- a/xen/arch/x86/guest/hyperv/hyperv.c
> +++ b/xen/arch/x86/guest/hyperv/hyperv.c
> @@ -18,17 +18,27 @@
>   *
>   * Copyright (c) 2019 Microsoft.
>   */
> +#include <xen/version.h>
>  #include <xen/init.h>
> 
> +#include <asm/fixmap.h>
>  #include <asm/guest.h>
>  #include <asm/guest/hyperv-tlfs.h>
> +#include <asm/processor.h>
> 
>  struct ms_hyperv_info __read_mostly ms_hyperv;
> 
> -static const struct hypervisor_ops ops = {
> -    .name = "Hyper-V",
> -};
> +static uint64_t generate_guest_id(void)
> +{
> +    uint64_t id = 0;
> +
> +    id = (uint64_t)HV_XEN_VENDOR_ID << 48;
> +    id |= (xen_major_version() << 16) | xen_minor_version();
> +
> +    return id;
> +}
> 
> +static const struct hypervisor_ops ops;
>  const struct hypervisor_ops *__init hyperv_probe(void)
>  {
>      uint32_t eax, ebx, ecx, edx;
> @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void)
>      return &ops;
>  }
> 
> +static void __init setup_hypercall_page(void)
> +{
> +    union hv_x64_msr_hypercall_contents hypercall_msr;
> +    union hv_guest_os_id guest_id;
> +    unsigned long mfn;
> +
> +    rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> +    if ( !guest_id.raw )
> +    {
> +        guest_id.raw = generate_guest_id();
> +        wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> +    }
> +
> +    rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> +    if ( !hypercall_msr.enable )
> +    {
> +        mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT;
> +        hypercall_msr.enable = 1;
> +        hypercall_msr.guest_physical_address = mfn;
> +        wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> +    } else {
> +        mfn = hypercall_msr.guest_physical_address;
> +    }
> +
> +    set_fixmap_x(FIX_X_HYPERV_HCALL, mfn << PAGE_SHIFT);
> +}
> +
> +static void __init setup(void)
> +{
> +    setup_hypercall_page();
> +}
> +
> +static const struct hypervisor_ops ops = {
> +    .name = "Hyper-V",
> +    .setup = setup,
> +};
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/include/asm-x86/guest/hyperv-tlfs.h b/xen/include/asm-
> x86/guest/hyperv-tlfs.h
> index 05c4044976..5d37efd2d2 100644
> --- a/xen/include/asm-x86/guest/hyperv-tlfs.h
> +++ b/xen/include/asm-x86/guest/hyperv-tlfs.h
> @@ -318,15 +318,16 @@ struct ms_hyperv_tsc_page {
>   *
>   * Bit(s)
>   * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
> - * 62:56 - Os Type; Linux is 0x100
> + * 62:56 - Os Type; Linux 0x100, FreeBSD 0x200, Xen 0x300

This comment isn't quite right -- it reflects the mistake in the
TLFS that is being corrected.  The field 62:56 is only 7 bits wide,
so 0x100, 0x200, etc. won't fit.  The actual values are: Linux 0x1,
FreeBSD 0x2, and Xen 0x3.  Then bits 55:48 are 0x00.

>   * 55:48 - Distro specific identification
> - * 47:16 - Linux kernel version number
> + * 47:16 - Guest OS version number
>   * 15:0  - Distro specific identification
>   *
>   *
>   */
> 
>  #define HV_LINUX_VENDOR_ID              0x8100
> +#define HV_XEN_VENDOR_ID                0x8300
>  union hv_guest_os_id
>  {
>      uint64_t raw;
> --
> 2.20.1
Jan Beulich Jan. 23, 2020, 10:04 a.m. UTC | #3
On 22.01.2020 22:31, Andrew Cooper wrote:
> On 22/01/2020 20:23, Wei Liu wrote:
>> --- a/xen/arch/x86/e820.c
>> +++ b/xen/arch/x86/e820.c
>> +	 */
>> +        unsigned int phys_bits = find_phys_addr_bits();
>> +        unsigned long hcall_pfn =
>> +          ((1ull << phys_bits) - 1) >> PAGE_SHIFT;
> 
> (1ull << (phys_bits - PAGE_SHIFT)) - 1 is equivalent, and doesn't
> require a right shift.  I don't know if the compiler is smart enough to
> make this optimisation automatically.

It's not allowed to, without having a way to know that phys_bits is
no less than PAGE_SHIFT.

Jan
Jan Beulich Jan. 23, 2020, 11:18 a.m. UTC | #4
On 22.01.2020 21:23, Wei Liu wrote:
> --- a/xen/arch/x86/e820.c
> +++ b/xen/arch/x86/e820.c
> @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose);
>  struct e820map e820;
>  struct e820map __initdata e820_raw;
>  
> +static unsigned int find_phys_addr_bits(void)
> +{
> +    uint32_t eax;
> +    unsigned int phys_bits = 36;
> +
> +    eax = cpuid_eax(0x80000000);
> +    if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 )
> +    {
> +        phys_bits = (uint8_t)cpuid_eax(0x80000008);
> +        if ( phys_bits > PADDR_BITS )
> +            phys_bits = PADDR_BITS;
> +    }
> +
> +    return phys_bits;
> +}

Instead of this, how about pulling further ahead the call to
early_cpu_init() in setup.c? (Otherwise the function wants to
be __init at least.)

> @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void)
>              max_pfn = end;
>      }
>  
> +#ifdef CONFIG_HYPERV_GUEST
> +    {
> +	/*
> +	 * We reserve the top-most page for hypercall page. Adjust
> +	 * max_pfn if necessary.
> +	 */
> +        unsigned int phys_bits = find_phys_addr_bits();
> +        unsigned long hcall_pfn =
> +          ((1ull << phys_bits) - 1) >> PAGE_SHIFT;
> +
> +        if ( max_pfn >= hcall_pfn )
> +          max_pfn = hcall_pfn - 1;
> +    }
> +#endif

This wants abstracting away: There shouldn't be Hyper-V specific
code in here if at all possible, and the adjustment also shouldn't
be made unless actually running on Hyper-V.

> --- a/xen/arch/x86/guest/hyperv/hyperv.c
> +++ b/xen/arch/x86/guest/hyperv/hyperv.c
> @@ -18,17 +18,27 @@
>   *
>   * Copyright (c) 2019 Microsoft.
>   */
> +#include <xen/version.h>
>  #include <xen/init.h>

Please sort alphabetically.

> +#include <asm/fixmap.h>
>  #include <asm/guest.h>
>  #include <asm/guest/hyperv-tlfs.h>
> +#include <asm/processor.h>
>  
>  struct ms_hyperv_info __read_mostly ms_hyperv;
>  
> -static const struct hypervisor_ops ops = {
> -    .name = "Hyper-V",
> -};
> +static uint64_t generate_guest_id(void)
> +{
> +    uint64_t id = 0;

Pointless initializer.

> +    id = (uint64_t)HV_XEN_VENDOR_ID << 48;
> +    id |= (xen_major_version() << 16) | xen_minor_version();
> +
> +    return id;
> +}
>  
> +static const struct hypervisor_ops ops;
>  const struct hypervisor_ops *__init hyperv_probe(void)

Blank line between these two please (if you can't get away without
this declaration in the first place).

> @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void)
>      return &ops;
>  }
>  
> +static void __init setup_hypercall_page(void)
> +{
> +    union hv_x64_msr_hypercall_contents hypercall_msr;
> +    union hv_guest_os_id guest_id;
> +    unsigned long mfn;
> +
> +    rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> +    if ( !guest_id.raw )
> +    {
> +        guest_id.raw = generate_guest_id();
> +        wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> +    }
> +
> +    rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> +    if ( !hypercall_msr.enable )
> +    {
> +        mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT;

Along the lines of the abstracting-away request above: How is
anyone to notice what else needs changing if it is decided
that this page gets moved elsewhere?

> +        hypercall_msr.enable = 1;
> +        hypercall_msr.guest_physical_address = mfn;
> +        wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);

So on Hyper-V the hypervisor magically populates this page as
a side effect of the MSR write?

Jan
Michael Kelley (LINUX) Jan. 23, 2020, 3:20 p.m. UTC | #5
From: Jan Beulich <jbeulich@suse.com> Sent: Thursday, January 23, 2020 3:19 AM
> 
> On 22.01.2020 21:23, Wei Liu wrote:
> > --- a/xen/arch/x86/e820.c
> > +++ b/xen/arch/x86/e820.c
> > @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose);
> >  struct e820map e820;
> >  struct e820map __initdata e820_raw;
> >
> > +static unsigned int find_phys_addr_bits(void)
> > +{
> > +    uint32_t eax;
> > +    unsigned int phys_bits = 36;
> > +
> > +    eax = cpuid_eax(0x80000000);
> > +    if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 )
> > +    {
> > +        phys_bits = (uint8_t)cpuid_eax(0x80000008);
> > +        if ( phys_bits > PADDR_BITS )
> > +            phys_bits = PADDR_BITS;
> > +    }
> > +
> > +    return phys_bits;
> > +}
> 
> Instead of this, how about pulling further ahead the call to
> early_cpu_init() in setup.c? (Otherwise the function wants to
> be __init at least.)
> 
> > @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void)
> >              max_pfn = end;
> >      }
> >
> > +#ifdef CONFIG_HYPERV_GUEST
> > +    {
> > +	/*
> > +	 * We reserve the top-most page for hypercall page. Adjust
> > +	 * max_pfn if necessary.
> > +	 */
> > +        unsigned int phys_bits = find_phys_addr_bits();
> > +        unsigned long hcall_pfn =
> > +          ((1ull << phys_bits) - 1) >> PAGE_SHIFT;
> > +
> > +        if ( max_pfn >= hcall_pfn )
> > +          max_pfn = hcall_pfn - 1;
> > +    }
> > +#endif
> 
> This wants abstracting away: There shouldn't be Hyper-V specific
> code in here if at all possible, and the adjustment also shouldn't
> be made unless actually running on Hyper-V.
> 
> > --- a/xen/arch/x86/guest/hyperv/hyperv.c
> > +++ b/xen/arch/x86/guest/hyperv/hyperv.c
> > @@ -18,17 +18,27 @@
> >   *
> >   * Copyright (c) 2019 Microsoft.
> >   */
> > +#include <xen/version.h>
> >  #include <xen/init.h>
> 
> Please sort alphabetically.
> 
> > +#include <asm/fixmap.h>
> >  #include <asm/guest.h>
> >  #include <asm/guest/hyperv-tlfs.h>
> > +#include <asm/processor.h>
> >
> >  struct ms_hyperv_info __read_mostly ms_hyperv;
> >
> > -static const struct hypervisor_ops ops = {
> > -    .name = "Hyper-V",
> > -};
> > +static uint64_t generate_guest_id(void)
> > +{
> > +    uint64_t id = 0;
> 
> Pointless initializer.
> 
> > +    id = (uint64_t)HV_XEN_VENDOR_ID << 48;
> > +    id |= (xen_major_version() << 16) | xen_minor_version();
> > +
> > +    return id;
> > +}
> >
> > +static const struct hypervisor_ops ops;
> >  const struct hypervisor_ops *__init hyperv_probe(void)
> 
> Blank line between these two please (if you can't get away without
> this declaration in the first place).
> 
> > @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void)
> >      return &ops;
> >  }
> >
> > +static void __init setup_hypercall_page(void)
> > +{
> > +    union hv_x64_msr_hypercall_contents hypercall_msr;
> > +    union hv_guest_os_id guest_id;
> > +    unsigned long mfn;
> > +
> > +    rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> > +    if ( !guest_id.raw )
> > +    {
> > +        guest_id.raw = generate_guest_id();
> > +        wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> > +    }
> > +
> > +    rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> > +    if ( !hypercall_msr.enable )
> > +    {
> > +        mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT;
> 
> Along the lines of the abstracting-away request above: How is
> anyone to notice what else needs changing if it is decided
> that this page gets moved elsewhere?
> 
> > +        hypercall_msr.enable = 1;
> > +        hypercall_msr.guest_physical_address = mfn;
> > +        wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> 
> So on Hyper-V the hypervisor magically populates this page as
> a side effect of the MSR write?
> 

Yes, that's exactly what happens. :-)  Hyper-V takes the guest
physical address and remaps it to a different physical page that
Hyper-V has set up with whatever is needed to execute the hypercall
sequence. The original physical page corresponding to the guest physical
address is not modified, but it remains unused and inaccessible to the
guest.  When/if the MSR is written again with the enable flag set to 0,
the mapping is flipped back, and the original physical page, with its
original contents, reappears. There are a few other pages with this
same behavior.  The Hyper-V TLFS refers to these "GPA Overlay
Pages".  See Section 5.2.1 of the TLFS.

Michael
Wei Liu Jan. 28, 2020, 3:19 p.m. UTC | #6
On Wed, Jan 22, 2020 at 09:31:52PM +0000, Andrew Cooper wrote:
> On 22/01/2020 20:23, Wei Liu wrote:
> > Use the top-most addressable page for that purpose. Adjust e820 code
> > accordingly.
> >
> > We also need to register Xen's guest OS ID to Hyper-V. Use 0x300 as the
> > OS type.
> >
> > Signed-off-by: Wei Liu <liuwe@microsoft.com>
> > ---
> > XXX the decision on Xen's vendor ID is pending.
> 
> Presumably this is pending a published update to the TLFS?  (And I
> presume using 0x8088 is out of the question?  That is an X in the bottom
> byte, not a reference to an 8 bit microprocessor.)

We're discussing internally what Xen (and other OSes) should use.

There will be an update to TLFS for this.

At this point I can say Xen can use the ID I wrote in this patch.

0x8088 is out of the question. :-)

> 
> > diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c
> > index 082f9928a1..5a4ef27a0b 100644
> > --- a/xen/arch/x86/e820.c
> > +++ b/xen/arch/x86/e820.c
> > @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose);
> > @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void)
> >              max_pfn = end;
> >      }
> >  
> > +#ifdef CONFIG_HYPERV_GUEST
> > +    {
> > +	/*
> > +	 * We reserve the top-most page for hypercall page. Adjust
> > +	 * max_pfn if necessary.
> 
> It might be worth leaving a "TODO: Better algorithm/guess?" here.
> 

Ack.

> > +	 */
> > +        unsigned int phys_bits = find_phys_addr_bits();
> > +        unsigned long hcall_pfn =
> > +          ((1ull << phys_bits) - 1) >> PAGE_SHIFT;
> 
> (1ull << (phys_bits - PAGE_SHIFT)) - 1 is equivalent, and doesn't
> require a right shift.  I don't know if the compiler is smart enough to
> make this optimisation automatically.

OK. I can change it.

> 
> > +
> > +        if ( max_pfn >= hcall_pfn )
> > +          max_pfn = hcall_pfn - 1;
> 
> Indentation looks weird.
> 

I blame Emacs.

> > @@ -446,13 +477,7 @@ static uint64_t __init mtrr_top_of_ram(void)
> >           return 0;
> >  
> >      /* Find the physical address size for this CPU. */
> > -    eax = cpuid_eax(0x80000000);
> > -    if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 )
> > -    {
> > -        phys_bits = (uint8_t)cpuid_eax(0x80000008);
> > -        if ( phys_bits > PADDR_BITS )
> > -            phys_bits = PADDR_BITS;
> > -    }
> > +    phys_bits = find_phys_addr_bits();
> >      addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1);
> 
> Note for whomever is next doing cleanup in this area.  This wants to be
> & PAGE_MASK.
> 

Ack.

> > diff --git a/xen/arch/x86/guest/hyperv/hyperv.c b/xen/arch/x86/guest/hyperv/hyperv.c
> > index 8d38313d7a..f986c1a805 100644
> > --- a/xen/arch/x86/guest/hyperv/hyperv.c
> > +++ b/xen/arch/x86/guest/hyperv/hyperv.c
> > @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void)
> >      return &ops;
> >  }
> >  
> > +static void __init setup_hypercall_page(void)
> > +{
> > +    union hv_x64_msr_hypercall_contents hypercall_msr;
> > +    union hv_guest_os_id guest_id;
> > +    unsigned long mfn;
> > +
> > +    rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> > +    if ( !guest_id.raw )
> > +    {
> > +        guest_id.raw = generate_guest_id();
> > +        wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> > +    }
> > +
> > +    rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> > +    if ( !hypercall_msr.enable )
> > +    {
> > +        mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT;
> > +        hypercall_msr.enable = 1;
> > +        hypercall_msr.guest_physical_address = mfn;
> > +        wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> 
> Is it worth reading back, and BUG() if it is different?  It will be a
> more obvious failure than hypercalls disappearing mysteriously.
> 

Sure. I don't expect that BUG_ON to trigger in practice though.

> > +    } else {
> > +        mfn = hypercall_msr.guest_physical_address;
> > +    }
> 
> Style.
> 

Will fix.

Wei.

> Otherwise, LGTM.
> 
> ~Andrew
Wei Liu Jan. 28, 2020, 3:20 p.m. UTC | #7
On Thu, Jan 23, 2020 at 01:35:22AM +0000, Michael Kelley wrote:
[...]
> > diff --git a/xen/include/asm-x86/guest/hyperv-tlfs.h b/xen/include/asm-
> > x86/guest/hyperv-tlfs.h
> > index 05c4044976..5d37efd2d2 100644
> > --- a/xen/include/asm-x86/guest/hyperv-tlfs.h
> > +++ b/xen/include/asm-x86/guest/hyperv-tlfs.h
> > @@ -318,15 +318,16 @@ struct ms_hyperv_tsc_page {
> >   *
> >   * Bit(s)
> >   * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
> > - * 62:56 - Os Type; Linux is 0x100
> > + * 62:56 - Os Type; Linux 0x100, FreeBSD 0x200, Xen 0x300
> 
> This comment isn't quite right -- it reflects the mistake in the
> TLFS that is being corrected.  The field 62:56 is only 7 bits wide,
> so 0x100, 0x200, etc. won't fit.  The actual values are: Linux 0x1,
> FreeBSD 0x2, and Xen 0x3.  Then bits 55:48 are 0x00.

Thanks Michael.

I will fix this section (and perhaps submit a patch for Linux as well).

Wei.
Wei Liu Jan. 28, 2020, 3:30 p.m. UTC | #8
On Thu, Jan 23, 2020 at 12:18:41PM +0100, Jan Beulich wrote:
> On 22.01.2020 21:23, Wei Liu wrote:
> > --- a/xen/arch/x86/e820.c
> > +++ b/xen/arch/x86/e820.c
> > @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose);
> >  struct e820map e820;
> >  struct e820map __initdata e820_raw;
> >  
> > +static unsigned int find_phys_addr_bits(void)
> > +{
> > +    uint32_t eax;
> > +    unsigned int phys_bits = 36;
> > +
> > +    eax = cpuid_eax(0x80000000);
> > +    if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 )
> > +    {
> > +        phys_bits = (uint8_t)cpuid_eax(0x80000008);
> > +        if ( phys_bits > PADDR_BITS )
> > +            phys_bits = PADDR_BITS;
> > +    }
> > +
> > +    return phys_bits;
> > +}
> 
> Instead of this, how about pulling further ahead the call to
> early_cpu_init() in setup.c? (Otherwise the function wants to
> be __init at least.)

I can certainly try that, but that would require modifying e820.c
nonetheless because we can drop the cpuid invocation here if the move is
successful.

> 
> > @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void)
> >              max_pfn = end;
> >      }
> >  
> > +#ifdef CONFIG_HYPERV_GUEST
> > +    {
> > +	/*
> > +	 * We reserve the top-most page for hypercall page. Adjust
> > +	 * max_pfn if necessary.
> > +	 */
> > +        unsigned int phys_bits = find_phys_addr_bits();
> > +        unsigned long hcall_pfn =
> > +          ((1ull << phys_bits) - 1) >> PAGE_SHIFT;
> > +
> > +        if ( max_pfn >= hcall_pfn )
> > +          max_pfn = hcall_pfn - 1;
> > +    }
> > +#endif
> 
> This wants abstracting away: There shouldn't be Hyper-V specific
> code in here if at all possible, and the adjustment also shouldn't
> be made unless actually running on Hyper-V.
> 

I will introduce a hook called hypervisor_reserve_top_pages.

> > --- a/xen/arch/x86/guest/hyperv/hyperv.c
> > +++ b/xen/arch/x86/guest/hyperv/hyperv.c
> > @@ -18,17 +18,27 @@
[...]
> > @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void)
> >      return &ops;
> >  }
> >  
> > +static void __init setup_hypercall_page(void)
> > +{
> > +    union hv_x64_msr_hypercall_contents hypercall_msr;
> > +    union hv_guest_os_id guest_id;
> > +    unsigned long mfn;
> > +
> > +    rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> > +    if ( !guest_id.raw )
> > +    {
> > +        guest_id.raw = generate_guest_id();
> > +        wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
> > +    }
> > +
> > +    rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> > +    if ( !hypercall_msr.enable )
> > +    {
> > +        mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT;
> 
> Along the lines of the abstracting-away request above: How is
> anyone to notice what else needs changing if it is decided
> that this page gets moved elsewhere?

I don't have a good answer to this other than documenting. It is
probably as fragile as livepatch or pcpu stub.

> 
> > +        hypercall_msr.enable = 1;
> > +        hypercall_msr.guest_physical_address = mfn;
> > +        wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> 
> So on Hyper-V the hypervisor magically populates this page as
> a side effect of the MSR write?

Yes. See Michael's reply.

Wei.

> 
> Jan
Jan Beulich Jan. 28, 2020, 3:41 p.m. UTC | #9
On 28.01.2020 16:30, Wei Liu wrote:
> On Thu, Jan 23, 2020 at 12:18:41PM +0100, Jan Beulich wrote:
>> On 22.01.2020 21:23, Wei Liu wrote:
>>> --- a/xen/arch/x86/e820.c
>>> +++ b/xen/arch/x86/e820.c
>>> @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose);
>>>  struct e820map e820;
>>>  struct e820map __initdata e820_raw;
>>>  
>>> +static unsigned int find_phys_addr_bits(void)
>>> +{
>>> +    uint32_t eax;
>>> +    unsigned int phys_bits = 36;
>>> +
>>> +    eax = cpuid_eax(0x80000000);
>>> +    if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 )
>>> +    {
>>> +        phys_bits = (uint8_t)cpuid_eax(0x80000008);
>>> +        if ( phys_bits > PADDR_BITS )
>>> +            phys_bits = PADDR_BITS;
>>> +    }
>>> +
>>> +    return phys_bits;
>>> +}
>>
>> Instead of this, how about pulling further ahead the call to
>> early_cpu_init() in setup.c? (Otherwise the function wants to
>> be __init at least.)
> 
> I can certainly try that, but that would require modifying e820.c
> nonetheless because we can drop the cpuid invocation here if the move is
> successful.

Right, but this could then be a separate, follow-on cleanup
patch aiui.

>>> --- a/xen/arch/x86/guest/hyperv/hyperv.c
>>> +++ b/xen/arch/x86/guest/hyperv/hyperv.c
>>> @@ -18,17 +18,27 @@
> [...]
>>> @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void)
>>>      return &ops;
>>>  }
>>>  
>>> +static void __init setup_hypercall_page(void)
>>> +{
>>> +    union hv_x64_msr_hypercall_contents hypercall_msr;
>>> +    union hv_guest_os_id guest_id;
>>> +    unsigned long mfn;
>>> +
>>> +    rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
>>> +    if ( !guest_id.raw )
>>> +    {
>>> +        guest_id.raw = generate_guest_id();
>>> +        wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
>>> +    }
>>> +
>>> +    rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
>>> +    if ( !hypercall_msr.enable )
>>> +    {
>>> +        mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT;
>>
>> Along the lines of the abstracting-away request above: How is
>> anyone to notice what else needs changing if it is decided
>> that this page gets moved elsewhere?
> 
> I don't have a good answer to this other than documenting. It is
> probably as fragile as livepatch or pcpu stub.

At least macro-ize it then, so that all use sites can be easily
identified.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c
index 082f9928a1..5a4ef27a0b 100644
--- a/xen/arch/x86/e820.c
+++ b/xen/arch/x86/e820.c
@@ -36,6 +36,22 @@  boolean_param("e820-verbose", e820_verbose);
 struct e820map e820;
 struct e820map __initdata e820_raw;
 
+static unsigned int find_phys_addr_bits(void)
+{
+    uint32_t eax;
+    unsigned int phys_bits = 36;
+
+    eax = cpuid_eax(0x80000000);
+    if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 )
+    {
+        phys_bits = (uint8_t)cpuid_eax(0x80000008);
+        if ( phys_bits > PADDR_BITS )
+            phys_bits = PADDR_BITS;
+    }
+
+    return phys_bits;
+}
+
 /*
  * This function checks if the entire range <start,end> is mapped with type.
  *
@@ -357,6 +373,21 @@  static unsigned long __init find_max_pfn(void)
             max_pfn = end;
     }
 
+#ifdef CONFIG_HYPERV_GUEST
+    {
+	/*
+	 * We reserve the top-most page for hypercall page. Adjust
+	 * max_pfn if necessary.
+	 */
+        unsigned int phys_bits = find_phys_addr_bits();
+        unsigned long hcall_pfn =
+          ((1ull << phys_bits) - 1) >> PAGE_SHIFT;
+
+        if ( max_pfn >= hcall_pfn )
+          max_pfn = hcall_pfn - 1;
+    }
+#endif
+
     return max_pfn;
 }
 
@@ -420,7 +451,7 @@  static uint64_t __init mtrr_top_of_ram(void)
 {
     uint32_t eax, ebx, ecx, edx;
     uint64_t mtrr_cap, mtrr_def, addr_mask, base, mask, top;
-    unsigned int i, phys_bits = 36;
+    unsigned int i, phys_bits;
 
     /* By default we check only Intel systems. */
     if ( e820_mtrr_clip == -1 )
@@ -446,13 +477,7 @@  static uint64_t __init mtrr_top_of_ram(void)
          return 0;
 
     /* Find the physical address size for this CPU. */
-    eax = cpuid_eax(0x80000000);
-    if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 )
-    {
-        phys_bits = (uint8_t)cpuid_eax(0x80000008);
-        if ( phys_bits > PADDR_BITS )
-            phys_bits = PADDR_BITS;
-    }
+    phys_bits = find_phys_addr_bits();
     addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1);
 
     rdmsrl(MSR_MTRRcap, mtrr_cap);
diff --git a/xen/arch/x86/guest/hyperv/hyperv.c b/xen/arch/x86/guest/hyperv/hyperv.c
index 8d38313d7a..f986c1a805 100644
--- a/xen/arch/x86/guest/hyperv/hyperv.c
+++ b/xen/arch/x86/guest/hyperv/hyperv.c
@@ -18,17 +18,27 @@ 
  *
  * Copyright (c) 2019 Microsoft.
  */
+#include <xen/version.h>
 #include <xen/init.h>
 
+#include <asm/fixmap.h>
 #include <asm/guest.h>
 #include <asm/guest/hyperv-tlfs.h>
+#include <asm/processor.h>
 
 struct ms_hyperv_info __read_mostly ms_hyperv;
 
-static const struct hypervisor_ops ops = {
-    .name = "Hyper-V",
-};
+static uint64_t generate_guest_id(void)
+{
+    uint64_t id = 0;
+
+    id = (uint64_t)HV_XEN_VENDOR_ID << 48;
+    id |= (xen_major_version() << 16) | xen_minor_version();
+
+    return id;
+}
 
+static const struct hypervisor_ops ops;
 const struct hypervisor_ops *__init hyperv_probe(void)
 {
     uint32_t eax, ebx, ecx, edx;
@@ -72,6 +82,43 @@  const struct hypervisor_ops *__init hyperv_probe(void)
     return &ops;
 }
 
+static void __init setup_hypercall_page(void)
+{
+    union hv_x64_msr_hypercall_contents hypercall_msr;
+    union hv_guest_os_id guest_id;
+    unsigned long mfn;
+
+    rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
+    if ( !guest_id.raw )
+    {
+        guest_id.raw = generate_guest_id();
+        wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw);
+    }
+
+    rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+    if ( !hypercall_msr.enable )
+    {
+        mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT;
+        hypercall_msr.enable = 1;
+        hypercall_msr.guest_physical_address = mfn;
+        wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+    } else {
+        mfn = hypercall_msr.guest_physical_address;
+    }
+
+    set_fixmap_x(FIX_X_HYPERV_HCALL, mfn << PAGE_SHIFT);
+}
+
+static void __init setup(void)
+{
+    setup_hypercall_page();
+}
+
+static const struct hypervisor_ops ops = {
+    .name = "Hyper-V",
+    .setup = setup,
+};
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/include/asm-x86/guest/hyperv-tlfs.h b/xen/include/asm-x86/guest/hyperv-tlfs.h
index 05c4044976..5d37efd2d2 100644
--- a/xen/include/asm-x86/guest/hyperv-tlfs.h
+++ b/xen/include/asm-x86/guest/hyperv-tlfs.h
@@ -318,15 +318,16 @@  struct ms_hyperv_tsc_page {
  *
  * Bit(s)
  * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
- * 62:56 - Os Type; Linux is 0x100
+ * 62:56 - Os Type; Linux 0x100, FreeBSD 0x200, Xen 0x300
  * 55:48 - Distro specific identification
- * 47:16 - Linux kernel version number
+ * 47:16 - Guest OS version number
  * 15:0  - Distro specific identification
  *
  *
  */
 
 #define HV_LINUX_VENDOR_ID              0x8100
+#define HV_XEN_VENDOR_ID                0x8300
 union hv_guest_os_id
 {
     uint64_t raw;