Message ID | 20200122202343.5703-3-liuwe@microsoft.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | More Hyper-V infrastructure | expand |
On 22/01/2020 20:23, Wei Liu wrote: > Use the top-most addressable page for that purpose. Adjust e820 code > accordingly. > > We also need to register Xen's guest OS ID to Hyper-V. Use 0x300 as the > OS type. > > Signed-off-by: Wei Liu <liuwe@microsoft.com> > --- > XXX the decision on Xen's vendor ID is pending. Presumably this is pending a published update to the TLFS? (And I presume using 0x8088 is out of the question? That is an X in the bottom byte, not a reference to an 8 bit microprocessor.) > diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c > index 082f9928a1..5a4ef27a0b 100644 > --- a/xen/arch/x86/e820.c > +++ b/xen/arch/x86/e820.c > @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose); > @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void) > max_pfn = end; > } > > +#ifdef CONFIG_HYPERV_GUEST > + { > + /* > + * We reserve the top-most page for hypercall page. Adjust > + * max_pfn if necessary. It might be worth leaving a "TODO: Better algorithm/guess?" here. > + */ > + unsigned int phys_bits = find_phys_addr_bits(); > + unsigned long hcall_pfn = > + ((1ull << phys_bits) - 1) >> PAGE_SHIFT; (1ull << (phys_bits - PAGE_SHIFT)) - 1 is equivalent, and doesn't require a right shift. I don't know if the compiler is smart enough to make this optimisation automatically. > + > + if ( max_pfn >= hcall_pfn ) > + max_pfn = hcall_pfn - 1; Indentation looks weird. > @@ -446,13 +477,7 @@ static uint64_t __init mtrr_top_of_ram(void) > return 0; > > /* Find the physical address size for this CPU. */ > - eax = cpuid_eax(0x80000000); > - if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 ) > - { > - phys_bits = (uint8_t)cpuid_eax(0x80000008); > - if ( phys_bits > PADDR_BITS ) > - phys_bits = PADDR_BITS; > - } > + phys_bits = find_phys_addr_bits(); > addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1); Note for whomever is next doing cleanup in this area. This wants to be & PAGE_MASK. > diff --git a/xen/arch/x86/guest/hyperv/hyperv.c b/xen/arch/x86/guest/hyperv/hyperv.c > index 8d38313d7a..f986c1a805 100644 > --- a/xen/arch/x86/guest/hyperv/hyperv.c > +++ b/xen/arch/x86/guest/hyperv/hyperv.c > @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void) > return &ops; > } > > +static void __init setup_hypercall_page(void) > +{ > + union hv_x64_msr_hypercall_contents hypercall_msr; > + union hv_guest_os_id guest_id; > + unsigned long mfn; > + > + rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > + if ( !guest_id.raw ) > + { > + guest_id.raw = generate_guest_id(); > + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > + } > + > + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > + if ( !hypercall_msr.enable ) > + { > + mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT; > + hypercall_msr.enable = 1; > + hypercall_msr.guest_physical_address = mfn; > + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); Is it worth reading back, and BUG() if it is different? It will be a more obvious failure than hypercalls disappearing mysteriously. > + } else { > + mfn = hypercall_msr.guest_physical_address; > + } Style. Otherwise, LGTM. ~Andrew
From: Wei Liu <wei.liu.xen@gmail.com> On Behalf Of Wei Liu Sent: Wednesday, January 22, 2020 12:24 PM > > Use the top-most addressable page for that purpose. Adjust e820 code > accordingly. > > We also need to register Xen's guest OS ID to Hyper-V. Use 0x300 as the > OS type. > > Signed-off-by: Wei Liu <liuwe@microsoft.com> > --- > XXX the decision on Xen's vendor ID is pending. > > v4: > 1. Use fixmap > 2. Follow routines listed in TLFS > --- > xen/arch/x86/e820.c | 41 +++++++++++++++---- > xen/arch/x86/guest/hyperv/hyperv.c | 53 +++++++++++++++++++++++-- > xen/include/asm-x86/guest/hyperv-tlfs.h | 5 ++- > 3 files changed, 86 insertions(+), 13 deletions(-) > > diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c > index 082f9928a1..5a4ef27a0b 100644 > --- a/xen/arch/x86/e820.c > +++ b/xen/arch/x86/e820.c > @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose); > struct e820map e820; > struct e820map __initdata e820_raw; > > +static unsigned int find_phys_addr_bits(void) > +{ > + uint32_t eax; > + unsigned int phys_bits = 36; > + > + eax = cpuid_eax(0x80000000); > + if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 ) > + { > + phys_bits = (uint8_t)cpuid_eax(0x80000008); > + if ( phys_bits > PADDR_BITS ) > + phys_bits = PADDR_BITS; > + } > + > + return phys_bits; > +} > + > /* > * This function checks if the entire range <start,end> is mapped with type. > * > @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void) > max_pfn = end; > } > > +#ifdef CONFIG_HYPERV_GUEST > + { > + /* > + * We reserve the top-most page for hypercall page. Adjust > + * max_pfn if necessary. > + */ > + unsigned int phys_bits = find_phys_addr_bits(); > + unsigned long hcall_pfn = > + ((1ull << phys_bits) - 1) >> PAGE_SHIFT; > + > + if ( max_pfn >= hcall_pfn ) > + max_pfn = hcall_pfn - 1; > + } > +#endif > + > return max_pfn; > } > > @@ -420,7 +451,7 @@ static uint64_t __init mtrr_top_of_ram(void) > { > uint32_t eax, ebx, ecx, edx; > uint64_t mtrr_cap, mtrr_def, addr_mask, base, mask, top; > - unsigned int i, phys_bits = 36; > + unsigned int i, phys_bits; > > /* By default we check only Intel systems. */ > if ( e820_mtrr_clip == -1 ) > @@ -446,13 +477,7 @@ static uint64_t __init mtrr_top_of_ram(void) > return 0; > > /* Find the physical address size for this CPU. */ > - eax = cpuid_eax(0x80000000); > - if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 ) > - { > - phys_bits = (uint8_t)cpuid_eax(0x80000008); > - if ( phys_bits > PADDR_BITS ) > - phys_bits = PADDR_BITS; > - } > + phys_bits = find_phys_addr_bits(); > addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1); > > rdmsrl(MSR_MTRRcap, mtrr_cap); > diff --git a/xen/arch/x86/guest/hyperv/hyperv.c b/xen/arch/x86/guest/hyperv/hyperv.c > index 8d38313d7a..f986c1a805 100644 > --- a/xen/arch/x86/guest/hyperv/hyperv.c > +++ b/xen/arch/x86/guest/hyperv/hyperv.c > @@ -18,17 +18,27 @@ > * > * Copyright (c) 2019 Microsoft. > */ > +#include <xen/version.h> > #include <xen/init.h> > > +#include <asm/fixmap.h> > #include <asm/guest.h> > #include <asm/guest/hyperv-tlfs.h> > +#include <asm/processor.h> > > struct ms_hyperv_info __read_mostly ms_hyperv; > > -static const struct hypervisor_ops ops = { > - .name = "Hyper-V", > -}; > +static uint64_t generate_guest_id(void) > +{ > + uint64_t id = 0; > + > + id = (uint64_t)HV_XEN_VENDOR_ID << 48; > + id |= (xen_major_version() << 16) | xen_minor_version(); > + > + return id; > +} > > +static const struct hypervisor_ops ops; > const struct hypervisor_ops *__init hyperv_probe(void) > { > uint32_t eax, ebx, ecx, edx; > @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void) > return &ops; > } > > +static void __init setup_hypercall_page(void) > +{ > + union hv_x64_msr_hypercall_contents hypercall_msr; > + union hv_guest_os_id guest_id; > + unsigned long mfn; > + > + rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > + if ( !guest_id.raw ) > + { > + guest_id.raw = generate_guest_id(); > + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > + } > + > + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > + if ( !hypercall_msr.enable ) > + { > + mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT; > + hypercall_msr.enable = 1; > + hypercall_msr.guest_physical_address = mfn; > + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > + } else { > + mfn = hypercall_msr.guest_physical_address; > + } > + > + set_fixmap_x(FIX_X_HYPERV_HCALL, mfn << PAGE_SHIFT); > +} > + > +static void __init setup(void) > +{ > + setup_hypercall_page(); > +} > + > +static const struct hypervisor_ops ops = { > + .name = "Hyper-V", > + .setup = setup, > +}; > + > /* > * Local variables: > * mode: C > diff --git a/xen/include/asm-x86/guest/hyperv-tlfs.h b/xen/include/asm- > x86/guest/hyperv-tlfs.h > index 05c4044976..5d37efd2d2 100644 > --- a/xen/include/asm-x86/guest/hyperv-tlfs.h > +++ b/xen/include/asm-x86/guest/hyperv-tlfs.h > @@ -318,15 +318,16 @@ struct ms_hyperv_tsc_page { > * > * Bit(s) > * 63 - Indicates if the OS is Open Source or not; 1 is Open Source > - * 62:56 - Os Type; Linux is 0x100 > + * 62:56 - Os Type; Linux 0x100, FreeBSD 0x200, Xen 0x300 This comment isn't quite right -- it reflects the mistake in the TLFS that is being corrected. The field 62:56 is only 7 bits wide, so 0x100, 0x200, etc. won't fit. The actual values are: Linux 0x1, FreeBSD 0x2, and Xen 0x3. Then bits 55:48 are 0x00. > * 55:48 - Distro specific identification > - * 47:16 - Linux kernel version number > + * 47:16 - Guest OS version number > * 15:0 - Distro specific identification > * > * > */ > > #define HV_LINUX_VENDOR_ID 0x8100 > +#define HV_XEN_VENDOR_ID 0x8300 > union hv_guest_os_id > { > uint64_t raw; > -- > 2.20.1
On 22.01.2020 22:31, Andrew Cooper wrote: > On 22/01/2020 20:23, Wei Liu wrote: >> --- a/xen/arch/x86/e820.c >> +++ b/xen/arch/x86/e820.c >> + */ >> + unsigned int phys_bits = find_phys_addr_bits(); >> + unsigned long hcall_pfn = >> + ((1ull << phys_bits) - 1) >> PAGE_SHIFT; > > (1ull << (phys_bits - PAGE_SHIFT)) - 1 is equivalent, and doesn't > require a right shift. I don't know if the compiler is smart enough to > make this optimisation automatically. It's not allowed to, without having a way to know that phys_bits is no less than PAGE_SHIFT. Jan
On 22.01.2020 21:23, Wei Liu wrote: > --- a/xen/arch/x86/e820.c > +++ b/xen/arch/x86/e820.c > @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose); > struct e820map e820; > struct e820map __initdata e820_raw; > > +static unsigned int find_phys_addr_bits(void) > +{ > + uint32_t eax; > + unsigned int phys_bits = 36; > + > + eax = cpuid_eax(0x80000000); > + if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 ) > + { > + phys_bits = (uint8_t)cpuid_eax(0x80000008); > + if ( phys_bits > PADDR_BITS ) > + phys_bits = PADDR_BITS; > + } > + > + return phys_bits; > +} Instead of this, how about pulling further ahead the call to early_cpu_init() in setup.c? (Otherwise the function wants to be __init at least.) > @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void) > max_pfn = end; > } > > +#ifdef CONFIG_HYPERV_GUEST > + { > + /* > + * We reserve the top-most page for hypercall page. Adjust > + * max_pfn if necessary. > + */ > + unsigned int phys_bits = find_phys_addr_bits(); > + unsigned long hcall_pfn = > + ((1ull << phys_bits) - 1) >> PAGE_SHIFT; > + > + if ( max_pfn >= hcall_pfn ) > + max_pfn = hcall_pfn - 1; > + } > +#endif This wants abstracting away: There shouldn't be Hyper-V specific code in here if at all possible, and the adjustment also shouldn't be made unless actually running on Hyper-V. > --- a/xen/arch/x86/guest/hyperv/hyperv.c > +++ b/xen/arch/x86/guest/hyperv/hyperv.c > @@ -18,17 +18,27 @@ > * > * Copyright (c) 2019 Microsoft. > */ > +#include <xen/version.h> > #include <xen/init.h> Please sort alphabetically. > +#include <asm/fixmap.h> > #include <asm/guest.h> > #include <asm/guest/hyperv-tlfs.h> > +#include <asm/processor.h> > > struct ms_hyperv_info __read_mostly ms_hyperv; > > -static const struct hypervisor_ops ops = { > - .name = "Hyper-V", > -}; > +static uint64_t generate_guest_id(void) > +{ > + uint64_t id = 0; Pointless initializer. > + id = (uint64_t)HV_XEN_VENDOR_ID << 48; > + id |= (xen_major_version() << 16) | xen_minor_version(); > + > + return id; > +} > > +static const struct hypervisor_ops ops; > const struct hypervisor_ops *__init hyperv_probe(void) Blank line between these two please (if you can't get away without this declaration in the first place). > @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void) > return &ops; > } > > +static void __init setup_hypercall_page(void) > +{ > + union hv_x64_msr_hypercall_contents hypercall_msr; > + union hv_guest_os_id guest_id; > + unsigned long mfn; > + > + rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > + if ( !guest_id.raw ) > + { > + guest_id.raw = generate_guest_id(); > + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > + } > + > + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > + if ( !hypercall_msr.enable ) > + { > + mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT; Along the lines of the abstracting-away request above: How is anyone to notice what else needs changing if it is decided that this page gets moved elsewhere? > + hypercall_msr.enable = 1; > + hypercall_msr.guest_physical_address = mfn; > + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); So on Hyper-V the hypervisor magically populates this page as a side effect of the MSR write? Jan
From: Jan Beulich <jbeulich@suse.com> Sent: Thursday, January 23, 2020 3:19 AM > > On 22.01.2020 21:23, Wei Liu wrote: > > --- a/xen/arch/x86/e820.c > > +++ b/xen/arch/x86/e820.c > > @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose); > > struct e820map e820; > > struct e820map __initdata e820_raw; > > > > +static unsigned int find_phys_addr_bits(void) > > +{ > > + uint32_t eax; > > + unsigned int phys_bits = 36; > > + > > + eax = cpuid_eax(0x80000000); > > + if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 ) > > + { > > + phys_bits = (uint8_t)cpuid_eax(0x80000008); > > + if ( phys_bits > PADDR_BITS ) > > + phys_bits = PADDR_BITS; > > + } > > + > > + return phys_bits; > > +} > > Instead of this, how about pulling further ahead the call to > early_cpu_init() in setup.c? (Otherwise the function wants to > be __init at least.) > > > @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void) > > max_pfn = end; > > } > > > > +#ifdef CONFIG_HYPERV_GUEST > > + { > > + /* > > + * We reserve the top-most page for hypercall page. Adjust > > + * max_pfn if necessary. > > + */ > > + unsigned int phys_bits = find_phys_addr_bits(); > > + unsigned long hcall_pfn = > > + ((1ull << phys_bits) - 1) >> PAGE_SHIFT; > > + > > + if ( max_pfn >= hcall_pfn ) > > + max_pfn = hcall_pfn - 1; > > + } > > +#endif > > This wants abstracting away: There shouldn't be Hyper-V specific > code in here if at all possible, and the adjustment also shouldn't > be made unless actually running on Hyper-V. > > > --- a/xen/arch/x86/guest/hyperv/hyperv.c > > +++ b/xen/arch/x86/guest/hyperv/hyperv.c > > @@ -18,17 +18,27 @@ > > * > > * Copyright (c) 2019 Microsoft. > > */ > > +#include <xen/version.h> > > #include <xen/init.h> > > Please sort alphabetically. > > > +#include <asm/fixmap.h> > > #include <asm/guest.h> > > #include <asm/guest/hyperv-tlfs.h> > > +#include <asm/processor.h> > > > > struct ms_hyperv_info __read_mostly ms_hyperv; > > > > -static const struct hypervisor_ops ops = { > > - .name = "Hyper-V", > > -}; > > +static uint64_t generate_guest_id(void) > > +{ > > + uint64_t id = 0; > > Pointless initializer. > > > + id = (uint64_t)HV_XEN_VENDOR_ID << 48; > > + id |= (xen_major_version() << 16) | xen_minor_version(); > > + > > + return id; > > +} > > > > +static const struct hypervisor_ops ops; > > const struct hypervisor_ops *__init hyperv_probe(void) > > Blank line between these two please (if you can't get away without > this declaration in the first place). > > > @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void) > > return &ops; > > } > > > > +static void __init setup_hypercall_page(void) > > +{ > > + union hv_x64_msr_hypercall_contents hypercall_msr; > > + union hv_guest_os_id guest_id; > > + unsigned long mfn; > > + > > + rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > > + if ( !guest_id.raw ) > > + { > > + guest_id.raw = generate_guest_id(); > > + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > > + } > > + > > + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > > + if ( !hypercall_msr.enable ) > > + { > > + mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT; > > Along the lines of the abstracting-away request above: How is > anyone to notice what else needs changing if it is decided > that this page gets moved elsewhere? > > > + hypercall_msr.enable = 1; > > + hypercall_msr.guest_physical_address = mfn; > > + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > > So on Hyper-V the hypervisor magically populates this page as > a side effect of the MSR write? > Yes, that's exactly what happens. :-) Hyper-V takes the guest physical address and remaps it to a different physical page that Hyper-V has set up with whatever is needed to execute the hypercall sequence. The original physical page corresponding to the guest physical address is not modified, but it remains unused and inaccessible to the guest. When/if the MSR is written again with the enable flag set to 0, the mapping is flipped back, and the original physical page, with its original contents, reappears. There are a few other pages with this same behavior. The Hyper-V TLFS refers to these "GPA Overlay Pages". See Section 5.2.1 of the TLFS. Michael
On Wed, Jan 22, 2020 at 09:31:52PM +0000, Andrew Cooper wrote: > On 22/01/2020 20:23, Wei Liu wrote: > > Use the top-most addressable page for that purpose. Adjust e820 code > > accordingly. > > > > We also need to register Xen's guest OS ID to Hyper-V. Use 0x300 as the > > OS type. > > > > Signed-off-by: Wei Liu <liuwe@microsoft.com> > > --- > > XXX the decision on Xen's vendor ID is pending. > > Presumably this is pending a published update to the TLFS? (And I > presume using 0x8088 is out of the question? That is an X in the bottom > byte, not a reference to an 8 bit microprocessor.) We're discussing internally what Xen (and other OSes) should use. There will be an update to TLFS for this. At this point I can say Xen can use the ID I wrote in this patch. 0x8088 is out of the question. :-) > > > diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c > > index 082f9928a1..5a4ef27a0b 100644 > > --- a/xen/arch/x86/e820.c > > +++ b/xen/arch/x86/e820.c > > @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose); > > @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void) > > max_pfn = end; > > } > > > > +#ifdef CONFIG_HYPERV_GUEST > > + { > > + /* > > + * We reserve the top-most page for hypercall page. Adjust > > + * max_pfn if necessary. > > It might be worth leaving a "TODO: Better algorithm/guess?" here. > Ack. > > + */ > > + unsigned int phys_bits = find_phys_addr_bits(); > > + unsigned long hcall_pfn = > > + ((1ull << phys_bits) - 1) >> PAGE_SHIFT; > > (1ull << (phys_bits - PAGE_SHIFT)) - 1 is equivalent, and doesn't > require a right shift. I don't know if the compiler is smart enough to > make this optimisation automatically. OK. I can change it. > > > + > > + if ( max_pfn >= hcall_pfn ) > > + max_pfn = hcall_pfn - 1; > > Indentation looks weird. > I blame Emacs. > > @@ -446,13 +477,7 @@ static uint64_t __init mtrr_top_of_ram(void) > > return 0; > > > > /* Find the physical address size for this CPU. */ > > - eax = cpuid_eax(0x80000000); > > - if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 ) > > - { > > - phys_bits = (uint8_t)cpuid_eax(0x80000008); > > - if ( phys_bits > PADDR_BITS ) > > - phys_bits = PADDR_BITS; > > - } > > + phys_bits = find_phys_addr_bits(); > > addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1); > > Note for whomever is next doing cleanup in this area. This wants to be > & PAGE_MASK. > Ack. > > diff --git a/xen/arch/x86/guest/hyperv/hyperv.c b/xen/arch/x86/guest/hyperv/hyperv.c > > index 8d38313d7a..f986c1a805 100644 > > --- a/xen/arch/x86/guest/hyperv/hyperv.c > > +++ b/xen/arch/x86/guest/hyperv/hyperv.c > > @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void) > > return &ops; > > } > > > > +static void __init setup_hypercall_page(void) > > +{ > > + union hv_x64_msr_hypercall_contents hypercall_msr; > > + union hv_guest_os_id guest_id; > > + unsigned long mfn; > > + > > + rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > > + if ( !guest_id.raw ) > > + { > > + guest_id.raw = generate_guest_id(); > > + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > > + } > > + > > + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > > + if ( !hypercall_msr.enable ) > > + { > > + mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT; > > + hypercall_msr.enable = 1; > > + hypercall_msr.guest_physical_address = mfn; > > + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > > Is it worth reading back, and BUG() if it is different? It will be a > more obvious failure than hypercalls disappearing mysteriously. > Sure. I don't expect that BUG_ON to trigger in practice though. > > + } else { > > + mfn = hypercall_msr.guest_physical_address; > > + } > > Style. > Will fix. Wei. > Otherwise, LGTM. > > ~Andrew
On Thu, Jan 23, 2020 at 01:35:22AM +0000, Michael Kelley wrote: [...] > > diff --git a/xen/include/asm-x86/guest/hyperv-tlfs.h b/xen/include/asm- > > x86/guest/hyperv-tlfs.h > > index 05c4044976..5d37efd2d2 100644 > > --- a/xen/include/asm-x86/guest/hyperv-tlfs.h > > +++ b/xen/include/asm-x86/guest/hyperv-tlfs.h > > @@ -318,15 +318,16 @@ struct ms_hyperv_tsc_page { > > * > > * Bit(s) > > * 63 - Indicates if the OS is Open Source or not; 1 is Open Source > > - * 62:56 - Os Type; Linux is 0x100 > > + * 62:56 - Os Type; Linux 0x100, FreeBSD 0x200, Xen 0x300 > > This comment isn't quite right -- it reflects the mistake in the > TLFS that is being corrected. The field 62:56 is only 7 bits wide, > so 0x100, 0x200, etc. won't fit. The actual values are: Linux 0x1, > FreeBSD 0x2, and Xen 0x3. Then bits 55:48 are 0x00. Thanks Michael. I will fix this section (and perhaps submit a patch for Linux as well). Wei.
On Thu, Jan 23, 2020 at 12:18:41PM +0100, Jan Beulich wrote: > On 22.01.2020 21:23, Wei Liu wrote: > > --- a/xen/arch/x86/e820.c > > +++ b/xen/arch/x86/e820.c > > @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose); > > struct e820map e820; > > struct e820map __initdata e820_raw; > > > > +static unsigned int find_phys_addr_bits(void) > > +{ > > + uint32_t eax; > > + unsigned int phys_bits = 36; > > + > > + eax = cpuid_eax(0x80000000); > > + if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 ) > > + { > > + phys_bits = (uint8_t)cpuid_eax(0x80000008); > > + if ( phys_bits > PADDR_BITS ) > > + phys_bits = PADDR_BITS; > > + } > > + > > + return phys_bits; > > +} > > Instead of this, how about pulling further ahead the call to > early_cpu_init() in setup.c? (Otherwise the function wants to > be __init at least.) I can certainly try that, but that would require modifying e820.c nonetheless because we can drop the cpuid invocation here if the move is successful. > > > @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void) > > max_pfn = end; > > } > > > > +#ifdef CONFIG_HYPERV_GUEST > > + { > > + /* > > + * We reserve the top-most page for hypercall page. Adjust > > + * max_pfn if necessary. > > + */ > > + unsigned int phys_bits = find_phys_addr_bits(); > > + unsigned long hcall_pfn = > > + ((1ull << phys_bits) - 1) >> PAGE_SHIFT; > > + > > + if ( max_pfn >= hcall_pfn ) > > + max_pfn = hcall_pfn - 1; > > + } > > +#endif > > This wants abstracting away: There shouldn't be Hyper-V specific > code in here if at all possible, and the adjustment also shouldn't > be made unless actually running on Hyper-V. > I will introduce a hook called hypervisor_reserve_top_pages. > > --- a/xen/arch/x86/guest/hyperv/hyperv.c > > +++ b/xen/arch/x86/guest/hyperv/hyperv.c > > @@ -18,17 +18,27 @@ [...] > > @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void) > > return &ops; > > } > > > > +static void __init setup_hypercall_page(void) > > +{ > > + union hv_x64_msr_hypercall_contents hypercall_msr; > > + union hv_guest_os_id guest_id; > > + unsigned long mfn; > > + > > + rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > > + if ( !guest_id.raw ) > > + { > > + guest_id.raw = generate_guest_id(); > > + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); > > + } > > + > > + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > > + if ( !hypercall_msr.enable ) > > + { > > + mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT; > > Along the lines of the abstracting-away request above: How is > anyone to notice what else needs changing if it is decided > that this page gets moved elsewhere? I don't have a good answer to this other than documenting. It is probably as fragile as livepatch or pcpu stub. > > > + hypercall_msr.enable = 1; > > + hypercall_msr.guest_physical_address = mfn; > > + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > > So on Hyper-V the hypervisor magically populates this page as > a side effect of the MSR write? Yes. See Michael's reply. Wei. > > Jan
On 28.01.2020 16:30, Wei Liu wrote: > On Thu, Jan 23, 2020 at 12:18:41PM +0100, Jan Beulich wrote: >> On 22.01.2020 21:23, Wei Liu wrote: >>> --- a/xen/arch/x86/e820.c >>> +++ b/xen/arch/x86/e820.c >>> @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose); >>> struct e820map e820; >>> struct e820map __initdata e820_raw; >>> >>> +static unsigned int find_phys_addr_bits(void) >>> +{ >>> + uint32_t eax; >>> + unsigned int phys_bits = 36; >>> + >>> + eax = cpuid_eax(0x80000000); >>> + if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 ) >>> + { >>> + phys_bits = (uint8_t)cpuid_eax(0x80000008); >>> + if ( phys_bits > PADDR_BITS ) >>> + phys_bits = PADDR_BITS; >>> + } >>> + >>> + return phys_bits; >>> +} >> >> Instead of this, how about pulling further ahead the call to >> early_cpu_init() in setup.c? (Otherwise the function wants to >> be __init at least.) > > I can certainly try that, but that would require modifying e820.c > nonetheless because we can drop the cpuid invocation here if the move is > successful. Right, but this could then be a separate, follow-on cleanup patch aiui. >>> --- a/xen/arch/x86/guest/hyperv/hyperv.c >>> +++ b/xen/arch/x86/guest/hyperv/hyperv.c >>> @@ -18,17 +18,27 @@ > [...] >>> @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void) >>> return &ops; >>> } >>> >>> +static void __init setup_hypercall_page(void) >>> +{ >>> + union hv_x64_msr_hypercall_contents hypercall_msr; >>> + union hv_guest_os_id guest_id; >>> + unsigned long mfn; >>> + >>> + rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); >>> + if ( !guest_id.raw ) >>> + { >>> + guest_id.raw = generate_guest_id(); >>> + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); >>> + } >>> + >>> + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); >>> + if ( !hypercall_msr.enable ) >>> + { >>> + mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT; >> >> Along the lines of the abstracting-away request above: How is >> anyone to notice what else needs changing if it is decided >> that this page gets moved elsewhere? > > I don't have a good answer to this other than documenting. It is > probably as fragile as livepatch or pcpu stub. At least macro-ize it then, so that all use sites can be easily identified. Jan
diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c index 082f9928a1..5a4ef27a0b 100644 --- a/xen/arch/x86/e820.c +++ b/xen/arch/x86/e820.c @@ -36,6 +36,22 @@ boolean_param("e820-verbose", e820_verbose); struct e820map e820; struct e820map __initdata e820_raw; +static unsigned int find_phys_addr_bits(void) +{ + uint32_t eax; + unsigned int phys_bits = 36; + + eax = cpuid_eax(0x80000000); + if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 ) + { + phys_bits = (uint8_t)cpuid_eax(0x80000008); + if ( phys_bits > PADDR_BITS ) + phys_bits = PADDR_BITS; + } + + return phys_bits; +} + /* * This function checks if the entire range <start,end> is mapped with type. * @@ -357,6 +373,21 @@ static unsigned long __init find_max_pfn(void) max_pfn = end; } +#ifdef CONFIG_HYPERV_GUEST + { + /* + * We reserve the top-most page for hypercall page. Adjust + * max_pfn if necessary. + */ + unsigned int phys_bits = find_phys_addr_bits(); + unsigned long hcall_pfn = + ((1ull << phys_bits) - 1) >> PAGE_SHIFT; + + if ( max_pfn >= hcall_pfn ) + max_pfn = hcall_pfn - 1; + } +#endif + return max_pfn; } @@ -420,7 +451,7 @@ static uint64_t __init mtrr_top_of_ram(void) { uint32_t eax, ebx, ecx, edx; uint64_t mtrr_cap, mtrr_def, addr_mask, base, mask, top; - unsigned int i, phys_bits = 36; + unsigned int i, phys_bits; /* By default we check only Intel systems. */ if ( e820_mtrr_clip == -1 ) @@ -446,13 +477,7 @@ static uint64_t __init mtrr_top_of_ram(void) return 0; /* Find the physical address size for this CPU. */ - eax = cpuid_eax(0x80000000); - if ( (eax >> 16) == 0x8000 && eax >= 0x80000008 ) - { - phys_bits = (uint8_t)cpuid_eax(0x80000008); - if ( phys_bits > PADDR_BITS ) - phys_bits = PADDR_BITS; - } + phys_bits = find_phys_addr_bits(); addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1); rdmsrl(MSR_MTRRcap, mtrr_cap); diff --git a/xen/arch/x86/guest/hyperv/hyperv.c b/xen/arch/x86/guest/hyperv/hyperv.c index 8d38313d7a..f986c1a805 100644 --- a/xen/arch/x86/guest/hyperv/hyperv.c +++ b/xen/arch/x86/guest/hyperv/hyperv.c @@ -18,17 +18,27 @@ * * Copyright (c) 2019 Microsoft. */ +#include <xen/version.h> #include <xen/init.h> +#include <asm/fixmap.h> #include <asm/guest.h> #include <asm/guest/hyperv-tlfs.h> +#include <asm/processor.h> struct ms_hyperv_info __read_mostly ms_hyperv; -static const struct hypervisor_ops ops = { - .name = "Hyper-V", -}; +static uint64_t generate_guest_id(void) +{ + uint64_t id = 0; + + id = (uint64_t)HV_XEN_VENDOR_ID << 48; + id |= (xen_major_version() << 16) | xen_minor_version(); + + return id; +} +static const struct hypervisor_ops ops; const struct hypervisor_ops *__init hyperv_probe(void) { uint32_t eax, ebx, ecx, edx; @@ -72,6 +82,43 @@ const struct hypervisor_ops *__init hyperv_probe(void) return &ops; } +static void __init setup_hypercall_page(void) +{ + union hv_x64_msr_hypercall_contents hypercall_msr; + union hv_guest_os_id guest_id; + unsigned long mfn; + + rdmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); + if ( !guest_id.raw ) + { + guest_id.raw = generate_guest_id(); + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id.raw); + } + + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + if ( !hypercall_msr.enable ) + { + mfn = ((1ull << paddr_bits) - 1) >> HV_HYP_PAGE_SHIFT; + hypercall_msr.enable = 1; + hypercall_msr.guest_physical_address = mfn; + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + } else { + mfn = hypercall_msr.guest_physical_address; + } + + set_fixmap_x(FIX_X_HYPERV_HCALL, mfn << PAGE_SHIFT); +} + +static void __init setup(void) +{ + setup_hypercall_page(); +} + +static const struct hypervisor_ops ops = { + .name = "Hyper-V", + .setup = setup, +}; + /* * Local variables: * mode: C diff --git a/xen/include/asm-x86/guest/hyperv-tlfs.h b/xen/include/asm-x86/guest/hyperv-tlfs.h index 05c4044976..5d37efd2d2 100644 --- a/xen/include/asm-x86/guest/hyperv-tlfs.h +++ b/xen/include/asm-x86/guest/hyperv-tlfs.h @@ -318,15 +318,16 @@ struct ms_hyperv_tsc_page { * * Bit(s) * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 + * 62:56 - Os Type; Linux 0x100, FreeBSD 0x200, Xen 0x300 * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number + * 47:16 - Guest OS version number * 15:0 - Distro specific identification * * */ #define HV_LINUX_VENDOR_ID 0x8100 +#define HV_XEN_VENDOR_ID 0x8300 union hv_guest_os_id { uint64_t raw;
Use the top-most addressable page for that purpose. Adjust e820 code accordingly. We also need to register Xen's guest OS ID to Hyper-V. Use 0x300 as the OS type. Signed-off-by: Wei Liu <liuwe@microsoft.com> --- XXX the decision on Xen's vendor ID is pending. v4: 1. Use fixmap 2. Follow routines listed in TLFS --- xen/arch/x86/e820.c | 41 +++++++++++++++---- xen/arch/x86/guest/hyperv/hyperv.c | 53 +++++++++++++++++++++++-- xen/include/asm-x86/guest/hyperv-tlfs.h | 5 ++- 3 files changed, 86 insertions(+), 13 deletions(-)