Message ID | 1632955927-27911-2-git-send-email-olekstysh@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Add handling of extended regions (safe ranges) on Arm (Was "xen/memory: Introduce a hypercall to provide unallocated space") | expand |
On Thu, 30 Sep 2021, Oleksandr Tyshchenko wrote: > From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> > > We need to pass info about maximum supported guest address > space size to the toolstack on Arm in order to properly > calculate the base and size of the extended region (safe range) > for the guest. The extended region is unused address space which > could be safely used by domain for foreign/grant mappings on Arm. > The extended region itself will be handled by the subsequents > patch. > > Use p2m_ipa_bits variable on Arm, the x86 equivalent is > hap_paddr_bits. > > As we change the size of structure bump the interface version. > > Suggested-by: Julien Grall <jgrall@amazon.com> > Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> > Reviewed-by: Michal Orzel <michal.orzel@arm.com> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> > --- > Please note, that review comments for the RFC version [1] haven't been addressed yet. > It is not forgotten, some clarification is needed. It will be addressed for the next version. > > [1] https://lore.kernel.org/xen-devel/973f5344-aa10-3ad6-ff02-ad5f358ad279@citrix.com/ > > Changes RFC -> V2: > - update patch subject/description > - replace arch-specific sub-struct with common gpaddr_bits > field and update code to reflect that > > Changes V2 -> V3: > - make the field uint8_t and add uint8_t pad[7] after > - remove leading blanks in libxl.h > > Changes V3 -> V4: > - also print gpaddr_bits from output_physinfo() > - add Michal's R-b > --- > tools/include/libxl.h | 7 +++++++ > tools/libs/light/libxl.c | 2 ++ > tools/libs/light/libxl_types.idl | 2 ++ > tools/xl/xl_info.c | 2 ++ > xen/arch/arm/sysctl.c | 2 ++ > xen/arch/x86/sysctl.c | 2 ++ > xen/include/public/sysctl.h | 4 +++- > 7 files changed, 20 insertions(+), 1 deletion(-) > > diff --git a/tools/include/libxl.h b/tools/include/libxl.h > index b9ba16d..63f9550 100644 > --- a/tools/include/libxl.h > +++ b/tools/include/libxl.h > @@ -856,6 +856,13 @@ typedef struct libxl__ctx libxl_ctx; > #define LIBXL_HAVE_PHYSINFO_MAX_POSSIBLE_MFN 1 > > /* > + * LIBXL_HAVE_PHYSINFO_GPADDR_BITS > + * > + * If this is defined, libxl_physinfo has a "gpaddr_bits" field. > + */ > +#define LIBXL_HAVE_PHYSINFO_GPADDR_BITS 1 > + > +/* > * LIBXL_HAVE_DOMINFO_OUTSTANDING_MEMKB 1 > * > * If this is defined, libxl_dominfo will contain a MemKB type field called > diff --git a/tools/libs/light/libxl.c b/tools/libs/light/libxl.c > index 204eb0b..c86624f 100644 > --- a/tools/libs/light/libxl.c > +++ b/tools/libs/light/libxl.c > @@ -405,6 +405,8 @@ int libxl_get_physinfo(libxl_ctx *ctx, libxl_physinfo *physinfo) > physinfo->cap_vmtrace = > !!(xcphysinfo.capabilities & XEN_SYSCTL_PHYSCAP_vmtrace); > > + physinfo->gpaddr_bits = xcphysinfo.gpaddr_bits; > + > GC_FREE; > return 0; > } > diff --git a/tools/libs/light/libxl_types.idl b/tools/libs/light/libxl_types.idl > index 3f9fff6..bf27fe6 100644 > --- a/tools/libs/light/libxl_types.idl > +++ b/tools/libs/light/libxl_types.idl > @@ -1061,6 +1061,8 @@ libxl_physinfo = Struct("physinfo", [ > ("cap_shadow", bool), > ("cap_iommu_hap_pt_share", bool), > ("cap_vmtrace", bool), > + > + ("gpaddr_bits", uint8), > ], dir=DIR_OUT) > > libxl_connectorinfo = Struct("connectorinfo", [ > diff --git a/tools/xl/xl_info.c b/tools/xl/xl_info.c > index 8383e4a..dfbbeaa 100644 > --- a/tools/xl/xl_info.c > +++ b/tools/xl/xl_info.c > @@ -221,6 +221,8 @@ static void output_physinfo(void) > info.cap_vmtrace ? " vmtrace" : "" > ); > > + maybe_printf("gpaddr_bits : %d\n", info.gpaddr_bits); > + > vinfo = libxl_get_version_info(ctx); > if (vinfo) { > i = (1 << 20) / vinfo->pagesize; > diff --git a/xen/arch/arm/sysctl.c b/xen/arch/arm/sysctl.c > index f87944e..91dca4f 100644 > --- a/xen/arch/arm/sysctl.c > +++ b/xen/arch/arm/sysctl.c > @@ -15,6 +15,8 @@ > void arch_do_physinfo(struct xen_sysctl_physinfo *pi) > { > pi->capabilities |= XEN_SYSCTL_PHYSCAP_hvm | XEN_SYSCTL_PHYSCAP_hap; > + > + pi->gpaddr_bits = p2m_ipa_bits; > } > > long arch_do_sysctl(struct xen_sysctl *sysctl, > diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c > index aff52a1..7b14865 100644 > --- a/xen/arch/x86/sysctl.c > +++ b/xen/arch/x86/sysctl.c > @@ -135,6 +135,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi) > pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap; > if ( IS_ENABLED(CONFIG_SHADOW_PAGING) ) > pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow; > + > + pi->gpaddr_bits = hap_paddr_bits; > } > > long arch_do_sysctl( > diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h > index 039ccf8..0450a78 100644 > --- a/xen/include/public/sysctl.h > +++ b/xen/include/public/sysctl.h > @@ -35,7 +35,7 @@ > #include "domctl.h" > #include "physdev.h" > > -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000013 > +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014 > > /* > * Read console content from Xen buffer ring. > @@ -120,6 +120,8 @@ struct xen_sysctl_physinfo { > uint64_aligned_t outstanding_pages; > uint64_aligned_t max_mfn; /* Largest possible MFN on this host */ > uint32_t hw_cap[8]; > + uint8_t gpaddr_bits; > + uint8_t pad[7]; > }; > > /* > -- > 2.7.4 >
On 01.10.2021 01:00, Stefano Stabellini wrote: > On Thu, 30 Sep 2021, Oleksandr Tyshchenko wrote: >> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> >> >> We need to pass info about maximum supported guest address >> space size to the toolstack on Arm in order to properly >> calculate the base and size of the extended region (safe range) >> for the guest. The extended region is unused address space which >> could be safely used by domain for foreign/grant mappings on Arm. >> The extended region itself will be handled by the subsequents >> patch. >> >> Use p2m_ipa_bits variable on Arm, the x86 equivalent is >> hap_paddr_bits. >> >> As we change the size of structure bump the interface version. >> >> Suggested-by: Julien Grall <jgrall@amazon.com> >> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> >> Reviewed-by: Michal Orzel <michal.orzel@arm.com> > > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> I have to admit that I'm a little puzzled to see these R-b-s when ... >> Please note, that review comments for the RFC version [1] haven't been addressed yet. >> It is not forgotten, some clarification is needed. It will be addressed for the next version. >> >> [1] https://lore.kernel.org/xen-devel/973f5344-aa10-3ad6-ff02-ad5f358ad279@citrix.com/ ... Oleksandr makes clear this patch isn't really ready yet. The tags could misguide a committer into putting in this series despite the open issue(s). Jan
On 01.10.21 10:50, Jan Beulich wrote: Hi Jan > On 01.10.2021 01:00, Stefano Stabellini wrote: >> On Thu, 30 Sep 2021, Oleksandr Tyshchenko wrote: >>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> >>> >>> We need to pass info about maximum supported guest address >>> space size to the toolstack on Arm in order to properly >>> calculate the base and size of the extended region (safe range) >>> for the guest. The extended region is unused address space which >>> could be safely used by domain for foreign/grant mappings on Arm. >>> The extended region itself will be handled by the subsequents >>> patch. >>> >>> Use p2m_ipa_bits variable on Arm, the x86 equivalent is >>> hap_paddr_bits. >>> >>> As we change the size of structure bump the interface version. >>> >>> Suggested-by: Julien Grall <jgrall@amazon.com> >>> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> >>> Reviewed-by: Michal Orzel <michal.orzel@arm.com> >> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> > I have to admit that I'm a little puzzled to see these R-b-s when ... > >>> Please note, that review comments for the RFC version [1] haven't been addressed yet. >>> It is not forgotten, some clarification is needed. It will be addressed for the next version. >>> >>> [1] https://lore.kernel.org/xen-devel/973f5344-aa10-3ad6-ff02-ad5f358ad279@citrix.com/ > ... Oleksandr makes clear this patch isn't really ready yet. Unfortunately, this is true. I am still waiting for the clarification [1] > The tags > could misguide a committer into putting in this series despite the > open issue(s). > > Jan [1] https://lore.kernel.org/xen-devel/6a2a183d-c9d8-df2a-41aa-b25283fab197@gmail.com/
Bertrand, see comment on ID_AA64MMFR0_EL1 below, any ideas? On Fri, 1 Oct 2021, Oleksandr wrote: > On 01.10.21 10:50, Jan Beulich wrote: > > On 01.10.2021 01:00, Stefano Stabellini wrote: > > > On Thu, 30 Sep 2021, Oleksandr Tyshchenko wrote: > > > > From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> > > > > > > > > We need to pass info about maximum supported guest address > > > > space size to the toolstack on Arm in order to properly > > > > calculate the base and size of the extended region (safe range) > > > > for the guest. The extended region is unused address space which > > > > could be safely used by domain for foreign/grant mappings on Arm. > > > > The extended region itself will be handled by the subsequents > > > > patch. > > > > > > > > Use p2m_ipa_bits variable on Arm, the x86 equivalent is > > > > hap_paddr_bits. > > > > > > > > As we change the size of structure bump the interface version. > > > > > > > > Suggested-by: Julien Grall <jgrall@amazon.com> > > > > Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> > > > > Reviewed-by: Michal Orzel <michal.orzel@arm.com> > > > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> > > I have to admit that I'm a little puzzled to see these R-b-s when ... > > > > > > Please note, that review comments for the RFC version [1] haven't been > > > > addressed yet. > > > > It is not forgotten, some clarification is needed. It will be addressed > > > > for the next version. > > > > > > > > [1] > > > > https://lore.kernel.org/xen-devel/973f5344-aa10-3ad6-ff02-ad5f358ad279@citrix.com/ > > ... Oleksandr makes clear this patch isn't really ready yet. > > Unfortunately, this is true. I am still waiting for the clarification [1] Although I was aware of comments to older versions, this is actually the first version of this patch that I reviewed with any level of details; I didn't read previous comments very closely. I tried to find any bugs or problems with it and I couldn't see any, so I gave my reviewed-by. I should have clarified that was meant for the ARM part as I don't have a full understanding of the implications of using hap_paddr_bits on x86 for VM migration. But let me take this opportunity to say that although I think the hypercall is OK, I wish we didn't need this patch at all: it is problematic because it touches tools, x86 and ARM hypervisor code all together. It needs at least three acks/reviewed-by to get accepted: from an x86 maintainer, an arm maintainer and from a tools maintainer. I don't say this to criticize the patch acceptance process: this patch makes changes to an existing hypercall so it is only fair that it needs to go through extra levels of scrutiny. For the sake of simplicity and decoupling (reducing dependencies between patches and between components), I think it would be best to introduce an #define for the minimum value of gpaddr_bits and then move this patch at the end of the series; that way it becomes optional. Unfortunately the minimum value is 32 (in practice I have never seen less than 40 but the architecture supports 32 as minimum). Actually, the info we are looking for is already exposed via ID_AA64MMFR0_EL1. ID_AA64MMFR0_EL1 can be read from a virtual machine, and Linux let userspace read it [1]. Regardless of this patch series, we should make sure that Xen exposes the right mm64.pa_range value to guest virtual machines. If that is done right, then you can just add support for reading ID_AA64MMFR0_EL1 in libxl/libxc and then we don't need any hypercall modifications changes. So, in theory we already have all the interfaces we need, but in practice they don't work: unfortunaly both Xen and Linux mark ID_AA64MMFR0_EL1 as FTR_HIDDEN in cpufeature.c so neither Linux from Xen, not userspace from Linux can actually read the real value :-/ They always read zero. (Also I think we have an issue today with p2m_restrict_ipa_bits not updating the mm64.pa_range value. I think that it should be fixed.) Bertrand, do you have any ideas in regards to ID_AA64MMFR0_EL1? If not, maybe we could just go with #define MIN_GPADDR_BITS 32 [1] https://01.org/linuxgraphics/gfx-docs/drm/arm64/cpu-feature-registers.html
Hi On Sat, 2 Oct 2021, 01:24 Stefano Stabellini, <sstabellini@kernel.org> wrote: > Bertrand, see comment on ID_AA64MMFR0_EL1 below, any ideas? > > > On Fri, 1 Oct 2021, Oleksandr wrote: > > On 01.10.21 10:50, Jan Beulich wrote: > > > On 01.10.2021 01:00, Stefano Stabellini wrote: > > > > On Thu, 30 Sep 2021, Oleksandr Tyshchenko wrote: > > > > > From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> > > > > > > > > > > We need to pass info about maximum supported guest address > > > > > space size to the toolstack on Arm in order to properly > > > > > calculate the base and size of the extended region (safe range) > > > > > for the guest. The extended region is unused address space which > > > > > could be safely used by domain for foreign/grant mappings on Arm. > > > > > The extended region itself will be handled by the subsequents > > > > > patch. > > > > > > > > > > Use p2m_ipa_bits variable on Arm, the x86 equivalent is > > > > > hap_paddr_bits. > > > > > > > > > > As we change the size of structure bump the interface version. > > > > > > > > > > Suggested-by: Julien Grall <jgrall@amazon.com> > > > > > Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com > > > > > > > Reviewed-by: Michal Orzel <michal.orzel@arm.com> > > > > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> > > > I have to admit that I'm a little puzzled to see these R-b-s when ... > > > > > > > > Please note, that review comments for the RFC version [1] haven't > been > > > > > addressed yet. > > > > > It is not forgotten, some clarification is needed. It will be > addressed > > > > > for the next version. > > > > > > > > > > [1] > > > > > > https://lore.kernel.org/xen-devel/973f5344-aa10-3ad6-ff02-ad5f358ad279@citrix.com/ > > > ... Oleksandr makes clear this patch isn't really ready yet. > > > > Unfortunately, this is true. I am still waiting for the clarification [1] > > Although I was aware of comments to older versions, this is actually the > first version of this patch that I reviewed with any level of details; I > didn't read previous comments very closely. I tried to find any bugs or > problems with it and I couldn't see any, so I gave my reviewed-by. I > should have clarified that was meant for the ARM part as I don't have a > full understanding of the implications of using hap_paddr_bits on x86 > for VM migration. > > But let me take this opportunity to say that although I think the > hypercall is OK, I wish we didn't need this patch at all: it is > problematic because it touches tools, x86 and ARM hypervisor code all > together. It needs at least three acks/reviewed-by to get accepted: from > an x86 maintainer, an arm maintainer and from a tools maintainer. I > don't say this to criticize the patch acceptance process: this patch > makes changes to an existing hypercall so it is only fair that it needs > to go through extra levels of scrutiny. For the sake of simplicity and > decoupling (reducing dependencies between patches and between > components), I think it would be best to introduce an #define for the > minimum value of gpaddr_bits and then move this patch at the end of the > series; that way it becomes optional. It depends what you mean by optional. Yes we can add hack to avoid the hypercall... But the more scalable solution is the hypercall. I am slightly concerned that if we don't push for the hypercall now, then there will be no incentive to do it afterwards... So I went through Andrew's e-mail to understand what's the request. I understand that there are some problem with migration. But it doesn't look like we need to solve them now. Instead, AFAICT, his main ask for this series is to switch to a domctl. It seems the conversation is simply stuck on waiting for Andrew to provide details on what would look like. Did we ping Andrew on IRC? Unfortunately the minimum value > is 32 (in practice I have never seen less than 40 but the architecture > supports 32 as minimum). > > Actually, the info we are looking for is already exposed via > ID_AA64MMFR0_EL1. ID_AA64MMFR0_EL1 can be read from a virtual machine, > and Linux let userspace read it [1]. Regardless of this patch series, we > should make sure that Xen exposes the right mm64.pa_range value to guest > virtual machines. If that is done right, then you can just add support > for reading ID_AA64MMFR0_EL1 in libxl/libxc and then we don't need any > hypercall modifications changes. From my understanding, from a VM PoV "pa_range" should represent the size of the guest physical address space. Today, it happens that every VM is using the same P2M size. However, I would rather not make such assumption in the userspace. > So, in theory we already have all the interfaces we need, but in > practice they don't work: unfortunaly both Xen and Linux mark > ID_AA64MMFR0_EL1 as FTR_HIDDEN in cpufeature.c so neither Linux from > Xen, not userspace from Linux can actually read the real value :-/ > They always read zero. > > (Also I think we have an issue today with p2m_restrict_ipa_bits not > updating the mm64.pa_range value. I think that it should be fixed.) It looks like it. That should be handled in a separate patch though. > Bertrand, do you have any ideas in regards to ID_AA64MMFR0_EL1? > > If not, maybe we could just go with > #define MIN_GPADDR_BITS 32. The toolstack would have to consider it as the "maximum" because it may not be safe to expose anything above. With 32, we are going to be limited in term of space we can find. We could potentially use 40 bits as a minimum. Although it still feels a bit of a hack to me given that the IOMMU may restrict it further and the architecture can in theory support less. Overall, I still strongly prefer the hypercall approach. If a common one is difficult to achieve, then we can extend the domctl to create a domain to provide the p2m_bits (in the same way as we deal for the GIC version) in an arch specific way. Cheers, > > [1] > https://01.org/linuxgraphics/gfx-docs/drm/arm64/cpu-feature-registers.html >
On 02.10.21 10:35, Julien Grall wrote: Hi Julien, Stefano. Thank you for your comments! > Hi > > On Sat, 2 Oct 2021, 01:24 Stefano Stabellini, <sstabellini@kernel.org > <mailto:sstabellini@kernel.org>> wrote: > > Bertrand, see comment on ID_AA64MMFR0_EL1 below, any ideas? > > > On Fri, 1 Oct 2021, Oleksandr wrote: > > On 01.10.21 10:50, Jan Beulich wrote: > > > On 01.10.2021 01:00, Stefano Stabellini wrote: > > > > On Thu, 30 Sep 2021, Oleksandr Tyshchenko wrote: > > > > > From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com > <mailto:oleksandr_tyshchenko@epam.com>> > > > > > > > > > > We need to pass info about maximum supported guest address > > > > > space size to the toolstack on Arm in order to properly > > > > > calculate the base and size of the extended region (safe > range) > > > > > for the guest. The extended region is unused address space > which > > > > > could be safely used by domain for foreign/grant mappings > on Arm. > > > > > The extended region itself will be handled by the subsequents > > > > > patch. > > > > > > > > > > Use p2m_ipa_bits variable on Arm, the x86 equivalent is > > > > > hap_paddr_bits. > > > > > > > > > > As we change the size of structure bump the interface version. > > > > > > > > > > Suggested-by: Julien Grall <jgrall@amazon.com > <mailto:jgrall@amazon.com>> > > > > > Signed-off-by: Oleksandr Tyshchenko > <oleksandr_tyshchenko@epam.com <mailto:oleksandr_tyshchenko@epam.com>> > > > > > Reviewed-by: Michal Orzel <michal.orzel@arm.com > <mailto:michal.orzel@arm.com>> > > > > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org > <mailto:sstabellini@kernel.org>> > > > I have to admit that I'm a little puzzled to see these R-b-s > when ... > > > > > > > > Please note, that review comments for the RFC version [1] > haven't been > > > > > addressed yet. > > > > > It is not forgotten, some clarification is needed. It will > be addressed > > > > > for the next version. > > > > > > > > > > [1] > > > > > > https://lore.kernel.org/xen-devel/973f5344-aa10-3ad6-ff02-ad5f358ad279@citrix.com/ > > > ... Oleksandr makes clear this patch isn't really ready yet. > > > > Unfortunately, this is true. I am still waiting for the > clarification [1] > > Although I was aware of comments to older versions, this is > actually the > first version of this patch that I reviewed with any level of > details; I > didn't read previous comments very closely. I tried to find any > bugs or > problems with it and I couldn't see any, so I gave my reviewed-by. I > should have clarified that was meant for the ARM part as I don't > have a > full understanding of the implications of using hap_paddr_bits on x86 > for VM migration. > > > > But let me take this opportunity to say that although I think the > hypercall is OK, I wish we didn't need this patch at all: it is > problematic because it touches tools, x86 and ARM hypervisor code all > together. It needs at least three acks/reviewed-by to get > accepted: from > an x86 maintainer, an arm maintainer and from a tools maintainer. I > don't say this to criticize the patch acceptance process: this patch > makes changes to an existing hypercall so it is only fair that it > needs > to go through extra levels of scrutiny. For the sake of simplicity and > decoupling (reducing dependencies between patches and between > components), I think it would be best to introduce an #define for the > minimum value of gpaddr_bits and then move this patch at the end > of the > series; that way it becomes optional. > > > It depends what you mean by optional. Yes we can add hack to avoid the > hypercall... But the more scalable solution is the hypercall. > > I am slightly concerned that if we don't push for the hypercall now, > then there will be no incentive to do it afterwards... > > So I went through Andrew's e-mail to understand what's the request. I > understand that there are some problem with migration. But it doesn't > look like we need to solve them now. Instead, AFAICT, his main ask > for this series is to switch to a domctl. > > It seems the conversation is simply stuck on waiting for Andrew to > provide details on what would look like. Did we ping Andrew on IRC? > > Unfortunately the minimum value > is 32 (in practice I have never seen less than 40 but the architecture > supports 32 as minimum). > > > > Actually, the info we are looking for is already exposed via > ID_AA64MMFR0_EL1. ID_AA64MMFR0_EL1 can be read from a virtual machine, > and Linux let userspace read it [1]. Regardless of this patch > series, we > should make sure that Xen exposes the right mm64.pa_range value to > guest > virtual machines. If that is done right, then you can just add support > for reading ID_AA64MMFR0_EL1 in libxl/libxc and then we don't need any > hypercall modifications changes. > > > From my understanding, from a VM PoV "pa_range" should represent the > size of the guest physical address space. > > Today, it happens that every VM is using the same P2M size. However, I > would rather not make such assumption in the userspace. > > > So, in theory we already have all the interfaces we need, but in > practice they don't work: unfortunaly both Xen and Linux mark > ID_AA64MMFR0_EL1 as FTR_HIDDEN in cpufeature.c so neither Linux from > Xen, not userspace from Linux can actually read the real value :-/ > They always read zero. > > (Also I think we have an issue today with p2m_restrict_ipa_bits not > updating the mm64.pa_range value. I think that it should be fixed.) > > > It looks like it. That should be handled in a separate patch though. > > > Bertrand, do you have any ideas in regards to ID_AA64MMFR0_EL1? > > If not, maybe we could just go with > #define MIN_GPADDR_BITS 32. > > > The toolstack would have to consider it as the "maximum" because it > may not be safe to expose anything above. > > With 32, we are going to be limited in term of space we can find. > > We could potentially use 40 bits as a minimum. Although it still feels > a bit of a hack to me given that the IOMMU may restrict it further and > the architecture can in theory support less. > > Overall, I still strongly prefer the hypercall approach. If a common > one is difficult to achieve, then we can extend the domctl to create a > domain to provide the p2m_bits (in the same way as we deal for the GIC > version) in an arch specific way. To summarize: If we don't query the hypervisor to provide gpaddr_bits we have two options: - The safe option is to use minimum possible value which is 32 bits on Arm64. But, there would be of no practical use. - The unsafe option is to use let's say "default" 40 bits and pray it will work in all cases on Arm64 (it is ok on Arm32). So we definitely need to query the hypervisor. As it turned out the sysctl approach is not welcome, in the long term we want to have this information per domain. I have been absolutely OK with that valid ask since RFC, I just wanted to know what was the preferred way to do this (new domctl, existing, etc)... I analyzed what Julien had suggested regarding pass gpaddr_bits via Arm's struct xen_arch_domainconfig (I assume, this should be an OUT parameter) and I think it makes sense. Taking into the account that the feature freeze date is coming, I will wait a few days, and if there are no objections I will send updated version (patch #3 also needs updating as it expects the gpaddr_bits to be in physinfo). > > Cheers, > > > > [1] > https://01.org/linuxgraphics/gfx-docs/drm/arm64/cpu-feature-registers.html >
On Sat, 2 Oct 2021, Oleksandr wrote: > On 02.10.21 10:35, Julien Grall wrote: > > Thank you for your comments! > > Hi > > On Sat, 2 Oct 2021, 01:24 Stefano Stabellini, <sstabellini@kernel.org> wrote: > Bertrand, see comment on ID_AA64MMFR0_EL1 below, any ideas? > > > On Fri, 1 Oct 2021, Oleksandr wrote: > > On 01.10.21 10:50, Jan Beulich wrote: > > > On 01.10.2021 01:00, Stefano Stabellini wrote: > > > > On Thu, 30 Sep 2021, Oleksandr Tyshchenko wrote: > > > > > From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> > > > > > > > > > > We need to pass info about maximum supported guest address > > > > > space size to the toolstack on Arm in order to properly > > > > > calculate the base and size of the extended region (safe range) > > > > > for the guest. The extended region is unused address space which > > > > > could be safely used by domain for foreign/grant mappings on Arm. > > > > > The extended region itself will be handled by the subsequents > > > > > patch. > > > > > > > > > > Use p2m_ipa_bits variable on Arm, the x86 equivalent is > > > > > hap_paddr_bits. > > > > > > > > > > As we change the size of structure bump the interface version. > > > > > > > > > > Suggested-by: Julien Grall <jgrall@amazon.com> > > > > > Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> > > > > > Reviewed-by: Michal Orzel <michal.orzel@arm.com> > > > > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> > > > I have to admit that I'm a little puzzled to see these R-b-s when ... > > > > > > > > Please note, that review comments for the RFC version [1] haven't been > > > > > addressed yet. > > > > > It is not forgotten, some clarification is needed. It will be addressed > > > > > for the next version. > > > > > > > > > > [1] > > > > > https://lore.kernel.org/xen-devel/973f5344-aa10-3ad6-ff02-ad5f358ad279@citrix.com/ > > > ... Oleksandr makes clear this patch isn't really ready yet. > > > > Unfortunately, this is true. I am still waiting for the clarification [1] > > Although I was aware of comments to older versions, this is actually the > first version of this patch that I reviewed with any level of details; I > didn't read previous comments very closely. I tried to find any bugs or > problems with it and I couldn't see any, so I gave my reviewed-by. I > should have clarified that was meant for the ARM part as I don't have a > full understanding of the implications of using hap_paddr_bits on x86 > for VM migration. > > > > But let me take this opportunity to say that although I think the > hypercall is OK, I wish we didn't need this patch at all: it is > problematic because it touches tools, x86 and ARM hypervisor code all > together. It needs at least three acks/reviewed-by to get accepted: from > an x86 maintainer, an arm maintainer and from a tools maintainer. I > don't say this to criticize the patch acceptance process: this patch > makes changes to an existing hypercall so it is only fair that it needs > to go through extra levels of scrutiny. For the sake of simplicity and > decoupling (reducing dependencies between patches and between > components), I think it would be best to introduce an #define for the > minimum value of gpaddr_bits and then move this patch at the end of the > series; that way it becomes optional. > > > It depends what you mean by optional. Yes we can add hack to avoid the hypercall... But the more scalable solution is the hypercall. > > I am slightly concerned that if we don't push for the hypercall now, then there will be no incentive to do it afterwards... > > So I went through Andrew's e-mail to understand what's the request. I understand that there are some problem with migration. But it > doesn't look like we need to solve them now. Instead, AFAICT, his main ask for this series is to switch to a domctl. > > It seems the conversation is simply stuck on waiting for Andrew to provide details on what would look like. Did we ping Andrew on > IRC? > > Unfortunately the minimum value > is 32 (in practice I have never seen less than 40 but the architecture > supports 32 as minimum). > > > > Actually, the info we are looking for is already exposed via > ID_AA64MMFR0_EL1. ID_AA64MMFR0_EL1 can be read from a virtual machine, > and Linux let userspace read it [1]. Regardless of this patch series, we > should make sure that Xen exposes the right mm64.pa_range value to guest > virtual machines. If that is done right, then you can just add support > for reading ID_AA64MMFR0_EL1 in libxl/libxc and then we don't need any > hypercall modifications changes. > > > From my understanding, from a VM PoV "pa_range" should represent the size of the guest physical address space. > > Today, it happens that every VM is using the same P2M size. However, I would rather not make such assumption in the userspace. > > > So, in theory we already have all the interfaces we need, but in > practice they don't work: unfortunaly both Xen and Linux mark > ID_AA64MMFR0_EL1 as FTR_HIDDEN in cpufeature.c so neither Linux from > Xen, not userspace from Linux can actually read the real value :-/ > They always read zero. > > (Also I think we have an issue today with p2m_restrict_ipa_bits not > updating the mm64.pa_range value. I think that it should be fixed.) > > > It looks like it. That should be handled in a separate patch though. > > > Bertrand, do you have any ideas in regards to ID_AA64MMFR0_EL1? > > If not, maybe we could just go with > #define MIN_GPADDR_BITS 32. > > > The toolstack would have to consider it as the "maximum" because it may not be safe to expose anything above. > > With 32, we are going to be limited in term of space we can find. > > We could potentially use 40 bits as a minimum. Although it still feels a bit of a hack to me given that the IOMMU may restrict it > further and the architecture can in theory support less. > > Overall, I still strongly prefer the hypercall approach. If a common one is difficult to achieve, then we can extend the domctl to > create a domain to provide the p2m_bits (in the same way as we deal for the GIC version) in an arch specific way. > > > To summarize: > If we don't query the hypervisor to provide gpaddr_bits we have two options: > - The safe option is to use minimum possible value which is 32 bits on Arm64. But, there would be of no practical use. > - The unsafe option is to use let's say "default" 40 bits and pray it will work in all cases on Arm64 (it is ok on Arm32). > > So we definitely need to query the hypervisor. As it turned out the sysctl approach is not welcome, in the long term we want to have this > information per domain. I have been absolutely OK with that valid ask since RFC, I just wanted to know what was the preferred way to do > this (new domctl, existing, etc)... > > I analyzed what Julien had suggested regarding pass gpaddr_bits via Arm's struct xen_arch_domainconfig (I assume, this should be an OUT > parameter) and I think it makes sense. Taking into the account that the feature freeze date is coming, I will wait a few days, and if there > are no objections I will send updated version (patch #3 also needs updating as it expects the gpaddr_bits to be in physinfo). No objections from me, I think Julien's suggestion is a good one.
On 05.10.21 00:11, Stefano Stabellini wrote: Hi Stefano > On Sat, 2 Oct 2021, Oleksandr wrote: >> On 02.10.21 10:35, Julien Grall wrote: >> >> Thank you for your comments! >> >> Hi >> >> On Sat, 2 Oct 2021, 01:24 Stefano Stabellini, <sstabellini@kernel.org> wrote: >> Bertrand, see comment on ID_AA64MMFR0_EL1 below, any ideas? >> >> >> On Fri, 1 Oct 2021, Oleksandr wrote: >> > On 01.10.21 10:50, Jan Beulich wrote: >> > > On 01.10.2021 01:00, Stefano Stabellini wrote: >> > > > On Thu, 30 Sep 2021, Oleksandr Tyshchenko wrote: >> > > > > From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> >> > > > > >> > > > > We need to pass info about maximum supported guest address >> > > > > space size to the toolstack on Arm in order to properly >> > > > > calculate the base and size of the extended region (safe range) >> > > > > for the guest. The extended region is unused address space which >> > > > > could be safely used by domain for foreign/grant mappings on Arm. >> > > > > The extended region itself will be handled by the subsequents >> > > > > patch. >> > > > > >> > > > > Use p2m_ipa_bits variable on Arm, the x86 equivalent is >> > > > > hap_paddr_bits. >> > > > > >> > > > > As we change the size of structure bump the interface version. >> > > > > >> > > > > Suggested-by: Julien Grall <jgrall@amazon.com> >> > > > > Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> >> > > > > Reviewed-by: Michal Orzel <michal.orzel@arm.com> >> > > > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> >> > > I have to admit that I'm a little puzzled to see these R-b-s when ... >> > > >> > > > > Please note, that review comments for the RFC version [1] haven't been >> > > > > addressed yet. >> > > > > It is not forgotten, some clarification is needed. It will be addressed >> > > > > for the next version. >> > > > > >> > > > > [1] >> > > > > https://lore.kernel.org/xen-devel/973f5344-aa10-3ad6-ff02-ad5f358ad279@citrix.com/ >> > > ... Oleksandr makes clear this patch isn't really ready yet. >> > >> > Unfortunately, this is true. I am still waiting for the clarification [1] >> >> Although I was aware of comments to older versions, this is actually the >> first version of this patch that I reviewed with any level of details; I >> didn't read previous comments very closely. I tried to find any bugs or >> problems with it and I couldn't see any, so I gave my reviewed-by. I >> should have clarified that was meant for the ARM part as I don't have a >> full understanding of the implications of using hap_paddr_bits on x86 >> for VM migration. >> >> >> >> But let me take this opportunity to say that although I think the >> hypercall is OK, I wish we didn't need this patch at all: it is >> problematic because it touches tools, x86 and ARM hypervisor code all >> together. It needs at least three acks/reviewed-by to get accepted: from >> an x86 maintainer, an arm maintainer and from a tools maintainer. I >> don't say this to criticize the patch acceptance process: this patch >> makes changes to an existing hypercall so it is only fair that it needs >> to go through extra levels of scrutiny. For the sake of simplicity and >> decoupling (reducing dependencies between patches and between >> components), I think it would be best to introduce an #define for the >> minimum value of gpaddr_bits and then move this patch at the end of the >> series; that way it becomes optional. >> >> >> It depends what you mean by optional. Yes we can add hack to avoid the hypercall... But the more scalable solution is the hypercall. >> >> I am slightly concerned that if we don't push for the hypercall now, then there will be no incentive to do it afterwards... >> >> So I went through Andrew's e-mail to understand what's the request. I understand that there are some problem with migration. But it >> doesn't look like we need to solve them now. Instead, AFAICT, his main ask for this series is to switch to a domctl. >> >> It seems the conversation is simply stuck on waiting for Andrew to provide details on what would look like. Did we ping Andrew on >> IRC? >> >> Unfortunately the minimum value >> is 32 (in practice I have never seen less than 40 but the architecture >> supports 32 as minimum). >> >> >> >> Actually, the info we are looking for is already exposed via >> ID_AA64MMFR0_EL1. ID_AA64MMFR0_EL1 can be read from a virtual machine, >> and Linux let userspace read it [1]. Regardless of this patch series, we >> should make sure that Xen exposes the right mm64.pa_range value to guest >> virtual machines. If that is done right, then you can just add support >> for reading ID_AA64MMFR0_EL1 in libxl/libxc and then we don't need any >> hypercall modifications changes. >> >> >> From my understanding, from a VM PoV "pa_range" should represent the size of the guest physical address space. >> >> Today, it happens that every VM is using the same P2M size. However, I would rather not make such assumption in the userspace. >> >> >> So, in theory we already have all the interfaces we need, but in >> practice they don't work: unfortunaly both Xen and Linux mark >> ID_AA64MMFR0_EL1 as FTR_HIDDEN in cpufeature.c so neither Linux from >> Xen, not userspace from Linux can actually read the real value :-/ >> They always read zero. >> >> (Also I think we have an issue today with p2m_restrict_ipa_bits not >> updating the mm64.pa_range value. I think that it should be fixed.) >> >> >> It looks like it. That should be handled in a separate patch though. >> >> >> Bertrand, do you have any ideas in regards to ID_AA64MMFR0_EL1? >> >> If not, maybe we could just go with >> #define MIN_GPADDR_BITS 32. >> >> >> The toolstack would have to consider it as the "maximum" because it may not be safe to expose anything above. >> >> With 32, we are going to be limited in term of space we can find. >> >> We could potentially use 40 bits as a minimum. Although it still feels a bit of a hack to me given that the IOMMU may restrict it >> further and the architecture can in theory support less. >> >> Overall, I still strongly prefer the hypercall approach. If a common one is difficult to achieve, then we can extend the domctl to >> create a domain to provide the p2m_bits (in the same way as we deal for the GIC version) in an arch specific way. >> >> >> To summarize: >> If we don't query the hypervisor to provide gpaddr_bits we have two options: >> - The safe option is to use minimum possible value which is 32 bits on Arm64. But, there would be of no practical use. >> - The unsafe option is to use let's say "default" 40 bits and pray it will work in all cases on Arm64 (it is ok on Arm32). >> >> So we definitely need to query the hypervisor. As it turned out the sysctl approach is not welcome, in the long term we want to have this >> information per domain. I have been absolutely OK with that valid ask since RFC, I just wanted to know what was the preferred way to do >> this (new domctl, existing, etc)... >> >> I analyzed what Julien had suggested regarding pass gpaddr_bits via Arm's struct xen_arch_domainconfig (I assume, this should be an OUT >> parameter) and I think it makes sense. Taking into the account that the feature freeze date is coming, I will wait a few days, and if there >> are no objections I will send updated version (patch #3 also needs updating as it expects the gpaddr_bits to be in physinfo). > > No objections from me, I think Julien's suggestion is a good one. ok, great. I have already implemented/tested that. Thank you.
diff --git a/tools/include/libxl.h b/tools/include/libxl.h index b9ba16d..63f9550 100644 --- a/tools/include/libxl.h +++ b/tools/include/libxl.h @@ -856,6 +856,13 @@ typedef struct libxl__ctx libxl_ctx; #define LIBXL_HAVE_PHYSINFO_MAX_POSSIBLE_MFN 1 /* + * LIBXL_HAVE_PHYSINFO_GPADDR_BITS + * + * If this is defined, libxl_physinfo has a "gpaddr_bits" field. + */ +#define LIBXL_HAVE_PHYSINFO_GPADDR_BITS 1 + +/* * LIBXL_HAVE_DOMINFO_OUTSTANDING_MEMKB 1 * * If this is defined, libxl_dominfo will contain a MemKB type field called diff --git a/tools/libs/light/libxl.c b/tools/libs/light/libxl.c index 204eb0b..c86624f 100644 --- a/tools/libs/light/libxl.c +++ b/tools/libs/light/libxl.c @@ -405,6 +405,8 @@ int libxl_get_physinfo(libxl_ctx *ctx, libxl_physinfo *physinfo) physinfo->cap_vmtrace = !!(xcphysinfo.capabilities & XEN_SYSCTL_PHYSCAP_vmtrace); + physinfo->gpaddr_bits = xcphysinfo.gpaddr_bits; + GC_FREE; return 0; } diff --git a/tools/libs/light/libxl_types.idl b/tools/libs/light/libxl_types.idl index 3f9fff6..bf27fe6 100644 --- a/tools/libs/light/libxl_types.idl +++ b/tools/libs/light/libxl_types.idl @@ -1061,6 +1061,8 @@ libxl_physinfo = Struct("physinfo", [ ("cap_shadow", bool), ("cap_iommu_hap_pt_share", bool), ("cap_vmtrace", bool), + + ("gpaddr_bits", uint8), ], dir=DIR_OUT) libxl_connectorinfo = Struct("connectorinfo", [ diff --git a/tools/xl/xl_info.c b/tools/xl/xl_info.c index 8383e4a..dfbbeaa 100644 --- a/tools/xl/xl_info.c +++ b/tools/xl/xl_info.c @@ -221,6 +221,8 @@ static void output_physinfo(void) info.cap_vmtrace ? " vmtrace" : "" ); + maybe_printf("gpaddr_bits : %d\n", info.gpaddr_bits); + vinfo = libxl_get_version_info(ctx); if (vinfo) { i = (1 << 20) / vinfo->pagesize; diff --git a/xen/arch/arm/sysctl.c b/xen/arch/arm/sysctl.c index f87944e..91dca4f 100644 --- a/xen/arch/arm/sysctl.c +++ b/xen/arch/arm/sysctl.c @@ -15,6 +15,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi) { pi->capabilities |= XEN_SYSCTL_PHYSCAP_hvm | XEN_SYSCTL_PHYSCAP_hap; + + pi->gpaddr_bits = p2m_ipa_bits; } long arch_do_sysctl(struct xen_sysctl *sysctl, diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c index aff52a1..7b14865 100644 --- a/xen/arch/x86/sysctl.c +++ b/xen/arch/x86/sysctl.c @@ -135,6 +135,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi) pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap; if ( IS_ENABLED(CONFIG_SHADOW_PAGING) ) pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow; + + pi->gpaddr_bits = hap_paddr_bits; } long arch_do_sysctl( diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h index 039ccf8..0450a78 100644 --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -35,7 +35,7 @@ #include "domctl.h" #include "physdev.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000013 +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014 /* * Read console content from Xen buffer ring. @@ -120,6 +120,8 @@ struct xen_sysctl_physinfo { uint64_aligned_t outstanding_pages; uint64_aligned_t max_mfn; /* Largest possible MFN on this host */ uint32_t hw_cap[8]; + uint8_t gpaddr_bits; + uint8_t pad[7]; }; /*