Message ID | 20240524225522.2878481-2-stefano.stabellini@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Guest XenStore page allocation for 11 Dom0less domUs | expand |
Hi Stefano, You have sent a new version. But you didn't reply to Juergen's comment. While he is not the maintainer of the Arm code, he is the Xenstore maintainer. Even if I agree with this approach I don't feel this is right to even ack this patch without pending questions addressed. In this case, we are changing yet another time the sequence for initializing Xenstore dom0less domain. I would rather not want to have to change a 4th time... I don't think it is a big problem if this is not merged for the code freeze as this is technically a bug fix. Cheers, On 24/05/2024 23:55, Stefano Stabellini wrote: > From: Henry Wang <xin.wang2@amd.com> > > There are use cases (for example using the PV driver) in Dom0less > setup that require Dom0less DomUs start immediately with Dom0, but > initialize XenStore later after Dom0's successful boot and call to > the init-dom0less application. > > An error message can seen from the init-dom0less application on > 1:1 direct-mapped domains: > ``` > Allocating magic pages > memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1 > Error on alloc magic pages > ``` > > The "magic page" is a terminology used in the toolstack as reserved > pages for the VM to have access to virtual platform capabilities. > Currently the magic pages for Dom0less DomUs are populated by the > init-dom0less app through populate_physmap(), and populate_physmap() > automatically assumes gfn == mfn for 1:1 direct mapped domains. This > cannot be true for the magic pages that are allocated later from the > init-dom0less application executed in Dom0. For domain using statically > allocated memory but not 1:1 direct-mapped, similar error "failed to > retrieve a reserved page" can be seen as the reserved memory list is > empty at that time. > > Since for init-dom0less, the magic page region is only for XenStore. > To solve above issue, this commit allocates the XenStore page for > Dom0less DomUs at the domain construction time. The PFN will be > noted and communicated to the init-dom0less application executed > from Dom0. To keep the XenStore late init protocol, set the connection > status to XENSTORE_RECONNECT. > > Reported-by: Alec Kwapis <alec.kwapis@medtronic.com> > Suggested-by: Daniel P. Smith <dpsmith@apertussolutions.com> > Signed-off-by: Henry Wang <xin.wang2@amd.com> > Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com> > --- > xen/arch/arm/dom0less-build.c | 55 ++++++++++++++++++++++++++++++++++- > 1 file changed, 54 insertions(+), 1 deletion(-) > > diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c > index 74f053c242..2963ecc0b3 100644 > --- a/xen/arch/arm/dom0less-build.c > +++ b/xen/arch/arm/dom0less-build.c > @@ -1,5 +1,6 @@ > /* SPDX-License-Identifier: GPL-2.0-only */ > #include <xen/device_tree.h> > +#include <xen/domain_page.h> > #include <xen/err.h> > #include <xen/event.h> > #include <xen/grant_table.h> > @@ -10,6 +11,8 @@ > #include <xen/sizes.h> > #include <xen/vmap.h> > > +#include <public/io/xs_wire.h> > + > #include <asm/arm64/sve.h> > #include <asm/dom0less-build.h> > #include <asm/domain_build.h> > @@ -739,6 +742,53 @@ static int __init alloc_xenstore_evtchn(struct domain *d) > return 0; > } > > +#define XENSTORE_PFN_OFFSET 1 > +static int __init alloc_xenstore_page(struct domain *d) > +{ > + struct page_info *xenstore_pg; > + struct xenstore_domain_interface *interface; > + mfn_t mfn; > + gfn_t gfn; > + int rc; > + > + if ( (UINT_MAX - d->max_pages) < 1 ) > + { > + printk(XENLOG_ERR "%pd: Over-allocation for d->max_pages by 1 page.\n", > + d); > + return -EINVAL; > + } > + d->max_pages += 1; > + xenstore_pg = alloc_domheap_page(d, MEMF_bits(32)); > + if ( xenstore_pg == NULL && is_64bit_domain(d) ) > + xenstore_pg = alloc_domheap_page(d, 0); > + if ( xenstore_pg == NULL ) > + return -ENOMEM; > + > + mfn = page_to_mfn(xenstore_pg); > + if ( !mfn_x(mfn) ) > + return -ENOMEM; > + > + if ( !is_domain_direct_mapped(d) ) > + gfn = gaddr_to_gfn(GUEST_MAGIC_BASE + > + (XENSTORE_PFN_OFFSET << PAGE_SHIFT)); > + else > + gfn = gaddr_to_gfn(mfn_to_maddr(mfn)); > + > + rc = guest_physmap_add_page(d, gfn, mfn, 0); > + if ( rc ) > + { > + free_domheap_page(xenstore_pg); > + return rc; > + } > + > + d->arch.hvm.params[HVM_PARAM_STORE_PFN] = gfn_x(gfn); > + interface = map_domain_page(mfn); > + interface->connection = XENSTORE_RECONNECT; > + unmap_domain_page(interface); > + > + return 0; > +} > + > static int __init construct_domU(struct domain *d, > const struct dt_device_node *node) > { > @@ -839,7 +889,10 @@ static int __init construct_domU(struct domain *d, > rc = alloc_xenstore_evtchn(d); > if ( rc < 0 ) > return rc; > - d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL; > + > + rc = alloc_xenstore_page(d); > + if ( rc < 0 ) > + return rc; > } > > return rc;
On Sat, 2024-05-25 at 00:06 +0100, Julien Grall wrote: > Hi Stefano, > > You have sent a new version. But you didn't reply to Juergen's > comment. > > While he is not the maintainer of the Arm code, he is the Xenstore > maintainer. Even if I agree with this approach I don't feel this is > right to even ack this patch without pending questions addressed. > > In this case, we are changing yet another time the sequence for > initializing Xenstore dom0less domain. I would rather not want to > have > to change a 4th time... > > I don't think it is a big problem if this is not merged for the code > freeze as this is technically a bug fix. Agree, this is not a problem as it is still looks to me as a bug fix. ~ Oleksii > > Cheers, > > On 24/05/2024 23:55, Stefano Stabellini wrote: > > From: Henry Wang <xin.wang2@amd.com> > > > > There are use cases (for example using the PV driver) in Dom0less > > setup that require Dom0less DomUs start immediately with Dom0, but > > initialize XenStore later after Dom0's successful boot and call to > > the init-dom0less application. > > > > An error message can seen from the init-dom0less application on > > 1:1 direct-mapped domains: > > ``` > > Allocating magic pages > > memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1 > > Error on alloc magic pages > > ``` > > > > The "magic page" is a terminology used in the toolstack as reserved > > pages for the VM to have access to virtual platform capabilities. > > Currently the magic pages for Dom0less DomUs are populated by the > > init-dom0less app through populate_physmap(), and > > populate_physmap() > > automatically assumes gfn == mfn for 1:1 direct mapped domains. > > This > > cannot be true for the magic pages that are allocated later from > > the > > init-dom0less application executed in Dom0. For domain using > > statically > > allocated memory but not 1:1 direct-mapped, similar error "failed > > to > > retrieve a reserved page" can be seen as the reserved memory list > > is > > empty at that time. > > > > Since for init-dom0less, the magic page region is only for > > XenStore. > > To solve above issue, this commit allocates the XenStore page for > > Dom0less DomUs at the domain construction time. The PFN will be > > noted and communicated to the init-dom0less application executed > > from Dom0. To keep the XenStore late init protocol, set the > > connection > > status to XENSTORE_RECONNECT. > > > > Reported-by: Alec Kwapis <alec.kwapis@medtronic.com> > > Suggested-by: Daniel P. Smith <dpsmith@apertussolutions.com> > > Signed-off-by: Henry Wang <xin.wang2@amd.com> > > Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com> > > --- > > xen/arch/arm/dom0less-build.c | 55 > > ++++++++++++++++++++++++++++++++++- > > 1 file changed, 54 insertions(+), 1 deletion(-) > > > > diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less- > > build.c > > index 74f053c242..2963ecc0b3 100644 > > --- a/xen/arch/arm/dom0less-build.c > > +++ b/xen/arch/arm/dom0less-build.c > > @@ -1,5 +1,6 @@ > > /* SPDX-License-Identifier: GPL-2.0-only */ > > #include <xen/device_tree.h> > > +#include <xen/domain_page.h> > > #include <xen/err.h> > > #include <xen/event.h> > > #include <xen/grant_table.h> > > @@ -10,6 +11,8 @@ > > #include <xen/sizes.h> > > #include <xen/vmap.h> > > > > +#include <public/io/xs_wire.h> > > + > > #include <asm/arm64/sve.h> > > #include <asm/dom0less-build.h> > > #include <asm/domain_build.h> > > @@ -739,6 +742,53 @@ static int __init alloc_xenstore_evtchn(struct > > domain *d) > > return 0; > > } > > > > +#define XENSTORE_PFN_OFFSET 1 > > +static int __init alloc_xenstore_page(struct domain *d) > > +{ > > + struct page_info *xenstore_pg; > > + struct xenstore_domain_interface *interface; > > + mfn_t mfn; > > + gfn_t gfn; > > + int rc; > > + > > + if ( (UINT_MAX - d->max_pages) < 1 ) > > + { > > + printk(XENLOG_ERR "%pd: Over-allocation for d->max_pages > > by 1 page.\n", > > + d); > > + return -EINVAL; > > + } > > + d->max_pages += 1; > > + xenstore_pg = alloc_domheap_page(d, MEMF_bits(32)); > > + if ( xenstore_pg == NULL && is_64bit_domain(d) ) > > + xenstore_pg = alloc_domheap_page(d, 0); > > + if ( xenstore_pg == NULL ) > > + return -ENOMEM; > > + > > + mfn = page_to_mfn(xenstore_pg); > > + if ( !mfn_x(mfn) ) > > + return -ENOMEM; > > + > > + if ( !is_domain_direct_mapped(d) ) > > + gfn = gaddr_to_gfn(GUEST_MAGIC_BASE + > > + (XENSTORE_PFN_OFFSET << PAGE_SHIFT)); > > + else > > + gfn = gaddr_to_gfn(mfn_to_maddr(mfn)); > > + > > + rc = guest_physmap_add_page(d, gfn, mfn, 0); > > + if ( rc ) > > + { > > + free_domheap_page(xenstore_pg); > > + return rc; > > + } > > + > > + d->arch.hvm.params[HVM_PARAM_STORE_PFN] = gfn_x(gfn); > > + interface = map_domain_page(mfn); > > + interface->connection = XENSTORE_RECONNECT; > > + unmap_domain_page(interface); > > + > > + return 0; > > +} > > + > > static int __init construct_domU(struct domain *d, > > const struct dt_device_node > > *node) > > { > > @@ -839,7 +889,10 @@ static int __init construct_domU(struct domain > > *d, > > rc = alloc_xenstore_evtchn(d); > > if ( rc < 0 ) > > return rc; > > - d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL; > > + > > + rc = alloc_xenstore_page(d); > > + if ( rc < 0 ) > > + return rc; > > } > > > > return rc; >
Hi Stefano, Wasn't this patch supposed to be folded with patch no.3 to avoid breaking the bisection? On 25/05/2024 00:55, Stefano Stabellini wrote: > From: Henry Wang <xin.wang2@amd.com> > > There are use cases (for example using the PV driver) in Dom0less > setup that require Dom0less DomUs start immediately with Dom0, but > initialize XenStore later after Dom0's successful boot and call to > the init-dom0less application. > > An error message can seen from the init-dom0less application on > 1:1 direct-mapped domains: > ``` > Allocating magic pages > memory.c:238:d0v0 mfn 0x39000 doesn't belong to d1 > Error on alloc magic pages > ``` > > The "magic page" is a terminology used in the toolstack as reserved > pages for the VM to have access to virtual platform capabilities. > Currently the magic pages for Dom0less DomUs are populated by the > init-dom0less app through populate_physmap(), and populate_physmap() > automatically assumes gfn == mfn for 1:1 direct mapped domains. This > cannot be true for the magic pages that are allocated later from the > init-dom0less application executed in Dom0. For domain using statically > allocated memory but not 1:1 direct-mapped, similar error "failed to > retrieve a reserved page" can be seen as the reserved memory list is > empty at that time. > > Since for init-dom0less, the magic page region is only for XenStore. > To solve above issue, this commit allocates the XenStore page for > Dom0less DomUs at the domain construction time. The PFN will be > noted and communicated to the init-dom0less application executed > from Dom0. To keep the XenStore late init protocol, set the connection > status to XENSTORE_RECONNECT. > > Reported-by: Alec Kwapis <alec.kwapis@medtronic.com> > Suggested-by: Daniel P. Smith <dpsmith@apertussolutions.com> > Signed-off-by: Henry Wang <xin.wang2@amd.com> > Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com> > --- > xen/arch/arm/dom0less-build.c | 55 ++++++++++++++++++++++++++++++++++- > 1 file changed, 54 insertions(+), 1 deletion(-) > > diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c > index 74f053c242..2963ecc0b3 100644 > --- a/xen/arch/arm/dom0less-build.c > +++ b/xen/arch/arm/dom0less-build.c > @@ -1,5 +1,6 @@ > /* SPDX-License-Identifier: GPL-2.0-only */ > #include <xen/device_tree.h> > +#include <xen/domain_page.h> > #include <xen/err.h> > #include <xen/event.h> > #include <xen/grant_table.h> > @@ -10,6 +11,8 @@ > #include <xen/sizes.h> > #include <xen/vmap.h> > > +#include <public/io/xs_wire.h> > + > #include <asm/arm64/sve.h> > #include <asm/dom0less-build.h> > #include <asm/domain_build.h> > @@ -739,6 +742,53 @@ static int __init alloc_xenstore_evtchn(struct domain *d) > return 0; > } > > +#define XENSTORE_PFN_OFFSET 1 > +static int __init alloc_xenstore_page(struct domain *d) > +{ > + struct page_info *xenstore_pg; > + struct xenstore_domain_interface *interface; > + mfn_t mfn; > + gfn_t gfn; > + int rc; > + > + if ( (UINT_MAX - d->max_pages) < 1 ) > + { > + printk(XENLOG_ERR "%pd: Over-allocation for d->max_pages by 1 page.\n", > + d); > + return -EINVAL; > + } > + d->max_pages += 1; NIT: empty line here for readability > + xenstore_pg = alloc_domheap_page(d, MEMF_bits(32)); > + if ( xenstore_pg == NULL && is_64bit_domain(d) ) > + xenstore_pg = alloc_domheap_page(d, 0); > + if ( xenstore_pg == NULL ) > + return -ENOMEM; > + > + mfn = page_to_mfn(xenstore_pg); > + if ( !mfn_x(mfn) ) > + return -ENOMEM; I think you should free the allocated page here Other than that: Reviewed-by: Michal Orzel <michal.orzel@amd.com> ~Michal
On Mon, 27 May 2024, Oleksii K. wrote: > > I don't think it is a big problem if this is not merged for the code > > freeze as this is technically a bug fix. > > Agree, this is not a problem as it is still looks to me as a bug fix. > > ~ Oleksii Hi Oleksii, this version of the series was already all acked with minor NITs and you gave the go-ahead for this release as it is a bug fix. Due to 2 weeks of travels I only managed to commit the series now, sorry for the delay.
Hi Stefano, On 19/06/2024 01:37, Stefano Stabellini wrote: > On Mon, 27 May 2024, Oleksii K. wrote: >>> I don't think it is a big problem if this is not merged for the code >>> freeze as this is technically a bug fix. >> >> Agree, this is not a problem as it is still looks to me as a bug fix. >> >> ~ Oleksii > > Hi Oleksii, this version of the series was already all acked with minor > NITs and you gave the go-ahead for this release as it is a bug fix. Due > to 2 weeks of travels I only managed to commit the series now, sorry for > the delay. Unfortunately this series is breaking gitlab CI [1]. I understand the go ahead was given two weeks ago, but as we are now past the code freeze, I feel we should have had a pros/cons e-mail to assess whether it was worth the risk to merge it. Now to the issues, I vaguely recall this series didn't require any change in Linux. Did I miss anything? If so, why are we breaking Linux? For now, I will revert this series. Once we root cause the issue, we can re-assess whether the fix should be apply for 4.19. Cheers, [1] https://gitlab.com/xen-project/xen/-/pipelines/1338067978
On Wed, 18 Jun 2024, Julien Grall wrote: > Hi Stefano, > > On 19/06/2024 01:37, Stefano Stabellini wrote: > > On Mon, 27 May 2024, Oleksii K. wrote: > > > > I don't think it is a big problem if this is not merged for the code > > > > freeze as this is technically a bug fix. > > > > > > Agree, this is not a problem as it is still looks to me as a bug fix. > > > > > > ~ Oleksii > > > > Hi Oleksii, this version of the series was already all acked with minor > > NITs and you gave the go-ahead for this release as it is a bug fix. Due > > to 2 weeks of travels I only managed to commit the series now, sorry for > > the delay. > > Unfortunately this series is breaking gitlab CI [1]. I understand the go ahead > was given two weeks ago, but as we are now past the code freeze, I feel we > should have had a pros/cons e-mail to assess whether it was worth the risk to > merge it. > > Now to the issues, I vaguely recall this series didn't require any change in > Linux. Did I miss anything? If so, why are we breaking Linux? > > For now, I will revert this series. Once we root cause the issue, we can > re-assess whether the fix should be apply for 4.19. Hi Julien, Thanks for acting quickly on this. Reverting the series was the right thing to do, and I apologize for not waiting for the gitlab-ci results before committing it. Like you, my understanding was also that there were no Linux changes required. I think this will need a deeper investigation that we can do after the 4.19 release. Thanks again for unblocking the tree quickly. - Stefano
diff --git a/xen/arch/arm/dom0less-build.c b/xen/arch/arm/dom0less-build.c index 74f053c242..2963ecc0b3 100644 --- a/xen/arch/arm/dom0less-build.c +++ b/xen/arch/arm/dom0less-build.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <xen/device_tree.h> +#include <xen/domain_page.h> #include <xen/err.h> #include <xen/event.h> #include <xen/grant_table.h> @@ -10,6 +11,8 @@ #include <xen/sizes.h> #include <xen/vmap.h> +#include <public/io/xs_wire.h> + #include <asm/arm64/sve.h> #include <asm/dom0less-build.h> #include <asm/domain_build.h> @@ -739,6 +742,53 @@ static int __init alloc_xenstore_evtchn(struct domain *d) return 0; } +#define XENSTORE_PFN_OFFSET 1 +static int __init alloc_xenstore_page(struct domain *d) +{ + struct page_info *xenstore_pg; + struct xenstore_domain_interface *interface; + mfn_t mfn; + gfn_t gfn; + int rc; + + if ( (UINT_MAX - d->max_pages) < 1 ) + { + printk(XENLOG_ERR "%pd: Over-allocation for d->max_pages by 1 page.\n", + d); + return -EINVAL; + } + d->max_pages += 1; + xenstore_pg = alloc_domheap_page(d, MEMF_bits(32)); + if ( xenstore_pg == NULL && is_64bit_domain(d) ) + xenstore_pg = alloc_domheap_page(d, 0); + if ( xenstore_pg == NULL ) + return -ENOMEM; + + mfn = page_to_mfn(xenstore_pg); + if ( !mfn_x(mfn) ) + return -ENOMEM; + + if ( !is_domain_direct_mapped(d) ) + gfn = gaddr_to_gfn(GUEST_MAGIC_BASE + + (XENSTORE_PFN_OFFSET << PAGE_SHIFT)); + else + gfn = gaddr_to_gfn(mfn_to_maddr(mfn)); + + rc = guest_physmap_add_page(d, gfn, mfn, 0); + if ( rc ) + { + free_domheap_page(xenstore_pg); + return rc; + } + + d->arch.hvm.params[HVM_PARAM_STORE_PFN] = gfn_x(gfn); + interface = map_domain_page(mfn); + interface->connection = XENSTORE_RECONNECT; + unmap_domain_page(interface); + + return 0; +} + static int __init construct_domU(struct domain *d, const struct dt_device_node *node) { @@ -839,7 +889,10 @@ static int __init construct_domU(struct domain *d, rc = alloc_xenstore_evtchn(d); if ( rc < 0 ) return rc; - d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL; + + rc = alloc_xenstore_page(d); + if ( rc < 0 ) + return rc; } return rc;