diff mbox series

[v3,3/5] xen/arm: configure dom0less domain for enabling xenstore after boot

Message ID 20220128213307.2822078-3-sstabellini@kernel.org (mailing list archive)
State Superseded
Headers show
Series dom0less PV drivers | expand

Commit Message

Stefano Stabellini Jan. 28, 2022, 9:33 p.m. UTC
From: Luca Miccio <lucmiccio@gmail.com>

If "xen,enhanced" is enabled, then add to dom0less domains:

- the hypervisor node in device tree
- the xenstore event channel

The xenstore event channel is also used for the first notification to
let the guest know that xenstore has become available.

Signed-off-by: Luca Miccio <lucmiccio@gmail.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com>
Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>
CC: Julien Grall <julien@xen.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: Bertrand Marquis <bertrand.marquis@arm.com>

---
Changes in v3:
- use evtchn_alloc_unbound

Changes in v2:
- set HVM_PARAM_STORE_PFN to ~0ULL at domain creation
- in alloc_xenstore_evtchn do not call _evtchn_alloc_unbound
---
 xen/arch/arm/domain_build.c | 41 +++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

Comments

Julien Grall Jan. 29, 2022, 6:13 p.m. UTC | #1
Hi Stefano,

On 28/01/2022 21:33, Stefano Stabellini wrote:
> From: Luca Miccio <lucmiccio@gmail.com>
> 
> If "xen,enhanced" is enabled, then add to dom0less domains:
> 
> - the hypervisor node in device tree
> - the xenstore event channel
> 
> The xenstore event channel is also used for the first notification to
> let the guest know that xenstore has become available.
> 
> Signed-off-by: Luca Miccio <lucmiccio@gmail.com>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com>
> Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>
> CC: Julien Grall <julien@xen.org>
> CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> CC: Bertrand Marquis <bertrand.marquis@arm.com>
> 
> ---
> Changes in v3:
> - use evtchn_alloc_unbound
> 
> Changes in v2:
> - set HVM_PARAM_STORE_PFN to ~0ULL at domain creation
> - in alloc_xenstore_evtchn do not call _evtchn_alloc_unbound
> ---
>   xen/arch/arm/domain_build.c | 41 +++++++++++++++++++++++++++++++++++++
>   1 file changed, 41 insertions(+)
> 
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index 9144d6c0b6..8e030a7f05 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -27,6 +27,7 @@
>   #include <asm/setup.h>
>   #include <asm/cpufeature.h>
>   #include <asm/domain_build.h>
> +#include <xen/event.h>
>   
>   #include <xen/irq.h>
>   #include <xen/grant_table.h>
> @@ -2619,6 +2620,8 @@ static int __init prepare_dtb_domU(struct domain *d, struct kernel_info *kinfo)
>       int ret;
>   
>       kinfo->phandle_gic = GUEST_PHANDLE_GIC;
> +    kinfo->gnttab_start = GUEST_GNTTAB_BASE;
> +    kinfo->gnttab_size = GUEST_GNTTAB_SIZE;
>   
>       addrcells = GUEST_ROOT_ADDRESS_CELLS;
>       sizecells = GUEST_ROOT_SIZE_CELLS;
> @@ -2693,6 +2696,13 @@ static int __init prepare_dtb_domU(struct domain *d, struct kernel_info *kinfo)
>               goto err;
>       }
>   
> +    if ( kinfo->dom0less_enhanced )
> +    {
> +        ret = make_hypervisor_node(d, kinfo, addrcells, sizecells);

Looking at the code, I think the extended regions will not work properly 
because we are looking at the host memory layout. In the case of domU, 
we want to use the guest layout. Please have a look how it was done in 
libxl.

> +        if ( ret )
> +            goto err;
> +    }
> +
>       ret = fdt_end_node(kinfo->fdt);
>       if ( ret < 0 )
>           goto err;
> @@ -2959,6 +2969,25 @@ static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
>       return 0;
>   }
>   
> +static int __init alloc_xenstore_evtchn(struct domain *d)
> +{
> +    evtchn_alloc_unbound_t alloc;
> +    int rc;
> +
> +    alloc.dom = d->domain_id;
> +    alloc.remote_dom = hardware_domain->domain_id;

The first thing evtchn_alloc_unbound() will do is looking up the domain. 
This seems a bit pointless given that we have the domain in hand. 
Shouldn't we extend evtchn_alloc_unbound() to pass the domain?

> +    rc = evtchn_alloc_unbound(&alloc, true);
> +    if ( rc )
> +    {
> +        printk("Failed allocating event channel for domain\n");
> +        return rc;
> +    }
> +
> +    d->arch.hvm.params[HVM_PARAM_STORE_EVTCHN] = alloc.port;
> +
> +    return 0;
> +}
> +
>   static int __init construct_domU(struct domain *d,
>                                    const struct dt_device_node *node)
>   {
> @@ -3014,7 +3043,19 @@ static int __init construct_domU(struct domain *d,
>           return rc;
>   
>       if ( kinfo.vpl011 )
> +    {
>           rc = domain_vpl011_init(d, NULL);
> +        if ( rc < 0 )
> +            return rc;
> +    }
> +
> +    if ( kinfo.dom0less_enhanced )
> +    {
> +        rc = alloc_xenstore_evtchn(d);
> +        if ( rc < 0 )
> +            return rc;
> +        d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL;

I think it would be easy to allocate the page right now. So what prevent 
us to do it right now?

Cheers,
Stefano Stabellini March 23, 2022, 1:18 a.m. UTC | #2
On Sat, 29 Jan 2022, Julien Grall wrote:
> On 28/01/2022 21:33, Stefano Stabellini wrote:
> > From: Luca Miccio <lucmiccio@gmail.com>
> > 
> > If "xen,enhanced" is enabled, then add to dom0less domains:
> > 
> > - the hypervisor node in device tree
> > - the xenstore event channel
> > 
> > The xenstore event channel is also used for the first notification to
> > let the guest know that xenstore has become available.
> > 
> > Signed-off-by: Luca Miccio <lucmiccio@gmail.com>
> > Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com>
> > Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com>
> > CC: Julien Grall <julien@xen.org>
> > CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> > CC: Bertrand Marquis <bertrand.marquis@arm.com>
> > 
> > ---
> > Changes in v3:
> > - use evtchn_alloc_unbound
> > 
> > Changes in v2:
> > - set HVM_PARAM_STORE_PFN to ~0ULL at domain creation
> > - in alloc_xenstore_evtchn do not call _evtchn_alloc_unbound
> > ---
> >   xen/arch/arm/domain_build.c | 41 +++++++++++++++++++++++++++++++++++++
> >   1 file changed, 41 insertions(+)
> > 
> > diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> > index 9144d6c0b6..8e030a7f05 100644
> > --- a/xen/arch/arm/domain_build.c
> > +++ b/xen/arch/arm/domain_build.c
> > @@ -27,6 +27,7 @@
> >   #include <asm/setup.h>
> >   #include <asm/cpufeature.h>
> >   #include <asm/domain_build.h>
> > +#include <xen/event.h>
> >     #include <xen/irq.h>
> >   #include <xen/grant_table.h>
> > @@ -2619,6 +2620,8 @@ static int __init prepare_dtb_domU(struct domain *d,
> > struct kernel_info *kinfo)
> >       int ret;
> >         kinfo->phandle_gic = GUEST_PHANDLE_GIC;
> > +    kinfo->gnttab_start = GUEST_GNTTAB_BASE;
> > +    kinfo->gnttab_size = GUEST_GNTTAB_SIZE;
> >         addrcells = GUEST_ROOT_ADDRESS_CELLS;
> >       sizecells = GUEST_ROOT_SIZE_CELLS;
> > @@ -2693,6 +2696,13 @@ static int __init prepare_dtb_domU(struct domain *d,
> > struct kernel_info *kinfo)
> >               goto err;
> >       }
> >   +    if ( kinfo->dom0less_enhanced )
> > +    {
> > +        ret = make_hypervisor_node(d, kinfo, addrcells, sizecells);
> 
> Looking at the code, I think the extended regions will not work properly
> because we are looking at the host memory layout. In the case of domU, we want
> to use the guest layout. Please have a look how it was done in libxl.

Yeah you are right, I'll do that.


> > +        if ( ret )
> > +            goto err;
> > +    }
> > +
> >       ret = fdt_end_node(kinfo->fdt);
> >       if ( ret < 0 )
> >           goto err;
> > @@ -2959,6 +2969,25 @@ static int __init construct_domain(struct domain *d,
> > struct kernel_info *kinfo)
> >       return 0;
> >   }
> >   +static int __init alloc_xenstore_evtchn(struct domain *d)
> > +{
> > +    evtchn_alloc_unbound_t alloc;
> > +    int rc;
> > +
> > +    alloc.dom = d->domain_id;
> > +    alloc.remote_dom = hardware_domain->domain_id;
> 
> The first thing evtchn_alloc_unbound() will do is looking up the domain. This
> seems a bit pointless given that we have the domain in hand. Shouldn't we
> extend evtchn_alloc_unbound() to pass the domain?

That's a good point, but I actually think it is better to go back to
[2]. The evtchn_alloc_unbound discussion is still ongoing but I'll keep
this suggestion in mind.

[2] https://marc.info/?l=xen-devel&m=164203543615114


> > +    rc = evtchn_alloc_unbound(&alloc, true);
> > +    if ( rc )
> > +    {
> > +        printk("Failed allocating event channel for domain\n");
> > +        return rc;
> > +    }
> > +
> > +    d->arch.hvm.params[HVM_PARAM_STORE_EVTCHN] = alloc.port;
> > +
> > +    return 0;
> > +}
> > +
> >   static int __init construct_domU(struct domain *d,
> >                                    const struct dt_device_node *node)
> >   {
> > @@ -3014,7 +3043,19 @@ static int __init construct_domU(struct domain *d,
> >           return rc;
> >         if ( kinfo.vpl011 )
> > +    {
> >           rc = domain_vpl011_init(d, NULL);
> > +        if ( rc < 0 )
> > +            return rc;
> > +    }
> > +
> > +    if ( kinfo.dom0less_enhanced )
> > +    {
> > +        rc = alloc_xenstore_evtchn(d);
> > +        if ( rc < 0 )
> > +            return rc;
> > +        d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL;
> 
> I think it would be easy to allocate the page right now. So what prevent us to
> do it right now?

Because (as you noted as a comment to the following patch) as soon as
d->arch.hvm.params[HVM_PARAM_STORE_PFN] is set the guest can continue
with the initialization and will expect the right data to be set on the
page. In other words: it is not enough to have the pfn allocated, we
also need xenstore to initialize it. At that point, it is better to do
both later from init-dom0less.c.
Julien Grall March 25, 2022, 6:46 p.m. UTC | #3
Hi Stefano,

On 23/03/2022 01:18, Stefano Stabellini wrote:
> On Sat, 29 Jan 2022, Julien Grall wrote:
>> On 28/01/2022 21:33, Stefano Stabellini wrote:
>>> +    rc = evtchn_alloc_unbound(&alloc, true);
>>> +    if ( rc )
>>> +    {
>>> +        printk("Failed allocating event channel for domain\n");
>>> +        return rc;
>>> +    }
>>> +
>>> +    d->arch.hvm.params[HVM_PARAM_STORE_EVTCHN] = alloc.port;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>    static int __init construct_domU(struct domain *d,
>>>                                     const struct dt_device_node *node)
>>>    {
>>> @@ -3014,7 +3043,19 @@ static int __init construct_domU(struct domain *d,
>>>            return rc;
>>>          if ( kinfo.vpl011 )
>>> +    {
>>>            rc = domain_vpl011_init(d, NULL);
>>> +        if ( rc < 0 )
>>> +            return rc;
>>> +    }
>>> +
>>> +    if ( kinfo.dom0less_enhanced )
>>> +    {
>>> +        rc = alloc_xenstore_evtchn(d);
>>> +        if ( rc < 0 )
>>> +            return rc;
>>> +        d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL;
>>
>> I think it would be easy to allocate the page right now. So what prevent us to
>> do it right now?
> 
> Because (as you noted as a comment to the following patch) as soon as
> d->arch.hvm.params[HVM_PARAM_STORE_PFN] is set the guest can continue
> with the initialization and will expect the right data to be set on the
> page.

I think you misunderstood my question. From my understanding, at the 
moment, Linux would break with your proposal. So you need to modify the 
kernel in order to support what you are doing.

IOW, we have room to decide the approach here.

Xenstore protocol has a way to allow (re)connection (see 
docs/mics/xenstore-ring.txt). This feature looks quite suited to what 
you are trying to do here (we want to delay the connection).

The main advantage with this approach is the resources allocation for 
Xenstore will be done in the place and the work in Linux could be 
re-used for non-dom0less domain.

Have you explored it?

> In other words: it is not enough to have the pfn allocated, we
> also need xenstore to initialize it. At that point, it is better to do
> both later from init-dom0less.c.
See above. My main concern with your proposal is the allocation is split 
and this making more difficult to understand the initialization. Could 
you write some documentation how everything is meant to work?

Cheers,
Stefano Stabellini April 1, 2022, 12:34 a.m. UTC | #4
On Fri, 25 Mar 2022, Julien Grall wrote:
> On 23/03/2022 01:18, Stefano Stabellini wrote:
> > On Sat, 29 Jan 2022, Julien Grall wrote:
> > > On 28/01/2022 21:33, Stefano Stabellini wrote:
> > > > +    rc = evtchn_alloc_unbound(&alloc, true);
> > > > +    if ( rc )
> > > > +    {
> > > > +        printk("Failed allocating event channel for domain\n");
> > > > +        return rc;
> > > > +    }
> > > > +
> > > > +    d->arch.hvm.params[HVM_PARAM_STORE_EVTCHN] = alloc.port;
> > > > +
> > > > +    return 0;
> > > > +}
> > > > +
> > > >    static int __init construct_domU(struct domain *d,
> > > >                                     const struct dt_device_node *node)
> > > >    {
> > > > @@ -3014,7 +3043,19 @@ static int __init construct_domU(struct domain
> > > > *d,
> > > >            return rc;
> > > >          if ( kinfo.vpl011 )
> > > > +    {
> > > >            rc = domain_vpl011_init(d, NULL);
> > > > +        if ( rc < 0 )
> > > > +            return rc;
> > > > +    }
> > > > +
> > > > +    if ( kinfo.dom0less_enhanced )
> > > > +    {
> > > > +        rc = alloc_xenstore_evtchn(d);
> > > > +        if ( rc < 0 )
> > > > +            return rc;
> > > > +        d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL;
> > > 
> > > I think it would be easy to allocate the page right now. So what prevent
> > > us to
> > > do it right now?
> > 
> > Because (as you noted as a comment to the following patch) as soon as
> > d->arch.hvm.params[HVM_PARAM_STORE_PFN] is set the guest can continue
> > with the initialization and will expect the right data to be set on the
> > page.
> 
> I think you misunderstood my question. From my understanding, at the moment,
> Linux would break with your proposal. So you need to modify the kernel in
> order to support what you are doing.

Linux does not break with this proposal. I wrote a longer explanation
[1] some time ago.

In short: the master branch and any supported versions of Linux boots
fine with this proposal without changes, however it wouldn't be able to
use PV drivers when started as dom0less kernel.

To be able to use the new feature, this patch is required [2].

Old unsupported and not updated Linux is the only one to break. You gave
an excellent suggestion on thread [1], which resulted in me writing
patch #1 "xen: introduce xen,enhanced dom0less property" to retain
compatibility with older unpatched and unsupported kernels.

[1] https://marc.info/?l=xen-devel&m=164142956915469
[2] https://marc.info/?l=xen-devel&m=164203595315414


> IOW, we have room to decide the approach here.
> 
> Xenstore protocol has a way to allow (re)connection (see
> docs/mics/xenstore-ring.txt). This feature looks quite suited to what you are
> trying to do here (we want to delay the connection).
> 
> The main advantage with this approach is the resources allocation for Xenstore
> will be done in the place and the work in Linux could be re-used for
> non-dom0less domain.
> 
> Have you explored it?

Luca (CCed) is the original author. I don't know if he explored that
approach. I have not looked into it in any details but I think there
might be challenges: in this case there is nothing on the shared page.
There are no feature bits as it has not been initialized (xenstored is
the one initializating it).

Keep in mind that Luca and I have done many tests on this approach, both
the Xen side, the Linux side (very many different kernel versions) and
complex configurations (both network and block PV drivers, DMA mastering
devices, etc.) If we changed approach now we would lose some of the
value of the past efforts. But if required, I'll try to schedule time to
do a proper research of your suggestion.


> > In other words: it is not enough to have the pfn allocated, we
> > also need xenstore to initialize it. At that point, it is better to do
> > both later from init-dom0less.c.
> See above. My main concern with your proposal is the allocation is split and
> this making more difficult to understand the initialization. Could you write
> some documentation how everything is meant to work?

I can document it a lot better for sure. I'll do that.
Julien Grall April 1, 2022, 8:36 a.m. UTC | #5
Hi Stefano,

On 01/04/2022 01:34, Stefano Stabellini wrote:
>>> Because (as you noted as a comment to the following patch) as soon as
>>> d->arch.hvm.params[HVM_PARAM_STORE_PFN] is set the guest can continue
>>> with the initialization and will expect the right data to be set on the
>>> page.
>>
>> I think you misunderstood my question. From my understanding, at the moment,
>> Linux would break with your proposal. So you need to modify the kernel in
>> order to support what you are doing.
> 
> Linux does not break with this proposal. I wrote a longer explanation
> [1] some time ago.

I guess I should not have written "broken" here. But instead point out 
that...

> 
> In short: the master branch and any supported versions of Linux boots
> fine with this proposal without changes, however it wouldn't be able to
> use PV drivers when started as dom0less kernel.
> 
> To be able to use the new feature, this patch is required [2].

... without the extra patch is Linux, you would not be able to take 
advantage of this feature.

>> IOW, we have room to decide the approach here.
>>
>> Xenstore protocol has a way to allow (re)connection (see
>> docs/mics/xenstore-ring.txt). This feature looks quite suited to what you are
>> trying to do here (we want to delay the connection).
>>
>> The main advantage with this approach is the resources allocation for Xenstore
>> will be done in the place and the work in Linux could be re-used for
>> non-dom0less domain.
>>
>> Have you explored it?
> 
> Luca (CCed) is the original author. I don't know if he explored that
> approach. I have not looked into it in any details but I think there
> might be challenges: in this case there is nothing on the shared page.
> There are no feature bits as it has not been initialized (xenstored is
> the one initializating it).
I agree there is nothing today. But here, we have the liberty to 
initialize the feature bits in Xen.

> 
> Keep in mind that Luca and I have done many tests on this approach, both
> the Xen side, the Linux side (very many different kernel versions) and
> complex configurations (both network and block PV drivers, DMA mastering
> devices, etc.) If we changed approach now we would lose some of the
> value of the past efforts.

I appreciate the effort you put into testing this approach. However, 
this is an external interface that we will consider stable as soon as 
the two sides (Xen and Linux) are committed. So I want to make sure we 
are not putting ourself in a corner.

One major issue I can forsee with your approach is the xenstore 
initialization seem to only works for HVM. In theory, we may have the 
same need for PV (e.g. in the case of Hyperlaunch).

How would your proposal work for PV guest?

Note that I now that PV guest may not work without Xenstore. But I don't 
see any reason why we could not start them before Xenstored.

Cheers,
diff mbox series

Patch

diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 9144d6c0b6..8e030a7f05 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -27,6 +27,7 @@ 
 #include <asm/setup.h>
 #include <asm/cpufeature.h>
 #include <asm/domain_build.h>
+#include <xen/event.h>
 
 #include <xen/irq.h>
 #include <xen/grant_table.h>
@@ -2619,6 +2620,8 @@  static int __init prepare_dtb_domU(struct domain *d, struct kernel_info *kinfo)
     int ret;
 
     kinfo->phandle_gic = GUEST_PHANDLE_GIC;
+    kinfo->gnttab_start = GUEST_GNTTAB_BASE;
+    kinfo->gnttab_size = GUEST_GNTTAB_SIZE;
 
     addrcells = GUEST_ROOT_ADDRESS_CELLS;
     sizecells = GUEST_ROOT_SIZE_CELLS;
@@ -2693,6 +2696,13 @@  static int __init prepare_dtb_domU(struct domain *d, struct kernel_info *kinfo)
             goto err;
     }
 
+    if ( kinfo->dom0less_enhanced )
+    {
+        ret = make_hypervisor_node(d, kinfo, addrcells, sizecells);
+        if ( ret )
+            goto err;
+    }
+
     ret = fdt_end_node(kinfo->fdt);
     if ( ret < 0 )
         goto err;
@@ -2959,6 +2969,25 @@  static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
     return 0;
 }
 
+static int __init alloc_xenstore_evtchn(struct domain *d)
+{
+    evtchn_alloc_unbound_t alloc;
+    int rc;
+
+    alloc.dom = d->domain_id;
+    alloc.remote_dom = hardware_domain->domain_id;
+    rc = evtchn_alloc_unbound(&alloc, true);
+    if ( rc )
+    {
+        printk("Failed allocating event channel for domain\n");
+        return rc;
+    }
+
+    d->arch.hvm.params[HVM_PARAM_STORE_EVTCHN] = alloc.port;
+
+    return 0;
+}
+
 static int __init construct_domU(struct domain *d,
                                  const struct dt_device_node *node)
 {
@@ -3014,7 +3043,19 @@  static int __init construct_domU(struct domain *d,
         return rc;
 
     if ( kinfo.vpl011 )
+    {
         rc = domain_vpl011_init(d, NULL);
+        if ( rc < 0 )
+            return rc;
+    }
+
+    if ( kinfo.dom0less_enhanced )
+    {
+        rc = alloc_xenstore_evtchn(d);
+        if ( rc < 0 )
+            return rc;
+        d->arch.hvm.params[HVM_PARAM_STORE_PFN] = ~0ULL;
+    }
 
     return rc;
 }