diff mbox series

[v3,16/16] x86/hyperlaunch: add capabilities to boot domain

Message ID 20250408160802.49870-17-agarciav@amd.com (mailing list archive)
State Superseded
Headers show
Series Hyperlaunch device tree for dom0 | expand

Commit Message

Alejandro Vallejo April 8, 2025, 4:07 p.m. UTC
From: "Daniel P. Smith" <dpsmith@apertussolutions.com>

Introduce the ability to assign capabilities to a domain via its definition in
device tree. The first capability enabled to select is the control domain
capability. The capability property is a bitfield in both the device tree and
`struct boot_domain`.

Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
Reviewed-by: Jason Andryuk <jason.andryuk@amd.com>
Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
---
 xen/arch/x86/domain-builder/core.c     |  1 +
 xen/arch/x86/domain-builder/fdt.c      | 12 ++++++++++++
 xen/arch/x86/include/asm/boot-domain.h |  4 ++++
 xen/arch/x86/setup.c                   |  6 +++++-
 4 files changed, 22 insertions(+), 1 deletion(-)

Comments

Denis Mukhin April 9, 2025, 10:39 p.m. UTC | #1
On Tuesday, April 8th, 2025 at 9:07 AM, Alejandro Vallejo <agarciav@amd.com> wrote:

> 
> 
> From: "Daniel P. Smith" dpsmith@apertussolutions.com
> 
> 
> Introduce the ability to assign capabilities to a domain via its definition in
> device tree. The first capability enabled to select is the control domain
> capability. The capability property is a bitfield in both the device tree and
> `struct boot_domain`.
> 
> Signed-off-by: Daniel P. Smith dpsmith@apertussolutions.com
> 
> Reviewed-by: Jason Andryuk jason.andryuk@amd.com
> 
> Signed-off-by: Jason Andryuk jason.andryuk@amd.com
> 
> ---
> xen/arch/x86/domain-builder/core.c | 1 +
> xen/arch/x86/domain-builder/fdt.c | 12 ++++++++++++
> xen/arch/x86/include/asm/boot-domain.h | 4 ++++
> xen/arch/x86/setup.c | 6 +++++-
> 4 files changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/x86/domain-builder/core.c b/xen/arch/x86/domain-builder/core.c
> index 510a74a675..6ab4e6fe53 100644
> --- a/xen/arch/x86/domain-builder/core.c
> +++ b/xen/arch/x86/domain-builder/core.c
> @@ -96,6 +96,7 @@ void __init builder_init(struct boot_info *bi)
> i = first_boot_module_index(bi, BOOTMOD_UNKNOWN);
> bi->mods[i].type = BOOTMOD_KERNEL;
> 
> bi->domains[0].kernel = &bi->mods[i];
> 
> + bi->domains[0].capabilities |= BUILD_CAPS_CONTROL;
> 
> bi->nr_domains = 1;
> 
> }
> }
> diff --git a/xen/arch/x86/domain-builder/fdt.c b/xen/arch/x86/domain-builder/fdt.c
> index 5fcb767bdd..dbfbcffb0a 100644
> --- a/xen/arch/x86/domain-builder/fdt.c
> +++ b/xen/arch/x86/domain-builder/fdt.c
> @@ -257,6 +257,18 @@ static int __init process_domain_node(
> bd->max_vcpus = val;
> 
> printk(" max vcpus: %d\n", bd->max_vcpus);
> 
> }
> + else if ( strncmp(prop_name, "capabilities", name_len) == 0 )
> + {
> + if ( fdt_prop_as_u32(prop, &bd->capabilities) != 0 )
> 
> + {
> + printk(" failed processing domain id for domain %s\n", name);

Suggest adding XENLOG_ERR to the error message.

> + return -EINVAL;
> + }
> + printk(" caps: ");
> + if ( bd->capabilities & BUILD_CAPS_CONTROL )
> 
> + printk("c");

Perhaps wrap string generation into a separate function?

That will help if the number of capabilities will grow over time
and if there will be a need to use string representation somewhere else
in the code.

Thoughts?

> + printk("\n");
> + }
> }
> 
> fdt_for_each_subnode(node, fdt, dom_node)
> diff --git a/xen/arch/x86/include/asm/boot-domain.h b/xen/arch/x86/include/asm/boot-domain.h
> index 969c02a6ea..29a7d806de 100644
> --- a/xen/arch/x86/include/asm/boot-domain.h
> +++ b/xen/arch/x86/include/asm/boot-domain.h
> @@ -13,6 +13,10 @@
> struct boot_domain {
> domid_t domid;
> 
> +#define BUILD_CAPS_NONE (0)
> +#define BUILD_CAPS_CONTROL (1 << 0)
> + uint32_t capabilities;
> +
> /* On | Off /
> #define BUILD_MODE_PARAVIRT (1 << 0) / PV | PVH/HVM /
> #define BUILD_MODE_ENABLE_DM (1 << 1) / HVM | PVH */
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index 4127a0105d..7e1a26b4d2 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1006,6 +1006,7 @@ static struct domain *__init create_dom0(struct boot_info *bi)
> {
> char *cmdline = NULL;
> size_t cmdline_size;
> + unsigned int create_flags = 0;
> struct xen_domctl_createdomain dom0_cfg = {
> .flags = IS_ENABLED(CONFIG_TBOOT) ? XEN_DOMCTL_CDF_s3_integrity : 0,
> .max_evtchn_port = -1,
> @@ -1037,7 +1038,10 @@ static struct domain *__init create_dom0(struct boot_info *bi)
> if ( bd->domid == DOMID_INVALID )
> 
> /* Create initial domain. Not d0 for pvshim. */
> bd->domid = get_initial_domain_id();
> 
> - d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
> 
> + if ( bd->capabilities & BUILD_CAPS_CONTROL )
> 
> + create_flags |= CDF_privileged;
> + d = domain_create(bd->domid, &dom0_cfg,
> 
> + pv_shim ? 0 : create_flags);
> if ( IS_ERR(d) )
> panic("Error creating d%u: %ld\n", bd->domid, PTR_ERR(d));
> 
> 
> --
> 2.43.0
Jan Beulich April 10, 2025, 12:18 p.m. UTC | #2
On 08.04.2025 18:07, Alejandro Vallejo wrote:
> From: "Daniel P. Smith" <dpsmith@apertussolutions.com>
> 
> Introduce the ability to assign capabilities to a domain via its definition in
> device tree. The first capability enabled to select is the control domain
> capability. The capability property is a bitfield in both the device tree and
> `struct boot_domain`.
> 
> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
> Reviewed-by: Jason Andryuk <jason.andryuk@amd.com>
> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>

The R-b feels kind of redundant with the subsequent S-o-b.

> --- a/xen/arch/x86/domain-builder/fdt.c
> +++ b/xen/arch/x86/domain-builder/fdt.c
> @@ -257,6 +257,18 @@ static int __init process_domain_node(
>              bd->max_vcpus = val;
>              printk("  max vcpus: %d\n", bd->max_vcpus);
>          }
> +        else if ( strncmp(prop_name, "capabilities", name_len) == 0 )
> +        {
> +            if ( fdt_prop_as_u32(prop, &bd->capabilities) != 0 )
> +            {
> +                printk("  failed processing domain id for domain %s\n", name);
> +                return -EINVAL;
> +            }
> +            printk("  caps: ");
> +            if ( bd->capabilities & BUILD_CAPS_CONTROL )
> +                printk("c");
> +            printk("\n");
> +        }

Like for the other patch: What about other bits being set in the value read?

> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1006,6 +1006,7 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>  {
>      char *cmdline = NULL;
>      size_t cmdline_size;
> +    unsigned int create_flags = 0;
>      struct xen_domctl_createdomain dom0_cfg = {
>          .flags = IS_ENABLED(CONFIG_TBOOT) ? XEN_DOMCTL_CDF_s3_integrity : 0,
>          .max_evtchn_port = -1,
> @@ -1037,7 +1038,10 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>      if ( bd->domid == DOMID_INVALID )
>          /* Create initial domain.  Not d0 for pvshim. */
>          bd->domid = get_initial_domain_id();
> -    d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
> +    if ( bd->capabilities & BUILD_CAPS_CONTROL )
> +        create_flags |= CDF_privileged;

Seeing that builder_init() in the non-DT case sets the new bit unconditionally,
isn't the shim's only domain suddenly getting CDF_privileged set this way? Oh,
no, you then ...

> +    d = domain_create(bd->domid, &dom0_cfg,
> +                      pv_shim ? 0 : create_flags);

... hide the flag here. Any reason to have the intermediate variable in the
first place (can't resist: when there's already a wall of local variables here)?

Jan
Jan Beulich April 10, 2025, 12:18 p.m. UTC | #3
On 08.04.2025 18:07, Alejandro Vallejo wrote:
> From: "Daniel P. Smith" <dpsmith@apertussolutions.com>
> 
> Introduce the ability to assign capabilities to a domain via its definition in
> device tree. The first capability enabled to select is the control domain
> capability. The capability property is a bitfield in both the device tree and
> `struct boot_domain`.
> 
> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
> Reviewed-by: Jason Andryuk <jason.andryuk@amd.com>
> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>

The R-b feels kind of redundant with the subsequent S-o-b.

> --- a/xen/arch/x86/domain-builder/fdt.c
> +++ b/xen/arch/x86/domain-builder/fdt.c
> @@ -257,6 +257,18 @@ static int __init process_domain_node(
>              bd->max_vcpus = val;
>              printk("  max vcpus: %d\n", bd->max_vcpus);
>          }
> +        else if ( strncmp(prop_name, "capabilities", name_len) == 0 )
> +        {
> +            if ( fdt_prop_as_u32(prop, &bd->capabilities) != 0 )
> +            {
> +                printk("  failed processing domain id for domain %s\n", name);
> +                return -EINVAL;
> +            }
> +            printk("  caps: ");
> +            if ( bd->capabilities & BUILD_CAPS_CONTROL )
> +                printk("c");
> +            printk("\n");
> +        }

Like for the other patch: What about other bits being set in the value read?

> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1006,6 +1006,7 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>  {
>      char *cmdline = NULL;
>      size_t cmdline_size;
> +    unsigned int create_flags = 0;
>      struct xen_domctl_createdomain dom0_cfg = {
>          .flags = IS_ENABLED(CONFIG_TBOOT) ? XEN_DOMCTL_CDF_s3_integrity : 0,
>          .max_evtchn_port = -1,
> @@ -1037,7 +1038,10 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>      if ( bd->domid == DOMID_INVALID )
>          /* Create initial domain.  Not d0 for pvshim. */
>          bd->domid = get_initial_domain_id();
> -    d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
> +    if ( bd->capabilities & BUILD_CAPS_CONTROL )
> +        create_flags |= CDF_privileged;

Seeing that builder_init() in the non-DT case sets the new bit unconditionally,
isn't the shim's only domain suddenly getting CDF_privileged set this way? Oh,
no, you then ...

> +    d = domain_create(bd->domid, &dom0_cfg,
> +                      pv_shim ? 0 : create_flags);

... hide the flag here. Any reason to have the intermediate variable in the
first place (can't resist: when there's already a wall of local variables here)?

Jan
Alejandro Vallejo April 14, 2025, 7:17 p.m. UTC | #4
On Wed Apr 9, 2025 at 11:39 PM BST, Denis Mukhin wrote:
> On Tuesday, April 8th, 2025 at 9:07 AM, Alejandro Vallejo <agarciav@amd.com> wrote:
>
>> 
>> 
>> From: "Daniel P. Smith" dpsmith@apertussolutions.com
>> 
>> 
>> Introduce the ability to assign capabilities to a domain via its definition in
>> device tree. The first capability enabled to select is the control domain
>> capability. The capability property is a bitfield in both the device tree and
>> `struct boot_domain`.
>> 
>> Signed-off-by: Daniel P. Smith dpsmith@apertussolutions.com
>> 
>> Reviewed-by: Jason Andryuk jason.andryuk@amd.com
>> 
>> Signed-off-by: Jason Andryuk jason.andryuk@amd.com
>> 
>> ---
>> xen/arch/x86/domain-builder/core.c | 1 +
>> xen/arch/x86/domain-builder/fdt.c | 12 ++++++++++++
>> xen/arch/x86/include/asm/boot-domain.h | 4 ++++
>> xen/arch/x86/setup.c | 6 +++++-
>> 4 files changed, 22 insertions(+), 1 deletion(-)
>> 
>> diff --git a/xen/arch/x86/domain-builder/core.c b/xen/arch/x86/domain-builder/core.c
>> index 510a74a675..6ab4e6fe53 100644
>> --- a/xen/arch/x86/domain-builder/core.c
>> +++ b/xen/arch/x86/domain-builder/core.c
>> @@ -96,6 +96,7 @@ void __init builder_init(struct boot_info *bi)
>> i = first_boot_module_index(bi, BOOTMOD_UNKNOWN);
>> bi->mods[i].type = BOOTMOD_KERNEL;
>> 
>> bi->domains[0].kernel = &bi->mods[i];
>> 
>> + bi->domains[0].capabilities |= BUILD_CAPS_CONTROL;
>> 
>> bi->nr_domains = 1;
>> 
>> }
>> }
>> diff --git a/xen/arch/x86/domain-builder/fdt.c b/xen/arch/x86/domain-builder/fdt.c
>> index 5fcb767bdd..dbfbcffb0a 100644
>> --- a/xen/arch/x86/domain-builder/fdt.c
>> +++ b/xen/arch/x86/domain-builder/fdt.c
>> @@ -257,6 +257,18 @@ static int __init process_domain_node(
>> bd->max_vcpus = val;
>> 
>> printk(" max vcpus: %d\n", bd->max_vcpus);
>> 
>> }
>> + else if ( strncmp(prop_name, "capabilities", name_len) == 0 )
>> + {
>> + if ( fdt_prop_as_u32(prop, &bd->capabilities) != 0 )
>> 
>> + {
>> + printk(" failed processing domain id for domain %s\n", name);
>
> Suggest adding XENLOG_ERR to the error message.

Yes, and the message itself seems bogus. The dangers of copy-paste...

Will fix both.

>
>> + return -EINVAL;
>> + }
>> + printk(" caps: ");
>> + if ( bd->capabilities & BUILD_CAPS_CONTROL )
>> 
>> + printk("c");
>
> Perhaps wrap string generation into a separate function?
>
> That will help if the number of capabilities will grow over time
> and if there will be a need to use string representation somewhere else
> in the code.
>
> Thoughts?

If/when such other code appears I'm happy to unify them, but until then
I'd rather reduce indirection if possible and keep it inlined.

Cheers,
Alejandro
Alejandro Vallejo April 14, 2025, 7:31 p.m. UTC | #5
On Thu Apr 10, 2025 at 1:18 PM BST, Jan Beulich wrote:
> On 08.04.2025 18:07, Alejandro Vallejo wrote:
>> From: "Daniel P. Smith" <dpsmith@apertussolutions.com>
>> 
>> Introduce the ability to assign capabilities to a domain via its definition in
>> device tree. The first capability enabled to select is the control domain
>> capability. The capability property is a bitfield in both the device tree and
>> `struct boot_domain`.
>> 
>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>> Reviewed-by: Jason Andryuk <jason.andryuk@amd.com>
>> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
>
> The R-b feels kind of redundant with the subsequent S-o-b.

I'll drop it.

>
>> --- a/xen/arch/x86/domain-builder/fdt.c
>> +++ b/xen/arch/x86/domain-builder/fdt.c
>> @@ -257,6 +257,18 @@ static int __init process_domain_node(
>>              bd->max_vcpus = val;
>>              printk("  max vcpus: %d\n", bd->max_vcpus);
>>          }
>> +        else if ( strncmp(prop_name, "capabilities", name_len) == 0 )
>> +        {
>> +            if ( fdt_prop_as_u32(prop, &bd->capabilities) != 0 )
>> +            {
>> +                printk("  failed processing domain id for domain %s\n", name);
>> +                return -EINVAL;
>> +            }
>> +            printk("  caps: ");
>> +            if ( bd->capabilities & BUILD_CAPS_CONTROL )
>> +                printk("c");
>> +            printk("\n");
>> +        }
>
> Like for the other patch: What about other bits being set in the value read?

I take it that the non-worded suggestion is to have a mask of reserved
bits for each case and check they are not set (giving a warning if they are)?

>
>> --- a/xen/arch/x86/setup.c
>> +++ b/xen/arch/x86/setup.c
>> @@ -1006,6 +1006,7 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>>  {
>>      char *cmdline = NULL;
>>      size_t cmdline_size;
>> +    unsigned int create_flags = 0;
>>      struct xen_domctl_createdomain dom0_cfg = {
>>          .flags = IS_ENABLED(CONFIG_TBOOT) ? XEN_DOMCTL_CDF_s3_integrity : 0,
>>          .max_evtchn_port = -1,
>> @@ -1037,7 +1038,10 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>>      if ( bd->domid == DOMID_INVALID )
>>          /* Create initial domain.  Not d0 for pvshim. */
>>          bd->domid = get_initial_domain_id();
>> -    d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
>> +    if ( bd->capabilities & BUILD_CAPS_CONTROL )
>> +        create_flags |= CDF_privileged;
>
> Seeing that builder_init() in the non-DT case sets the new bit unconditionally,
> isn't the shim's only domain suddenly getting CDF_privileged set this way? Oh,
> no, you then ...
>
>> +    d = domain_create(bd->domid, &dom0_cfg,
>> +                      pv_shim ? 0 : create_flags);
>
> ... hide the flag here. Any reason to have the intermediate variable in the
> first place

Well, the logic would end up fairly convoluted otherwise. As things
stand this can be encoded in an if-else fashion with 2 calls, but
there's 2 capability flags coming that need integrating together.

This is just avoiding further code motion down the line.

> (can't resist: when there's already a wall of local variables here)?

Heh :). Point taken.

Cheers,
Alejandro
Jan Beulich April 15, 2025, 6:38 a.m. UTC | #6
On 14.04.2025 21:31, Alejandro Vallejo wrote:
> On Thu Apr 10, 2025 at 1:18 PM BST, Jan Beulich wrote:
>> On 08.04.2025 18:07, Alejandro Vallejo wrote:
>>> --- a/xen/arch/x86/domain-builder/fdt.c
>>> +++ b/xen/arch/x86/domain-builder/fdt.c
>>> @@ -257,6 +257,18 @@ static int __init process_domain_node(
>>>              bd->max_vcpus = val;
>>>              printk("  max vcpus: %d\n", bd->max_vcpus);
>>>          }
>>> +        else if ( strncmp(prop_name, "capabilities", name_len) == 0 )
>>> +        {
>>> +            if ( fdt_prop_as_u32(prop, &bd->capabilities) != 0 )
>>> +            {
>>> +                printk("  failed processing domain id for domain %s\n", name);
>>> +                return -EINVAL;
>>> +            }
>>> +            printk("  caps: ");
>>> +            if ( bd->capabilities & BUILD_CAPS_CONTROL )
>>> +                printk("c");
>>> +            printk("\n");
>>> +        }
>>
>> Like for the other patch: What about other bits being set in the value read?
> 
> I take it that the non-worded suggestion is to have a mask of reserved
> bits for each case and check they are not set (giving a warning if they are)?

Whether a warning is sufficient I can't tell. I would have expected such to be
outright rejected.

>>> --- a/xen/arch/x86/setup.c
>>> +++ b/xen/arch/x86/setup.c
>>> @@ -1006,6 +1006,7 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>>>  {
>>>      char *cmdline = NULL;
>>>      size_t cmdline_size;
>>> +    unsigned int create_flags = 0;
>>>      struct xen_domctl_createdomain dom0_cfg = {
>>>          .flags = IS_ENABLED(CONFIG_TBOOT) ? XEN_DOMCTL_CDF_s3_integrity : 0,
>>>          .max_evtchn_port = -1,
>>> @@ -1037,7 +1038,10 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>>>      if ( bd->domid == DOMID_INVALID )
>>>          /* Create initial domain.  Not d0 for pvshim. */
>>>          bd->domid = get_initial_domain_id();
>>> -    d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
>>> +    if ( bd->capabilities & BUILD_CAPS_CONTROL )
>>> +        create_flags |= CDF_privileged;
>>
>> Seeing that builder_init() in the non-DT case sets the new bit unconditionally,
>> isn't the shim's only domain suddenly getting CDF_privileged set this way? Oh,
>> no, you then ...
>>
>>> +    d = domain_create(bd->domid, &dom0_cfg,
>>> +                      pv_shim ? 0 : create_flags);
>>
>> ... hide the flag here. Any reason to have the intermediate variable in the
>> first place
> 
> Well, the logic would end up fairly convoluted otherwise. As things
> stand this can be encoded in an if-else fashion with 2 calls, but
> there's 2 capability flags coming that need integrating together.
> 
> This is just avoiding further code motion down the line.

Is it?

-    d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
+    d = domain_create(bd->domid, &dom0_cfg,
+                      ((bd->capabilities & BUILD_CAPS_CONTROL) && !pv_shim
+                       ? CDF_privileged : 0));

isn't really worse (imo), but is highlighting the problem more clearly: Why
would the shim have BUILD_CAPS_CONTROL set in the first place? Without that
the statement would remain pretty similar to what it was before.

Jan
Alejandro Vallejo April 15, 2025, 12:22 p.m. UTC | #7
On Tue Apr 15, 2025 at 7:38 AM BST, Jan Beulich wrote:
> On 14.04.2025 21:31, Alejandro Vallejo wrote:
>> On Thu Apr 10, 2025 at 1:18 PM BST, Jan Beulich wrote:
>>> On 08.04.2025 18:07, Alejandro Vallejo wrote:
>>>> --- a/xen/arch/x86/domain-builder/fdt.c
>>>> +++ b/xen/arch/x86/domain-builder/fdt.c
>>>> @@ -257,6 +257,18 @@ static int __init process_domain_node(
>>>>              bd->max_vcpus = val;
>>>>              printk("  max vcpus: %d\n", bd->max_vcpus);
>>>>          }
>>>> +        else if ( strncmp(prop_name, "capabilities", name_len) == 0 )
>>>> +        {
>>>> +            if ( fdt_prop_as_u32(prop, &bd->capabilities) != 0 )
>>>> +            {
>>>> +                printk("  failed processing domain id for domain %s\n", name);
>>>> +                return -EINVAL;
>>>> +            }
>>>> +            printk("  caps: ");
>>>> +            if ( bd->capabilities & BUILD_CAPS_CONTROL )
>>>> +                printk("c");
>>>> +            printk("\n");
>>>> +        }
>>>
>>> Like for the other patch: What about other bits being set in the value read?
>> 
>> I take it that the non-worded suggestion is to have a mask of reserved
>> bits for each case and check they are not set (giving a warning if they are)?
>
> Whether a warning is sufficient I can't tell. I would have expected such to be
> outright rejected.
>
>>>> --- a/xen/arch/x86/setup.c
>>>> +++ b/xen/arch/x86/setup.c
>>>> @@ -1006,6 +1006,7 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>>>>  {
>>>>      char *cmdline = NULL;
>>>>      size_t cmdline_size;
>>>> +    unsigned int create_flags = 0;
>>>>      struct xen_domctl_createdomain dom0_cfg = {
>>>>          .flags = IS_ENABLED(CONFIG_TBOOT) ? XEN_DOMCTL_CDF_s3_integrity : 0,
>>>>          .max_evtchn_port = -1,
>>>> @@ -1037,7 +1038,10 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>>>>      if ( bd->domid == DOMID_INVALID )
>>>>          /* Create initial domain.  Not d0 for pvshim. */
>>>>          bd->domid = get_initial_domain_id();
>>>> -    d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
>>>> +    if ( bd->capabilities & BUILD_CAPS_CONTROL )
>>>> +        create_flags |= CDF_privileged;
>>>
>>> Seeing that builder_init() in the non-DT case sets the new bit unconditionally,
>>> isn't the shim's only domain suddenly getting CDF_privileged set this way? Oh,
>>> no, you then ...
>>>
>>>> +    d = domain_create(bd->domid, &dom0_cfg,
>>>> +                      pv_shim ? 0 : create_flags);
>>>
>>> ... hide the flag here. Any reason to have the intermediate variable in the
>>> first place
>> 
>> Well, the logic would end up fairly convoluted otherwise. As things
>> stand this can be encoded in an if-else fashion with 2 calls, but
>> there's 2 capability flags coming that need integrating together.
>> 
>> This is just avoiding further code motion down the line.
>
> Is it?
>
> -    d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
> +    d = domain_create(bd->domid, &dom0_cfg,
> +                      ((bd->capabilities & BUILD_CAPS_CONTROL) && !pv_shim
> +                       ? CDF_privileged : 0));
>
> isn't really worse (imo),

Not sure I agree. Long conditions on ternary operators makes the
control flow harder to follow.

A nicer alternative that also removes the auxiliary variable is to have
a helper to convert from bootcaps to whatever createdomainflags are
required. That'd extend naturally for more bits.

> but is highlighting the problem more clearly: Why
> would the shim have BUILD_CAPS_CONTROL set in the first place? Without that
> the statement would remain pretty similar to what it was before.

If the commandline is parsed early enough (I see the early parse path in
head.S?) it would be better to add this logic to builder_init() and
prevent the capability from reaching the boot_domain in the first place.

Then there's no exception for the pv shim.

Cheers,
Alejandro
Jan Beulich April 15, 2025, 2:20 p.m. UTC | #8
On 15.04.2025 14:22, Alejandro Vallejo wrote:
> On Tue Apr 15, 2025 at 7:38 AM BST, Jan Beulich wrote:
>> On 14.04.2025 21:31, Alejandro Vallejo wrote:
>>> On Thu Apr 10, 2025 at 1:18 PM BST, Jan Beulich wrote:
>>>> On 08.04.2025 18:07, Alejandro Vallejo wrote:
>>>>> --- a/xen/arch/x86/setup.c
>>>>> +++ b/xen/arch/x86/setup.c
>>>>> @@ -1006,6 +1006,7 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>>>>>  {
>>>>>      char *cmdline = NULL;
>>>>>      size_t cmdline_size;
>>>>> +    unsigned int create_flags = 0;
>>>>>      struct xen_domctl_createdomain dom0_cfg = {
>>>>>          .flags = IS_ENABLED(CONFIG_TBOOT) ? XEN_DOMCTL_CDF_s3_integrity : 0,
>>>>>          .max_evtchn_port = -1,
>>>>> @@ -1037,7 +1038,10 @@ static struct domain *__init create_dom0(struct boot_info *bi)
>>>>>      if ( bd->domid == DOMID_INVALID )
>>>>>          /* Create initial domain.  Not d0 for pvshim. */
>>>>>          bd->domid = get_initial_domain_id();
>>>>> -    d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
>>>>> +    if ( bd->capabilities & BUILD_CAPS_CONTROL )
>>>>> +        create_flags |= CDF_privileged;
>>>>
>>>> Seeing that builder_init() in the non-DT case sets the new bit unconditionally,
>>>> isn't the shim's only domain suddenly getting CDF_privileged set this way? Oh,
>>>> no, you then ...
>>>>
>>>>> +    d = domain_create(bd->domid, &dom0_cfg,
>>>>> +                      pv_shim ? 0 : create_flags);
>>>>
>>>> ... hide the flag here. Any reason to have the intermediate variable in the
>>>> first place
>>>
>>> Well, the logic would end up fairly convoluted otherwise. As things
>>> stand this can be encoded in an if-else fashion with 2 calls, but
>>> there's 2 capability flags coming that need integrating together.
>>>
>>> This is just avoiding further code motion down the line.
>>
>> Is it?
>>
>> -    d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
>> +    d = domain_create(bd->domid, &dom0_cfg,
>> +                      ((bd->capabilities & BUILD_CAPS_CONTROL) && !pv_shim
>> +                       ? CDF_privileged : 0));
>>
>> isn't really worse (imo),
> 
> Not sure I agree. Long conditions on ternary operators makes the
> control flow harder to follow.
> 
> A nicer alternative that also removes the auxiliary variable is to have
> a helper to convert from bootcaps to whatever createdomainflags are
> required. That'd extend naturally for more bits.
> 
>> but is highlighting the problem more clearly: Why
>> would the shim have BUILD_CAPS_CONTROL set in the first place? Without that
>> the statement would remain pretty similar to what it was before.
> 
> If the commandline is parsed early enough (I see the early parse path in
> head.S?) it would be better to add this logic to builder_init() and
> prevent the capability from reaching the boot_domain in the first place.

The parsing from head.S is only partial. But surely DT is being looked at
far later than when the full parsing (cmdline_parse()) is done?

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/domain-builder/core.c b/xen/arch/x86/domain-builder/core.c
index 510a74a675..6ab4e6fe53 100644
--- a/xen/arch/x86/domain-builder/core.c
+++ b/xen/arch/x86/domain-builder/core.c
@@ -96,6 +96,7 @@  void __init builder_init(struct boot_info *bi)
         i = first_boot_module_index(bi, BOOTMOD_UNKNOWN);
         bi->mods[i].type = BOOTMOD_KERNEL;
         bi->domains[0].kernel = &bi->mods[i];
+        bi->domains[0].capabilities |= BUILD_CAPS_CONTROL;
         bi->nr_domains = 1;
     }
 }
diff --git a/xen/arch/x86/domain-builder/fdt.c b/xen/arch/x86/domain-builder/fdt.c
index 5fcb767bdd..dbfbcffb0a 100644
--- a/xen/arch/x86/domain-builder/fdt.c
+++ b/xen/arch/x86/domain-builder/fdt.c
@@ -257,6 +257,18 @@  static int __init process_domain_node(
             bd->max_vcpus = val;
             printk("  max vcpus: %d\n", bd->max_vcpus);
         }
+        else if ( strncmp(prop_name, "capabilities", name_len) == 0 )
+        {
+            if ( fdt_prop_as_u32(prop, &bd->capabilities) != 0 )
+            {
+                printk("  failed processing domain id for domain %s\n", name);
+                return -EINVAL;
+            }
+            printk("  caps: ");
+            if ( bd->capabilities & BUILD_CAPS_CONTROL )
+                printk("c");
+            printk("\n");
+        }
     }
 
     fdt_for_each_subnode(node, fdt, dom_node)
diff --git a/xen/arch/x86/include/asm/boot-domain.h b/xen/arch/x86/include/asm/boot-domain.h
index 969c02a6ea..29a7d806de 100644
--- a/xen/arch/x86/include/asm/boot-domain.h
+++ b/xen/arch/x86/include/asm/boot-domain.h
@@ -13,6 +13,10 @@ 
 struct boot_domain {
     domid_t domid;
 
+#define BUILD_CAPS_NONE          (0)
+#define BUILD_CAPS_CONTROL       (1 << 0)
+    uint32_t capabilities;
+
                                           /* On     | Off    */
 #define BUILD_MODE_PARAVIRT      (1 << 0) /* PV     | PVH/HVM */
 #define BUILD_MODE_ENABLE_DM     (1 << 1) /* HVM    | PVH     */
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 4127a0105d..7e1a26b4d2 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1006,6 +1006,7 @@  static struct domain *__init create_dom0(struct boot_info *bi)
 {
     char *cmdline = NULL;
     size_t cmdline_size;
+    unsigned int create_flags = 0;
     struct xen_domctl_createdomain dom0_cfg = {
         .flags = IS_ENABLED(CONFIG_TBOOT) ? XEN_DOMCTL_CDF_s3_integrity : 0,
         .max_evtchn_port = -1,
@@ -1037,7 +1038,10 @@  static struct domain *__init create_dom0(struct boot_info *bi)
     if ( bd->domid == DOMID_INVALID )
         /* Create initial domain.  Not d0 for pvshim. */
         bd->domid = get_initial_domain_id();
-    d = domain_create(bd->domid, &dom0_cfg, pv_shim ? 0 : CDF_privileged);
+    if ( bd->capabilities & BUILD_CAPS_CONTROL )
+        create_flags |= CDF_privileged;
+    d = domain_create(bd->domid, &dom0_cfg,
+                      pv_shim ? 0 : create_flags);
     if ( IS_ERR(d) )
         panic("Error creating d%u: %ld\n", bd->domid, PTR_ERR(d));