diff mbox series

[RFC,v1,5/5] xen/arm: add SCI mediator support for DomUs

Message ID 4469cdf05051bd691a8adff2657d27f6a5f0cefb.1639472078.git.oleksii_moisieiev@epam.com (mailing list archive)
State Superseded
Headers show
Series Introduce SCI-mediator feature | expand

Commit Message

Oleksii Moisieiev Dec. 14, 2021, 9:34 a.m. UTC
Integration of the SCMI mediator with xen libs:
- add hypercalls to aquire SCI channel and set device permissions
for DomUs;
- add SCMI_SMC nodes to DomUs device-tree based on partial device-tree;
- SCI requests redirection from DomUs to Firmware.

Signed-off-by: Oleksii Moisieiev <oleksii_moisieiev@epam.com>
---
 tools/include/xenctrl.h           |   3 +
 tools/include/xenguest.h          |   2 +
 tools/libs/ctrl/xc_domain.c       |  23 ++++++
 tools/libs/guest/xg_dom_arm.c     |   5 +-
 tools/libs/light/libxl_arm.c      | 122 +++++++++++++++++++++++++++---
 tools/libs/light/libxl_create.c   |  54 ++++++++++++-
 tools/libs/light/libxl_dom.c      |   1 +
 tools/libs/light/libxl_internal.h |   4 +
 xen/arch/arm/domctl.c             |  15 ++++
 xen/include/public/domctl.h       |   9 +++
 10 files changed, 223 insertions(+), 15 deletions(-)

Comments

Jan Beulich Dec. 14, 2021, 9:41 a.m. UTC | #1
On 14.12.2021 10:34, Oleksii Moisieiev wrote:
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -1177,6 +1177,13 @@ struct xen_domctl_vmtrace_op {
>  #define XEN_DOMCTL_vmtrace_get_option         5
>  #define XEN_DOMCTL_vmtrace_set_option         6
>  };
> +
> +/* XEN_DOMCTL_add_sci_device: set sci device permissions */
> +struct xen_domctl_sci_device_op {
> +    uint32_t size; /* Length of the path */
> +    XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
> +};

This being - aiui - Arm-only, please enclose it by respective #if,
just like we do for certain x86-only ops.

I'm further afraid the term "SCI" is ambiguous with ACPI's System
Control Interrupt, so there's some further tag needed in the names
used here.

Finally please make padding explicit and check that it's zero on
input.

Jan
Oleksandr Tyshchenko Dec. 16, 2021, 12:04 a.m. UTC | #2
On 14.12.21 11:34, Oleksii Moisieiev wrote:

Hi Oleksii

> Integration of the SCMI mediator with xen libs:
> - add hypercalls to aquire SCI channel and set device permissions
> for DomUs;
> - add SCMI_SMC nodes to DomUs device-tree based on partial device-tree;
> - SCI requests redirection from DomUs to Firmware.
>
> Signed-off-by: Oleksii Moisieiev <oleksii_moisieiev@epam.com>
> ---
>   tools/include/xenctrl.h           |   3 +
>   tools/include/xenguest.h          |   2 +
>   tools/libs/ctrl/xc_domain.c       |  23 ++++++
>   tools/libs/guest/xg_dom_arm.c     |   5 +-
>   tools/libs/light/libxl_arm.c      | 122 +++++++++++++++++++++++++++---
>   tools/libs/light/libxl_create.c   |  54 ++++++++++++-
>   tools/libs/light/libxl_dom.c      |   1 +
>   tools/libs/light/libxl_internal.h |   4 +
>   xen/arch/arm/domctl.c             |  15 ++++
>   xen/include/public/domctl.h       |   9 +++
>   10 files changed, 223 insertions(+), 15 deletions(-)
>
> diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> index 07b96e6671..cdd14f465f 100644
> --- a/tools/include/xenctrl.h
> +++ b/tools/include/xenctrl.h
> @@ -1238,6 +1238,9 @@ int xc_domain_getvnuma(xc_interface *xch,
>   int xc_domain_soft_reset(xc_interface *xch,
>                            uint32_t domid);
>   
> +int xc_domain_add_sci_device(xc_interface *xch,
> +                              uint32_t domid, char *path);
> +
>   #if defined(__i386__) || defined(__x86_64__)
>   /*
>    * PC BIOS standard E820 types and structure.
> diff --git a/tools/include/xenguest.h b/tools/include/xenguest.h
> index 61d0a82f48..35c611ac73 100644
> --- a/tools/include/xenguest.h
> +++ b/tools/include/xenguest.h
> @@ -242,6 +242,8 @@ struct xc_dom_image {
>   
>       /* Number of vCPUs */
>       unsigned int max_vcpus;
> +
> +    xen_pfn_t sci_shmem_gfn;
>   };
>   
>   /* --- arch specific hooks ----------------------------------------- */
> diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
> index b155d6afd2..07bb390e17 100644
> --- a/tools/libs/ctrl/xc_domain.c
> +++ b/tools/libs/ctrl/xc_domain.c
> @@ -2206,6 +2206,29 @@ int xc_domain_soft_reset(xc_interface *xch,
>       domctl.domain = domid;
>       return do_domctl(xch, &domctl);
>   }
> +
> +int xc_domain_add_sci_device(xc_interface *xch,
> +                              uint32_t domid, char *path)
> +{
> +    size_t size = strlen(path);
> +    int rc;
> +    DECLARE_DOMCTL;
> +    DECLARE_HYPERCALL_BOUNCE(path, size, XC_HYPERCALL_BUFFER_BOUNCE_IN);
> +
> +    if ( xc_hypercall_bounce_pre(xch, path) )
> +        return -1;
> +
> +    domctl.cmd = XEN_DOMCTL_add_sci_device;
> +    domctl.domain = domid;
> +    domctl.u.sci_device_op.size = size;
> +    set_xen_guest_handle(domctl.u.sci_device_op.path, path);
> +    rc = do_domctl(xch, &domctl);
> +
> +    xc_hypercall_bounce_post(xch, path);
> +
> +    return rc;
> +}
> +
>   /*
>    * Local variables:
>    * mode: C
> diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c
> index 5e3b76355e..368a670c46 100644
> --- a/tools/libs/guest/xg_dom_arm.c
> +++ b/tools/libs/guest/xg_dom_arm.c
> @@ -25,11 +25,12 @@
>   
>   #include "xg_private.h"
>   
> -#define NR_MAGIC_PAGES 4
> +#define NR_MAGIC_PAGES 5
>   #define CONSOLE_PFN_OFFSET 0
>   #define XENSTORE_PFN_OFFSET 1
>   #define MEMACCESS_PFN_OFFSET 2
>   #define VUART_PFN_OFFSET 3
> +#define SCI_SHMEM_OFFSET 4
>   
>   #define LPAE_SHIFT 9
>   
> @@ -69,11 +70,13 @@ static int alloc_magic_pages(struct xc_dom_image *dom)
>       dom->console_pfn = base + CONSOLE_PFN_OFFSET;
>       dom->xenstore_pfn = base + XENSTORE_PFN_OFFSET;
>       dom->vuart_gfn = base + VUART_PFN_OFFSET;
> +    dom->sci_shmem_gfn = base + SCI_SHMEM_OFFSET;
>   
>       xc_clear_domain_page(dom->xch, dom->guest_domid, dom->console_pfn);
>       xc_clear_domain_page(dom->xch, dom->guest_domid, dom->xenstore_pfn);
>       xc_clear_domain_page(dom->xch, dom->guest_domid, base + MEMACCESS_PFN_OFFSET);
>       xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn);
> +    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->sci_shmem_gfn);
>   
>       xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN,
>               dom->console_pfn);
> diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
> index eef1de0939..73ef591822 100644
> --- a/tools/libs/light/libxl_arm.c
> +++ b/tools/libs/light/libxl_arm.c
> @@ -8,6 +8,8 @@
>   #include <assert.h>
>   #include <xen/device_tree_defs.h>
>   
> +#define SCMI_NODE_PATH      "/firmware/scmi"
> +
>   static const char *gicv_to_string(libxl_gic_version gic_version)
>   {
>       switch (gic_version) {
> @@ -101,6 +103,19 @@ int libxl__arch_domain_prepare_config(libxl__gc *gc,
>           return ERROR_FAIL;
>       }
>   
> +    switch (d_config->b_info.sci) {
> +    case LIBXL_SCI_TYPE_NONE:
> +        config->arch.sci_type = XEN_DOMCTL_CONFIG_SCI_NONE;
> +        break;
> +    case LIBXL_SCI_TYPE_SCMI_SMC:
> +        config->arch.sci_type = XEN_DOMCTL_CONFIG_SCI_SCMI_SMC;
> +        break;
> +    default:
> +        LOG(ERROR, "Unknown SCI type %d",
> +            d_config->b_info.sci);
> +        return ERROR_FAIL;
> +    }
> +
>       return 0;
>   }
>   
> @@ -122,6 +137,7 @@ int libxl__arch_domain_save_config(libxl__gc *gc,
>       }
>   
>       state->clock_frequency = config->arch.clock_frequency;
> +    state->sci_agent_paddr = config->arch.sci_agent_paddr;
>   
>       return 0;
>   }
> @@ -502,9 +518,6 @@ static int make_optee_node(libxl__gc *gc, void *fdt)
>       int res;
>       LOG(DEBUG, "Creating OP-TEE node in dtb");
>   
> -    res = fdt_begin_node(fdt, "firmware");
> -    if (res) return res;
> -
>       res = fdt_begin_node(fdt, "optee");
>       if (res) return res;
>   
> @@ -517,9 +530,6 @@ static int make_optee_node(libxl__gc *gc, void *fdt)
>       res = fdt_end_node(fdt);
>       if (res) return res;
>   
> -    res = fdt_end_node(fdt);
> -    if (res) return res;
> -
>       return 0;
>   }
>   
> @@ -902,10 +912,9 @@ static int copy_node(libxl__gc *gc, void *fdt, void *pfdt,
>       return 0;
>   }
>   
> -static int copy_node_by_path(libxl__gc *gc, const char *path,
> -                             void *fdt, void *pfdt)
> +static int get_path_nodeoff(const char *path, void *pfdt)
>   {
> -    int nodeoff, r;
> +    int nodeoff;
>       const char *name = strrchr(path, '/');
>   
>       if (!name)
> @@ -925,12 +934,101 @@ static int copy_node_by_path(libxl__gc *gc, const char *path,
>       if (strcmp(fdt_get_name(pfdt, nodeoff, NULL), name))
>           return -FDT_ERR_NOTFOUND;
>   
> +    return nodeoff;
> +}
> +
> +static int copy_node_by_path(libxl__gc *gc, const char *path,
> +                             void *fdt, void *pfdt)
> +{
> +    int nodeoff, r;
> +
> +    nodeoff = get_path_nodeoff(path, pfdt);
> +    if (nodeoff < 0)
> +        return nodeoff;
> +
>       r = copy_node(gc, fdt, pfdt, nodeoff, 0);
>       if (r) return r;
>   
>       return 0;
>   }
>   
> +static int get_node_phandle(const char *path, void *pfdt, uint32_t *phandle)
> +{
> +    int nodeoff;
> +    const char *name = strrchr(path, '/');
> +
> +    if (!name)
> +        return -FDT_ERR_INTERNAL;
> +
> +    name++;
> +    nodeoff = fdt_path_offset(pfdt, path);
> +    if (nodeoff < 0)
> +        return nodeoff;
> +
> +    *phandle = fdt_get_phandle(pfdt, nodeoff);
> +    return 0;
> +}
> +
> +static int make_scmi_shmem_node(libxl__gc *gc, void *fdt, void *pfdt,
> +                           struct xc_dom_image *dom)
> +{
> +    int res;
> +    char buf[64];
> +    uint32_t phandle = 0;
> +
> +    res = get_node_phandle("/scp-shmem", pfdt, &phandle);
> +    if (res) return res;
> +
> +    snprintf(buf, sizeof(buf), "scp-shmem@%lx",
> +             dom->sci_shmem_gfn << XC_PAGE_SHIFT);
> +    res = fdt_begin_node(fdt, buf);
> +    if (res) return res;
> +
> +    res = fdt_property_compat(gc, fdt, 1, "arm,scmi-shmem");
> +    if (res) return res;
> +
> +    res = fdt_property_regs(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
> +                    GUEST_ROOT_SIZE_CELLS, 1,
> +                    dom->sci_shmem_gfn << XC_PAGE_SHIFT, XC_PAGE_SHIFT);
> +    if (res) return res;
> +
> +    LOG(DEBUG, "scmi: setting phandle = %u\n", phandle);
> +    res = fdt_property_cell(fdt, "phandle", phandle);
> +    if (res) return res;
> +
> +    res = fdt_end_node(fdt);
> +    if (res) return res;
> +
> +    return 0;
> +}
> +
> +static int make_firmware_node(libxl__gc *gc, void *fdt, void *pfdt, int tee,
> +                              int sci)
> +{
> +    int res;
> +
> +    if ((tee != LIBXL_TEE_TYPE_OPTEE) && (sci != LIBXL_SCI_TYPE_NONE))
> +        return 0;
> +
> +    res = fdt_begin_node(fdt, "firmware");
> +    if (res) return res;
> +
> +    if (tee == LIBXL_TEE_TYPE_OPTEE) {
> +       res = make_optee_node(gc, fdt);
> +       if (res) return res;
> +    }
> +
> +    if (sci == LIBXL_SCI_TYPE_SCMI_SMC) {
> +        res = copy_node_by_path(gc, SCMI_NODE_PATH, fdt, pfdt);
> +        if (res) return res;
> +    }
> +
> +    res = fdt_end_node(fdt);
> +    if (res) return res;
> +
> +    return 0;
> +}
> +
>   /*
>    * The partial device tree is not copied entirely. Only the relevant bits are
>    * copied to the guest device tree:
> @@ -1088,8 +1186,10 @@ next_resize:
>           if (info->arch_arm.vuart == LIBXL_VUART_TYPE_SBSA_UART)
>               FDT( make_vpl011_uart_node(gc, fdt, ainfo, dom) );
>   
> -        if (info->tee == LIBXL_TEE_TYPE_OPTEE)
> -            FDT( make_optee_node(gc, fdt) );
> +        FDT( make_firmware_node(gc, fdt, pfdt, info->tee, info->sci) );
> +
> +        if (info->sci == LIBXL_SCI_TYPE_SCMI_SMC)
> +            FDT( make_scmi_shmem_node(gc, fdt, pfdt, dom) );
>   
>           if (d_config->num_pcidevs)
>               FDT( make_vpci_node(gc, fdt, ainfo, dom) );
> diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
> index dcd09d32ba..c7372fd344 100644
> --- a/tools/libs/light/libxl_create.c
> +++ b/tools/libs/light/libxl_create.c
> @@ -596,6 +596,37 @@ out:
>       return ret;
>   }
>   
> +static int map_sci_page(libxl__gc *gc, uint32_t domid, uint64_t paddr,
> +                         uint64_t guest_addr)
> +{
> +    int ret;
> +    uint64_t _paddr_pfn = paddr >> XC_PAGE_SHIFT;
> +    uint64_t _guest_pfn = guest_addr >> XC_PAGE_SHIFT;
> +
> +    LOG(DEBUG, "iomem %"PRIx64, _paddr_pfn);
> +
> +    ret = xc_domain_iomem_permission(CTX->xch, domid, _paddr_pfn, 1, 1);
> +    if (ret < 0) {
> +        LOG(ERROR,
> +              "failed give domain access to iomem page %"PRIx64,
> +             _paddr_pfn);
> +        return ret;
> +    }
> +
> +    ret = xc_domain_memory_mapping(CTX->xch, domid,
> +                                   _guest_pfn, _paddr_pfn,
> +                                   1, 1);
> +    if (ret < 0) {
> +        LOG(ERROR,
> +              "failed to map to domain iomem page %"PRIx64
> +              " to guest address %"PRIx64,
> +              _paddr_pfn, _guest_pfn);
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
>   int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
>                          libxl__domain_build_state *state,
>                          uint32_t *domid, bool soft_reset)
> @@ -762,6 +793,16 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
>           goto out;
>       }
>   
> +    if (state->sci_agent_paddr != 0) {
> +        ret = map_sci_page(gc, *domid, state->sci_agent_paddr,
> +                            state->sci_shmem_gfn << XC_PAGE_SHIFT);
> +        if (ret < 0) {
> +            LOGED(ERROR, *domid, "map SCI page fail");
> +            rc = ERROR_FAIL;
> +            goto out;
> +        }
> +    }
> +
>       dom_path = libxl__xs_get_dompath(gc, *domid);
>       if (!dom_path) {
>           rc = ERROR_FAIL;
> @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
>   {
>       AO_GC;
>       libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
> -    int i, rc = 0;
> +    int i, rc = 0, rc_sci = 0;
>   
>       for (i = 0; i < d_config->num_dtdevs; i++) {
>           const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
>   
>           LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
>           rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
> -        if (rc < 0) {
> -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
> +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
> +
> +        if ((rc < 0) && (rc_sci < 0)) {
> +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
> +                 "xc_domain_add_sci_device failed: %d",
> +                 rc, rc_sci);
>               goto out;
>           }
> +
> +        if (rc)
> +            rc = rc_sci;


If I get this code right, it sounds like the dom.cfg's dtdev property is 
reused to describe sci devices as well, but the libxl__add_dtdevs() does 
not (and can not) differentiate them. So it has no option but to send 
two domctls for each device in dtdevs[] hoping to at least one domctl to 
succeeded. Or I really missed something?

It feels to me that:
  - either new dom.cfg's property could be introduced (scidev?) to 
describe sci devices together with new parsing logic/management code, so 
you will end up having new libxl__add_scidevs() to invoke 
XEN_DOMCTL_add_sci_device,
so no mixing things.
  - or indeed dtdev logic could be *completely* reused including 
extending XEN_DOMCTL_assign_device to cover your use-case, although 
sounds generic, it is used to describe devices for the passthrough (to 
configure an IOMMU for the device), in that case you wouldn't need an 
extra XEN_DOMCTL_add_sci_device introduced by current patch.

Personally I would use the first option as I am not sure that second 
option is conceptually correct && possible. I would leave this for the 
maintainers to clarify.




>       }
>   
>   out:
> diff --git a/tools/libs/light/libxl_dom.c b/tools/libs/light/libxl_dom.c
> index fe9f760f71..b1d288a8b9 100644
> --- a/tools/libs/light/libxl_dom.c
> +++ b/tools/libs/light/libxl_dom.c
> @@ -710,6 +710,7 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid,
>           state->console_mfn = dom->console_pfn;
>           state->store_mfn = dom->xenstore_pfn;
>           state->vuart_gfn = dom->vuart_gfn;
> +        state->sci_shmem_gfn = dom->sci_shmem_gfn;
>       } else {
>           state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
>           state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
> diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
> index 0b4671318c..f9f9cc633a 100644
> --- a/tools/libs/light/libxl_internal.h
> +++ b/tools/libs/light/libxl_internal.h
> @@ -1407,6 +1407,10 @@ typedef struct {
>       /* Whether this domain is being migrated/restored, or booting fresh.  Only
>        * applicable to the primary domain, not support domains (e.g. stub QEMU). */
>       bool restore;
> +
> +    /* sci channel paddr to be set to device-tree node */
> +    uint64_t sci_agent_paddr;
> +    xen_pfn_t sci_shmem_gfn;
>   } libxl__domain_build_state;
>   
>   _hidden void libxl__domain_build_state_init(libxl__domain_build_state *s);
> diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
> index 6245af6d0b..ba200407da 100644
> --- a/xen/arch/arm/domctl.c
> +++ b/xen/arch/arm/domctl.c
> @@ -4,6 +4,7 @@
>    * Copyright (c) 2012, Citrix Systems
>    */
>   
> +#include <asm/sci/sci.h>
>   #include <xen/errno.h>
>   #include <xen/guest_access.h>
>   #include <xen/hypercall.h>
> @@ -175,6 +176,20 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
>   
>           return rc;
>       }
> +    case XEN_DOMCTL_add_sci_device:
> +    {
> +        int rc;
> +        struct dt_device_node *dev;
> +
> +        rc = dt_find_node_by_gpath(domctl->u.sci_device_op.path,
> +                                   domctl->u.sci_device_op.size,
> +                                   &dev);
> +        if ( rc )
> +            return rc;
> +
> +        return sci_add_dt_device(d, dev);
> +    }
> +
>       default:
>       {
>           int rc;
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index b85e6170b0..671c72c3e8 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -1177,6 +1177,13 @@ struct xen_domctl_vmtrace_op {
>   #define XEN_DOMCTL_vmtrace_get_option         5
>   #define XEN_DOMCTL_vmtrace_set_option         6
>   };
> +
> +/* XEN_DOMCTL_add_sci_device: set sci device permissions */
> +struct xen_domctl_sci_device_op {
> +    uint32_t size; /* Length of the path */
> +    XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
> +};
> +
>   typedef struct xen_domctl_vmtrace_op xen_domctl_vmtrace_op_t;
>   DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmtrace_op_t);
>   
> @@ -1265,6 +1272,7 @@ struct xen_domctl {
>   #define XEN_DOMCTL_get_cpu_policy                82
>   #define XEN_DOMCTL_set_cpu_policy                83
>   #define XEN_DOMCTL_vmtrace_op                    84
> +#define XEN_DOMCTL_add_sci_device                85
>   #define XEN_DOMCTL_gdbsx_guestmemio            1000
>   #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>   #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -1326,6 +1334,7 @@ struct xen_domctl {
>           struct xen_domctl_psr_alloc         psr_alloc;
>           struct xen_domctl_vuart_op          vuart_op;
>           struct xen_domctl_vmtrace_op        vmtrace_op;
> +        struct xen_domctl_sci_device_op     sci_device_op;
>           uint8_t                             pad[128];
>       } u;
>   };

I assume the XSM needs updating (adding new hook, etc).
Oleksii Moisieiev Dec. 16, 2021, 5:36 p.m. UTC | #3
Hi Jan,

On Tue, Dec 14, 2021 at 10:41:30AM +0100, Jan Beulich wrote:
> On 14.12.2021 10:34, Oleksii Moisieiev wrote:
> > --- a/xen/include/public/domctl.h
> > +++ b/xen/include/public/domctl.h
> > @@ -1177,6 +1177,13 @@ struct xen_domctl_vmtrace_op {
> >  #define XEN_DOMCTL_vmtrace_get_option         5
> >  #define XEN_DOMCTL_vmtrace_set_option         6
> >  };
> > +
> > +/* XEN_DOMCTL_add_sci_device: set sci device permissions */
> > +struct xen_domctl_sci_device_op {
> > +    uint32_t size; /* Length of the path */
> > +    XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
> > +};
> 
> This being - aiui - Arm-only, please enclose it by respective #if,
> just like we do for certain x86-only ops.
> 

I agree. I will add #ifdefs in v2.

> I'm further afraid the term "SCI" is ambiguous with ACPI's System
> Control Interrupt, so there's some further tag needed in the names
> used here.
> 

Thank you for remark. I'm thinking about SC as System Control.
What do you think?

> Finally please make padding explicit and check that it's zero on
> input.
> 

I will align the comments in functions and structures in v2.

Best regards,
Oleksii.
Jan Beulich Dec. 17, 2021, 7:12 a.m. UTC | #4
On 16.12.2021 18:36, Oleksii Moisieiev wrote:
> On Tue, Dec 14, 2021 at 10:41:30AM +0100, Jan Beulich wrote:
>> On 14.12.2021 10:34, Oleksii Moisieiev wrote:
>>> --- a/xen/include/public/domctl.h
>>> +++ b/xen/include/public/domctl.h
>>> @@ -1177,6 +1177,13 @@ struct xen_domctl_vmtrace_op {
>>>  #define XEN_DOMCTL_vmtrace_get_option         5
>>>  #define XEN_DOMCTL_vmtrace_set_option         6
>>>  };
>>> +
>>> +/* XEN_DOMCTL_add_sci_device: set sci device permissions */
>>> +struct xen_domctl_sci_device_op {
>>> +    uint32_t size; /* Length of the path */
>>> +    XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
>>> +};
>>
>> This being - aiui - Arm-only, please enclose it by respective #if,
>> just like we do for certain x86-only ops.
>>
> 
> I agree. I will add #ifdefs in v2.
> 
>> I'm further afraid the term "SCI" is ambiguous with ACPI's System
>> Control Interrupt, so there's some further tag needed in the names
>> used here.
>>
> 
> Thank you for remark. I'm thinking about SC as System Control.
> What do you think?

I guess "SC" could even more so stand for various things. Even the
spelled out "System Control" looks overly generic. If this isn't
Arm-specific (in which case adding "arm" into the name might at least
help the situation a little), then I guess some further disambiguation
is going to be wanted. Since I don't know any of the context of this,
I'm afraid you're in a far better position than me to come up with a
non-ambiguous name.

Jan
Jan Beulich Dec. 17, 2021, 7:16 a.m. UTC | #5
On 17.12.2021 08:12, Jan Beulich wrote:
> On 16.12.2021 18:36, Oleksii Moisieiev wrote:
>> On Tue, Dec 14, 2021 at 10:41:30AM +0100, Jan Beulich wrote:
>>> On 14.12.2021 10:34, Oleksii Moisieiev wrote:
>>>> --- a/xen/include/public/domctl.h
>>>> +++ b/xen/include/public/domctl.h
>>>> @@ -1177,6 +1177,13 @@ struct xen_domctl_vmtrace_op {
>>>>  #define XEN_DOMCTL_vmtrace_get_option         5
>>>>  #define XEN_DOMCTL_vmtrace_set_option         6
>>>>  };
>>>> +
>>>> +/* XEN_DOMCTL_add_sci_device: set sci device permissions */
>>>> +struct xen_domctl_sci_device_op {
>>>> +    uint32_t size; /* Length of the path */
>>>> +    XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
>>>> +};
>>>
>>> This being - aiui - Arm-only, please enclose it by respective #if,
>>> just like we do for certain x86-only ops.
>>>
>>
>> I agree. I will add #ifdefs in v2.
>>
>>> I'm further afraid the term "SCI" is ambiguous with ACPI's System
>>> Control Interrupt, so there's some further tag needed in the names
>>> used here.
>>>
>>
>> Thank you for remark. I'm thinking about SC as System Control.
>> What do you think?
> 
> I guess "SC" could even more so stand for various things. Even the
> spelled out "System Control" looks overly generic. If this isn't
> Arm-specific (in which case adding "arm" into the name might at least
> help the situation a little), then I guess some further disambiguation
> is going to be wanted. Since I don't know any of the context of this,
> I'm afraid you're in a far better position than me to come up with a
> non-ambiguous name.

Actually, looking at the title again - any reason not to add "mediator"
into the name? While I have no idea whether there could be other
mediators with an ambiguous acronym, this would at least address the
ACPI related concern (I don't expect anything mediator-like to appear
there, but then again I might be wrong).

Jan
Oleksii Moisieiev Dec. 17, 2021, 12:15 p.m. UTC | #6
Hi Oleksandr,

On Thu, Dec 16, 2021 at 02:04:35AM +0200, Oleksandr wrote:
> 
> On 14.12.21 11:34, Oleksii Moisieiev wrote:
> 
> Hi Oleksii
> 
> > Integration of the SCMI mediator with xen libs:
> > - add hypercalls to aquire SCI channel and set device permissions
> > for DomUs;
> > - add SCMI_SMC nodes to DomUs device-tree based on partial device-tree;
> > - SCI requests redirection from DomUs to Firmware.
> > 
> > Signed-off-by: Oleksii Moisieiev <oleksii_moisieiev@epam.com>
> > ---
> >   tools/include/xenctrl.h           |   3 +
> >   tools/include/xenguest.h          |   2 +
> >   tools/libs/ctrl/xc_domain.c       |  23 ++++++
> >   tools/libs/guest/xg_dom_arm.c     |   5 +-
> >   tools/libs/light/libxl_arm.c      | 122 +++++++++++++++++++++++++++---
> >   tools/libs/light/libxl_create.c   |  54 ++++++++++++-
> >   tools/libs/light/libxl_dom.c      |   1 +
> >   tools/libs/light/libxl_internal.h |   4 +
> >   xen/arch/arm/domctl.c             |  15 ++++
> >   xen/include/public/domctl.h       |   9 +++
> >   10 files changed, 223 insertions(+), 15 deletions(-)
> > 
> > diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> > index 07b96e6671..cdd14f465f 100644
> > --- a/tools/include/xenctrl.h
> > +++ b/tools/include/xenctrl.h
> > @@ -1238,6 +1238,9 @@ int xc_domain_getvnuma(xc_interface *xch,
> >   int xc_domain_soft_reset(xc_interface *xch,
> >                            uint32_t domid);
> > +int xc_domain_add_sci_device(xc_interface *xch,
> > +                              uint32_t domid, char *path);
> > +
> >   #if defined(__i386__) || defined(__x86_64__)
> >   /*
> >    * PC BIOS standard E820 types and structure.
> > diff --git a/tools/include/xenguest.h b/tools/include/xenguest.h
> > index 61d0a82f48..35c611ac73 100644
> > --- a/tools/include/xenguest.h
> > +++ b/tools/include/xenguest.h
> > @@ -242,6 +242,8 @@ struct xc_dom_image {
> >       /* Number of vCPUs */
> >       unsigned int max_vcpus;
> > +
> > +    xen_pfn_t sci_shmem_gfn;
> >   };
> >   /* --- arch specific hooks ----------------------------------------- */
> > diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
> > index b155d6afd2..07bb390e17 100644
> > --- a/tools/libs/ctrl/xc_domain.c
> > +++ b/tools/libs/ctrl/xc_domain.c
> > @@ -2206,6 +2206,29 @@ int xc_domain_soft_reset(xc_interface *xch,
> >       domctl.domain = domid;
> >       return do_domctl(xch, &domctl);
> >   }
> > +
> > +int xc_domain_add_sci_device(xc_interface *xch,
> > +                              uint32_t domid, char *path)
> > +{
> > +    size_t size = strlen(path);
> > +    int rc;
> > +    DECLARE_DOMCTL;
> > +    DECLARE_HYPERCALL_BOUNCE(path, size, XC_HYPERCALL_BUFFER_BOUNCE_IN);
> > +
> > +    if ( xc_hypercall_bounce_pre(xch, path) )
> > +        return -1;
> > +
> > +    domctl.cmd = XEN_DOMCTL_add_sci_device;
> > +    domctl.domain = domid;
> > +    domctl.u.sci_device_op.size = size;
> > +    set_xen_guest_handle(domctl.u.sci_device_op.path, path);
> > +    rc = do_domctl(xch, &domctl);
> > +
> > +    xc_hypercall_bounce_post(xch, path);
> > +
> > +    return rc;
> > +}
> > +
> >   /*
> >    * Local variables:
> >    * mode: C
> > diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c
> > index 5e3b76355e..368a670c46 100644
> > --- a/tools/libs/guest/xg_dom_arm.c
> > +++ b/tools/libs/guest/xg_dom_arm.c
> > @@ -25,11 +25,12 @@
> >   #include "xg_private.h"
> > -#define NR_MAGIC_PAGES 4
> > +#define NR_MAGIC_PAGES 5
> >   #define CONSOLE_PFN_OFFSET 0
> >   #define XENSTORE_PFN_OFFSET 1
> >   #define MEMACCESS_PFN_OFFSET 2
> >   #define VUART_PFN_OFFSET 3
> > +#define SCI_SHMEM_OFFSET 4
> >   #define LPAE_SHIFT 9
> > @@ -69,11 +70,13 @@ static int alloc_magic_pages(struct xc_dom_image *dom)
> >       dom->console_pfn = base + CONSOLE_PFN_OFFSET;
> >       dom->xenstore_pfn = base + XENSTORE_PFN_OFFSET;
> >       dom->vuart_gfn = base + VUART_PFN_OFFSET;
> > +    dom->sci_shmem_gfn = base + SCI_SHMEM_OFFSET;
> >       xc_clear_domain_page(dom->xch, dom->guest_domid, dom->console_pfn);
> >       xc_clear_domain_page(dom->xch, dom->guest_domid, dom->xenstore_pfn);
> >       xc_clear_domain_page(dom->xch, dom->guest_domid, base + MEMACCESS_PFN_OFFSET);
> >       xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn);
> > +    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->sci_shmem_gfn);
> >       xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN,
> >               dom->console_pfn);
> > diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
> > index eef1de0939..73ef591822 100644
> > --- a/tools/libs/light/libxl_arm.c
> > +++ b/tools/libs/light/libxl_arm.c
> > @@ -8,6 +8,8 @@
> >   #include <assert.h>
> >   #include <xen/device_tree_defs.h>
> > +#define SCMI_NODE_PATH      "/firmware/scmi"
> > +
> >   static const char *gicv_to_string(libxl_gic_version gic_version)
> >   {
> >       switch (gic_version) {
> > @@ -101,6 +103,19 @@ int libxl__arch_domain_prepare_config(libxl__gc *gc,
> >           return ERROR_FAIL;
> >       }
> > +    switch (d_config->b_info.sci) {
> > +    case LIBXL_SCI_TYPE_NONE:
> > +        config->arch.sci_type = XEN_DOMCTL_CONFIG_SCI_NONE;
> > +        break;
> > +    case LIBXL_SCI_TYPE_SCMI_SMC:
> > +        config->arch.sci_type = XEN_DOMCTL_CONFIG_SCI_SCMI_SMC;
> > +        break;
> > +    default:
> > +        LOG(ERROR, "Unknown SCI type %d",
> > +            d_config->b_info.sci);
> > +        return ERROR_FAIL;
> > +    }
> > +
> >       return 0;
> >   }
> > @@ -122,6 +137,7 @@ int libxl__arch_domain_save_config(libxl__gc *gc,
> >       }
> >       state->clock_frequency = config->arch.clock_frequency;
> > +    state->sci_agent_paddr = config->arch.sci_agent_paddr;
> >       return 0;
> >   }
> > @@ -502,9 +518,6 @@ static int make_optee_node(libxl__gc *gc, void *fdt)
> >       int res;
> >       LOG(DEBUG, "Creating OP-TEE node in dtb");
> > -    res = fdt_begin_node(fdt, "firmware");
> > -    if (res) return res;
> > -
> >       res = fdt_begin_node(fdt, "optee");
> >       if (res) return res;
> > @@ -517,9 +530,6 @@ static int make_optee_node(libxl__gc *gc, void *fdt)
> >       res = fdt_end_node(fdt);
> >       if (res) return res;
> > -    res = fdt_end_node(fdt);
> > -    if (res) return res;
> > -
> >       return 0;
> >   }
> > @@ -902,10 +912,9 @@ static int copy_node(libxl__gc *gc, void *fdt, void *pfdt,
> >       return 0;
> >   }
> > -static int copy_node_by_path(libxl__gc *gc, const char *path,
> > -                             void *fdt, void *pfdt)
> > +static int get_path_nodeoff(const char *path, void *pfdt)
> >   {
> > -    int nodeoff, r;
> > +    int nodeoff;
> >       const char *name = strrchr(path, '/');
> >       if (!name)
> > @@ -925,12 +934,101 @@ static int copy_node_by_path(libxl__gc *gc, const char *path,
> >       if (strcmp(fdt_get_name(pfdt, nodeoff, NULL), name))
> >           return -FDT_ERR_NOTFOUND;
> > +    return nodeoff;
> > +}
> > +
> > +static int copy_node_by_path(libxl__gc *gc, const char *path,
> > +                             void *fdt, void *pfdt)
> > +{
> > +    int nodeoff, r;
> > +
> > +    nodeoff = get_path_nodeoff(path, pfdt);
> > +    if (nodeoff < 0)
> > +        return nodeoff;
> > +
> >       r = copy_node(gc, fdt, pfdt, nodeoff, 0);
> >       if (r) return r;
> >       return 0;
> >   }
> > +static int get_node_phandle(const char *path, void *pfdt, uint32_t *phandle)
> > +{
> > +    int nodeoff;
> > +    const char *name = strrchr(path, '/');
> > +
> > +    if (!name)
> > +        return -FDT_ERR_INTERNAL;
> > +
> > +    name++;
> > +    nodeoff = fdt_path_offset(pfdt, path);
> > +    if (nodeoff < 0)
> > +        return nodeoff;
> > +
> > +    *phandle = fdt_get_phandle(pfdt, nodeoff);
> > +    return 0;
> > +}
> > +
> > +static int make_scmi_shmem_node(libxl__gc *gc, void *fdt, void *pfdt,
> > +                           struct xc_dom_image *dom)
> > +{
> > +    int res;
> > +    char buf[64];
> > +    uint32_t phandle = 0;
> > +
> > +    res = get_node_phandle("/scp-shmem", pfdt, &phandle);
> > +    if (res) return res;
> > +
> > +    snprintf(buf, sizeof(buf), "scp-shmem@%lx",
> > +             dom->sci_shmem_gfn << XC_PAGE_SHIFT);
> > +    res = fdt_begin_node(fdt, buf);
> > +    if (res) return res;
> > +
> > +    res = fdt_property_compat(gc, fdt, 1, "arm,scmi-shmem");
> > +    if (res) return res;
> > +
> > +    res = fdt_property_regs(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
> > +                    GUEST_ROOT_SIZE_CELLS, 1,
> > +                    dom->sci_shmem_gfn << XC_PAGE_SHIFT, XC_PAGE_SHIFT);
> > +    if (res) return res;
> > +
> > +    LOG(DEBUG, "scmi: setting phandle = %u\n", phandle);
> > +    res = fdt_property_cell(fdt, "phandle", phandle);
> > +    if (res) return res;
> > +
> > +    res = fdt_end_node(fdt);
> > +    if (res) return res;
> > +
> > +    return 0;
> > +}
> > +
> > +static int make_firmware_node(libxl__gc *gc, void *fdt, void *pfdt, int tee,
> > +                              int sci)
> > +{
> > +    int res;
> > +
> > +    if ((tee != LIBXL_TEE_TYPE_OPTEE) && (sci != LIBXL_SCI_TYPE_NONE))
> > +        return 0;
> > +
> > +    res = fdt_begin_node(fdt, "firmware");
> > +    if (res) return res;
> > +
> > +    if (tee == LIBXL_TEE_TYPE_OPTEE) {
> > +       res = make_optee_node(gc, fdt);
> > +       if (res) return res;
> > +    }
> > +
> > +    if (sci == LIBXL_SCI_TYPE_SCMI_SMC) {
> > +        res = copy_node_by_path(gc, SCMI_NODE_PATH, fdt, pfdt);
> > +        if (res) return res;
> > +    }
> > +
> > +    res = fdt_end_node(fdt);
> > +    if (res) return res;
> > +
> > +    return 0;
> > +}
> > +
> >   /*
> >    * The partial device tree is not copied entirely. Only the relevant bits are
> >    * copied to the guest device tree:
> > @@ -1088,8 +1186,10 @@ next_resize:
> >           if (info->arch_arm.vuart == LIBXL_VUART_TYPE_SBSA_UART)
> >               FDT( make_vpl011_uart_node(gc, fdt, ainfo, dom) );
> > -        if (info->tee == LIBXL_TEE_TYPE_OPTEE)
> > -            FDT( make_optee_node(gc, fdt) );
> > +        FDT( make_firmware_node(gc, fdt, pfdt, info->tee, info->sci) );
> > +
> > +        if (info->sci == LIBXL_SCI_TYPE_SCMI_SMC)
> > +            FDT( make_scmi_shmem_node(gc, fdt, pfdt, dom) );
> >           if (d_config->num_pcidevs)
> >               FDT( make_vpci_node(gc, fdt, ainfo, dom) );
> > diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
> > index dcd09d32ba..c7372fd344 100644
> > --- a/tools/libs/light/libxl_create.c
> > +++ b/tools/libs/light/libxl_create.c
> > @@ -596,6 +596,37 @@ out:
> >       return ret;
> >   }
> > +static int map_sci_page(libxl__gc *gc, uint32_t domid, uint64_t paddr,
> > +                         uint64_t guest_addr)
> > +{
> > +    int ret;
> > +    uint64_t _paddr_pfn = paddr >> XC_PAGE_SHIFT;
> > +    uint64_t _guest_pfn = guest_addr >> XC_PAGE_SHIFT;
> > +
> > +    LOG(DEBUG, "iomem %"PRIx64, _paddr_pfn);
> > +
> > +    ret = xc_domain_iomem_permission(CTX->xch, domid, _paddr_pfn, 1, 1);
> > +    if (ret < 0) {
> > +        LOG(ERROR,
> > +              "failed give domain access to iomem page %"PRIx64,
> > +             _paddr_pfn);
> > +        return ret;
> > +    }
> > +
> > +    ret = xc_domain_memory_mapping(CTX->xch, domid,
> > +                                   _guest_pfn, _paddr_pfn,
> > +                                   1, 1);
> > +    if (ret < 0) {
> > +        LOG(ERROR,
> > +              "failed to map to domain iomem page %"PRIx64
> > +              " to guest address %"PRIx64,
> > +              _paddr_pfn, _guest_pfn);
> > +        return ret;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >   int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
> >                          libxl__domain_build_state *state,
> >                          uint32_t *domid, bool soft_reset)
> > @@ -762,6 +793,16 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
> >           goto out;
> >       }
> > +    if (state->sci_agent_paddr != 0) {
> > +        ret = map_sci_page(gc, *domid, state->sci_agent_paddr,
> > +                            state->sci_shmem_gfn << XC_PAGE_SHIFT);
> > +        if (ret < 0) {
> > +            LOGED(ERROR, *domid, "map SCI page fail");
> > +            rc = ERROR_FAIL;
> > +            goto out;
> > +        }
> > +    }
> > +
> >       dom_path = libxl__xs_get_dompath(gc, *domid);
> >       if (!dom_path) {
> >           rc = ERROR_FAIL;
> > @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
> >   {
> >       AO_GC;
> >       libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
> > -    int i, rc = 0;
> > +    int i, rc = 0, rc_sci = 0;
> >       for (i = 0; i < d_config->num_dtdevs; i++) {
> >           const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
> >           LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
> >           rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
> > -        if (rc < 0) {
> > -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
> > +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
> > +
> > +        if ((rc < 0) && (rc_sci < 0)) {
> > +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
> > +                 "xc_domain_add_sci_device failed: %d",
> > +                 rc, rc_sci);
> >               goto out;
> >           }
> > +
> > +        if (rc)
> > +            rc = rc_sci;
> 
> 
> If I get this code right, it sounds like the dom.cfg's dtdev property is
> reused to describe sci devices as well, but the libxl__add_dtdevs() does not
> (and can not) differentiate them. So it has no option but to send two
> domctls for each device in dtdevs[] hoping to at least one domctl to
> succeeded. Or I really missed something?
> 
> It feels to me that:
>  - either new dom.cfg's property could be introduced (scidev?) to describe
> sci devices together with new parsing logic/management code, so you will end
> up having new libxl__add_scidevs() to invoke XEN_DOMCTL_add_sci_device,
> so no mixing things.
>  - or indeed dtdev logic could be *completely* reused including extending
> XEN_DOMCTL_assign_device to cover your use-case, although sounds generic, it
> is used to describe devices for the passthrough (to configure an IOMMU for
> the device), in that case you wouldn't need an extra
> XEN_DOMCTL_add_sci_device introduced by current patch.
> 
> Personally I would use the first option as I am not sure that second option
> is conceptually correct && possible. I would leave this for the maintainers
> to clarify.
> 

Thank you for the point. I agree that reusing XEN_DOMCTL_assign_device
seems not to be conceptually correct. Introducing new dom.cfg property
seems to be the only way to avoid extra Domctl calls.
I will handle this in v2 if there will be no complains from Maintainers.

---
Oleksii.

> 
> 
> >       }
> >   out:
> > diff --git a/tools/libs/light/libxl_dom.c b/tools/libs/light/libxl_dom.c
> > index fe9f760f71..b1d288a8b9 100644
> > --- a/tools/libs/light/libxl_dom.c
> > +++ b/tools/libs/light/libxl_dom.c
> > @@ -710,6 +710,7 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid,
> >           state->console_mfn = dom->console_pfn;
> >           state->store_mfn = dom->xenstore_pfn;
> >           state->vuart_gfn = dom->vuart_gfn;
> > +        state->sci_shmem_gfn = dom->sci_shmem_gfn;
> >       } else {
> >           state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
> >           state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
> > diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
> > index 0b4671318c..f9f9cc633a 100644
> > --- a/tools/libs/light/libxl_internal.h
> > +++ b/tools/libs/light/libxl_internal.h
> > @@ -1407,6 +1407,10 @@ typedef struct {
> >       /* Whether this domain is being migrated/restored, or booting fresh.  Only
> >        * applicable to the primary domain, not support domains (e.g. stub QEMU). */
> >       bool restore;
> > +
> > +    /* sci channel paddr to be set to device-tree node */
> > +    uint64_t sci_agent_paddr;
> > +    xen_pfn_t sci_shmem_gfn;
> >   } libxl__domain_build_state;
> >   _hidden void libxl__domain_build_state_init(libxl__domain_build_state *s);
> > diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
> > index 6245af6d0b..ba200407da 100644
> > --- a/xen/arch/arm/domctl.c
> > +++ b/xen/arch/arm/domctl.c
> > @@ -4,6 +4,7 @@
> >    * Copyright (c) 2012, Citrix Systems
> >    */
> > +#include <asm/sci/sci.h>
> >   #include <xen/errno.h>
> >   #include <xen/guest_access.h>
> >   #include <xen/hypercall.h>
> > @@ -175,6 +176,20 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
> >           return rc;
> >       }
> > +    case XEN_DOMCTL_add_sci_device:
> > +    {
> > +        int rc;
> > +        struct dt_device_node *dev;
> > +
> > +        rc = dt_find_node_by_gpath(domctl->u.sci_device_op.path,
> > +                                   domctl->u.sci_device_op.size,
> > +                                   &dev);
> > +        if ( rc )
> > +            return rc;
> > +
> > +        return sci_add_dt_device(d, dev);
> > +    }
> > +
> >       default:
> >       {
> >           int rc;
> > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> > index b85e6170b0..671c72c3e8 100644
> > --- a/xen/include/public/domctl.h
> > +++ b/xen/include/public/domctl.h
> > @@ -1177,6 +1177,13 @@ struct xen_domctl_vmtrace_op {
> >   #define XEN_DOMCTL_vmtrace_get_option         5
> >   #define XEN_DOMCTL_vmtrace_set_option         6
> >   };
> > +
> > +/* XEN_DOMCTL_add_sci_device: set sci device permissions */
> > +struct xen_domctl_sci_device_op {
> > +    uint32_t size; /* Length of the path */
> > +    XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
> > +};
> > +
> >   typedef struct xen_domctl_vmtrace_op xen_domctl_vmtrace_op_t;
> >   DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmtrace_op_t);
> > @@ -1265,6 +1272,7 @@ struct xen_domctl {
> >   #define XEN_DOMCTL_get_cpu_policy                82
> >   #define XEN_DOMCTL_set_cpu_policy                83
> >   #define XEN_DOMCTL_vmtrace_op                    84
> > +#define XEN_DOMCTL_add_sci_device                85
> >   #define XEN_DOMCTL_gdbsx_guestmemio            1000
> >   #define XEN_DOMCTL_gdbsx_pausevcpu             1001
> >   #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> > @@ -1326,6 +1334,7 @@ struct xen_domctl {
> >           struct xen_domctl_psr_alloc         psr_alloc;
> >           struct xen_domctl_vuart_op          vuart_op;
> >           struct xen_domctl_vmtrace_op        vmtrace_op;
> > +        struct xen_domctl_sci_device_op     sci_device_op;
> >           uint8_t                             pad[128];
> >       } u;
> >   };
> 
> I assume the XSM needs updating (adding new hook, etc).
> 
> 
> -- 
> Regards,
> 
> Oleksandr Tyshchenko
>
Oleksii Moisieiev Dec. 17, 2021, 1:40 p.m. UTC | #7
Hi Jan,

On Fri, Dec 17, 2021 at 08:16:05AM +0100, Jan Beulich wrote:
> On 17.12.2021 08:12, Jan Beulich wrote:
> > On 16.12.2021 18:36, Oleksii Moisieiev wrote:
> >> On Tue, Dec 14, 2021 at 10:41:30AM +0100, Jan Beulich wrote:
> >>> On 14.12.2021 10:34, Oleksii Moisieiev wrote:
> >>>> --- a/xen/include/public/domctl.h
> >>>> +++ b/xen/include/public/domctl.h
> >>>> @@ -1177,6 +1177,13 @@ struct xen_domctl_vmtrace_op {
> >>>>  #define XEN_DOMCTL_vmtrace_get_option         5
> >>>>  #define XEN_DOMCTL_vmtrace_set_option         6
> >>>>  };
> >>>> +
> >>>> +/* XEN_DOMCTL_add_sci_device: set sci device permissions */
> >>>> +struct xen_domctl_sci_device_op {
> >>>> +    uint32_t size; /* Length of the path */
> >>>> +    XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
> >>>> +};
> >>>
> >>> This being - aiui - Arm-only, please enclose it by respective #if,
> >>> just like we do for certain x86-only ops.
> >>>
> >>
> >> I agree. I will add #ifdefs in v2.
> >>
> >>> I'm further afraid the term "SCI" is ambiguous with ACPI's System
> >>> Control Interrupt, so there's some further tag needed in the names
> >>> used here.
> >>>
> >>
> >> Thank you for remark. I'm thinking about SC as System Control.
> >> What do you think?
> > 
> > I guess "SC" could even more so stand for various things. Even the
> > spelled out "System Control" looks overly generic. If this isn't
> > Arm-specific (in which case adding "arm" into the name might at least
> > help the situation a little), then I guess some further disambiguation
> > is going to be wanted. Since I don't know any of the context of this,
> > I'm afraid you're in a far better position than me to come up with a
> > non-ambiguous name.
> 
> Actually, looking at the title again - any reason not to add "mediator"
> into the name? While I have no idea whether there could be other
> mediators with an ambiguous acronym, this would at least address the
> ACPI related concern (I don't expect anything mediator-like to appear
> there, but then again I might be wrong).
> 
> Jan
> 

I wanted the name to be abbriveation. Also tee option in xen doesn't
have "mediator" in it's name either. As for the SC - the only 2 uses I
found are - Spreadsheet Calculator and some script name for
traffic-shaper.
But I think name still need to be discussed.

--
Oleksii.
Stefano Stabellini Dec. 21, 2021, 1:37 a.m. UTC | #8
On Tue, 14 Dec 2021, Oleksii Moisieiev wrote:
> Integration of the SCMI mediator with xen libs:
> - add hypercalls to aquire SCI channel and set device permissions
> for DomUs;
> - add SCMI_SMC nodes to DomUs device-tree based on partial device-tree;
> - SCI requests redirection from DomUs to Firmware.
> 
> Signed-off-by: Oleksii Moisieiev <oleksii_moisieiev@epam.com>
> ---
>  tools/include/xenctrl.h           |   3 +
>  tools/include/xenguest.h          |   2 +
>  tools/libs/ctrl/xc_domain.c       |  23 ++++++
>  tools/libs/guest/xg_dom_arm.c     |   5 +-
>  tools/libs/light/libxl_arm.c      | 122 +++++++++++++++++++++++++++---
>  tools/libs/light/libxl_create.c   |  54 ++++++++++++-
>  tools/libs/light/libxl_dom.c      |   1 +
>  tools/libs/light/libxl_internal.h |   4 +
>  xen/arch/arm/domctl.c             |  15 ++++
>  xen/include/public/domctl.h       |   9 +++
>  10 files changed, 223 insertions(+), 15 deletions(-)
> 
> diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> index 07b96e6671..cdd14f465f 100644
> --- a/tools/include/xenctrl.h
> +++ b/tools/include/xenctrl.h
> @@ -1238,6 +1238,9 @@ int xc_domain_getvnuma(xc_interface *xch,
>  int xc_domain_soft_reset(xc_interface *xch,
>                           uint32_t domid);
>  
> +int xc_domain_add_sci_device(xc_interface *xch,
> +                              uint32_t domid, char *path);
> +
>  #if defined(__i386__) || defined(__x86_64__)
>  /*
>   * PC BIOS standard E820 types and structure.
> diff --git a/tools/include/xenguest.h b/tools/include/xenguest.h
> index 61d0a82f48..35c611ac73 100644
> --- a/tools/include/xenguest.h
> +++ b/tools/include/xenguest.h
> @@ -242,6 +242,8 @@ struct xc_dom_image {
>  
>      /* Number of vCPUs */
>      unsigned int max_vcpus;
> +
> +    xen_pfn_t sci_shmem_gfn;

We should be able to avoid introducing sci_shmem_gfn (more below)


>  };
>  
>  /* --- arch specific hooks ----------------------------------------- */
> diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
> index b155d6afd2..07bb390e17 100644
> --- a/tools/libs/ctrl/xc_domain.c
> +++ b/tools/libs/ctrl/xc_domain.c
> @@ -2206,6 +2206,29 @@ int xc_domain_soft_reset(xc_interface *xch,
>      domctl.domain = domid;
>      return do_domctl(xch, &domctl);
>  }
> +
> +int xc_domain_add_sci_device(xc_interface *xch,
> +                              uint32_t domid, char *path)
> +{
> +    size_t size = strlen(path);
> +    int rc;
> +    DECLARE_DOMCTL;
> +    DECLARE_HYPERCALL_BOUNCE(path, size, XC_HYPERCALL_BUFFER_BOUNCE_IN);
> +
> +    if ( xc_hypercall_bounce_pre(xch, path) )
> +        return -1;
> +
> +    domctl.cmd = XEN_DOMCTL_add_sci_device;
> +    domctl.domain = domid;
> +    domctl.u.sci_device_op.size = size;
> +    set_xen_guest_handle(domctl.u.sci_device_op.path, path);
> +    rc = do_domctl(xch, &domctl);
> +
> +    xc_hypercall_bounce_post(xch, path);
> +
> +    return rc;
> +}

I'd skip this xc_ function and hypercall (more below)


>  /*
>   * Local variables:
>   * mode: C
> diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c
> index 5e3b76355e..368a670c46 100644
> --- a/tools/libs/guest/xg_dom_arm.c
> +++ b/tools/libs/guest/xg_dom_arm.c
> @@ -25,11 +25,12 @@
>  
>  #include "xg_private.h"
>  
> -#define NR_MAGIC_PAGES 4
> +#define NR_MAGIC_PAGES 5
>  #define CONSOLE_PFN_OFFSET 0
>  #define XENSTORE_PFN_OFFSET 1
>  #define MEMACCESS_PFN_OFFSET 2
>  #define VUART_PFN_OFFSET 3
> +#define SCI_SHMEM_OFFSET 4
>  
>  #define LPAE_SHIFT 9
>  
> @@ -69,11 +70,13 @@ static int alloc_magic_pages(struct xc_dom_image *dom)
>      dom->console_pfn = base + CONSOLE_PFN_OFFSET;
>      dom->xenstore_pfn = base + XENSTORE_PFN_OFFSET;
>      dom->vuart_gfn = base + VUART_PFN_OFFSET;
> +    dom->sci_shmem_gfn = base + SCI_SHMEM_OFFSET;
>  
>      xc_clear_domain_page(dom->xch, dom->guest_domid, dom->console_pfn);
>      xc_clear_domain_page(dom->xch, dom->guest_domid, dom->xenstore_pfn);
>      xc_clear_domain_page(dom->xch, dom->guest_domid, base + MEMACCESS_PFN_OFFSET);
>      xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn);
> +    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->sci_shmem_gfn);

Given that sci_shmem_gfn doesn't actually require any allocations, just
a remapping of an existing and already specified physical page, then I
don't think we need to add a new page to alloc_magic_pages for it.

We can just #define a static address for the SCMI page in the domU
addres space and use it for the mapping. No need to allocate a new
page.


>      xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN,
>              dom->console_pfn);
> diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
> index eef1de0939..73ef591822 100644
> --- a/tools/libs/light/libxl_arm.c
> +++ b/tools/libs/light/libxl_arm.c
> @@ -8,6 +8,8 @@
>  #include <assert.h>
>  #include <xen/device_tree_defs.h>
>  
> +#define SCMI_NODE_PATH      "/firmware/scmi"
> +
>  static const char *gicv_to_string(libxl_gic_version gic_version)
>  {
>      switch (gic_version) {
> @@ -101,6 +103,19 @@ int libxl__arch_domain_prepare_config(libxl__gc *gc,
>          return ERROR_FAIL;
>      }
>  
> +    switch (d_config->b_info.sci) {
> +    case LIBXL_SCI_TYPE_NONE:
> +        config->arch.sci_type = XEN_DOMCTL_CONFIG_SCI_NONE;
> +        break;
> +    case LIBXL_SCI_TYPE_SCMI_SMC:
> +        config->arch.sci_type = XEN_DOMCTL_CONFIG_SCI_SCMI_SMC;
> +        break;
> +    default:
> +        LOG(ERROR, "Unknown SCI type %d",
> +            d_config->b_info.sci);
> +        return ERROR_FAIL;
> +    }
> +
>      return 0;
>  }
>  
> @@ -122,6 +137,7 @@ int libxl__arch_domain_save_config(libxl__gc *gc,
>      }
>  
>      state->clock_frequency = config->arch.clock_frequency;
> +    state->sci_agent_paddr = config->arch.sci_agent_paddr;
>  
>      return 0;
>  }
> @@ -502,9 +518,6 @@ static int make_optee_node(libxl__gc *gc, void *fdt)
>      int res;
>      LOG(DEBUG, "Creating OP-TEE node in dtb");
>  
> -    res = fdt_begin_node(fdt, "firmware");
> -    if (res) return res;
> -
>      res = fdt_begin_node(fdt, "optee");
>      if (res) return res;
>  
> @@ -517,9 +530,6 @@ static int make_optee_node(libxl__gc *gc, void *fdt)
>      res = fdt_end_node(fdt);
>      if (res) return res;
>  
> -    res = fdt_end_node(fdt);
> -    if (res) return res;
> -
>      return 0;
>  }
>  
> @@ -902,10 +912,9 @@ static int copy_node(libxl__gc *gc, void *fdt, void *pfdt,
>      return 0;
>  }
>  
> -static int copy_node_by_path(libxl__gc *gc, const char *path,
> -                             void *fdt, void *pfdt)
> +static int get_path_nodeoff(const char *path, void *pfdt)
>  {
> -    int nodeoff, r;
> +    int nodeoff;
>      const char *name = strrchr(path, '/');
>  
>      if (!name)
> @@ -925,12 +934,101 @@ static int copy_node_by_path(libxl__gc *gc, const char *path,
>      if (strcmp(fdt_get_name(pfdt, nodeoff, NULL), name))
>          return -FDT_ERR_NOTFOUND;
>  
> +    return nodeoff;
> +}
> +
> +static int copy_node_by_path(libxl__gc *gc, const char *path,
> +                             void *fdt, void *pfdt)
> +{
> +    int nodeoff, r;
> +
> +    nodeoff = get_path_nodeoff(path, pfdt);
> +    if (nodeoff < 0)
> +        return nodeoff;
> +
>      r = copy_node(gc, fdt, pfdt, nodeoff, 0);
>      if (r) return r;
>  
>      return 0;
>  }
>  
> +static int get_node_phandle(const char *path, void *pfdt, uint32_t *phandle)
> +{
> +    int nodeoff;
> +    const char *name = strrchr(path, '/');
> +
> +    if (!name)
> +        return -FDT_ERR_INTERNAL;
> +
> +    name++;
> +    nodeoff = fdt_path_offset(pfdt, path);
> +    if (nodeoff < 0)
> +        return nodeoff;
> +
> +    *phandle = fdt_get_phandle(pfdt, nodeoff);
> +    return 0;
> +}
> +
> +static int make_scmi_shmem_node(libxl__gc *gc, void *fdt, void *pfdt,
> +                           struct xc_dom_image *dom)
> +{
> +    int res;
> +    char buf[64];
> +    uint32_t phandle = 0;
> +
> +    res = get_node_phandle("/scp-shmem", pfdt, &phandle);
> +    if (res) return res;

I hope we'll be able to avoid requiring the user to write a partial
device tree with SCMI info in order to use it.

But if we have to go down this route, then we need to add a description
of scp-shmem under docs/misc/arm/

Also, in general, we cannot mandate or require specific paths in device
tree and instead we should find nodes based on the compatible string.
(There are exceptions like /reserved-memory and /firmware but they are
only a couple.)


> +    snprintf(buf, sizeof(buf), "scp-shmem@%lx",
> +             dom->sci_shmem_gfn << XC_PAGE_SHIFT);
> +    res = fdt_begin_node(fdt, buf);
> +    if (res) return res;
> +
> +    res = fdt_property_compat(gc, fdt, 1, "arm,scmi-shmem");
> +    if (res) return res;
> +
> +    res = fdt_property_regs(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
> +                    GUEST_ROOT_SIZE_CELLS, 1,
> +                    dom->sci_shmem_gfn << XC_PAGE_SHIFT, XC_PAGE_SHIFT);
> +    if (res) return res;
> +
> +    LOG(DEBUG, "scmi: setting phandle = %u\n", phandle);
> +    res = fdt_property_cell(fdt, "phandle", phandle);
> +    if (res) return res;
> +
> +    res = fdt_end_node(fdt);
> +    if (res) return res;
> +
> +    return 0;
> +}
> +
> +static int make_firmware_node(libxl__gc *gc, void *fdt, void *pfdt, int tee,
> +                              int sci)
> +{
> +    int res;
> +
> +    if ((tee != LIBXL_TEE_TYPE_OPTEE) && (sci != LIBXL_SCI_TYPE_NONE))
> +        return 0;

Shouldn't this be:

    if ((tee != LIBXL_TEE_TYPE_OPTEE) && (sci == LIBXL_SCI_TYPE_NONE))


> +    res = fdt_begin_node(fdt, "firmware");
> +    if (res) return res;
> +
> +    if (tee == LIBXL_TEE_TYPE_OPTEE) {
> +       res = make_optee_node(gc, fdt);
> +       if (res) return res;
> +    }
> +
> +    if (sci == LIBXL_SCI_TYPE_SCMI_SMC) {
> +        res = copy_node_by_path(gc, SCMI_NODE_PATH, fdt, pfdt);
> +        if (res) return res;
> +    }

Do we really need to copy the node from the partial device tree? That
makes it a lot harder to use for the user. Ideally a user would only
need to specify sci = "scmi_smc" and everything else should be done
automatically.

Can we automatically generate the SCMI device tree node instead? It
looks like we should have all the information to be able to do it. If
not, what is missing?


> +    res = fdt_end_node(fdt);
> +    if (res) return res;
> +
> +    return 0;
> +}
> +
>  /*
>   * The partial device tree is not copied entirely. Only the relevant bits are
>   * copied to the guest device tree:
> @@ -1088,8 +1186,10 @@ next_resize:
>          if (info->arch_arm.vuart == LIBXL_VUART_TYPE_SBSA_UART)
>              FDT( make_vpl011_uart_node(gc, fdt, ainfo, dom) );
>  
> -        if (info->tee == LIBXL_TEE_TYPE_OPTEE)
> -            FDT( make_optee_node(gc, fdt) );
> +        FDT( make_firmware_node(gc, fdt, pfdt, info->tee, info->sci) );
> +
> +        if (info->sci == LIBXL_SCI_TYPE_SCMI_SMC)
> +            FDT( make_scmi_shmem_node(gc, fdt, pfdt, dom) );
>  
>          if (d_config->num_pcidevs)
>              FDT( make_vpci_node(gc, fdt, ainfo, dom) );
> diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
> index dcd09d32ba..c7372fd344 100644
> --- a/tools/libs/light/libxl_create.c
> +++ b/tools/libs/light/libxl_create.c
> @@ -596,6 +596,37 @@ out:
>      return ret;
>  }
>  
> +static int map_sci_page(libxl__gc *gc, uint32_t domid, uint64_t paddr,
> +                         uint64_t guest_addr)
> +{
> +    int ret;
> +    uint64_t _paddr_pfn = paddr >> XC_PAGE_SHIFT;
> +    uint64_t _guest_pfn = guest_addr >> XC_PAGE_SHIFT;
> +
> +    LOG(DEBUG, "iomem %"PRIx64, _paddr_pfn);
> +
> +    ret = xc_domain_iomem_permission(CTX->xch, domid, _paddr_pfn, 1, 1);
> +    if (ret < 0) {
> +        LOG(ERROR,
> +              "failed give domain access to iomem page %"PRIx64,
> +             _paddr_pfn);
> +        return ret;
> +    }
> +
> +    ret = xc_domain_memory_mapping(CTX->xch, domid,
> +                                   _guest_pfn, _paddr_pfn,
> +                                   1, 1);
> +    if (ret < 0) {
> +        LOG(ERROR,
> +              "failed to map to domain iomem page %"PRIx64
> +              " to guest address %"PRIx64,
> +              _paddr_pfn, _guest_pfn);
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
>  int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
>                         libxl__domain_build_state *state,
>                         uint32_t *domid, bool soft_reset)
> @@ -762,6 +793,16 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
>          goto out;
>      }
>  
> +    if (state->sci_agent_paddr != 0) {

Shouldn't we also check for sci_type == XEN_DOMCTL_CONFIG_SCI_NONE ?

If the user specifies sci_type == XEN_DOMCTL_CONFIG_SCI_SCMI_SMC, we
shouldn't automatically map any SCMI channels to the guest, right?


> +        ret = map_sci_page(gc, *domid, state->sci_agent_paddr,
> +                            state->sci_shmem_gfn << XC_PAGE_SHIFT);
> +        if (ret < 0) {
> +            LOGED(ERROR, *domid, "map SCI page fail");
> +            rc = ERROR_FAIL;
> +            goto out;
> +        }
> +    }
> +
>      dom_path = libxl__xs_get_dompath(gc, *domid);
>      if (!dom_path) {
>          rc = ERROR_FAIL;
> @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
>  {
>      AO_GC;
>      libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
> -    int i, rc = 0;
> +    int i, rc = 0, rc_sci = 0;
>  
>      for (i = 0; i < d_config->num_dtdevs; i++) {
>          const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
>  
>          LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
>          rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
> -        if (rc < 0) {
> -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
> +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
> +
> +        if ((rc < 0) && (rc_sci < 0)) {
> +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
> +                 "xc_domain_add_sci_device failed: %d",
> +                 rc, rc_sci);
>              goto out;
>          }
> +
> +        if (rc)
> +            rc = rc_sci;

I would make this simpler actually. Do we need to pass dtdev->path
twice, once for xc_assign_dt_device and a second time for
xc_domain_add_sci_device?

I would skip adding xc_domain_add_sci_device altogether. A general SCMI
enable/disable for the domain is necessary, but then we can just get the
directly assigned devices from xc_assign_dt_device. There is no need to
specify the list twice. If a device is not manageable via SCMI we can
detect it automatically because it is going to be missing scmi_devid on
device tree.


>      }
>  
>  out:
> diff --git a/tools/libs/light/libxl_dom.c b/tools/libs/light/libxl_dom.c
> index fe9f760f71..b1d288a8b9 100644
> --- a/tools/libs/light/libxl_dom.c
> +++ b/tools/libs/light/libxl_dom.c
> @@ -710,6 +710,7 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid,
>          state->console_mfn = dom->console_pfn;
>          state->store_mfn = dom->xenstore_pfn;
>          state->vuart_gfn = dom->vuart_gfn;
> +        state->sci_shmem_gfn = dom->sci_shmem_gfn;
>      } else {
>          state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
>          state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
> diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
> index 0b4671318c..f9f9cc633a 100644
> --- a/tools/libs/light/libxl_internal.h
> +++ b/tools/libs/light/libxl_internal.h
> @@ -1407,6 +1407,10 @@ typedef struct {
>      /* Whether this domain is being migrated/restored, or booting fresh.  Only
>       * applicable to the primary domain, not support domains (e.g. stub QEMU). */
>      bool restore;
> +
> +    /* sci channel paddr to be set to device-tree node */
> +    uint64_t sci_agent_paddr;
> +    xen_pfn_t sci_shmem_gfn;
>  } libxl__domain_build_state;
>  
>  _hidden void libxl__domain_build_state_init(libxl__domain_build_state *s);
> diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
> index 6245af6d0b..ba200407da 100644
> --- a/xen/arch/arm/domctl.c
> +++ b/xen/arch/arm/domctl.c
> @@ -4,6 +4,7 @@
>   * Copyright (c) 2012, Citrix Systems
>   */
>  
> +#include <asm/sci/sci.h>
>  #include <xen/errno.h>
>  #include <xen/guest_access.h>
>  #include <xen/hypercall.h>
> @@ -175,6 +176,20 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
>  
>          return rc;
>      }
> +    case XEN_DOMCTL_add_sci_device:
> +    {
> +        int rc;
> +        struct dt_device_node *dev;
> +
> +        rc = dt_find_node_by_gpath(domctl->u.sci_device_op.path,
> +                                   domctl->u.sci_device_op.size,
> +                                   &dev);
> +        if ( rc )
> +            return rc;
> +
> +        return sci_add_dt_device(d, dev);
> +    }

I would skip it and instead I would add a call to sci_add_dt_device in
the implementation of XEN_DOMCTL_assign_device.


>      default:
>      {
>          int rc;
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index b85e6170b0..671c72c3e8 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -1177,6 +1177,13 @@ struct xen_domctl_vmtrace_op {
>  #define XEN_DOMCTL_vmtrace_get_option         5
>  #define XEN_DOMCTL_vmtrace_set_option         6
>  };
> +
> +/* XEN_DOMCTL_add_sci_device: set sci device permissions */
> +struct xen_domctl_sci_device_op {
> +    uint32_t size; /* Length of the path */
> +    XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
> +};
> +
>  typedef struct xen_domctl_vmtrace_op xen_domctl_vmtrace_op_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmtrace_op_t);
>  
> @@ -1265,6 +1272,7 @@ struct xen_domctl {
>  #define XEN_DOMCTL_get_cpu_policy                82
>  #define XEN_DOMCTL_set_cpu_policy                83
>  #define XEN_DOMCTL_vmtrace_op                    84
> +#define XEN_DOMCTL_add_sci_device                85
>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -1326,6 +1334,7 @@ struct xen_domctl {
>          struct xen_domctl_psr_alloc         psr_alloc;
>          struct xen_domctl_vuart_op          vuart_op;
>          struct xen_domctl_vmtrace_op        vmtrace_op;
> +        struct xen_domctl_sci_device_op     sci_device_op;
>          uint8_t                             pad[128];
>      } u;
>  };
> -- 
> 2.27.0
>
Anthony PERARD Dec. 21, 2021, 2:45 p.m. UTC | #9
On Fri, Dec 17, 2021 at 12:15:25PM +0000, Oleksii Moisieiev wrote:
> > On 14.12.21 11:34, Oleksii Moisieiev wrote:
> > > @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
> > >   {
> > >       AO_GC;
> > >       libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
> > > -    int i, rc = 0;
> > > +    int i, rc = 0, rc_sci = 0;
> > >       for (i = 0; i < d_config->num_dtdevs; i++) {
> > >           const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
> > >           LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
> > >           rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
> > > -        if (rc < 0) {
> > > -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
> > > +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
> > > +
> > > +        if ((rc < 0) && (rc_sci < 0)) {
> > > +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
> > > +                 "xc_domain_add_sci_device failed: %d",
> > > +                 rc, rc_sci);
> > >               goto out;
> > >           }
> > > +
> > > +        if (rc)
> > > +            rc = rc_sci;
> > 
> > 
> > If I get this code right, it sounds like the dom.cfg's dtdev property is
> > reused to describe sci devices as well, but the libxl__add_dtdevs() does not
> > (and can not) differentiate them. So it has no option but to send two
> > domctls for each device in dtdevs[] hoping to at least one domctl to
> > succeeded. Or I really missed something?
> > 
> > It feels to me that:
> >  - either new dom.cfg's property could be introduced (scidev?) to describe
> > sci devices together with new parsing logic/management code, so you will end
> > up having new libxl__add_scidevs() to invoke XEN_DOMCTL_add_sci_device,
> > so no mixing things.
> >  - or indeed dtdev logic could be *completely* reused including extending
> > XEN_DOMCTL_assign_device to cover your use-case, although sounds generic, it
> > is used to describe devices for the passthrough (to configure an IOMMU for
> > the device), in that case you wouldn't need an extra
> > XEN_DOMCTL_add_sci_device introduced by current patch.
> > 
> > Personally I would use the first option as I am not sure that second option
> > is conceptually correct && possible. I would leave this for the maintainers
> > to clarify.
> > 
> 
> Thank you for the point. I agree that reusing XEN_DOMCTL_assign_device
> seems not to be conceptually correct. Introducing new dom.cfg property
> seems to be the only way to avoid extra Domctl calls.
> I will handle this in v2 if there will be no complains from Maintainers.

I don't know if there will be a complains in v2 or not :-), but using
something different from dtdev sound good.

If I understand correctly, dtdev seems to be about passing through an
existing device to a guest, and this new sci device seems to be about having Xen
"emulating" an sci device which the guest will use. So introducing
something new (scidev or other name) sounds good.

Thanks,
Stefano Stabellini Dec. 21, 2021, 9:39 p.m. UTC | #10
On Tue, 21 Dec 2021, Anthony PERARD wrote:
> On Fri, Dec 17, 2021 at 12:15:25PM +0000, Oleksii Moisieiev wrote:
> > > On 14.12.21 11:34, Oleksii Moisieiev wrote:
> > > > @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
> > > >   {
> > > >       AO_GC;
> > > >       libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
> > > > -    int i, rc = 0;
> > > > +    int i, rc = 0, rc_sci = 0;
> > > >       for (i = 0; i < d_config->num_dtdevs; i++) {
> > > >           const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
> > > >           LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
> > > >           rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
> > > > -        if (rc < 0) {
> > > > -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
> > > > +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
> > > > +
> > > > +        if ((rc < 0) && (rc_sci < 0)) {
> > > > +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
> > > > +                 "xc_domain_add_sci_device failed: %d",
> > > > +                 rc, rc_sci);
> > > >               goto out;
> > > >           }
> > > > +
> > > > +        if (rc)
> > > > +            rc = rc_sci;
> > > 
> > > 
> > > If I get this code right, it sounds like the dom.cfg's dtdev property is
> > > reused to describe sci devices as well, but the libxl__add_dtdevs() does not
> > > (and can not) differentiate them. So it has no option but to send two
> > > domctls for each device in dtdevs[] hoping to at least one domctl to
> > > succeeded. Or I really missed something?
> > > 
> > > It feels to me that:
> > >  - either new dom.cfg's property could be introduced (scidev?) to describe
> > > sci devices together with new parsing logic/management code, so you will end
> > > up having new libxl__add_scidevs() to invoke XEN_DOMCTL_add_sci_device,
> > > so no mixing things.
> > >  - or indeed dtdev logic could be *completely* reused including extending
> > > XEN_DOMCTL_assign_device to cover your use-case, although sounds generic, it
> > > is used to describe devices for the passthrough (to configure an IOMMU for
> > > the device), in that case you wouldn't need an extra
> > > XEN_DOMCTL_add_sci_device introduced by current patch.

I realize I did my review before reading Oleksandr's comments. I fully
agree with his feedback. Having seen how difficult is for users to setup
a domU configuration correctly today, I would advise to try to reuse the
existing dtdev property instead of adding yet one new property to make
the life of our users easier.



> > > Personally I would use the first option as I am not sure that second option
> > > is conceptually correct && possible. I would leave this for the maintainers
> > > to clarify.
> > > 
> > 
> > Thank you for the point. I agree that reusing XEN_DOMCTL_assign_device
> > seems not to be conceptually correct. Introducing new dom.cfg property
> > seems to be the only way to avoid extra Domctl calls.
> > I will handle this in v2 if there will be no complains from Maintainers.
> 
> I don't know if there will be a complains in v2 or not :-), but using
> something different from dtdev sound good.
> 
> If I understand correctly, dtdev seems to be about passing through an
> existing device to a guest, and this new sci device seems to be about having Xen
> "emulating" an sci device which the guest will use. So introducing
> something new (scidev or other name) sounds good.

Users already have to provide 4 properties (dtdev, iomem, irqs,
device_tree) to setup device assignment. I think that making it 5
properties would not be a step forward :-)

To me dtdev and XEN_DOMCTL_assign_device are appropriate because they
signify device assignment of one or more devices. We are not adding any
additional "meaning" to them. It is just that we'll automatically detect
and generate any SCMI configurations based on the list of assigned
devices. Because if SCMI is enabled and a device is assigned to the
guest, then I think we want to add the device to the SCMI list of
devices automatically.

If we really want to introduce a new list of devices, please make it
optional so that most times the user can skip it unless really required.
Julien Grall Dec. 22, 2021, 9:24 a.m. UTC | #11
Hi Stefano,

On 21/12/2021 22:39, Stefano Stabellini wrote:
> On Tue, 21 Dec 2021, Anthony PERARD wrote:
>> On Fri, Dec 17, 2021 at 12:15:25PM +0000, Oleksii Moisieiev wrote:
>>>> On 14.12.21 11:34, Oleksii Moisieiev wrote:
>>>>> @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
>>>>>    {
>>>>>        AO_GC;
>>>>>        libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
>>>>> -    int i, rc = 0;
>>>>> +    int i, rc = 0, rc_sci = 0;
>>>>>        for (i = 0; i < d_config->num_dtdevs; i++) {
>>>>>            const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
>>>>>            LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
>>>>>            rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
>>>>> -        if (rc < 0) {
>>>>> -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
>>>>> +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
>>>>> +
>>>>> +        if ((rc < 0) && (rc_sci < 0)) {
>>>>> +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
>>>>> +                 "xc_domain_add_sci_device failed: %d",
>>>>> +                 rc, rc_sci);
>>>>>                goto out;
>>>>>            }
>>>>> +
>>>>> +        if (rc)
>>>>> +            rc = rc_sci;
>>>>
>>>>
>>>> If I get this code right, it sounds like the dom.cfg's dtdev property is
>>>> reused to describe sci devices as well, but the libxl__add_dtdevs() does not
>>>> (and can not) differentiate them. So it has no option but to send two
>>>> domctls for each device in dtdevs[] hoping to at least one domctl to
>>>> succeeded. Or I really missed something?
>>>>
>>>> It feels to me that:
>>>>   - either new dom.cfg's property could be introduced (scidev?) to describe
>>>> sci devices together with new parsing logic/management code, so you will end
>>>> up having new libxl__add_scidevs() to invoke XEN_DOMCTL_add_sci_device,
>>>> so no mixing things.
>>>>   - or indeed dtdev logic could be *completely* reused including extending
>>>> XEN_DOMCTL_assign_device to cover your use-case, although sounds generic, it
>>>> is used to describe devices for the passthrough (to configure an IOMMU for
>>>> the device), in that case you wouldn't need an extra
>>>> XEN_DOMCTL_add_sci_device introduced by current patch.
> 
> I realize I did my review before reading Oleksandr's comments. I fully
> agree with his feedback. Having seen how difficult is for users to setup
> a domU configuration correctly today, I would advise to try to reuse the
> existing dtdev property instead of adding yet one new property to make
> the life of our users easier.
> 
> 
> 
>>>> Personally I would use the first option as I am not sure that second option
>>>> is conceptually correct && possible. I would leave this for the maintainers
>>>> to clarify.
>>>>
>>>
>>> Thank you for the point. I agree that reusing XEN_DOMCTL_assign_device
>>> seems not to be conceptually correct. Introducing new dom.cfg property
>>> seems to be the only way to avoid extra Domctl calls.
>>> I will handle this in v2 if there will be no complains from Maintainers.
>>
>> I don't know if there will be a complains in v2 or not :-), but using
>> something different from dtdev sound good.
>>
>> If I understand correctly, dtdev seems to be about passing through an
>> existing device to a guest, and this new sci device seems to be about having Xen
>> "emulating" an sci device which the guest will use. So introducing
>> something new (scidev or other name) sounds good.
> 
> Users already have to provide 4 properties (dtdev, iomem, irqs,
> device_tree) to setup device assignment. I think that making it 5
> properties would not be a step forward :-)

IIRC, in the past, we discussed to fetch the information directly from 
the partial device-tree. Maybe this discussion needs to be revived?

Although, this is a separate topic from this series.

> 
> To me dtdev and XEN_DOMCTL_assign_device are appropriate because they
> signify device assignment of one or more devices. We are not adding any
> additional "meaning" to them. It is just that we'll automatically detect
> and generate any SCMI configurations based on the list of assigned
> devices. Because if SCMI is enabled and a device is assigned to the
> guest, then I think we want to add the device to the SCMI list of
> devices automatically.

I am OK with re-using dtdev/XEN_DOMCTL_assign_device however there is a 
pitfall with that approach.

At the moment, they are only used for device protected by the IOMMU. If 
the device is not protected by the IOMMU then it will return an error.

Now, with your approach we may have a device that is not protected by 
the IOMMU but require to a SCMI configuration.

I don't think it would be sensible to just return "succeed" here because 
technically you are asking to assign a non-protected device. But at the 
same time, it would prevent a user to assign a non-DMA capable device.

So how do you suggest to approach this?

Cheers,
Volodymyr Babchuk Dec. 22, 2021, 11:17 a.m. UTC | #12
Hi Julien,

Julien Grall <julien@xen.org> writes:

> Hi Stefano,
>
> On 21/12/2021 22:39, Stefano Stabellini wrote:
>> On Tue, 21 Dec 2021, Anthony PERARD wrote:
>>> On Fri, Dec 17, 2021 at 12:15:25PM +0000, Oleksii Moisieiev wrote:
>>>>> On 14.12.21 11:34, Oleksii Moisieiev wrote:
>>>>>> @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
>>>>>>    {
>>>>>>        AO_GC;
>>>>>>        libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
>>>>>> -    int i, rc = 0;
>>>>>> +    int i, rc = 0, rc_sci = 0;
>>>>>>        for (i = 0; i < d_config->num_dtdevs; i++) {
>>>>>>            const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
>>>>>>            LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
>>>>>>            rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
>>>>>> -        if (rc < 0) {
>>>>>> -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
>>>>>> +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
>>>>>> +
>>>>>> +        if ((rc < 0) && (rc_sci < 0)) {
>>>>>> +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
>>>>>> +                 "xc_domain_add_sci_device failed: %d",
>>>>>> +                 rc, rc_sci);
>>>>>>                goto out;
>>>>>>            }
>>>>>> +
>>>>>> +        if (rc)
>>>>>> +            rc = rc_sci;
>>>>>
>>>>>
>>>>> If I get this code right, it sounds like the dom.cfg's dtdev property is
>>>>> reused to describe sci devices as well, but the libxl__add_dtdevs() does not
>>>>> (and can not) differentiate them. So it has no option but to send two
>>>>> domctls for each device in dtdevs[] hoping to at least one domctl to
>>>>> succeeded. Or I really missed something?
>>>>>
>>>>> It feels to me that:
>>>>>   - either new dom.cfg's property could be introduced (scidev?) to describe
>>>>> sci devices together with new parsing logic/management code, so you will end
>>>>> up having new libxl__add_scidevs() to invoke XEN_DOMCTL_add_sci_device,
>>>>> so no mixing things.
>>>>>   - or indeed dtdev logic could be *completely* reused including extending
>>>>> XEN_DOMCTL_assign_device to cover your use-case, although sounds generic, it
>>>>> is used to describe devices for the passthrough (to configure an IOMMU for
>>>>> the device), in that case you wouldn't need an extra
>>>>> XEN_DOMCTL_add_sci_device introduced by current patch.
>> I realize I did my review before reading Oleksandr's comments. I
>> fully
>> agree with his feedback. Having seen how difficult is for users to setup
>> a domU configuration correctly today, I would advise to try to reuse the
>> existing dtdev property instead of adding yet one new property to make
>> the life of our users easier.
>> 
>> 
>>>>> Personally I would use the first option as I am not sure that second option
>>>>> is conceptually correct && possible. I would leave this for the maintainers
>>>>> to clarify.
>>>>>
>>>>
>>>> Thank you for the point. I agree that reusing XEN_DOMCTL_assign_device
>>>> seems not to be conceptually correct. Introducing new dom.cfg property
>>>> seems to be the only way to avoid extra Domctl calls.
>>>> I will handle this in v2 if there will be no complains from Maintainers.
>>>
>>> I don't know if there will be a complains in v2 or not :-), but using
>>> something different from dtdev sound good.
>>>
>>> If I understand correctly, dtdev seems to be about passing through an
>>> existing device to a guest, and this new sci device seems to be about having Xen
>>> "emulating" an sci device which the guest will use. So introducing
>>> something new (scidev or other name) sounds good.
>> Users already have to provide 4 properties (dtdev, iomem, irqs,
>> device_tree) to setup device assignment. I think that making it 5
>> properties would not be a step forward :-)
>
> IIRC, in the past, we discussed to fetch the information directly from
> the partial device-tree. Maybe this discussion needs to be revived?
>
> Although, this is a separate topic from this series.
>
>> To me dtdev and XEN_DOMCTL_assign_device are appropriate because
>> they
>> signify device assignment of one or more devices. We are not adding any
>> additional "meaning" to them. It is just that we'll automatically detect
>> and generate any SCMI configurations based on the list of assigned
>> devices. Because if SCMI is enabled and a device is assigned to the
>> guest, then I think we want to add the device to the SCMI list of
>> devices automatically.
>
> I am OK with re-using dtdev/XEN_DOMCTL_assign_device however there is
> a pitfall with that approach.
>
> At the moment, they are only used for device protected by the
> IOMMU. If the device is not protected by the IOMMU then it will return
> an error.

IIRC there was a change, that allowed to assign device without a IOMMU. At
least we discussed this internally.

>
> Now, with your approach we may have a device that is not protected by
> the IOMMU but require to a SCMI configuration.

You need to protect only DMA-capable devices.

> I don't think it would be sensible to just return "succeed" here
> because technically you are asking to assign a non-protected
> device. But at the same time, it would prevent a user to assign a
> non-DMA capable device.
>
> So how do you suggest to approach this?

Well, in my head assign_device ideally should do the following:

1. Assign IOMMU if it is configured for the device
2. Assign SCMI access rights
(Not related to this patch series, but...)
3. Assign IRQs
4. Assign IO memory ranges.

Points 3. and 4. would allow us to not provide additional irq=[] and
iomem=[] entries in a guest config.
Julien Grall Dec. 22, 2021, 11:30 a.m. UTC | #13
Hi,

On 22/12/2021 12:17, Volodymyr Babchuk wrote:
> Julien Grall <julien@xen.org> writes:
>> On 21/12/2021 22:39, Stefano Stabellini wrote:
>>> On Tue, 21 Dec 2021, Anthony PERARD wrote:
>>>> On Fri, Dec 17, 2021 at 12:15:25PM +0000, Oleksii Moisieiev wrote:
>>>>>> On 14.12.21 11:34, Oleksii Moisieiev wrote:
>>>>>>> @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
>>>>>>>     {
>>>>>>>         AO_GC;
>>>>>>>         libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
>>>>>>> -    int i, rc = 0;
>>>>>>> +    int i, rc = 0, rc_sci = 0;
>>>>>>>         for (i = 0; i < d_config->num_dtdevs; i++) {
>>>>>>>             const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
>>>>>>>             LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
>>>>>>>             rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
>>>>>>> -        if (rc < 0) {
>>>>>>> -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
>>>>>>> +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
>>>>>>> +
>>>>>>> +        if ((rc < 0) && (rc_sci < 0)) {
>>>>>>> +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
>>>>>>> +                 "xc_domain_add_sci_device failed: %d",
>>>>>>> +                 rc, rc_sci);
>>>>>>>                 goto out;
>>>>>>>             }
>>>>>>> +
>>>>>>> +        if (rc)
>>>>>>> +            rc = rc_sci;
>>>>>>
>>>>>>
>>>>>> If I get this code right, it sounds like the dom.cfg's dtdev property is
>>>>>> reused to describe sci devices as well, but the libxl__add_dtdevs() does not
>>>>>> (and can not) differentiate them. So it has no option but to send two
>>>>>> domctls for each device in dtdevs[] hoping to at least one domctl to
>>>>>> succeeded. Or I really missed something?
>>>>>>
>>>>>> It feels to me that:
>>>>>>    - either new dom.cfg's property could be introduced (scidev?) to describe
>>>>>> sci devices together with new parsing logic/management code, so you will end
>>>>>> up having new libxl__add_scidevs() to invoke XEN_DOMCTL_add_sci_device,
>>>>>> so no mixing things.
>>>>>>    - or indeed dtdev logic could be *completely* reused including extending
>>>>>> XEN_DOMCTL_assign_device to cover your use-case, although sounds generic, it
>>>>>> is used to describe devices for the passthrough (to configure an IOMMU for
>>>>>> the device), in that case you wouldn't need an extra
>>>>>> XEN_DOMCTL_add_sci_device introduced by current patch.
>>> I realize I did my review before reading Oleksandr's comments. I
>>> fully
>>> agree with his feedback. Having seen how difficult is for users to setup
>>> a domU configuration correctly today, I would advise to try to reuse the
>>> existing dtdev property instead of adding yet one new property to make
>>> the life of our users easier.
>>>
>>>
>>>>>> Personally I would use the first option as I am not sure that second option
>>>>>> is conceptually correct && possible. I would leave this for the maintainers
>>>>>> to clarify.
>>>>>>
>>>>>
>>>>> Thank you for the point. I agree that reusing XEN_DOMCTL_assign_device
>>>>> seems not to be conceptually correct. Introducing new dom.cfg property
>>>>> seems to be the only way to avoid extra Domctl calls.
>>>>> I will handle this in v2 if there will be no complains from Maintainers.
>>>>
>>>> I don't know if there will be a complains in v2 or not :-), but using
>>>> something different from dtdev sound good.
>>>>
>>>> If I understand correctly, dtdev seems to be about passing through an
>>>> existing device to a guest, and this new sci device seems to be about having Xen
>>>> "emulating" an sci device which the guest will use. So introducing
>>>> something new (scidev or other name) sounds good.
>>> Users already have to provide 4 properties (dtdev, iomem, irqs,
>>> device_tree) to setup device assignment. I think that making it 5
>>> properties would not be a step forward :-)
>>
>> IIRC, in the past, we discussed to fetch the information directly from
>> the partial device-tree. Maybe this discussion needs to be revived?
>>
>> Although, this is a separate topic from this series.
>>
>>> To me dtdev and XEN_DOMCTL_assign_device are appropriate because
>>> they
>>> signify device assignment of one or more devices. We are not adding any
>>> additional "meaning" to them. It is just that we'll automatically detect
>>> and generate any SCMI configurations based on the list of assigned
>>> devices. Because if SCMI is enabled and a device is assigned to the
>>> guest, then I think we want to add the device to the SCMI list of
>>> devices automatically.
>>
>> I am OK with re-using dtdev/XEN_DOMCTL_assign_device however there is
>> a pitfall with that approach.
>>
>> At the moment, they are only used for device protected by the
>> IOMMU. If the device is not protected by the IOMMU then it will return
>> an error.
> 
> IIRC there was a change, that allowed to assign device without a IOMMU. At
> least we discussed this internally.

I am not aware of any upstream. Do you have a pointer if there is any 
public discussion?

>>
>> Now, with your approach we may have a device that is not protected by
>> the IOMMU but require to a SCMI configuration.
> 
> You need to protect only DMA-capable devices.

Xen doesn't know if the device is DMA-capable or not. So...

> 
>> I don't think it would be sensible to just return "succeed" here
>> because technically you are asking to assign a non-protected
>> device. But at the same time, it would prevent a user to assign a
>> non-DMA capable device.
>>
>> So how do you suggest to approach this?
> 
> Well, in my head assign_device ideally should do the following:
> 
> 1. Assign IOMMU if it is configured for the device

... with this approach you are at the risk to let the user passthrough a 
device that cannot be protected.

> 2. Assign SCMI access rights
> (Not related to this patch series, but...)
> 3. Assign IRQs
> 4. Assign IO memory ranges.
> 
> Points 3. and 4. would allow us to not provide additional irq=[] and
> iomem=[] entries in a guest config.

That could only work if your guest is using the same layout as the host. 
Otherwise, there is a risk it will clash with other parts of the memory 
layout.

Today, guests started via the toolstack is only using a virtual layout, 
so you would first need to add support to use the host memory layout.

Cheers,
Volodymyr Babchuk Dec. 22, 2021, 12:34 p.m. UTC | #14
Julien Grall <julien@xen.org> writes:

> Hi,
>
> On 22/12/2021 12:17, Volodymyr Babchuk wrote:
>> Julien Grall <julien@xen.org> writes:
>>> On 21/12/2021 22:39, Stefano Stabellini wrote:
>>>> On Tue, 21 Dec 2021, Anthony PERARD wrote:
>>>>> On Fri, Dec 17, 2021 at 12:15:25PM +0000, Oleksii Moisieiev wrote:
>>>>>>> On 14.12.21 11:34, Oleksii Moisieiev wrote:
>>>>>>>> @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
>>>>>>>>     {
>>>>>>>>         AO_GC;
>>>>>>>>         libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
>>>>>>>> -    int i, rc = 0;
>>>>>>>> +    int i, rc = 0, rc_sci = 0;
>>>>>>>>         for (i = 0; i < d_config->num_dtdevs; i++) {
>>>>>>>>             const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
>>>>>>>>             LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
>>>>>>>>             rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
>>>>>>>> -        if (rc < 0) {
>>>>>>>> -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
>>>>>>>> +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
>>>>>>>> +
>>>>>>>> +        if ((rc < 0) && (rc_sci < 0)) {
>>>>>>>> +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
>>>>>>>> +                 "xc_domain_add_sci_device failed: %d",
>>>>>>>> +                 rc, rc_sci);
>>>>>>>>                 goto out;
>>>>>>>>             }
>>>>>>>> +
>>>>>>>> +        if (rc)
>>>>>>>> +            rc = rc_sci;
>>>>>>>
>>>>>>>
>>>>>>> If I get this code right, it sounds like the dom.cfg's dtdev property is
>>>>>>> reused to describe sci devices as well, but the libxl__add_dtdevs() does not
>>>>>>> (and can not) differentiate them. So it has no option but to send two
>>>>>>> domctls for each device in dtdevs[] hoping to at least one domctl to
>>>>>>> succeeded. Or I really missed something?
>>>>>>>
>>>>>>> It feels to me that:
>>>>>>>    - either new dom.cfg's property could be introduced (scidev?) to describe
>>>>>>> sci devices together with new parsing logic/management code, so you will end
>>>>>>> up having new libxl__add_scidevs() to invoke XEN_DOMCTL_add_sci_device,
>>>>>>> so no mixing things.
>>>>>>>    - or indeed dtdev logic could be *completely* reused including extending
>>>>>>> XEN_DOMCTL_assign_device to cover your use-case, although sounds generic, it
>>>>>>> is used to describe devices for the passthrough (to configure an IOMMU for
>>>>>>> the device), in that case you wouldn't need an extra
>>>>>>> XEN_DOMCTL_add_sci_device introduced by current patch.
>>>> I realize I did my review before reading Oleksandr's comments. I
>>>> fully
>>>> agree with his feedback. Having seen how difficult is for users to setup
>>>> a domU configuration correctly today, I would advise to try to reuse the
>>>> existing dtdev property instead of adding yet one new property to make
>>>> the life of our users easier.
>>>>
>>>>
>>>>>>> Personally I would use the first option as I am not sure that second option
>>>>>>> is conceptually correct && possible. I would leave this for the maintainers
>>>>>>> to clarify.
>>>>>>>
>>>>>>
>>>>>> Thank you for the point. I agree that reusing XEN_DOMCTL_assign_device
>>>>>> seems not to be conceptually correct. Introducing new dom.cfg property
>>>>>> seems to be the only way to avoid extra Domctl calls.
>>>>>> I will handle this in v2 if there will be no complains from Maintainers.
>>>>>
>>>>> I don't know if there will be a complains in v2 or not :-), but using
>>>>> something different from dtdev sound good.
>>>>>
>>>>> If I understand correctly, dtdev seems to be about passing through an
>>>>> existing device to a guest, and this new sci device seems to be about having Xen
>>>>> "emulating" an sci device which the guest will use. So introducing
>>>>> something new (scidev or other name) sounds good.
>>>> Users already have to provide 4 properties (dtdev, iomem, irqs,
>>>> device_tree) to setup device assignment. I think that making it 5
>>>> properties would not be a step forward :-)
>>>
>>> IIRC, in the past, we discussed to fetch the information directly from
>>> the partial device-tree. Maybe this discussion needs to be revived?
>>>
>>> Although, this is a separate topic from this series.
>>>
>>>> To me dtdev and XEN_DOMCTL_assign_device are appropriate because
>>>> they
>>>> signify device assignment of one or more devices. We are not adding any
>>>> additional "meaning" to them. It is just that we'll automatically detect
>>>> and generate any SCMI configurations based on the list of assigned
>>>> devices. Because if SCMI is enabled and a device is assigned to the
>>>> guest, then I think we want to add the device to the SCMI list of
>>>> devices automatically.
>>>
>>> I am OK with re-using dtdev/XEN_DOMCTL_assign_device however there is
>>> a pitfall with that approach.
>>>
>>> At the moment, they are only used for device protected by the
>>> IOMMU. If the device is not protected by the IOMMU then it will return
>>> an error.
>> IIRC there was a change, that allowed to assign device without a
>> IOMMU. At
>> least we discussed this internally.
>
> I am not aware of any upstream. Do you have a pointer if there is any
> public discussion?

No, this is the first public discussion on this topic.

>
>>>
>>> Now, with your approach we may have a device that is not protected by
>>> the IOMMU but require to a SCMI configuration.
>> You need to protect only DMA-capable devices.
>
> Xen doesn't know if the device is DMA-capable or not. So...
>

But it knows if there is a iommus=<> node present for the device.

>> 
>>> I don't think it would be sensible to just return "succeed" here
>>> because technically you are asking to assign a non-protected
>>> device. But at the same time, it would prevent a user to assign a
>>> non-DMA capable device.
>>>
>>> So how do you suggest to approach this?
>> Well, in my head assign_device ideally should do the following:
>> 1. Assign IOMMU if it is configured for the device
>
> ... with this approach you are at the risk to let the user passthrough
> a device that cannot be protected.
>
>> 2. Assign SCMI access rights
>> (Not related to this patch series, but...)
>> 3. Assign IRQs
>> 4. Assign IO memory ranges.
>> Points 3. and 4. would allow us to not provide additional irq=[] and
>> iomem=[] entries in a guest config.
>
> That could only work if your guest is using the same layout as the
> host.

Agreed. But in my experience, in most cases this is the true. We worked
with Renesas and IMX hardware and in both cases iomems were
mapped 1:1.

> Otherwise, there is a risk it will clash with other parts of the
> memory layout.
>

> Today, guests started via the toolstack is only using a virtual
> layout, so you would first need to add support to use the host memory
> layout.

I can't say for all the possible configurations in the wild, but I'm
assuming that in most cases it will be fine to use 1:1 mapping. For
those more exotic cases it would be possible to implement some
configuration option like iomem_map=[mfn:gfn].

As Stefano pointed, right now user needs to provide 3 configuration
options to pass a device to a guest: dt_dev, irq, iomem. But in fact,
Xen is already knowing about irq and iomem from device tree.
Oleksii Moisieiev Dec. 22, 2021, 1:41 p.m. UTC | #15
Hi Stefano, 

On Mon, Dec 20, 2021 at 05:37:50PM -0800, Stefano Stabellini wrote:
> On Tue, 14 Dec 2021, Oleksii Moisieiev wrote:
> > Integration of the SCMI mediator with xen libs:
> > - add hypercalls to aquire SCI channel and set device permissions
> > for DomUs;
> > - add SCMI_SMC nodes to DomUs device-tree based on partial device-tree;
> > - SCI requests redirection from DomUs to Firmware.
> > 
> > Signed-off-by: Oleksii Moisieiev <oleksii_moisieiev@epam.com>
> > ---
> >  tools/include/xenctrl.h           |   3 +
> >  tools/include/xenguest.h          |   2 +
> >  tools/libs/ctrl/xc_domain.c       |  23 ++++++
> >  tools/libs/guest/xg_dom_arm.c     |   5 +-
> >  tools/libs/light/libxl_arm.c      | 122 +++++++++++++++++++++++++++---
> >  tools/libs/light/libxl_create.c   |  54 ++++++++++++-
> >  tools/libs/light/libxl_dom.c      |   1 +
> >  tools/libs/light/libxl_internal.h |   4 +
> >  xen/arch/arm/domctl.c             |  15 ++++
> >  xen/include/public/domctl.h       |   9 +++
> >  10 files changed, 223 insertions(+), 15 deletions(-)
> > 
> > diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
> > index 07b96e6671..cdd14f465f 100644
> > --- a/tools/include/xenctrl.h
> > +++ b/tools/include/xenctrl.h
> > @@ -1238,6 +1238,9 @@ int xc_domain_getvnuma(xc_interface *xch,
> >  int xc_domain_soft_reset(xc_interface *xch,
> >                           uint32_t domid);
> >  
> > +int xc_domain_add_sci_device(xc_interface *xch,
> > +                              uint32_t domid, char *path);
> > +
> >  #if defined(__i386__) || defined(__x86_64__)
> >  /*
> >   * PC BIOS standard E820 types and structure.
> > diff --git a/tools/include/xenguest.h b/tools/include/xenguest.h
> > index 61d0a82f48..35c611ac73 100644
> > --- a/tools/include/xenguest.h
> > +++ b/tools/include/xenguest.h
> > @@ -242,6 +242,8 @@ struct xc_dom_image {
> >  
> >      /* Number of vCPUs */
> >      unsigned int max_vcpus;
> > +
> > +    xen_pfn_t sci_shmem_gfn;
> 
> We should be able to avoid introducing sci_shmem_gfn (more below)
> 
> 
> >  };
> >  
> >  /* --- arch specific hooks ----------------------------------------- */
> > diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
> > index b155d6afd2..07bb390e17 100644
> > --- a/tools/libs/ctrl/xc_domain.c
> > +++ b/tools/libs/ctrl/xc_domain.c
> > @@ -2206,6 +2206,29 @@ int xc_domain_soft_reset(xc_interface *xch,
> >      domctl.domain = domid;
> >      return do_domctl(xch, &domctl);
> >  }
> > +
> > +int xc_domain_add_sci_device(xc_interface *xch,
> > +                              uint32_t domid, char *path)
> > +{
> > +    size_t size = strlen(path);
> > +    int rc;
> > +    DECLARE_DOMCTL;
> > +    DECLARE_HYPERCALL_BOUNCE(path, size, XC_HYPERCALL_BUFFER_BOUNCE_IN);
> > +
> > +    if ( xc_hypercall_bounce_pre(xch, path) )
> > +        return -1;
> > +
> > +    domctl.cmd = XEN_DOMCTL_add_sci_device;
> > +    domctl.domain = domid;
> > +    domctl.u.sci_device_op.size = size;
> > +    set_xen_guest_handle(domctl.u.sci_device_op.path, path);
> > +    rc = do_domctl(xch, &domctl);
> > +
> > +    xc_hypercall_bounce_post(xch, path);
> > +
> > +    return rc;
> > +}
> 
> I'd skip this xc_ function and hypercall (more below)
> 
> 
> >  /*
> >   * Local variables:
> >   * mode: C
> > diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c
> > index 5e3b76355e..368a670c46 100644
> > --- a/tools/libs/guest/xg_dom_arm.c
> > +++ b/tools/libs/guest/xg_dom_arm.c
> > @@ -25,11 +25,12 @@
> >  
> >  #include "xg_private.h"
> >  
> > -#define NR_MAGIC_PAGES 4
> > +#define NR_MAGIC_PAGES 5
> >  #define CONSOLE_PFN_OFFSET 0
> >  #define XENSTORE_PFN_OFFSET 1
> >  #define MEMACCESS_PFN_OFFSET 2
> >  #define VUART_PFN_OFFSET 3
> > +#define SCI_SHMEM_OFFSET 4
> >  
> >  #define LPAE_SHIFT 9
> >  
> > @@ -69,11 +70,13 @@ static int alloc_magic_pages(struct xc_dom_image *dom)
> >      dom->console_pfn = base + CONSOLE_PFN_OFFSET;
> >      dom->xenstore_pfn = base + XENSTORE_PFN_OFFSET;
> >      dom->vuart_gfn = base + VUART_PFN_OFFSET;
> > +    dom->sci_shmem_gfn = base + SCI_SHMEM_OFFSET;
> >  
> >      xc_clear_domain_page(dom->xch, dom->guest_domid, dom->console_pfn);
> >      xc_clear_domain_page(dom->xch, dom->guest_domid, dom->xenstore_pfn);
> >      xc_clear_domain_page(dom->xch, dom->guest_domid, base + MEMACCESS_PFN_OFFSET);
> >      xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn);
> > +    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->sci_shmem_gfn);
> 
> Given that sci_shmem_gfn doesn't actually require any allocations, just
> a remapping of an existing and already specified physical page, then I
> don't think we need to add a new page to alloc_magic_pages for it.
> 
> We can just #define a static address for the SCMI page in the domU
> addres space and use it for the mapping. No need to allocate a new
> page.
> 

I think this could be implemented. I will refactor this part in v2.

> 
> >      xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN,
> >              dom->console_pfn);
> > diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
> > index eef1de0939..73ef591822 100644
> > --- a/tools/libs/light/libxl_arm.c
> > +++ b/tools/libs/light/libxl_arm.c
> > @@ -8,6 +8,8 @@
> >  #include <assert.h>
> >  #include <xen/device_tree_defs.h>
> >  
> > +#define SCMI_NODE_PATH      "/firmware/scmi"
> > +
> >  static const char *gicv_to_string(libxl_gic_version gic_version)
> >  {
> >      switch (gic_version) {
> > @@ -101,6 +103,19 @@ int libxl__arch_domain_prepare_config(libxl__gc *gc,
> >          return ERROR_FAIL;
> >      }
> >  
> > +    switch (d_config->b_info.sci) {
> > +    case LIBXL_SCI_TYPE_NONE:
> > +        config->arch.sci_type = XEN_DOMCTL_CONFIG_SCI_NONE;
> > +        break;
> > +    case LIBXL_SCI_TYPE_SCMI_SMC:
> > +        config->arch.sci_type = XEN_DOMCTL_CONFIG_SCI_SCMI_SMC;
> > +        break;
> > +    default:
> > +        LOG(ERROR, "Unknown SCI type %d",
> > +            d_config->b_info.sci);
> > +        return ERROR_FAIL;
> > +    }
> > +
> >      return 0;
> >  }
> >  
> > @@ -122,6 +137,7 @@ int libxl__arch_domain_save_config(libxl__gc *gc,
> >      }
> >  
> >      state->clock_frequency = config->arch.clock_frequency;
> > +    state->sci_agent_paddr = config->arch.sci_agent_paddr;
> >  
> >      return 0;
> >  }
> > @@ -502,9 +518,6 @@ static int make_optee_node(libxl__gc *gc, void *fdt)
> >      int res;
> >      LOG(DEBUG, "Creating OP-TEE node in dtb");
> >  
> > -    res = fdt_begin_node(fdt, "firmware");
> > -    if (res) return res;
> > -
> >      res = fdt_begin_node(fdt, "optee");
> >      if (res) return res;
> >  
> > @@ -517,9 +530,6 @@ static int make_optee_node(libxl__gc *gc, void *fdt)
> >      res = fdt_end_node(fdt);
> >      if (res) return res;
> >  
> > -    res = fdt_end_node(fdt);
> > -    if (res) return res;
> > -
> >      return 0;
> >  }
> >  
> > @@ -902,10 +912,9 @@ static int copy_node(libxl__gc *gc, void *fdt, void *pfdt,
> >      return 0;
> >  }
> >  
> > -static int copy_node_by_path(libxl__gc *gc, const char *path,
> > -                             void *fdt, void *pfdt)
> > +static int get_path_nodeoff(const char *path, void *pfdt)
> >  {
> > -    int nodeoff, r;
> > +    int nodeoff;
> >      const char *name = strrchr(path, '/');
> >  
> >      if (!name)
> > @@ -925,12 +934,101 @@ static int copy_node_by_path(libxl__gc *gc, const char *path,
> >      if (strcmp(fdt_get_name(pfdt, nodeoff, NULL), name))
> >          return -FDT_ERR_NOTFOUND;
> >  
> > +    return nodeoff;
> > +}
> > +
> > +static int copy_node_by_path(libxl__gc *gc, const char *path,
> > +                             void *fdt, void *pfdt)
> > +{
> > +    int nodeoff, r;
> > +
> > +    nodeoff = get_path_nodeoff(path, pfdt);
> > +    if (nodeoff < 0)
> > +        return nodeoff;
> > +
> >      r = copy_node(gc, fdt, pfdt, nodeoff, 0);
> >      if (r) return r;
> >  
> >      return 0;
> >  }
> >  
> > +static int get_node_phandle(const char *path, void *pfdt, uint32_t *phandle)
> > +{
> > +    int nodeoff;
> > +    const char *name = strrchr(path, '/');
> > +
> > +    if (!name)
> > +        return -FDT_ERR_INTERNAL;
> > +
> > +    name++;
> > +    nodeoff = fdt_path_offset(pfdt, path);
> > +    if (nodeoff < 0)
> > +        return nodeoff;
> > +
> > +    *phandle = fdt_get_phandle(pfdt, nodeoff);
> > +    return 0;
> > +}
> > +
> > +static int make_scmi_shmem_node(libxl__gc *gc, void *fdt, void *pfdt,
> > +                           struct xc_dom_image *dom)
> > +{
> > +    int res;
> > +    char buf[64];
> > +    uint32_t phandle = 0;
> > +
> > +    res = get_node_phandle("/scp-shmem", pfdt, &phandle);
> > +    if (res) return res;
> 
> I hope we'll be able to avoid requiring the user to write a partial
> device tree with SCMI info in order to use it.

I assuming that all SCMI related data is already present in partial
device-tree. Because all scmi nodes should be added to the original
platform device tree, which is used to build domu and dom0 dtbs.
So the structure should look the following way, taken h3ulcb as an
example;

r8a77951-scmi.dtsi - describe all scmi nodes and update
clock/power/reset etc in the device nodes;
 |
\ /
included in r8a77951-ulcb.dts - device tree for rcar h3ulcb board
 |
\ /
included in r8a77951-ulcb-xen.dts - xen dts files;
included in r8a77951-ulcb-domu.dts - domu dts file.

So the idea is that scmi configuration should be applied for both
virtualized and non-virtualized systems.

That's why I copy nodes from partial device-tree to domain device-tree.
Another advantage of this approach is that user can configure scmi node
for each domain by add/remove protocols. For example only clock and
resets could work through scmi for DomX. This can easily be configured
when using partial device-tree.

> 
> But if we have to go down this route, then we need to add a description
> of scp-shmem under docs/misc/arm/
> 
> Also, in general, we cannot mandate or require specific paths in device
> tree and instead we should find nodes based on the compatible string.
> (There are exceptions like /reserved-memory and /firmware but they are
> only a couple.)
> 

I agree about the specific path. I will make it to use compatible string
in v2.

> 
> > +    snprintf(buf, sizeof(buf), "scp-shmem@%lx",
> > +             dom->sci_shmem_gfn << XC_PAGE_SHIFT);
> > +    res = fdt_begin_node(fdt, buf);
> > +    if (res) return res;
> > +
> > +    res = fdt_property_compat(gc, fdt, 1, "arm,scmi-shmem");
> > +    if (res) return res;
> > +
> > +    res = fdt_property_regs(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
> > +                    GUEST_ROOT_SIZE_CELLS, 1,
> > +                    dom->sci_shmem_gfn << XC_PAGE_SHIFT, XC_PAGE_SHIFT);
> > +    if (res) return res;
> > +
> > +    LOG(DEBUG, "scmi: setting phandle = %u\n", phandle);
> > +    res = fdt_property_cell(fdt, "phandle", phandle);
> > +    if (res) return res;
> > +
> > +    res = fdt_end_node(fdt);
> > +    if (res) return res;
> > +
> > +    return 0;
> > +}
> > +
> > +static int make_firmware_node(libxl__gc *gc, void *fdt, void *pfdt, int tee,
> > +                              int sci)
> > +{
> > +    int res;
> > +
> > +    if ((tee != LIBXL_TEE_TYPE_OPTEE) && (sci != LIBXL_SCI_TYPE_NONE))
> > +        return 0;
> 
> Shouldn't this be:
> 
>     if ((tee != LIBXL_TEE_TYPE_OPTEE) && (sci == LIBXL_SCI_TYPE_NONE))
> 
Yeah, I should fix this in v2. Thanks.

> 
> > +    res = fdt_begin_node(fdt, "firmware");
> > +    if (res) return res;
> > +
> > +    if (tee == LIBXL_TEE_TYPE_OPTEE) {
> > +       res = make_optee_node(gc, fdt);
> > +       if (res) return res;
> > +    }
> > +
> > +    if (sci == LIBXL_SCI_TYPE_SCMI_SMC) {
> > +        res = copy_node_by_path(gc, SCMI_NODE_PATH, fdt, pfdt);
> > +        if (res) return res;
> > +    }
> 
> Do we really need to copy the node from the partial device tree? That
> makes it a lot harder to use for the user. Ideally a user would only
> need to specify sci = "scmi_smc" and everything else should be done
> automatically.
> 
> Can we automatically generate the SCMI device tree node instead? It
> looks like we should have all the information to be able to do it. If
> not, what is missing?
> 
Please see the answer above.
> 
> > +    res = fdt_end_node(fdt);
> > +    if (res) return res;
> > +
> > +    return 0;
> > +}
> > +
> >  /*
> >   * The partial device tree is not copied entirely. Only the relevant bits are
> >   * copied to the guest device tree:
> > @@ -1088,8 +1186,10 @@ next_resize:
> >          if (info->arch_arm.vuart == LIBXL_VUART_TYPE_SBSA_UART)
> >              FDT( make_vpl011_uart_node(gc, fdt, ainfo, dom) );
> >  
> > -        if (info->tee == LIBXL_TEE_TYPE_OPTEE)
> > -            FDT( make_optee_node(gc, fdt) );
> > +        FDT( make_firmware_node(gc, fdt, pfdt, info->tee, info->sci) );
> > +
> > +        if (info->sci == LIBXL_SCI_TYPE_SCMI_SMC)
> > +            FDT( make_scmi_shmem_node(gc, fdt, pfdt, dom) );
> >  
> >          if (d_config->num_pcidevs)
> >              FDT( make_vpci_node(gc, fdt, ainfo, dom) );
> > diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
> > index dcd09d32ba..c7372fd344 100644
> > --- a/tools/libs/light/libxl_create.c
> > +++ b/tools/libs/light/libxl_create.c
> > @@ -596,6 +596,37 @@ out:
> >      return ret;
> >  }
> >  
> > +static int map_sci_page(libxl__gc *gc, uint32_t domid, uint64_t paddr,
> > +                         uint64_t guest_addr)
> > +{
> > +    int ret;
> > +    uint64_t _paddr_pfn = paddr >> XC_PAGE_SHIFT;
> > +    uint64_t _guest_pfn = guest_addr >> XC_PAGE_SHIFT;
> > +
> > +    LOG(DEBUG, "iomem %"PRIx64, _paddr_pfn);
> > +
> > +    ret = xc_domain_iomem_permission(CTX->xch, domid, _paddr_pfn, 1, 1);
> > +    if (ret < 0) {
> > +        LOG(ERROR,
> > +              "failed give domain access to iomem page %"PRIx64,
> > +             _paddr_pfn);
> > +        return ret;
> > +    }
> > +
> > +    ret = xc_domain_memory_mapping(CTX->xch, domid,
> > +                                   _guest_pfn, _paddr_pfn,
> > +                                   1, 1);
> > +    if (ret < 0) {
> > +        LOG(ERROR,
> > +              "failed to map to domain iomem page %"PRIx64
> > +              " to guest address %"PRIx64,
> > +              _paddr_pfn, _guest_pfn);
> > +        return ret;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
> >                         libxl__domain_build_state *state,
> >                         uint32_t *domid, bool soft_reset)
> > @@ -762,6 +793,16 @@ int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
> >          goto out;
> >      }
> >  
> > +    if (state->sci_agent_paddr != 0) {
> 
> Shouldn't we also check for sci_type == XEN_DOMCTL_CONFIG_SCI_NONE ?
> 
> If the user specifies sci_type == XEN_DOMCTL_CONFIG_SCI_SCMI_SMC, we
> shouldn't automatically map any SCMI channels to the guest, right?
> 
Sounds good. I will fix this in v2` 
> 
> > +        ret = map_sci_page(gc, *domid, state->sci_agent_paddr,
> > +                            state->sci_shmem_gfn << XC_PAGE_SHIFT);
> > +        if (ret < 0) {
> > +            LOGED(ERROR, *domid, "map SCI page fail");
> > +            rc = ERROR_FAIL;
> > +            goto out;
> > +        }
> > +    }
> > +
> >      dom_path = libxl__xs_get_dompath(gc, *domid);
> >      if (!dom_path) {
> >          rc = ERROR_FAIL;
> > @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
> >  {
> >      AO_GC;
> >      libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
> > -    int i, rc = 0;
> > +    int i, rc = 0, rc_sci = 0;
> >  
> >      for (i = 0; i < d_config->num_dtdevs; i++) {
> >          const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
> >  
> >          LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
> >          rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
> > -        if (rc < 0) {
> > -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
> > +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
> > +
> > +        if ((rc < 0) && (rc_sci < 0)) {
> > +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
> > +                 "xc_domain_add_sci_device failed: %d",
> > +                 rc, rc_sci);
> >              goto out;
> >          }
> > +
> > +        if (rc)
> > +            rc = rc_sci;
> 
> I would make this simpler actually. Do we need to pass dtdev->path
> twice, once for xc_assign_dt_device and a second time for
> xc_domain_add_sci_device?
> 
> I would skip adding xc_domain_add_sci_device altogether. A general SCMI
> enable/disable for the domain is necessary, but then we can just get the
> directly assigned devices from xc_assign_dt_device. There is no need to
> specify the list twice. If a device is not manageable via SCMI we can
> detect it automatically because it is going to be missing scmi_devid on
> device tree.
> 
In one of my previous email I described the idea to make a list of
sci_devids in dom.cfg. Such as: 
sci_devs = [0, 1, 35]
and pass this list to Xen via hypercall, then send permission requests
via scmi for each devid.
The advantages of this approach are the following:
- we don't need scmi_devid in device-tree anymore;
- we do only 1 hypercall to set permissions for devices;
- we don't need to pass device-tree node to Hypervisor, just devid.

What do you think about this approach?

> 
> >      }
> >  
> >  out:
> > diff --git a/tools/libs/light/libxl_dom.c b/tools/libs/light/libxl_dom.c
> > index fe9f760f71..b1d288a8b9 100644
> > --- a/tools/libs/light/libxl_dom.c
> > +++ b/tools/libs/light/libxl_dom.c
> > @@ -710,6 +710,7 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid,
> >          state->console_mfn = dom->console_pfn;
> >          state->store_mfn = dom->xenstore_pfn;
> >          state->vuart_gfn = dom->vuart_gfn;
> > +        state->sci_shmem_gfn = dom->sci_shmem_gfn;
> >      } else {
> >          state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
> >          state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
> > diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
> > index 0b4671318c..f9f9cc633a 100644
> > --- a/tools/libs/light/libxl_internal.h
> > +++ b/tools/libs/light/libxl_internal.h
> > @@ -1407,6 +1407,10 @@ typedef struct {
> >      /* Whether this domain is being migrated/restored, or booting fresh.  Only
> >       * applicable to the primary domain, not support domains (e.g. stub QEMU). */
> >      bool restore;
> > +
> > +    /* sci channel paddr to be set to device-tree node */
> > +    uint64_t sci_agent_paddr;
> > +    xen_pfn_t sci_shmem_gfn;
> >  } libxl__domain_build_state;
> >  
> >  _hidden void libxl__domain_build_state_init(libxl__domain_build_state *s);
> > diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
> > index 6245af6d0b..ba200407da 100644
> > --- a/xen/arch/arm/domctl.c
> > +++ b/xen/arch/arm/domctl.c
> > @@ -4,6 +4,7 @@
> >   * Copyright (c) 2012, Citrix Systems
> >   */
> >  
> > +#include <asm/sci/sci.h>
> >  #include <xen/errno.h>
> >  #include <xen/guest_access.h>
> >  #include <xen/hypercall.h>
> > @@ -175,6 +176,20 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
> >  
> >          return rc;
> >      }
> > +    case XEN_DOMCTL_add_sci_device:
> > +    {
> > +        int rc;
> > +        struct dt_device_node *dev;
> > +
> > +        rc = dt_find_node_by_gpath(domctl->u.sci_device_op.path,
> > +                                   domctl->u.sci_device_op.size,
> > +                                   &dev);
> > +        if ( rc )
> > +            return rc;
> > +
> > +        return sci_add_dt_device(d, dev);
> > +    }
> 
> I would skip it and instead I would add a call to sci_add_dt_device in
> the implementation of XEN_DOMCTL_assign_device.
> 
please see my comment above.
> 
> >      default:
> >      {
> >          int rc;
> > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> > index b85e6170b0..671c72c3e8 100644
> > --- a/xen/include/public/domctl.h
> > +++ b/xen/include/public/domctl.h
> > @@ -1177,6 +1177,13 @@ struct xen_domctl_vmtrace_op {
> >  #define XEN_DOMCTL_vmtrace_get_option         5
> >  #define XEN_DOMCTL_vmtrace_set_option         6
> >  };
> > +
> > +/* XEN_DOMCTL_add_sci_device: set sci device permissions */
> > +struct xen_domctl_sci_device_op {
> > +    uint32_t size; /* Length of the path */
> > +    XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
> > +};
> > +
> >  typedef struct xen_domctl_vmtrace_op xen_domctl_vmtrace_op_t;
> >  DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmtrace_op_t);
> >  
> > @@ -1265,6 +1272,7 @@ struct xen_domctl {
> >  #define XEN_DOMCTL_get_cpu_policy                82
> >  #define XEN_DOMCTL_set_cpu_policy                83
> >  #define XEN_DOMCTL_vmtrace_op                    84
> > +#define XEN_DOMCTL_add_sci_device                85
> >  #define XEN_DOMCTL_gdbsx_guestmemio            1000
> >  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
> >  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> > @@ -1326,6 +1334,7 @@ struct xen_domctl {
> >          struct xen_domctl_psr_alloc         psr_alloc;
> >          struct xen_domctl_vuart_op          vuart_op;
> >          struct xen_domctl_vmtrace_op        vmtrace_op;
> > +        struct xen_domctl_sci_device_op     sci_device_op;
> >          uint8_t                             pad[128];
> >      } u;
> >  };
> > -- 
> > 2.27.0
> >
Julien Grall Dec. 22, 2021, 1:49 p.m. UTC | #16
Hi Volodymyr,

On 22/12/2021 13:34, Volodymyr Babchuk wrote:
> 
> 
> Julien Grall <julien@xen.org> writes:
> 
>> Hi,
>>
>> On 22/12/2021 12:17, Volodymyr Babchuk wrote:
>>> Julien Grall <julien@xen.org> writes:
>>>> On 21/12/2021 22:39, Stefano Stabellini wrote:
>>>>> On Tue, 21 Dec 2021, Anthony PERARD wrote:
>>>>>> On Fri, Dec 17, 2021 at 12:15:25PM +0000, Oleksii Moisieiev wrote:
>>>>>>>> On 14.12.21 11:34, Oleksii Moisieiev wrote:
>>>>>>>>> @@ -1817,17 +1858,24 @@ static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
>>>>>>>>>      {
>>>>>>>>>          AO_GC;
>>>>>>>>>          libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
>>>>>>>>> -    int i, rc = 0;
>>>>>>>>> +    int i, rc = 0, rc_sci = 0;
>>>>>>>>>          for (i = 0; i < d_config->num_dtdevs; i++) {
>>>>>>>>>              const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
>>>>>>>>>              LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
>>>>>>>>>              rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
>>>>>>>>> -        if (rc < 0) {
>>>>>>>>> -            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
>>>>>>>>> +        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
>>>>>>>>> +
>>>>>>>>> +        if ((rc < 0) && (rc_sci < 0)) {
>>>>>>>>> +            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
>>>>>>>>> +                 "xc_domain_add_sci_device failed: %d",
>>>>>>>>> +                 rc, rc_sci);
>>>>>>>>>                  goto out;
>>>>>>>>>              }
>>>>>>>>> +
>>>>>>>>> +        if (rc)
>>>>>>>>> +            rc = rc_sci;
>>>>>>>>
>>>>>>>>
>>>>>>>> If I get this code right, it sounds like the dom.cfg's dtdev property is
>>>>>>>> reused to describe sci devices as well, but the libxl__add_dtdevs() does not
>>>>>>>> (and can not) differentiate them. So it has no option but to send two
>>>>>>>> domctls for each device in dtdevs[] hoping to at least one domctl to
>>>>>>>> succeeded. Or I really missed something?
>>>>>>>>
>>>>>>>> It feels to me that:
>>>>>>>>     - either new dom.cfg's property could be introduced (scidev?) to describe
>>>>>>>> sci devices together with new parsing logic/management code, so you will end
>>>>>>>> up having new libxl__add_scidevs() to invoke XEN_DOMCTL_add_sci_device,
>>>>>>>> so no mixing things.
>>>>>>>>     - or indeed dtdev logic could be *completely* reused including extending
>>>>>>>> XEN_DOMCTL_assign_device to cover your use-case, although sounds generic, it
>>>>>>>> is used to describe devices for the passthrough (to configure an IOMMU for
>>>>>>>> the device), in that case you wouldn't need an extra
>>>>>>>> XEN_DOMCTL_add_sci_device introduced by current patch.
>>>>> I realize I did my review before reading Oleksandr's comments. I
>>>>> fully
>>>>> agree with his feedback. Having seen how difficult is for users to setup
>>>>> a domU configuration correctly today, I would advise to try to reuse the
>>>>> existing dtdev property instead of adding yet one new property to make
>>>>> the life of our users easier.
>>>>>
>>>>>
>>>>>>>> Personally I would use the first option as I am not sure that second option
>>>>>>>> is conceptually correct && possible. I would leave this for the maintainers
>>>>>>>> to clarify.
>>>>>>>>
>>>>>>>
>>>>>>> Thank you for the point. I agree that reusing XEN_DOMCTL_assign_device
>>>>>>> seems not to be conceptually correct. Introducing new dom.cfg property
>>>>>>> seems to be the only way to avoid extra Domctl calls.
>>>>>>> I will handle this in v2 if there will be no complains from Maintainers.
>>>>>>
>>>>>> I don't know if there will be a complains in v2 or not :-), but using
>>>>>> something different from dtdev sound good.
>>>>>>
>>>>>> If I understand correctly, dtdev seems to be about passing through an
>>>>>> existing device to a guest, and this new sci device seems to be about having Xen
>>>>>> "emulating" an sci device which the guest will use. So introducing
>>>>>> something new (scidev or other name) sounds good.
>>>>> Users already have to provide 4 properties (dtdev, iomem, irqs,
>>>>> device_tree) to setup device assignment. I think that making it 5
>>>>> properties would not be a step forward :-)
>>>>
>>>> IIRC, in the past, we discussed to fetch the information directly from
>>>> the partial device-tree. Maybe this discussion needs to be revived?
>>>>
>>>> Although, this is a separate topic from this series.
>>>>
>>>>> To me dtdev and XEN_DOMCTL_assign_device are appropriate because
>>>>> they
>>>>> signify device assignment of one or more devices. We are not adding any
>>>>> additional "meaning" to them. It is just that we'll automatically detect
>>>>> and generate any SCMI configurations based on the list of assigned
>>>>> devices. Because if SCMI is enabled and a device is assigned to the
>>>>> guest, then I think we want to add the device to the SCMI list of
>>>>> devices automatically.
>>>>
>>>> I am OK with re-using dtdev/XEN_DOMCTL_assign_device however there is
>>>> a pitfall with that approach.
>>>>
>>>> At the moment, they are only used for device protected by the
>>>> IOMMU. If the device is not protected by the IOMMU then it will return
>>>> an error.
>>> IIRC there was a change, that allowed to assign device without a
>>> IOMMU. At
>>> least we discussed this internally.
>>
>> I am not aware of any upstream. Do you have a pointer if there is any
>> public discussion?
> 
> No, this is the first public discussion on this topic.
> 
>>
>>>>
>>>> Now, with your approach we may have a device that is not protected by
>>>> the IOMMU but require to a SCMI configuration.
>>> You need to protect only DMA-capable devices.
>>
>> Xen doesn't know if the device is DMA-capable or not. So...
>>
> 
> But it knows if there is a iommus=<> node present for the device.

Not really. Not all the platforms have IOMMUs and not all the platforms 
with IOMMU have DMA-capable device protected by an IOMMU.

> 
>>>
>>>> I don't think it would be sensible to just return "succeed" here
>>>> because technically you are asking to assign a non-protected
>>>> device. But at the same time, it would prevent a user to assign a
>>>> non-DMA capable device.
>>>>
>>>> So how do you suggest to approach this?
>>> Well, in my head assign_device ideally should do the following:
>>> 1. Assign IOMMU if it is configured for the device
>>
>> ... with this approach you are at the risk to let the user passthrough
>> a device that cannot be protected.
>>
>>> 2. Assign SCMI access rights
>>> (Not related to this patch series, but...)
>>> 3. Assign IRQs
>>> 4. Assign IO memory ranges.
>>> Points 3. and 4. would allow us to not provide additional irq=[] and
>>> iomem=[] entries in a guest config.
>>
>> That could only work if your guest is using the same layout as the
>> host.
> 
> Agreed. But in my experience, in most cases this is the true. We worked
> with Renesas and IMX hardware and in both cases iomems were
> mapped 1:1.
> 
>> Otherwise, there is a risk it will clash with other parts of the
>> memory layout.
>>
> 
>> Today, guests started via the toolstack is only using a virtual
>> layout, so you would first need to add support to use the host memory
>> layout.
> 
> I can't say for all the possible configurations in the wild, but I'm
> assuming that in most cases it will be fine to use 1:1 mapping. For
> those more exotic cases it would be possible to implement some
> configuration option like iomem_map=[mfn:gfn].
Well, the Xen memory layout is not something that is stable nor AFAIK 
based on any memory layout. In fact, there is no such generic layout on Arm.

It is quite possible that it will work well with 1:1 MMIO on some 
platform (e.g. Renesas, IMX) but you can't expect to work for every Xen 
release or all the platforms.

> 
> As Stefano pointed, right now user needs to provide 3 configuration
> options to pass a device to a guest: dt_dev, irq, iomem. But in fact,
> Xen is already knowing about irq and iomem from device tree.

I understand and this is not ideal. I would be happy to consider your 
approach. However, this will have to enabled only when the guest is 
re-using the host layout.

Cheers,
Stefano Stabellini Dec. 23, 2021, 2:23 a.m. UTC | #17
On Wed, 22 Dec 2021, Julien Grall wrote:
> > > > > > To me dtdev and XEN_DOMCTL_assign_device are appropriate because
> > > > > > they
> > > > > > signify device assignment of one or more devices. We are not adding
> > > > > > any
> > > > > > additional "meaning" to them. It is just that we'll automatically
> > > > > > detect
> > > > > > and generate any SCMI configurations based on the list of assigned
> > > > > > devices. Because if SCMI is enabled and a device is assigned to the
> > > > > > guest, then I think we want to add the device to the SCMI list of
> > > > > > devices automatically.
> > > > > 
> > > > > I am OK with re-using dtdev/XEN_DOMCTL_assign_device however there is
> > > > > a pitfall with that approach.
> > > > > 
> > > > > At the moment, they are only used for device protected by the
> > > > > IOMMU. If the device is not protected by the IOMMU then it will return
> > > > > an error.
> > > > IIRC there was a change, that allowed to assign device without a
> > > > IOMMU. At
> > > > least we discussed this internally.
> > > 
> > > I am not aware of any upstream. Do you have a pointer if there is any
> > > public discussion?
> > 
> > No, this is the first public discussion on this topic.
> > 
> > > 
> > > > > 
> > > > > Now, with your approach we may have a device that is not protected by
> > > > > the IOMMU but require to a SCMI configuration.
> > > > You need to protect only DMA-capable devices.
> > > 
> > > Xen doesn't know if the device is DMA-capable or not. So...
> > > 
> > 
> > But it knows if there is a iommus=<> node present for the device.
> 
> Not really. Not all the platforms have IOMMUs and not all the platforms with
> IOMMU have DMA-capable device protected by an IOMMU.
> 
> > 
> > > > 
> > > > > I don't think it would be sensible to just return "succeed" here
> > > > > because technically you are asking to assign a non-protected
> > > > > device. But at the same time, it would prevent a user to assign a
> > > > > non-DMA capable device.
> > > > > 
> > > > > So how do you suggest to approach this?
> > > > Well, in my head assign_device ideally should do the following:
> > > > 1. Assign IOMMU if it is configured for the device
> > > 
> > > ... with this approach you are at the risk to let the user passthrough
> > > a device that cannot be protected.
> > > 
> > > > 2. Assign SCMI access rights
> > > > (Not related to this patch series, but...)
> > > > 3. Assign IRQs
> > > > 4. Assign IO memory ranges.
> > > > Points 3. and 4. would allow us to not provide additional irq=[] and
> > > > iomem=[] entries in a guest config.
> > > 
> > > That could only work if your guest is using the same layout as the
> > > host.
> > 
> > Agreed. But in my experience, in most cases this is the true. We worked
> > with Renesas and IMX hardware and in both cases iomems were
> > mapped 1:1.
> > 
> > > Otherwise, there is a risk it will clash with other parts of the
> > > memory layout.
> > > 
> > 
> > > Today, guests started via the toolstack is only using a virtual
> > > layout, so you would first need to add support to use the host memory
> > > layout.
> > 
> > I can't say for all the possible configurations in the wild, but I'm
> > assuming that in most cases it will be fine to use 1:1 mapping. For
> > those more exotic cases it would be possible to implement some
> > configuration option like iomem_map=[mfn:gfn].
> Well, the Xen memory layout is not something that is stable nor AFAIK based on
> any memory layout. In fact, there is no such generic layout on Arm.
> 
> It is quite possible that it will work well with 1:1 MMIO on some platform
> (e.g. Renesas, IMX) but you can't expect to work for every Xen release or all
> the platforms.

Yeah this is a true problem. Thankfully with Penny's series we'll be
able to create domUs with the host memory layout (although dom0less
only but it is a step forward).

 
> > As Stefano pointed, right now user needs to provide 3 configuration
> > options to pass a device to a guest: dt_dev, irq, iomem. But in fact,
> > Xen is already knowing about irq and iomem from device tree.
> 
> I understand and this is not ideal. I would be happy to consider your
> approach. However, this will have to enabled only when the guest is re-using
> the host layout.

It looks like we all agree that today configuring device assignment with
Xen is too complicated and would like for it to be simpler. This thread
has some excellent ideas on how to address the issue. Skip to the end if
you are only interested in this patch series.


# Future Ideas

A great suggestion by Julien is to start supporting the dom0less partial
device tree format in xl/libxl as well so that we can have a single
"device_tree" option in dom.cfg instead of 4 (device_tree, iomem, irqs,
dtdev).

Even with that implemented, the user has still to provide a partial dtb,
xen,reg and xen,path. I think this is a great step forward and we should
do that, if nothing else to make it easier to switch between dom0less
and normal domU configurations. But the number of options and
information that the user has to provide is still similar to what we
have today.

After that, I think we need something along the lines of what Volodymyr
suggested. Let's say that the user only provides "dtdev" and
"device_tree" in dom.cfg.  We could:

- read interrupts from the  "interrupts" property like we do for dom0less
- read "xen,reg" for the mapping, if "xen,reg" is missing, read "reg"
  and assume 1:1 (we could try the mapping and print an error on
  failure, or we could only do 1:1 if the domain is entirely 1:1)
- optionally read "xen,path" to populate dtdev
- if an IOMMU configuration is present, then also configure the IOMMU
  for the device, if not then "xen,force-assign-without-iommu" must be
  present
- assign SCMI access rights


Now we only have:
- device_tree in dom.cfg
- dtdev in dom.cfg (or xen,path in the partial DTB)
- xen,force-assign-without-iommu in the partial DTB


It would be good if we could remove "xen,force-assign-without-iommu"
because at this stage it is the only Xen-specific property remaining in
the partial DTB. If we could get rid of it, it would make it easier to
write/generate the partial DTB because it becomes a strict subset of the
corresponding host node. It would enable us to automatically generate it
(we are going to do experiments with it at Xilinx in the next few
months) and it would be easier to explain to users how to write it.
The partial DTB usually starts as a copy of the corresponding host node
plus some edits. The fewer edits are required, the better.

But it is difficult because of the reasons mentioned by Julien. In Xen
we cannot know if a device is not behind an IOMMU because is not a DMA
master (so safe to assign) or because the system simply doesn't have
enough IOMMU coverage (so not safe to assign). Thankfully the hardware
has been improving in recent years and there are more and more platforms
with full IOMMU coverage. I think we should make it easier for users on
these well-behave platforms.

At least, we could move the "xen,force-assign-without-iommu" option from
the partial DTB to the VM config file dom.cfg. Something like:

force-assign-without-iommu="true"
or
platform-iommu-safe="true"

That way, it is global rather than per-device and doesn't have to be
added by the user by hand when creating the partial DTB.


# This patch series

Now in regards to this specific series and the SCMI options today, I
think it is OK to have a per-domain sci="scmi_smc", but I think we
should try to avoid another detailed list of device paths or list of
IDs in addition to dtdev.

dtdev already specifies the device tree nodes to be assigned to the
guest. Based on that list we can also do SCMI assignment.

As Julien pointed out, the issue is: what if a device needs SCMI
assignment but is not a DMA master (hence there is no IOMMU
configuration on the host)?

Attempting to assign a DMA mastering device without IOMMU protection is
not just unsafe, it will actively cause memory corruptions in most
cases. It is an erroneous configuration.

Of course we should try to stop people from running erroneous
configurations, but we should do so without penalizing all users.

With that in mind, also considering that dtdev is the list of devices to
be assigned, I think dtdev should be the list of all devices, even
non-DMA masters. It makes a lot of sense also because today is really
difficult to explain to a user that "yes, dtdev is the devices to be
assigned but no, if the device is a UART you don't have to add it to
dtdev because it is not a DMA master". It would be a lot easier if the
list of assigned devices was comprehensive, including both DMA masters
and not DMA masters.

So I think we should either:

a) extend dtdev to cover all devices, including non-DMA masters
b) or add a new "device_assignment" property which is like dtdev but is
   the list of both DMA masters and non-DMA masters

Either way, when non-DMA masters are present in the
dtdev/device_assignment list we could either:
    c) require force-assign-without-iommu="true" in dom.cfg
    d) or print a warning like:
    "WARNING: device assignment safety for device XXX cannot be
    verified. Please make sure XXX is not a DMA mastering device."


All these options are good I think. My preference is a) + d), so extend
dtdev and print a warning if non-DMA masters are in the list. We don't
necessarily need a new hypercall but that is also an option.

I think this discussion was a long time coming and I am glad we are
having it now. We have a lot of room for improvement! I don't want to
scope-creep this patch series, but I hope that this last bit could be
done as part of this series if we find agreement in the community.
Stefano Stabellini Dec. 23, 2021, 7:06 p.m. UTC | #18
On Wed, 22 Dec 2021, Stefano Stabellini wrote:
> # Future Ideas
> 
> A great suggestion by Julien is to start supporting the dom0less partial
> device tree format in xl/libxl as well so that we can have a single
> "device_tree" option in dom.cfg instead of 4 (device_tree, iomem, irqs,
> dtdev).
> 
> Even with that implemented, the user has still to provide a partial dtb,
> xen,reg and xen,path. I think this is a great step forward and we should
> do that, if nothing else to make it easier to switch between dom0less
> and normal domU configurations. But the number of options and
> information that the user has to provide is still similar to what we
> have today.

I have just realized that if we start to parse the partial DTB in
xl/libxl the same way that we do for dom0less guests (parse "xen,path",
"xen,reg", and "interrupts", making dtdev, irqs and iomem optional)
actually we can achieve the goal below thanks to the combination:
"xen,path" + "xen,force-assign-without-iommu".

In other words, with dom0less we already have a way to specify the link
to the host node even if the device is not a DMA master. We can do that
by specifying both xen,path and xen,force-assign-without-iommu for a
device.

This is just FYI. I am not suggesting we should introduce dom0less-style
partial DTBs in order to get SCMI support in guests (although it would
be great to have). I think the best way forward for this series is one
of the combinations below, like a) + d), or a) + c) 


[...]

> So I think we should either:
> 
> a) extend dtdev to cover all devices, including non-DMA masters
> b) or add a new "device_assignment" property which is like dtdev but is
>    the list of both DMA masters and non-DMA masters
> 
> Either way, when non-DMA masters are present in the
> dtdev/device_assignment list we could either:
>     c) require force-assign-without-iommu="true" in dom.cfg
>     d) or print a warning like:
>     "WARNING: device assignment safety for device XXX cannot be
>     verified. Please make sure XXX is not a DMA mastering device."
Oleksii Moisieiev Dec. 23, 2021, 7:11 p.m. UTC | #19
Hi Stefano,

On Wed, Dec 22, 2021 at 06:23:13PM -0800, Stefano Stabellini wrote:
> On Wed, 22 Dec 2021, Julien Grall wrote:
> > > > > > > To me dtdev and XEN_DOMCTL_assign_device are appropriate because
> > > > > > > they
> > > > > > > signify device assignment of one or more devices. We are not adding
> > > > > > > any
> > > > > > > additional "meaning" to them. It is just that we'll automatically
> > > > > > > detect
> > > > > > > and generate any SCMI configurations based on the list of assigned
> > > > > > > devices. Because if SCMI is enabled and a device is assigned to the
> > > > > > > guest, then I think we want to add the device to the SCMI list of
> > > > > > > devices automatically.
> > > > > > 
> > > > > > I am OK with re-using dtdev/XEN_DOMCTL_assign_device however there is
> > > > > > a pitfall with that approach.
> > > > > > 
> > > > > > At the moment, they are only used for device protected by the
> > > > > > IOMMU. If the device is not protected by the IOMMU then it will return
> > > > > > an error.
> > > > > IIRC there was a change, that allowed to assign device without a
> > > > > IOMMU. At
> > > > > least we discussed this internally.
> > > > 
> > > > I am not aware of any upstream. Do you have a pointer if there is any
> > > > public discussion?
> > > 
> > > No, this is the first public discussion on this topic.
> > > 
> > > > 
> > > > > > 
> > > > > > Now, with your approach we may have a device that is not protected by
> > > > > > the IOMMU but require to a SCMI configuration.
> > > > > You need to protect only DMA-capable devices.
> > > > 
> > > > Xen doesn't know if the device is DMA-capable or not. So...
> > > > 
> > > 
> > > But it knows if there is a iommus=<> node present for the device.
> > 
> > Not really. Not all the platforms have IOMMUs and not all the platforms with
> > IOMMU have DMA-capable device protected by an IOMMU.
> > 
> > > 
> > > > > 
> > > > > > I don't think it would be sensible to just return "succeed" here
> > > > > > because technically you are asking to assign a non-protected
> > > > > > device. But at the same time, it would prevent a user to assign a
> > > > > > non-DMA capable device.
> > > > > > 
> > > > > > So how do you suggest to approach this?
> > > > > Well, in my head assign_device ideally should do the following:
> > > > > 1. Assign IOMMU if it is configured for the device
> > > > 
> > > > ... with this approach you are at the risk to let the user passthrough
> > > > a device that cannot be protected.
> > > > 
> > > > > 2. Assign SCMI access rights
> > > > > (Not related to this patch series, but...)
> > > > > 3. Assign IRQs
> > > > > 4. Assign IO memory ranges.
> > > > > Points 3. and 4. would allow us to not provide additional irq=[] and
> > > > > iomem=[] entries in a guest config.
> > > > 
> > > > That could only work if your guest is using the same layout as the
> > > > host.
> > > 
> > > Agreed. But in my experience, in most cases this is the true. We worked
> > > with Renesas and IMX hardware and in both cases iomems were
> > > mapped 1:1.
> > > 
> > > > Otherwise, there is a risk it will clash with other parts of the
> > > > memory layout.
> > > > 
> > > 
> > > > Today, guests started via the toolstack is only using a virtual
> > > > layout, so you would first need to add support to use the host memory
> > > > layout.
> > > 
> > > I can't say for all the possible configurations in the wild, but I'm
> > > assuming that in most cases it will be fine to use 1:1 mapping. For
> > > those more exotic cases it would be possible to implement some
> > > configuration option like iomem_map=[mfn:gfn].
> > Well, the Xen memory layout is not something that is stable nor AFAIK based on
> > any memory layout. In fact, there is no such generic layout on Arm.
> > 
> > It is quite possible that it will work well with 1:1 MMIO on some platform
> > (e.g. Renesas, IMX) but you can't expect to work for every Xen release or all
> > the platforms.
> 
> Yeah this is a true problem. Thankfully with Penny's series we'll be
> able to create domUs with the host memory layout (although dom0less
> only but it is a step forward).
> 
>  
> > > As Stefano pointed, right now user needs to provide 3 configuration
> > > options to pass a device to a guest: dt_dev, irq, iomem. But in fact,
> > > Xen is already knowing about irq and iomem from device tree.
> > 
> > I understand and this is not ideal. I would be happy to consider your
> > approach. However, this will have to enabled only when the guest is re-using
> > the host layout.
> 
> It looks like we all agree that today configuring device assignment with
> Xen is too complicated and would like for it to be simpler. This thread
> has some excellent ideas on how to address the issue. Skip to the end if
> you are only interested in this patch series.
> 
> 
> # Future Ideas
> 
> A great suggestion by Julien is to start supporting the dom0less partial
> device tree format in xl/libxl as well so that we can have a single
> "device_tree" option in dom.cfg instead of 4 (device_tree, iomem, irqs,
> dtdev).
> 
> Even with that implemented, the user has still to provide a partial dtb,
> xen,reg and xen,path. I think this is a great step forward and we should
> do that, if nothing else to make it easier to switch between dom0less
> and normal domU configurations. But the number of options and
> information that the user has to provide is still similar to what we
> have today.
> 
> After that, I think we need something along the lines of what Volodymyr
> suggested. Let's say that the user only provides "dtdev" and
> "device_tree" in dom.cfg.  We could:
> 
> - read interrupts from the  "interrupts" property like we do for dom0less
> - read "xen,reg" for the mapping, if "xen,reg" is missing, read "reg"
>   and assume 1:1 (we could try the mapping and print an error on
>   failure, or we could only do 1:1 if the domain is entirely 1:1)
> - optionally read "xen,path" to populate dtdev
> - if an IOMMU configuration is present, then also configure the IOMMU
>   for the device, if not then "xen,force-assign-without-iommu" must be
>   present
> - assign SCMI access rights
> 
> 
> Now we only have:
> - device_tree in dom.cfg
> - dtdev in dom.cfg (or xen,path in the partial DTB)
> - xen,force-assign-without-iommu in the partial DTB
> 
> 
> It would be good if we could remove "xen,force-assign-without-iommu"
> because at this stage it is the only Xen-specific property remaining in
> the partial DTB. If we could get rid of it, it would make it easier to
> write/generate the partial DTB because it becomes a strict subset of the
> corresponding host node. It would enable us to automatically generate it
> (we are going to do experiments with it at Xilinx in the next few
> months) and it would be easier to explain to users how to write it.
> The partial DTB usually starts as a copy of the corresponding host node
> plus some edits. The fewer edits are required, the better.
> 
> But it is difficult because of the reasons mentioned by Julien. In Xen
> we cannot know if a device is not behind an IOMMU because is not a DMA
> master (so safe to assign) or because the system simply doesn't have
> enough IOMMU coverage (so not safe to assign). Thankfully the hardware
> has been improving in recent years and there are more and more platforms
> with full IOMMU coverage. I think we should make it easier for users on
> these well-behave platforms.
> 
> At least, we could move the "xen,force-assign-without-iommu" option from
> the partial DTB to the VM config file dom.cfg. Something like:
> 
> force-assign-without-iommu="true"
> or
> platform-iommu-safe="true"
> 
> That way, it is global rather than per-device and doesn't have to be
> added by the user by hand when creating the partial DTB.
> 
> 
> # This patch series
> 
> Now in regards to this specific series and the SCMI options today, I
> think it is OK to have a per-domain sci="scmi_smc", but I think we
> should try to avoid another detailed list of device paths or list of
> IDs in addition to dtdev.
> 
> dtdev already specifies the device tree nodes to be assigned to the
> guest. Based on that list we can also do SCMI assignment.
> 
> As Julien pointed out, the issue is: what if a device needs SCMI
> assignment but is not a DMA master (hence there is no IOMMU
> configuration on the host)?
> 
> Attempting to assign a DMA mastering device without IOMMU protection is
> not just unsafe, it will actively cause memory corruptions in most
> cases. It is an erroneous configuration.
> 
> Of course we should try to stop people from running erroneous
> configurations, but we should do so without penalizing all users.
> 
> With that in mind, also considering that dtdev is the list of devices to
> be assigned, I think dtdev should be the list of all devices, even
> non-DMA masters. It makes a lot of sense also because today is really
> difficult to explain to a user that "yes, dtdev is the devices to be
> assigned but no, if the device is a UART you don't have to add it to
> dtdev because it is not a DMA master". It would be a lot easier if the
> list of assigned devices was comprehensive, including both DMA masters
> and not DMA masters.
> 
> So I think we should either:
> 
> a) extend dtdev to cover all devices, including non-DMA masters
> b) or add a new "device_assignment" property which is like dtdev but is
>    the list of both DMA masters and non-DMA masters
> 
> Either way, when non-DMA masters are present in the
> dtdev/device_assignment list we could either:
>     c) require force-assign-without-iommu="true" in dom.cfg
>     d) or print a warning like:
>     "WARNING: device assignment safety for device XXX cannot be
>     verified. Please make sure XXX is not a DMA mastering device."
> 
> 
> All these options are good I think. My preference is a) + d), so extend
> dtdev and print a warning if non-DMA masters are in the list. We don't
> necessarily need a new hypercall but that is also an option.
> 
> I think this discussion was a long time coming and I am glad we are
> having it now. We have a lot of room for improvement! I don't want to
> scope-creep this patch series, but I hope that this last bit could be
> done as part of this series if we find agreement in the community.

For me a) + d) looks good. I will implement it in v2 if everybody ok
with this approach.
Julien Grall Dec. 24, 2021, 1:30 p.m. UTC | #20
Hi,

On 23/12/2021 20:06, Stefano Stabellini wrote:
> On Wed, 22 Dec 2021, Stefano Stabellini wrote:
>> # Future Ideas
>>
>> A great suggestion by Julien is to start supporting the dom0less partial
>> device tree format in xl/libxl as well so that we can have a single
>> "device_tree" option in dom.cfg instead of 4 (device_tree, iomem, irqs,
>> dtdev).
>>
>> Even with that implemented, the user has still to provide a partial dtb,
>> xen,reg and xen,path. I think this is a great step forward and we should
>> do that, if nothing else to make it easier to switch between dom0less
>> and normal domU configurations. But the number of options and
>> information that the user has to provide is still similar to what we
>> have today.
> 
> I have just realized that if we start to parse the partial DTB in
> xl/libxl the same way that we do for dom0less guests (parse "xen,path",
> "xen,reg", and "interrupts", making dtdev, irqs and iomem optional)
> actually we can achieve the goal below thanks to the combination:
> "xen,path" + "xen,force-assign-without-iommu".
> 
> In other words, with dom0less we already have a way to specify the link
> to the host node even if the device is not a DMA master. We can do that
> by specifying both xen,path and xen,force-assign-without-iommu for a
> device.
> 
> This is just FYI. I am not suggesting we should introduce dom0less-style
> partial DTBs in order to get SCMI support in guests (although it would
> be great to have). I think the best way forward for this series is one
> of the combinations below, like a) + d), or a) + c)

I strongly prefer a) + c) because a warning is easy to miss/ignore. At 
least with the extra property the user made an action to think about it 
and agree that this is the way do it.

It is also easier to spot if we ask the user to provide the 
configuration file.

Cheers,
Oleksii Moisieiev Jan. 19, 2022, 9:40 a.m. UTC | #21
Hi Julien,

On Fri, Dec 24, 2021 at 02:30:50PM +0100, Julien Grall wrote:
> Hi,
> 
> On 23/12/2021 20:06, Stefano Stabellini wrote:
> > On Wed, 22 Dec 2021, Stefano Stabellini wrote:
> > > # Future Ideas
> > > 
> > > A great suggestion by Julien is to start supporting the dom0less partial
> > > device tree format in xl/libxl as well so that we can have a single
> > > "device_tree" option in dom.cfg instead of 4 (device_tree, iomem, irqs,
> > > dtdev).
> > > 
> > > Even with that implemented, the user has still to provide a partial dtb,
> > > xen,reg and xen,path. I think this is a great step forward and we should
> > > do that, if nothing else to make it easier to switch between dom0less
> > > and normal domU configurations. But the number of options and
> > > information that the user has to provide is still similar to what we
> > > have today.
> > 
> > I have just realized that if we start to parse the partial DTB in
> > xl/libxl the same way that we do for dom0less guests (parse "xen,path",
> > "xen,reg", and "interrupts", making dtdev, irqs and iomem optional)
> > actually we can achieve the goal below thanks to the combination:
> > "xen,path" + "xen,force-assign-without-iommu".
> > 
> > In other words, with dom0less we already have a way to specify the link
> > to the host node even if the device is not a DMA master. We can do that
> > by specifying both xen,path and xen,force-assign-without-iommu for a
> > device.
> > 
> > This is just FYI. I am not suggesting we should introduce dom0less-style
> > partial DTBs in order to get SCMI support in guests (although it would
> > be great to have). I think the best way forward for this series is one
> > of the combinations below, like a) + d), or a) + c)
> 
> I strongly prefer a) + c) because a warning is easy to miss/ignore. At least
> with the extra property the user made an action to think about it and agree
> that this is the way do it.
> 
> It is also easier to spot if we ask the user to provide the configuration
> file.
> 

Let me share my thoughts about c), which is:
c) require force-assign-without-iommu="true" in dom.cfg

Adding this parameter to domain config means removing
xen,force-assign-without-iommu param from partial DTB. This will affect
dom0less configuration, which I can't test for now without extra effort.

What I suggest is to implement a) + d) in this patch series, which is:
    a) extend dtdev to cover all devices, including non-DMA masters
    d) or print a warning like:
    "WARNING: device assignment safety for device XXX cannot be
    verified. Please make sure XXX is not a DMA mastering device."

And introduce a) + c) with the next patch series where dom0less scmi
support will be done.
Maybe leave a comment in code that force-assign-without-iommu config parameter
should be implemened.

What do you think about this?

--
Best regards,
Oleksii.
Stefano Stabellini Jan. 20, 2022, 1:53 a.m. UTC | #22
On Wed, 19 Jan 2022, Oleksii Moisieiev wrote:
> On Fri, Dec 24, 2021 at 02:30:50PM +0100, Julien Grall wrote:
> > Hi,
> > 
> > On 23/12/2021 20:06, Stefano Stabellini wrote:
> > > On Wed, 22 Dec 2021, Stefano Stabellini wrote:
> > > > # Future Ideas
> > > > 
> > > > A great suggestion by Julien is to start supporting the dom0less partial
> > > > device tree format in xl/libxl as well so that we can have a single
> > > > "device_tree" option in dom.cfg instead of 4 (device_tree, iomem, irqs,
> > > > dtdev).
> > > > 
> > > > Even with that implemented, the user has still to provide a partial dtb,
> > > > xen,reg and xen,path. I think this is a great step forward and we should
> > > > do that, if nothing else to make it easier to switch between dom0less
> > > > and normal domU configurations. But the number of options and
> > > > information that the user has to provide is still similar to what we
> > > > have today.
> > > 
> > > I have just realized that if we start to parse the partial DTB in
> > > xl/libxl the same way that we do for dom0less guests (parse "xen,path",
> > > "xen,reg", and "interrupts", making dtdev, irqs and iomem optional)
> > > actually we can achieve the goal below thanks to the combination:
> > > "xen,path" + "xen,force-assign-without-iommu".
> > > 
> > > In other words, with dom0less we already have a way to specify the link
> > > to the host node even if the device is not a DMA master. We can do that
> > > by specifying both xen,path and xen,force-assign-without-iommu for a
> > > device.
> > > 
> > > This is just FYI. I am not suggesting we should introduce dom0less-style
> > > partial DTBs in order to get SCMI support in guests (although it would
> > > be great to have). I think the best way forward for this series is one
> > > of the combinations below, like a) + d), or a) + c)
> > 
> > I strongly prefer a) + c) because a warning is easy to miss/ignore. At least
> > with the extra property the user made an action to think about it and agree
> > that this is the way do it.
> > 
> > It is also easier to spot if we ask the user to provide the configuration
> > file.
> > 
> 
> Let me share my thoughts about c), which is:
> c) require force-assign-without-iommu="true" in dom.cfg
> 
> Adding this parameter to domain config means removing
> xen,force-assign-without-iommu param from partial DTB.

Why? No I don't think so.


> This will affect dom0less configuration, which I can't test for now
> without extra effort.

We are just talking about adding:

force-assign-without-iommu="true"

to the xl config file. For instance:

dtdev = [ "/amba/serial@ff000000" ]
iomem = ["0xff000,1"]
force-assign-without-iommu="true"

It is unrelated to the dom0less partial DTB. There is no need to test
dom0less to do this.
Oleksii Moisieiev Jan. 20, 2022, 10:27 a.m. UTC | #23
On Wed, Jan 19, 2022 at 05:53:55PM -0800, Stefano Stabellini wrote:
> On Wed, 19 Jan 2022, Oleksii Moisieiev wrote:
> > On Fri, Dec 24, 2021 at 02:30:50PM +0100, Julien Grall wrote:
> > > Hi,
> > > 
> > > On 23/12/2021 20:06, Stefano Stabellini wrote:
> > > > On Wed, 22 Dec 2021, Stefano Stabellini wrote:
> > > > > # Future Ideas
> > > > > 
> > > > > A great suggestion by Julien is to start supporting the dom0less partial
> > > > > device tree format in xl/libxl as well so that we can have a single
> > > > > "device_tree" option in dom.cfg instead of 4 (device_tree, iomem, irqs,
> > > > > dtdev).
> > > > > 
> > > > > Even with that implemented, the user has still to provide a partial dtb,
> > > > > xen,reg and xen,path. I think this is a great step forward and we should
> > > > > do that, if nothing else to make it easier to switch between dom0less
> > > > > and normal domU configurations. But the number of options and
> > > > > information that the user has to provide is still similar to what we
> > > > > have today.
> > > > 
> > > > I have just realized that if we start to parse the partial DTB in
> > > > xl/libxl the same way that we do for dom0less guests (parse "xen,path",
> > > > "xen,reg", and "interrupts", making dtdev, irqs and iomem optional)
> > > > actually we can achieve the goal below thanks to the combination:
> > > > "xen,path" + "xen,force-assign-without-iommu".
> > > > 
> > > > In other words, with dom0less we already have a way to specify the link
> > > > to the host node even if the device is not a DMA master. We can do that
> > > > by specifying both xen,path and xen,force-assign-without-iommu for a
> > > > device.
> > > > 
> > > > This is just FYI. I am not suggesting we should introduce dom0less-style
> > > > partial DTBs in order to get SCMI support in guests (although it would
> > > > be great to have). I think the best way forward for this series is one
> > > > of the combinations below, like a) + d), or a) + c)
> > > 
> > > I strongly prefer a) + c) because a warning is easy to miss/ignore. At least
> > > with the extra property the user made an action to think about it and agree
> > > that this is the way do it.
> > > 
> > > It is also easier to spot if we ask the user to provide the configuration
> > > file.
> > > 
> > 
> > Let me share my thoughts about c), which is:
> > c) require force-assign-without-iommu="true" in dom.cfg
> > 
> > Adding this parameter to domain config means removing
> > xen,force-assign-without-iommu param from partial DTB.
> 
> Why? No I don't think so.
> 
> 
> > This will affect dom0less configuration, which I can't test for now
> > without extra effort.
> 
> We are just talking about adding:
> 
> force-assign-without-iommu="true"
> 
> to the xl config file. For instance:
> 
> dtdev = [ "/amba/serial@ff000000" ]
> iomem = ["0xff000,1"]
> force-assign-without-iommu="true"
> 
> It is unrelated to the dom0less partial DTB. There is no need to test
> dom0less to do this.
> 

Oh. In this case I would be happy to add it in this patch series.

Best regards,
Oleksii.
diff mbox series

Patch

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 07b96e6671..cdd14f465f 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1238,6 +1238,9 @@  int xc_domain_getvnuma(xc_interface *xch,
 int xc_domain_soft_reset(xc_interface *xch,
                          uint32_t domid);
 
+int xc_domain_add_sci_device(xc_interface *xch,
+                              uint32_t domid, char *path);
+
 #if defined(__i386__) || defined(__x86_64__)
 /*
  * PC BIOS standard E820 types and structure.
diff --git a/tools/include/xenguest.h b/tools/include/xenguest.h
index 61d0a82f48..35c611ac73 100644
--- a/tools/include/xenguest.h
+++ b/tools/include/xenguest.h
@@ -242,6 +242,8 @@  struct xc_dom_image {
 
     /* Number of vCPUs */
     unsigned int max_vcpus;
+
+    xen_pfn_t sci_shmem_gfn;
 };
 
 /* --- arch specific hooks ----------------------------------------- */
diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
index b155d6afd2..07bb390e17 100644
--- a/tools/libs/ctrl/xc_domain.c
+++ b/tools/libs/ctrl/xc_domain.c
@@ -2206,6 +2206,29 @@  int xc_domain_soft_reset(xc_interface *xch,
     domctl.domain = domid;
     return do_domctl(xch, &domctl);
 }
+
+int xc_domain_add_sci_device(xc_interface *xch,
+                              uint32_t domid, char *path)
+{
+    size_t size = strlen(path);
+    int rc;
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(path, size, XC_HYPERCALL_BUFFER_BOUNCE_IN);
+
+    if ( xc_hypercall_bounce_pre(xch, path) )
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_add_sci_device;
+    domctl.domain = domid;
+    domctl.u.sci_device_op.size = size;
+    set_xen_guest_handle(domctl.u.sci_device_op.path, path);
+    rc = do_domctl(xch, &domctl);
+
+    xc_hypercall_bounce_post(xch, path);
+
+    return rc;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c
index 5e3b76355e..368a670c46 100644
--- a/tools/libs/guest/xg_dom_arm.c
+++ b/tools/libs/guest/xg_dom_arm.c
@@ -25,11 +25,12 @@ 
 
 #include "xg_private.h"
 
-#define NR_MAGIC_PAGES 4
+#define NR_MAGIC_PAGES 5
 #define CONSOLE_PFN_OFFSET 0
 #define XENSTORE_PFN_OFFSET 1
 #define MEMACCESS_PFN_OFFSET 2
 #define VUART_PFN_OFFSET 3
+#define SCI_SHMEM_OFFSET 4
 
 #define LPAE_SHIFT 9
 
@@ -69,11 +70,13 @@  static int alloc_magic_pages(struct xc_dom_image *dom)
     dom->console_pfn = base + CONSOLE_PFN_OFFSET;
     dom->xenstore_pfn = base + XENSTORE_PFN_OFFSET;
     dom->vuart_gfn = base + VUART_PFN_OFFSET;
+    dom->sci_shmem_gfn = base + SCI_SHMEM_OFFSET;
 
     xc_clear_domain_page(dom->xch, dom->guest_domid, dom->console_pfn);
     xc_clear_domain_page(dom->xch, dom->guest_domid, dom->xenstore_pfn);
     xc_clear_domain_page(dom->xch, dom->guest_domid, base + MEMACCESS_PFN_OFFSET);
     xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn);
+    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->sci_shmem_gfn);
 
     xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN,
             dom->console_pfn);
diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
index eef1de0939..73ef591822 100644
--- a/tools/libs/light/libxl_arm.c
+++ b/tools/libs/light/libxl_arm.c
@@ -8,6 +8,8 @@ 
 #include <assert.h>
 #include <xen/device_tree_defs.h>
 
+#define SCMI_NODE_PATH      "/firmware/scmi"
+
 static const char *gicv_to_string(libxl_gic_version gic_version)
 {
     switch (gic_version) {
@@ -101,6 +103,19 @@  int libxl__arch_domain_prepare_config(libxl__gc *gc,
         return ERROR_FAIL;
     }
 
+    switch (d_config->b_info.sci) {
+    case LIBXL_SCI_TYPE_NONE:
+        config->arch.sci_type = XEN_DOMCTL_CONFIG_SCI_NONE;
+        break;
+    case LIBXL_SCI_TYPE_SCMI_SMC:
+        config->arch.sci_type = XEN_DOMCTL_CONFIG_SCI_SCMI_SMC;
+        break;
+    default:
+        LOG(ERROR, "Unknown SCI type %d",
+            d_config->b_info.sci);
+        return ERROR_FAIL;
+    }
+
     return 0;
 }
 
@@ -122,6 +137,7 @@  int libxl__arch_domain_save_config(libxl__gc *gc,
     }
 
     state->clock_frequency = config->arch.clock_frequency;
+    state->sci_agent_paddr = config->arch.sci_agent_paddr;
 
     return 0;
 }
@@ -502,9 +518,6 @@  static int make_optee_node(libxl__gc *gc, void *fdt)
     int res;
     LOG(DEBUG, "Creating OP-TEE node in dtb");
 
-    res = fdt_begin_node(fdt, "firmware");
-    if (res) return res;
-
     res = fdt_begin_node(fdt, "optee");
     if (res) return res;
 
@@ -517,9 +530,6 @@  static int make_optee_node(libxl__gc *gc, void *fdt)
     res = fdt_end_node(fdt);
     if (res) return res;
 
-    res = fdt_end_node(fdt);
-    if (res) return res;
-
     return 0;
 }
 
@@ -902,10 +912,9 @@  static int copy_node(libxl__gc *gc, void *fdt, void *pfdt,
     return 0;
 }
 
-static int copy_node_by_path(libxl__gc *gc, const char *path,
-                             void *fdt, void *pfdt)
+static int get_path_nodeoff(const char *path, void *pfdt)
 {
-    int nodeoff, r;
+    int nodeoff;
     const char *name = strrchr(path, '/');
 
     if (!name)
@@ -925,12 +934,101 @@  static int copy_node_by_path(libxl__gc *gc, const char *path,
     if (strcmp(fdt_get_name(pfdt, nodeoff, NULL), name))
         return -FDT_ERR_NOTFOUND;
 
+    return nodeoff;
+}
+
+static int copy_node_by_path(libxl__gc *gc, const char *path,
+                             void *fdt, void *pfdt)
+{
+    int nodeoff, r;
+
+    nodeoff = get_path_nodeoff(path, pfdt);
+    if (nodeoff < 0)
+        return nodeoff;
+
     r = copy_node(gc, fdt, pfdt, nodeoff, 0);
     if (r) return r;
 
     return 0;
 }
 
+static int get_node_phandle(const char *path, void *pfdt, uint32_t *phandle)
+{
+    int nodeoff;
+    const char *name = strrchr(path, '/');
+
+    if (!name)
+        return -FDT_ERR_INTERNAL;
+
+    name++;
+    nodeoff = fdt_path_offset(pfdt, path);
+    if (nodeoff < 0)
+        return nodeoff;
+
+    *phandle = fdt_get_phandle(pfdt, nodeoff);
+    return 0;
+}
+
+static int make_scmi_shmem_node(libxl__gc *gc, void *fdt, void *pfdt,
+                           struct xc_dom_image *dom)
+{
+    int res;
+    char buf[64];
+    uint32_t phandle = 0;
+
+    res = get_node_phandle("/scp-shmem", pfdt, &phandle);
+    if (res) return res;
+
+    snprintf(buf, sizeof(buf), "scp-shmem@%lx",
+             dom->sci_shmem_gfn << XC_PAGE_SHIFT);
+    res = fdt_begin_node(fdt, buf);
+    if (res) return res;
+
+    res = fdt_property_compat(gc, fdt, 1, "arm,scmi-shmem");
+    if (res) return res;
+
+    res = fdt_property_regs(gc, fdt, GUEST_ROOT_ADDRESS_CELLS,
+                    GUEST_ROOT_SIZE_CELLS, 1,
+                    dom->sci_shmem_gfn << XC_PAGE_SHIFT, XC_PAGE_SHIFT);
+    if (res) return res;
+
+    LOG(DEBUG, "scmi: setting phandle = %u\n", phandle);
+    res = fdt_property_cell(fdt, "phandle", phandle);
+    if (res) return res;
+
+    res = fdt_end_node(fdt);
+    if (res) return res;
+
+    return 0;
+}
+
+static int make_firmware_node(libxl__gc *gc, void *fdt, void *pfdt, int tee,
+                              int sci)
+{
+    int res;
+
+    if ((tee != LIBXL_TEE_TYPE_OPTEE) && (sci != LIBXL_SCI_TYPE_NONE))
+        return 0;
+
+    res = fdt_begin_node(fdt, "firmware");
+    if (res) return res;
+
+    if (tee == LIBXL_TEE_TYPE_OPTEE) {
+       res = make_optee_node(gc, fdt);
+       if (res) return res;
+    }
+
+    if (sci == LIBXL_SCI_TYPE_SCMI_SMC) {
+        res = copy_node_by_path(gc, SCMI_NODE_PATH, fdt, pfdt);
+        if (res) return res;
+    }
+
+    res = fdt_end_node(fdt);
+    if (res) return res;
+
+    return 0;
+}
+
 /*
  * The partial device tree is not copied entirely. Only the relevant bits are
  * copied to the guest device tree:
@@ -1088,8 +1186,10 @@  next_resize:
         if (info->arch_arm.vuart == LIBXL_VUART_TYPE_SBSA_UART)
             FDT( make_vpl011_uart_node(gc, fdt, ainfo, dom) );
 
-        if (info->tee == LIBXL_TEE_TYPE_OPTEE)
-            FDT( make_optee_node(gc, fdt) );
+        FDT( make_firmware_node(gc, fdt, pfdt, info->tee, info->sci) );
+
+        if (info->sci == LIBXL_SCI_TYPE_SCMI_SMC)
+            FDT( make_scmi_shmem_node(gc, fdt, pfdt, dom) );
 
         if (d_config->num_pcidevs)
             FDT( make_vpci_node(gc, fdt, ainfo, dom) );
diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
index dcd09d32ba..c7372fd344 100644
--- a/tools/libs/light/libxl_create.c
+++ b/tools/libs/light/libxl_create.c
@@ -596,6 +596,37 @@  out:
     return ret;
 }
 
+static int map_sci_page(libxl__gc *gc, uint32_t domid, uint64_t paddr,
+                         uint64_t guest_addr)
+{
+    int ret;
+    uint64_t _paddr_pfn = paddr >> XC_PAGE_SHIFT;
+    uint64_t _guest_pfn = guest_addr >> XC_PAGE_SHIFT;
+
+    LOG(DEBUG, "iomem %"PRIx64, _paddr_pfn);
+
+    ret = xc_domain_iomem_permission(CTX->xch, domid, _paddr_pfn, 1, 1);
+    if (ret < 0) {
+        LOG(ERROR,
+              "failed give domain access to iomem page %"PRIx64,
+             _paddr_pfn);
+        return ret;
+    }
+
+    ret = xc_domain_memory_mapping(CTX->xch, domid,
+                                   _guest_pfn, _paddr_pfn,
+                                   1, 1);
+    if (ret < 0) {
+        LOG(ERROR,
+              "failed to map to domain iomem page %"PRIx64
+              " to guest address %"PRIx64,
+              _paddr_pfn, _guest_pfn);
+        return ret;
+    }
+
+    return 0;
+}
+
 int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
                        libxl__domain_build_state *state,
                        uint32_t *domid, bool soft_reset)
@@ -762,6 +793,16 @@  int libxl__domain_make(libxl__gc *gc, libxl_domain_config *d_config,
         goto out;
     }
 
+    if (state->sci_agent_paddr != 0) {
+        ret = map_sci_page(gc, *domid, state->sci_agent_paddr,
+                            state->sci_shmem_gfn << XC_PAGE_SHIFT);
+        if (ret < 0) {
+            LOGED(ERROR, *domid, "map SCI page fail");
+            rc = ERROR_FAIL;
+            goto out;
+        }
+    }
+
     dom_path = libxl__xs_get_dompath(gc, *domid);
     if (!dom_path) {
         rc = ERROR_FAIL;
@@ -1817,17 +1858,24 @@  static void libxl__add_dtdevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
 {
     AO_GC;
     libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
-    int i, rc = 0;
+    int i, rc = 0, rc_sci = 0;
 
     for (i = 0; i < d_config->num_dtdevs; i++) {
         const libxl_device_dtdev *dtdev = &d_config->dtdevs[i];
 
         LOGD(DEBUG, domid, "Assign device \"%s\" to domain", dtdev->path);
         rc = xc_assign_dt_device(CTX->xch, domid, dtdev->path);
-        if (rc < 0) {
-            LOGD(ERROR, domid, "xc_assign_dtdevice failed: %d", rc);
+        rc_sci = xc_domain_add_sci_device(CTX->xch, domid, dtdev->path);
+
+        if ((rc < 0) && (rc_sci < 0)) {
+            LOGD(ERROR, domid, "xc_assign_dt_device failed: %d; "
+                 "xc_domain_add_sci_device failed: %d",
+                 rc, rc_sci);
             goto out;
         }
+
+        if (rc)
+            rc = rc_sci;
     }
 
 out:
diff --git a/tools/libs/light/libxl_dom.c b/tools/libs/light/libxl_dom.c
index fe9f760f71..b1d288a8b9 100644
--- a/tools/libs/light/libxl_dom.c
+++ b/tools/libs/light/libxl_dom.c
@@ -710,6 +710,7 @@  int libxl__build_pv(libxl__gc *gc, uint32_t domid,
         state->console_mfn = dom->console_pfn;
         state->store_mfn = dom->xenstore_pfn;
         state->vuart_gfn = dom->vuart_gfn;
+        state->sci_shmem_gfn = dom->sci_shmem_gfn;
     } else {
         state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
         state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
index 0b4671318c..f9f9cc633a 100644
--- a/tools/libs/light/libxl_internal.h
+++ b/tools/libs/light/libxl_internal.h
@@ -1407,6 +1407,10 @@  typedef struct {
     /* Whether this domain is being migrated/restored, or booting fresh.  Only
      * applicable to the primary domain, not support domains (e.g. stub QEMU). */
     bool restore;
+
+    /* sci channel paddr to be set to device-tree node */
+    uint64_t sci_agent_paddr;
+    xen_pfn_t sci_shmem_gfn;
 } libxl__domain_build_state;
 
 _hidden void libxl__domain_build_state_init(libxl__domain_build_state *s);
diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
index 6245af6d0b..ba200407da 100644
--- a/xen/arch/arm/domctl.c
+++ b/xen/arch/arm/domctl.c
@@ -4,6 +4,7 @@ 
  * Copyright (c) 2012, Citrix Systems
  */
 
+#include <asm/sci/sci.h>
 #include <xen/errno.h>
 #include <xen/guest_access.h>
 #include <xen/hypercall.h>
@@ -175,6 +176,20 @@  long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
 
         return rc;
     }
+    case XEN_DOMCTL_add_sci_device:
+    {
+        int rc;
+        struct dt_device_node *dev;
+
+        rc = dt_find_node_by_gpath(domctl->u.sci_device_op.path,
+                                   domctl->u.sci_device_op.size,
+                                   &dev);
+        if ( rc )
+            return rc;
+
+        return sci_add_dt_device(d, dev);
+    }
+
     default:
     {
         int rc;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index b85e6170b0..671c72c3e8 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1177,6 +1177,13 @@  struct xen_domctl_vmtrace_op {
 #define XEN_DOMCTL_vmtrace_get_option         5
 #define XEN_DOMCTL_vmtrace_set_option         6
 };
+
+/* XEN_DOMCTL_add_sci_device: set sci device permissions */
+struct xen_domctl_sci_device_op {
+    uint32_t size; /* Length of the path */
+    XEN_GUEST_HANDLE_64(char) path; /* path to the device tree node */
+};
+
 typedef struct xen_domctl_vmtrace_op xen_domctl_vmtrace_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vmtrace_op_t);
 
@@ -1265,6 +1272,7 @@  struct xen_domctl {
 #define XEN_DOMCTL_get_cpu_policy                82
 #define XEN_DOMCTL_set_cpu_policy                83
 #define XEN_DOMCTL_vmtrace_op                    84
+#define XEN_DOMCTL_add_sci_device                85
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1326,6 +1334,7 @@  struct xen_domctl {
         struct xen_domctl_psr_alloc         psr_alloc;
         struct xen_domctl_vuart_op          vuart_op;
         struct xen_domctl_vmtrace_op        vmtrace_op;
+        struct xen_domctl_sci_device_op     sci_device_op;
         uint8_t                             pad[128];
     } u;
 };