diff mbox series

[QEMU,v5] hw/arm/sysbus-fdt: Add support for instantiating generic devices

Message ID 20190103094211.28473-1-geert+renesas@glider.be (mailing list archive)
State Under Review
Delegated to: Geert Uytterhoeven
Headers show
Series [QEMU,v5] hw/arm/sysbus-fdt: Add support for instantiating generic devices | expand

Commit Message

Geert Uytterhoeven Jan. 3, 2019, 9:42 a.m. UTC
Add a fallback for instantiating generic devices without a type-specific
or compatible-specific instantiation method.  This will be used when no
other match is found.

Generic device instantiation avoids having to write device-specific
instantiation methods for each and every "simple" device using only a
set of generic properties.  Devices that need more specialized handling
can still provide their own instantiation methods.

The generic instantiation method creates a device node with remapped
"reg" and (optional) "interrupts" properties, and copies properties from
the host, if deemed appropriate:
  - In general, properties without phandles are safe to be copied.
    Unfortunately, the FDT does not carry type information, hence an
    explicit whitelist is used, which can be extended when needed.
  - Properties related to power management (power and/or clock domain),
    isolation, and pin control, are handled by the host, and must not to
    be copied.

Devices nodes with subnodes are rejected, as subnodes cannot easily be
handled in a generic way.

This has been tested on a Renesas Salvator-XS board (R-Car H3 ES2.0)
with SATA, using:

    -device vfio-platform,host=ee300000.sata

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
Note: Also tested with GPIO, which needs "vfio: No-IOMMU mode support":

    -device vfio-platform,host=e6055400.gpio

v5:
  - Replace copying of a fixed list of properties (and ignoring all
    others), by scanning all properties on the host, and deciding if
    they need to be ignored, copied, or rejected,
  - Reject device nodes with subnodes,
  - Handle edge interrupts,

v3:
  - New.
---
 hw/arm/sysbus-fdt.c | 238 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 238 insertions(+)

Comments

Eric Auger Jan. 9, 2019, 3:55 p.m. UTC | #1
Hi Geert,

On 1/3/19 10:42 AM, Geert Uytterhoeven wrote:
> Add a fallback for instantiating generic devices without a type-specific
> or compatible-specific instantiation method.  This will be used when no
> other match is found.
> 
> Generic device instantiation avoids having to write device-specific
> instantiation methods for each and every "simple" device using only a
> set of generic properties.  Devices that need more specialized handling
> can still provide their own instantiation methods.
> 
> The generic instantiation method creates a device node with remapped
> "reg" and (optional) "interrupts" properties, and copies properties from
> the host, if deemed appropriate:
>   - In general, properties without phandles are safe to be copied.
>     Unfortunately, the FDT does not carry type information, hence an
>     explicit whitelist is used, which can be extended when needed.
>   - Properties related to power management (power and/or clock domain),
>     isolation, and pin control, are handled by the host, and must not to
>     be copied.
> 
> Devices nodes with subnodes are rejected, as subnodes cannot easily be
> handled in a generic way.
> 
> This has been tested on a Renesas Salvator-XS board (R-Car H3 ES2.0)
> with SATA, using:
> 
>     -device vfio-platform,host=ee300000.sata
> 
> Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
> ---
> Note: Also tested with GPIO, which needs "vfio: No-IOMMU mode support":
> 
>     -device vfio-platform,host=e6055400.gpio
> 
> v5:
>   - Replace copying of a fixed list of properties (and ignoring all
>     others), by scanning all properties on the host, and deciding if
>     they need to be ignored, copied, or rejected,
>   - Reject device nodes with subnodes,
>   - Handle edge interrupts,
> 
> v3:
>   - New.
> ---
>  hw/arm/sysbus-fdt.c | 238 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 238 insertions(+)
> 
> diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
> index ad698d4832c694be..52de8c9a040c282a 100644
> --- a/hw/arm/sysbus-fdt.c
> +++ b/hw/arm/sysbus-fdt.c
> @@ -28,6 +28,9 @@
>  #ifdef CONFIG_LINUX
>  #include <linux/vfio.h>
>  #endif
> +#ifdef CONFIG_FNMATCH
> +#include <fnmatch.h>
> +#endif
>  #include "hw/arm/sysbus-fdt.h"
>  #include "qemu/error-report.h"
>  #include "sysemu/device_tree.h"
> @@ -415,6 +418,232 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque)
>      return 0;
>  }
>  
> +enum GenericPropertyAction {
> +    PROP_IGNORE,
> +    PROP_WARN,
> +    PROP_COPY,
> +    PROP_REJECT,
> +};
> +
> +static struct {
> +    const char *name;
> +    enum GenericPropertyAction action;
> +} generic_properties[] = {
> +    { "name",            PROP_IGNORE }, /* handled automatically */
> +    { "phandle",         PROP_IGNORE }, /* not needed for the generic case */
> +
> +    /* The following are copied and remapped by dedicated code */
> +    { "reg",             PROP_IGNORE },
> +    { "interrupts",      PROP_IGNORE },
Shouldn't interrupt-parent be safely ignored as remapped?
> +
> +    /* The following are handled by the host */
> +    { "power-domains",   PROP_IGNORE }, /* power management (+ opt. clocks) */
> +    { "iommus",          PROP_IGNORE }, /* isolation */
> +    { "resets",          PROP_IGNORE }, /* isolation */
> +    { "pinctrl-*",       PROP_IGNORE }, /* pin control */
> +
> +    /* Ignoring the following may or may not work, hence the warning */
> +    { "gpio-ranges",     PROP_WARN },   /* no support for pinctrl yet */
> +    { "dmas",            PROP_WARN },   /* no support for external DMACs yet */
I would be tempted to simply reject things that may not work.
> +
> +    /* The following are irrelevant, as corresponding specifiers are ignored */
> +    { "clock-names",     PROP_IGNORE },
> +    { "reset-names",     PROP_IGNORE },
> +    { "dma-names",       PROP_IGNORE },
> +
> +    /* Whitelist of properties not taking phandles, and thus safe to copy */
> +    { "compatible",      PROP_COPY },
> +    { "status",          PROP_COPY },
> +    { "reg-names",       PROP_COPY },
> +    { "interrupt-names", PROP_COPY },
> +    { "#*-cells",        PROP_COPY },
> +};
> +
> +#ifndef CONFIG_FNMATCH
> +/* Fall back to exact string matching instead of allowing wildcards */
> +static inline int fnmatch(const char *pattern, const char *string, int flags)
> +{
> +        return strcmp(pattern, string);
> +}
> +#endif
> +
> +static enum GenericPropertyAction get_generic_property_action(const char *name)
> +{
> +    unsigned int i;
> +
> +    for (i = 0; i < ARRAY_SIZE(generic_properties); i++) {
> +        if (!fnmatch(generic_properties[i].name, name, 0)) {
> +            return generic_properties[i].action;
> +        }
> +    }
> +
> +    /*
> +     * Unfortunately DT properties do not carry type information,
> +     * so we have to assume everything else contains a phandle,
> +     * and must be rejected
> +     */
> +    return PROP_REJECT;
> +}
> +
> +/**
> + * copy_generic_node
> + *
> + * Copy the generic part of a DT node from the host
> + */
> +static void copy_generic_node(void *host_fdt, void *guest_fdt, char *host_path,
> +                              char *guest_path)
> +{
> +    int node, prop, len;
> +    const void *data;
> +    const char *name;
> +    enum GenericPropertyAction action;
> +
> +    node = fdt_path_offset(host_fdt, host_path);
> +    if (node < 0) {
> +        error_report("Cannot find node %s: %s", host_path, fdt_strerror(node));
> +        exit(1);
> +    }
> +
> +    /* Submodes are not yet supported */
> +    if (fdt_first_subnode(host_fdt, node) >= 0) {
> +        error_report("%s has unsupported subnodes", host_path);
> +        goto unsupported;
> +    }
> +
> +    /* Copy generic properties */
> +    fdt_for_each_property_offset(prop, host_fdt, node) {
> +        data = fdt_getprop_by_offset(host_fdt, prop, &name, &len);
> +        if (!data) {
> +            error_report("Cannot get property of %s: %s", host_path,
> +                         fdt_strerror(len));
> +            exit(1);
> +        }
> +
> +        if (!len) {
> +            /* Zero-sized properties are safe to copy */
> +            action = PROP_COPY;
> +        } else if (!strcmp(name, "clocks")) {
> +            /* Reject "clocks" if "power-domains" is not present */
Could you elaborate on clock management with and without power-domain.
If clock handles can be found on host side, don't we need to generate
clock nodes on guest side (as what was done with the amd xgmac). And in
that case don't you need clock-names prop too?

Please can you explain how the fact power-domain is not present
simplifies the problem. It is not obvious to me.

> +            action = fdt_getprop(host_fdt, node, "power-domains", NULL)
> +                     ? PROP_WARN : PROP_REJECT;
> +        } else {
> +            action = get_generic_property_action(name);
> +        }
> +
> +        switch (action) {
> +        case PROP_WARN:
> +            warn_report("%s: Ignoring %s property", host_path, name);
> +        case PROP_IGNORE:
> +            break;
> +
> +        case PROP_COPY:
> +            qemu_fdt_setprop(guest_fdt, guest_path, name, data, len);
> +            break;
> +
> +        case PROP_REJECT:
> +            error_report("%s has unsupported %s property", host_path, name);
> +            goto unsupported;
> +        }
> +    }
> +    return;
> +
> +unsupported:
> +    error_report("You can ask a wizard to enhance me");
maybe report which property causes the assignment abort
> +    exit(1);
> +}
> +
> +/**
> + * add_generic_fdt_node
> + *
> + * Generates a generic DT node by copying properties from the host
> + */
> +static int add_generic_fdt_node(SysBusDevice *sbdev, void *opaque)
> +{
> +    PlatformBusFDTData *data = opaque;
> +    VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
> +    const char *parent_node = data->pbus_node_name;
> +    void *guest_fdt = data->fdt, *host_fdt;
> +    VFIODevice *vbasedev = &vdev->vbasedev;
> +    char **node_path, *node_name, *dt_name;
> +    PlatformBusDevice *pbus = data->pbus;
> +    uint32_t *reg_attr, *irq_attr;
> +    uint64_t mmio_base;
> +    int i, irq_number;
> +    VFIOINTp *intp;
> +
> +    host_fdt = load_device_tree_from_sysfs();
> +
> +    dt_name = sysfs_to_dt_name(vbasedev->name);
> +    if (!dt_name) {
> +        error_report("%s incorrect sysfs device name %s", __func__,
> +                     vbasedev->name);
> +        exit(1);
> +    }
> +    node_path = qemu_fdt_node_path(host_fdt, dt_name, vdev->compat,
> +                                   &error_fatal);
> +    if (!node_path || !node_path[0]) {
> +        error_report("%s unable to retrieve node path for %s/%s", __func__,
> +                     dt_name, vdev->compat);
> +        exit(1);
> +    }
> +
> +    if (node_path[1]) {
> +        error_report("%s more than one node matching %s/%s!", __func__,
> +                     dt_name, vdev->compat);
> +        exit(1);
> +    }
The above part now is duplicated with code in add_amd_xgbe_fdt_node().
couldn't we factorize this into an helper such like
char [*]*get_host_node_path(VFIODevice *vbasedev).
> +
> +    mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
> +    node_name = g_strdup_printf("%s/%s@%" PRIx64, parent_node,
> +                               vbasedev->name, mmio_base);
> +    qemu_fdt_add_subnode(guest_fdt, node_name);
> +
> +    /* Copy generic parts from host */
> +    copy_generic_node(host_fdt, guest_fdt, node_path[0], node_name);
> +
> +    /* Copy reg (remapped) */
> +    reg_attr = g_new(uint32_t, vbasedev->num_regions * 2);
> +    for (i = 0; i < vbasedev->num_regions; i++) {
> +        mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
> +        reg_attr[2 * i] = cpu_to_be32(mmio_base);
> +        reg_attr[2 * i + 1] = cpu_to_be32(
> +                                memory_region_size(vdev->regions[i]->mem));
> +    }
> +    qemu_fdt_setprop(guest_fdt, node_name, "reg", reg_attr,
> +                     vbasedev->num_regions * 2 * sizeof(uint32_t));
> +
> +    /* Copy interrupts (remapped) */
> +    if (vbasedev->num_irqs) {
> +        irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3);
> +        for (i = 0; i < vbasedev->num_irqs; i++) {
> +            irq_number = platform_bus_get_irqn(pbus, sbdev, i) +
> +                         data->irq_start;
> +            irq_attr[3 * i] = cpu_to_be32(GIC_FDT_IRQ_TYPE_SPI);
> +            irq_attr[3 * i + 1] = cpu_to_be32(irq_number);
> +            QLIST_FOREACH(intp, &vdev->intp_list, next) {
> +                if (intp->pin == i) {
> +                    break;
> +                }
> +            }
> +            if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) {
> +                irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_LEVEL_HI);
> +            } else {
> +                irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
> +            }
> +        }
> +        qemu_fdt_setprop(guest_fdt, node_name, "interrupts",
> +                         irq_attr, vbasedev->num_irqs * 3 * sizeof(uint32_t));
> +        g_free(irq_attr);
> +    }
> +
> +    g_free(reg_attr);
> +    g_free(node_name);
> +    g_strfreev(node_path);
> +    g_free(dt_name);
> +    g_free(host_fdt);
> +    return 0;
> +}
> +
>  /* DT compatible matching */
>  static bool vfio_platform_match(SysBusDevice *sbdev,
>                                  const BindingEntry *entry)
> @@ -423,6 +652,11 @@ static bool vfio_platform_match(SysBusDevice *sbdev,
>      const char *compat;
>      unsigned int n;
>  
> +    if (!entry->compat) {
> +        /* Generic DT fallback */
> +        return true;
> +    }
> +
>      for (n = vdev->num_compat, compat = vdev->compat; n > 0;
>           n--, compat += strlen(compat) + 1) {
>          if (!strcmp(entry->compat, compat)) {
> @@ -459,6 +693,10 @@ static const BindingEntry bindings[] = {
>      VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
>  #endif
>      TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
> +#ifdef CONFIG_LINUX
> +    /* Generic DT fallback must be last */
> +    VFIO_PLATFORM_BINDING(NULL, add_generic_fdt_node),
> +#endif
>      TYPE_BINDING("", NULL), /* last element */
>  };
>  
> 
Could you share the SATA node that was generated with the generic function.

Thanks

Eric
Peter Maydell Jan. 9, 2019, 4:03 p.m. UTC | #2
On Wed, 9 Jan 2019 at 15:55, Auger Eric <eric.auger@redhat.com> wrote:
>
> Hi Geert,
>
> On 1/3/19 10:42 AM, Geert Uytterhoeven wrote:
> > Add a fallback for instantiating generic devices without a type-specific
> > or compatible-specific instantiation method.  This will be used when no
> > other match is found.
> >
> > Generic device instantiation avoids having to write device-specific
> > instantiation methods for each and every "simple" device using only a
> > set of generic properties.  Devices that need more specialized handling
> > can still provide their own instantiation methods.

> > +    /* Ignoring the following may or may not work, hence the warning */
> > +    { "gpio-ranges",     PROP_WARN },   /* no support for pinctrl yet */
> > +    { "dmas",            PROP_WARN },   /* no support for external DMACs yet */
> I would be tempted to simply reject things that may not work.

More generally, this whole feature seems to be "allow things that
might not work", isn't it? Otherwise we could just have explicit
whitelists for the devices we want to allow passthrough of and
that we've tested to work.

I have to say I'm not really very enthusiastic about
enhancing this to allow random device passthrough,
because it encourages further use of this. If people
want hardware that can be passed through they should
put it behind a bus that can be probed and can go
behind an IOMMU, ie pci or some equivalent. That
is a supportable hardware mechanism. All this
machinery feels very heath-robinson...

thanks
-- PMM
Geert Uytterhoeven Jan. 9, 2019, 4:15 p.m. UTC | #3
Hi Eric,

Thanks for your comments!

On Wed, Jan 9, 2019 at 4:56 PM Auger Eric <eric.auger@redhat.com> wrote:
> On 1/3/19 10:42 AM, Geert Uytterhoeven wrote:
> > Add a fallback for instantiating generic devices without a type-specific
> > or compatible-specific instantiation method.  This will be used when no
> > other match is found.
> >
> > Generic device instantiation avoids having to write device-specific
> > instantiation methods for each and every "simple" device using only a
> > set of generic properties.  Devices that need more specialized handling
> > can still provide their own instantiation methods.
> >
> > The generic instantiation method creates a device node with remapped
> > "reg" and (optional) "interrupts" properties, and copies properties from
> > the host, if deemed appropriate:
> >   - In general, properties without phandles are safe to be copied.
> >     Unfortunately, the FDT does not carry type information, hence an
> >     explicit whitelist is used, which can be extended when needed.
> >   - Properties related to power management (power and/or clock domain),
> >     isolation, and pin control, are handled by the host, and must not to
> >     be copied.
> >
> > Devices nodes with subnodes are rejected, as subnodes cannot easily be
> > handled in a generic way.
> >
> > This has been tested on a Renesas Salvator-XS board (R-Car H3 ES2.0)
> > with SATA, using:
> >
> >     -device vfio-platform,host=ee300000.sata
> >
> > Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
> > ---
> > Note: Also tested with GPIO, which needs "vfio: No-IOMMU mode support":
> >
> >     -device vfio-platform,host=e6055400.gpio
> >
> > v5:
> >   - Replace copying of a fixed list of properties (and ignoring all
> >     others), by scanning all properties on the host, and deciding if
> >     they need to be ignored, copied, or rejected,
> >   - Reject device nodes with subnodes,
> >   - Handle edge interrupts,
> >
> > v3:
> >   - New.

> > --- a/hw/arm/sysbus-fdt.c
> > +++ b/hw/arm/sysbus-fdt.c

> > +static struct {
> > +    const char *name;
> > +    enum GenericPropertyAction action;
> > +} generic_properties[] = {
> > +    { "name",            PROP_IGNORE }, /* handled automatically */
> > +    { "phandle",         PROP_IGNORE }, /* not needed for the generic case */
> > +
> > +    /* The following are copied and remapped by dedicated code */
> > +    { "reg",             PROP_IGNORE },
> > +    { "interrupts",      PROP_IGNORE },
> Shouldn't interrupt-parent be safely ignored as remapped?

Probably. Typically interrupt-parent is present at a higher level.
And interrupts-extended should be ignored, too.

> > +
> > +    /* The following are handled by the host */
> > +    { "power-domains",   PROP_IGNORE }, /* power management (+ opt. clocks) */
> > +    { "iommus",          PROP_IGNORE }, /* isolation */
> > +    { "resets",          PROP_IGNORE }, /* isolation */
> > +    { "pinctrl-*",       PROP_IGNORE }, /* pin control */
> > +
> > +    /* Ignoring the following may or may not work, hence the warning */
> > +    { "gpio-ranges",     PROP_WARN },   /* no support for pinctrl yet */
> > +    { "dmas",            PROP_WARN },   /* no support for external DMACs yet */
> I would be tempted to simply reject things that may not work.

I kept gpio-ranges, as it works with my rcar-gpio proof of concept
(depends on with no-iommu support).
Without dmas, drivers are supposed to fall back to PIO.  If a driver
doesn't support that, it will complain.

> > +
> > +    /* The following are irrelevant, as corresponding specifiers are ignored */
> > +    { "clock-names",     PROP_IGNORE },
> > +    { "reset-names",     PROP_IGNORE },
> > +    { "dma-names",       PROP_IGNORE },
> > +
> > +    /* Whitelist of properties not taking phandles, and thus safe to copy */
> > +    { "compatible",      PROP_COPY },
> > +    { "status",          PROP_COPY },
> > +    { "reg-names",       PROP_COPY },
> > +    { "interrupt-names", PROP_COPY },
> > +    { "#*-cells",        PROP_COPY },
> > +};
> > +
> > +#ifndef CONFIG_FNMATCH
> > +/* Fall back to exact string matching instead of allowing wildcards */
> > +static inline int fnmatch(const char *pattern, const char *string, int flags)
> > +{
> > +        return strcmp(pattern, string);
> > +}
> > +#endif

> > +/**
> > + * copy_generic_node
> > + *
> > + * Copy the generic part of a DT node from the host
> > + */
> > +static void copy_generic_node(void *host_fdt, void *guest_fdt, char *host_path,
> > +                              char *guest_path)
> > +{
> > +    int node, prop, len;
> > +    const void *data;
> > +    const char *name;
> > +    enum GenericPropertyAction action;
> > +
> > +    node = fdt_path_offset(host_fdt, host_path);
> > +    if (node < 0) {
> > +        error_report("Cannot find node %s: %s", host_path, fdt_strerror(node));
> > +        exit(1);
> > +    }
> > +
> > +    /* Submodes are not yet supported */
> > +    if (fdt_first_subnode(host_fdt, node) >= 0) {
> > +        error_report("%s has unsupported subnodes", host_path);
> > +        goto unsupported;
> > +    }
> > +
> > +    /* Copy generic properties */
> > +    fdt_for_each_property_offset(prop, host_fdt, node) {
> > +        data = fdt_getprop_by_offset(host_fdt, prop, &name, &len);
> > +        if (!data) {
> > +            error_report("Cannot get property of %s: %s", host_path,
> > +                         fdt_strerror(len));
> > +            exit(1);
> > +        }
> > +
> > +        if (!len) {
> > +            /* Zero-sized properties are safe to copy */
> > +            action = PROP_COPY;
> > +        } else if (!strcmp(name, "clocks")) {
> > +            /* Reject "clocks" if "power-domains" is not present */
> Could you elaborate on clock management with and without power-domain.
> If clock handles can be found on host side, don't we need to generate
> clock nodes on guest side (as what was done with the amd xgmac). And in
> that case don't you need clock-names prop too?
>
> Please can you explain how the fact power-domain is not present
> simplifies the problem. It is not obvious to me.

In the presence of a power-domains property, it's possible the clocks are
used for power management only (if the device is part of a clock domain).
In that case, the guest doesn't need to manage the clock.
Qemu will still print a warning, as it cannot be 100% sure that the clocks
are not needed.

In the absence of a power-domains property, it is assumed the clocks are
needed, and the device node is rejected.
Qemu could be enhanced to inspect all clocks and copy fixed-rate clocks,
like is done for xgmac, but that can be added later, if someone has a need
for it.

For an initial version, I try to keep it generic, but not too complicated ;-)

For complex cases, you can always write a device-specific instantiation
method.

> > +            action = fdt_getprop(host_fdt, node, "power-domains", NULL)
> > +                     ? PROP_WARN : PROP_REJECT;
> > +        } else {
> > +            action = get_generic_property_action(name);
> > +        }
> > +
> > +        switch (action) {
> > +        case PROP_WARN:
> > +            warn_report("%s: Ignoring %s property", host_path, name);
> > +        case PROP_IGNORE:
> > +            break;
> > +
> > +        case PROP_COPY:
> > +            qemu_fdt_setprop(guest_fdt, guest_path, name, data, len);
> > +            break;
> > +
> > +        case PROP_REJECT:
> > +            error_report("%s has unsupported %s property", host_path, name);
> > +            goto unsupported;
> > +        }
> > +    }
> > +    return;
> > +
> > +unsupported:
> > +    error_report("You can ask a wizard to enhance me");
> maybe report which property causes the assignment abort

That's already done above.

> > +    exit(1);
> > +}
> > +
> > +/**
> > + * add_generic_fdt_node
> > + *
> > + * Generates a generic DT node by copying properties from the host
> > + */
> > +static int add_generic_fdt_node(SysBusDevice *sbdev, void *opaque)
> > +{
> > +    PlatformBusFDTData *data = opaque;
> > +    VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
> > +    const char *parent_node = data->pbus_node_name;
> > +    void *guest_fdt = data->fdt, *host_fdt;
> > +    VFIODevice *vbasedev = &vdev->vbasedev;
> > +    char **node_path, *node_name, *dt_name;
> > +    PlatformBusDevice *pbus = data->pbus;
> > +    uint32_t *reg_attr, *irq_attr;
> > +    uint64_t mmio_base;
> > +    int i, irq_number;
> > +    VFIOINTp *intp;
> > +
> > +    host_fdt = load_device_tree_from_sysfs();
> > +
> > +    dt_name = sysfs_to_dt_name(vbasedev->name);
> > +    if (!dt_name) {
> > +        error_report("%s incorrect sysfs device name %s", __func__,
> > +                     vbasedev->name);
> > +        exit(1);
> > +    }
> > +    node_path = qemu_fdt_node_path(host_fdt, dt_name, vdev->compat,
> > +                                   &error_fatal);
> > +    if (!node_path || !node_path[0]) {
> > +        error_report("%s unable to retrieve node path for %s/%s", __func__,
> > +                     dt_name, vdev->compat);
> > +        exit(1);
> > +    }
> > +
> > +    if (node_path[1]) {
> > +        error_report("%s more than one node matching %s/%s!", __func__,
> > +                     dt_name, vdev->compat);
> > +        exit(1);
> > +    }
> The above part now is duplicated with code in add_amd_xgbe_fdt_node().
> couldn't we factorize this into an helper such like
> char [*]*get_host_node_path(VFIODevice *vbasedev).

Sure.

> Could you share the SATA node that was generated with the generic function.

Sure. The original one on the host is
(from arch/arm64/boot/dts/renesas/r8a7795-salvator-xs.dts):

        sata@ee300000 {
                compatible = "renesas,sata-r8a7795", "renesas,rcar-gen3-sata";
                reg = <0 0xee300000 0 0x200000>;
                interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cpg CPG_MOD 815>;
                power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
                resets = <&cpg 815>;
                status = "okay";
                iommus = <&ipmmu_hc 2>;
        };

The one on the guest is (wrapped in the platform subnode):

        platform@c000000 {
                interrupt-parent = <0x8001>;
                #address-cells = <0x1>;
                #size-cells = <0x1>;
                compatible = "qemu,platform", "simple-bus";
                ranges = <0x0 0x0 0xc000000 0x2000000>;

                ee300000.sata@0 {
                        status = "okay";
                        reg = <0x0 0x200000>;
                        interrupts = <0x0 0x70 0x4>;
                        compatible = "renesas,sata-r8a7795",
"renesas,rcar-gen3-sata";
                };
        };

Gr{oetje,eeting}s,

                        Geert
Geert Uytterhoeven Jan. 9, 2019, 4:30 p.m. UTC | #4
Hi Peter,

Thanks for your comments!

On Wed, Jan 9, 2019 at 5:03 PM Peter Maydell <peter.maydell@linaro.org> wrote:
> On Wed, 9 Jan 2019 at 15:55, Auger Eric <eric.auger@redhat.com> wrote:
> > On 1/3/19 10:42 AM, Geert Uytterhoeven wrote:
> > > Add a fallback for instantiating generic devices without a type-specific
> > > or compatible-specific instantiation method.  This will be used when no
> > > other match is found.
> > >
> > > Generic device instantiation avoids having to write device-specific
> > > instantiation methods for each and every "simple" device using only a
> > > set of generic properties.  Devices that need more specialized handling
> > > can still provide their own instantiation methods.
>
> > > +    /* Ignoring the following may or may not work, hence the warning */
> > > +    { "gpio-ranges",     PROP_WARN },   /* no support for pinctrl yet */
> > > +    { "dmas",            PROP_WARN },   /* no support for external DMACs yet */
> > I would be tempted to simply reject things that may not work.
>
> More generally, this whole feature seems to be "allow things that
> might not work", isn't it? Otherwise we could just have explicit

I can remove the two PROP_WARN properties above from the list, if you prefer.
Exporting rcar-sata still works fine after that.

> whitelists for the devices we want to allow passthrough of and
> that we've tested to work.

In the end, this will become a loooooong list (SoC x devices)...

> I have to say I'm not really very enthusiastic about
> enhancing this to allow random device passthrough,
> because it encourages further use of this. If people
> want hardware that can be passed through they should
> put it behind a bus that can be probed and can go
> behind an IOMMU, ie pci or some equivalent. That
> is a supportable hardware mechanism. All this
> machinery feels very heath-robinson...

As no-iommu suppport is not upstream (in Qemu; it is upstream in Linux,
perhaps it should be removed?), all devices using DMA require being
behind an IOMMU.

Reality is that on embedded, on-SoC devices are usually not on a PCI bus.
But IOMMUs are present, and virtualization is wanted.

Thanks!

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Eric Auger Jan. 9, 2019, 5:13 p.m. UTC | #5
Hi Geert,

On 1/9/19 5:15 PM, Geert Uytterhoeven wrote:
> Hi Eric,
> 
> Thanks for your comments!
> 
> On Wed, Jan 9, 2019 at 4:56 PM Auger Eric <eric.auger@redhat.com> wrote:
>> On 1/3/19 10:42 AM, Geert Uytterhoeven wrote:
>>> Add a fallback for instantiating generic devices without a type-specific
>>> or compatible-specific instantiation method.  This will be used when no
>>> other match is found.
>>>
>>> Generic device instantiation avoids having to write device-specific
>>> instantiation methods for each and every "simple" device using only a
>>> set of generic properties.  Devices that need more specialized handling
>>> can still provide their own instantiation methods.
>>>
>>> The generic instantiation method creates a device node with remapped
>>> "reg" and (optional) "interrupts" properties, and copies properties from
>>> the host, if deemed appropriate:
>>>   - In general, properties without phandles are safe to be copied.
>>>     Unfortunately, the FDT does not carry type information, hence an
>>>     explicit whitelist is used, which can be extended when needed.
>>>   - Properties related to power management (power and/or clock domain),
>>>     isolation, and pin control, are handled by the host, and must not to
>>>     be copied.
>>>
>>> Devices nodes with subnodes are rejected, as subnodes cannot easily be
>>> handled in a generic way.
>>>
>>> This has been tested on a Renesas Salvator-XS board (R-Car H3 ES2.0)
>>> with SATA, using:
>>>
>>>     -device vfio-platform,host=ee300000.sata
>>>
>>> Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
>>> ---
>>> Note: Also tested with GPIO, which needs "vfio: No-IOMMU mode support":
>>>
>>>     -device vfio-platform,host=e6055400.gpio
>>>
>>> v5:
>>>   - Replace copying of a fixed list of properties (and ignoring all
>>>     others), by scanning all properties on the host, and deciding if
>>>     they need to be ignored, copied, or rejected,
>>>   - Reject device nodes with subnodes,
>>>   - Handle edge interrupts,
>>>
>>> v3:
>>>   - New.
> 
>>> --- a/hw/arm/sysbus-fdt.c
>>> +++ b/hw/arm/sysbus-fdt.c
> 
>>> +static struct {
>>> +    const char *name;
>>> +    enum GenericPropertyAction action;
>>> +} generic_properties[] = {
>>> +    { "name",            PROP_IGNORE }, /* handled automatically */
>>> +    { "phandle",         PROP_IGNORE }, /* not needed for the generic case */
>>> +
>>> +    /* The following are copied and remapped by dedicated code */
>>> +    { "reg",             PROP_IGNORE },
>>> +    { "interrupts",      PROP_IGNORE },
>> Shouldn't interrupt-parent be safely ignored as remapped?
> 
> Probably. Typically interrupt-parent is present at a higher level.
> And interrupts-extended should be ignored, too.
> 
>>> +
>>> +    /* The following are handled by the host */
>>> +    { "power-domains",   PROP_IGNORE }, /* power management (+ opt. clocks) */
>>> +    { "iommus",          PROP_IGNORE }, /* isolation */
>>> +    { "resets",          PROP_IGNORE }, /* isolation */
>>> +    { "pinctrl-*",       PROP_IGNORE }, /* pin control */
>>> +
>>> +    /* Ignoring the following may or may not work, hence the warning */
>>> +    { "gpio-ranges",     PROP_WARN },   /* no support for pinctrl yet */
>>> +    { "dmas",            PROP_WARN },   /* no support for external DMACs yet */
>> I would be tempted to simply reject things that may not work.
> 
> I kept gpio-ranges, as it works with my rcar-gpio proof of concept
> (depends on with no-iommu support).
> Without dmas, drivers are supposed to fall back to PIO.  If a driver
> doesn't support that, it will complain.
In general I am concerned about allowing things we are not 100% sure
they will work. I would rather say either the node is sufficiently
simple and we can afford relying on a very simple and generic node
creation function or we may request a specific node creation function.
> 
>>> +
>>> +    /* The following are irrelevant, as corresponding specifiers are ignored */
>>> +    { "clock-names",     PROP_IGNORE },
>>> +    { "reset-names",     PROP_IGNORE },
>>> +    { "dma-names",       PROP_IGNORE },
>>> +
>>> +    /* Whitelist of properties not taking phandles, and thus safe to copy */
>>> +    { "compatible",      PROP_COPY },
>>> +    { "status",          PROP_COPY },
>>> +    { "reg-names",       PROP_COPY },
>>> +    { "interrupt-names", PROP_COPY },
>>> +    { "#*-cells",        PROP_COPY },
>>> +};
>>> +
>>> +#ifndef CONFIG_FNMATCH
>>> +/* Fall back to exact string matching instead of allowing wildcards */
>>> +static inline int fnmatch(const char *pattern, const char *string, int flags)
>>> +{
>>> +        return strcmp(pattern, string);
>>> +}
>>> +#endif
> 
>>> +/**
>>> + * copy_generic_node
>>> + *
>>> + * Copy the generic part of a DT node from the host
>>> + */
>>> +static void copy_generic_node(void *host_fdt, void *guest_fdt, char *host_path,
>>> +                              char *guest_path)
>>> +{
>>> +    int node, prop, len;
>>> +    const void *data;
>>> +    const char *name;
>>> +    enum GenericPropertyAction action;
>>> +
>>> +    node = fdt_path_offset(host_fdt, host_path);
>>> +    if (node < 0) {
>>> +        error_report("Cannot find node %s: %s", host_path, fdt_strerror(node));
>>> +        exit(1);
>>> +    }
>>> +
>>> +    /* Submodes are not yet supported */
>>> +    if (fdt_first_subnode(host_fdt, node) >= 0) {
>>> +        error_report("%s has unsupported subnodes", host_path);
>>> +        goto unsupported;
>>> +    }
>>> +
>>> +    /* Copy generic properties */
>>> +    fdt_for_each_property_offset(prop, host_fdt, node) {
>>> +        data = fdt_getprop_by_offset(host_fdt, prop, &name, &len);
>>> +        if (!data) {
>>> +            error_report("Cannot get property of %s: %s", host_path,
>>> +                         fdt_strerror(len));
>>> +            exit(1);
>>> +        }
>>> +
>>> +        if (!len) {
>>> +            /* Zero-sized properties are safe to copy */
>>> +            action = PROP_COPY;
>>> +        } else if (!strcmp(name, "clocks")) {
>>> +            /* Reject "clocks" if "power-domains" is not present */
>> Could you elaborate on clock management with and without power-domain.
>> If clock handles can be found on host side, don't we need to generate
>> clock nodes on guest side (as what was done with the amd xgmac). And in
>> that case don't you need clock-names prop too?
>>
>> Please can you explain how the fact power-domain is not present
>> simplifies the problem. It is not obvious to me.
> 
> In the presence of a power-domains property, it's possible the clocks are
> used for power management only (if the device is part of a clock domain).
> In that case, the guest doesn't need to manage the clock.
> Qemu will still print a warning, as it cannot be 100% sure that the clocks
> are not needed.
Thank you for the explanation. possible but 100% sure. Is it acceptable?
But I now understand this would kill your SATA use case.
> 
> In the absence of a power-domains property, it is assumed the clocks are
> needed, and the device node is rejected.
> Qemu could be enhanced to inspect all clocks and copy fixed-rate clocks,
> like is done for xgmac, but that can be added later, if someone has a need
> for it.
> 
> For an initial version, I try to keep it generic, but not too complicated ;-)
> 
> For complex cases, you can always write a device-specific instantiation
> method.
sure
> 
>>> +            action = fdt_getprop(host_fdt, node, "power-domains", NULL)
>>> +                     ? PROP_WARN : PROP_REJECT;
>>> +        } else {
>>> +            action = get_generic_property_action(name);
>>> +        }
>>> +
>>> +        switch (action) {
>>> +        case PROP_WARN:
>>> +            warn_report("%s: Ignoring %s property", host_path, name);
>>> +        case PROP_IGNORE:
>>> +            break;
>>> +
>>> +        case PROP_COPY:
>>> +            qemu_fdt_setprop(guest_fdt, guest_path, name, data, len);
>>> +            break;
>>> +
>>> +        case PROP_REJECT:
>>> +            error_report("%s has unsupported %s property", host_path, name);
>>> +            goto unsupported;
>>> +        }
>>> +    }
>>> +    return;
>>> +
>>> +unsupported:
>>> +    error_report("You can ask a wizard to enhance me");
>> maybe report which property causes the assignment abort
> 
> That's already done above.
oups sorry


> 
>>> +    exit(1);
>>> +}
>>> +
>>> +/**
>>> + * add_generic_fdt_node
>>> + *
>>> + * Generates a generic DT node by copying properties from the host
>>> + */
>>> +static int add_generic_fdt_node(SysBusDevice *sbdev, void *opaque)
>>> +{
>>> +    PlatformBusFDTData *data = opaque;
>>> +    VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
>>> +    const char *parent_node = data->pbus_node_name;
>>> +    void *guest_fdt = data->fdt, *host_fdt;
>>> +    VFIODevice *vbasedev = &vdev->vbasedev;
>>> +    char **node_path, *node_name, *dt_name;
>>> +    PlatformBusDevice *pbus = data->pbus;
>>> +    uint32_t *reg_attr, *irq_attr;
>>> +    uint64_t mmio_base;
>>> +    int i, irq_number;
>>> +    VFIOINTp *intp;
>>> +
>>> +    host_fdt = load_device_tree_from_sysfs();
>>> +
>>> +    dt_name = sysfs_to_dt_name(vbasedev->name);
>>> +    if (!dt_name) {
>>> +        error_report("%s incorrect sysfs device name %s", __func__,
>>> +                     vbasedev->name);
>>> +        exit(1);
>>> +    }
>>> +    node_path = qemu_fdt_node_path(host_fdt, dt_name, vdev->compat,
>>> +                                   &error_fatal);
>>> +    if (!node_path || !node_path[0]) {
>>> +        error_report("%s unable to retrieve node path for %s/%s", __func__,
>>> +                     dt_name, vdev->compat);
>>> +        exit(1);
>>> +    }
>>> +
>>> +    if (node_path[1]) {
>>> +        error_report("%s more than one node matching %s/%s!", __func__,
>>> +                     dt_name, vdev->compat);
>>> +        exit(1);
>>> +    }
>> The above part now is duplicated with code in add_amd_xgbe_fdt_node().
>> couldn't we factorize this into an helper such like
>> char [*]*get_host_node_path(VFIODevice *vbasedev).
> 
> Sure.
> 
>> Could you share the SATA node that was generated with the generic function.
> 
> Sure. The original one on the host is
> (from arch/arm64/boot/dts/renesas/r8a7795-salvator-xs.dts):
> 
>         sata@ee300000 {
>                 compatible = "renesas,sata-r8a7795", "renesas,rcar-gen3-sata";
>                 reg = <0 0xee300000 0 0x200000>;
>                 interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
>                 clocks = <&cpg CPG_MOD 815>;
>                 power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
thanks for sharing

Eric
>                 resets = <&cpg 815>;
>                 status = "okay";
>                 iommus = <&ipmmu_hc 2>;
>         };
> 
> The one on the guest is (wrapped in the platform subnode):
> 
>         platform@c000000 {
>                 interrupt-parent = <0x8001>;
>                 #address-cells = <0x1>;
>                 #size-cells = <0x1>;
>                 compatible = "qemu,platform", "simple-bus";
>                 ranges = <0x0 0x0 0xc000000 0x2000000>;
> 
>                 ee300000.sata@0 {
>                         status = "okay";
>                         reg = <0x0 0x200000>;
>                         interrupts = <0x0 0x70 0x4>;
>                         compatible = "renesas,sata-r8a7795",

> "renesas,rcar-gen3-sata";
>                 };
>         };
> 
> Gr{oetje,eeting}s,
> 
>                         Geert
>
Eric Auger Jan. 9, 2019, 5:16 p.m. UTC | #6
Hi Geert,

On 1/9/19 4:55 PM, Auger Eric wrote:
> Hi Geert,
> 
> On 1/3/19 10:42 AM, Geert Uytterhoeven wrote:
>> Add a fallback for instantiating generic devices without a type-specific
>> or compatible-specific instantiation method.  This will be used when no
>> other match is found.
>>
>> Generic device instantiation avoids having to write device-specific
>> instantiation methods for each and every "simple" device using only a
>> set of generic properties.  Devices that need more specialized handling
>> can still provide their own instantiation methods.
>>
>> The generic instantiation method creates a device node with remapped
>> "reg" and (optional) "interrupts" properties, and copies properties from
>> the host, if deemed appropriate:
>>   - In general, properties without phandles are safe to be copied.
>>     Unfortunately, the FDT does not carry type information, hence an
>>     explicit whitelist is used, which can be extended when needed.
>>   - Properties related to power management (power and/or clock domain),
>>     isolation, and pin control, are handled by the host, and must not to
>>     be copied.
>>
>> Devices nodes with subnodes are rejected, as subnodes cannot easily be
>> handled in a generic way.
>>
>> This has been tested on a Renesas Salvator-XS board (R-Car H3 ES2.0)
>> with SATA, using:
>>
>>     -device vfio-platform,host=ee300000.sata
>>
>> Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
>> ---
>> Note: Also tested with GPIO, which needs "vfio: No-IOMMU mode support":
>>
>>     -device vfio-platform,host=e6055400.gpio
>>
>> v5:
>>   - Replace copying of a fixed list of properties (and ignoring all
>>     others), by scanning all properties on the host, and deciding if
>>     they need to be ignored, copied, or rejected,
>>   - Reject device nodes with subnodes,
>>   - Handle edge interrupts,
>>
>> v3:
>>   - New.
>> ---
>>  hw/arm/sysbus-fdt.c | 238 ++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 238 insertions(+)
>>
>> diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
>> index ad698d4832c694be..52de8c9a040c282a 100644
>> --- a/hw/arm/sysbus-fdt.c
>> +++ b/hw/arm/sysbus-fdt.c
>> @@ -28,6 +28,9 @@
>>  #ifdef CONFIG_LINUX
>>  #include <linux/vfio.h>
>>  #endif
>> +#ifdef CONFIG_FNMATCH
>> +#include <fnmatch.h>
>> +#endif
>>  #include "hw/arm/sysbus-fdt.h"
>>  #include "qemu/error-report.h"
>>  #include "sysemu/device_tree.h"
>> @@ -415,6 +418,232 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque)
>>      return 0;
>>  }
>>  
>> +enum GenericPropertyAction {
>> +    PROP_IGNORE,
>> +    PROP_WARN,
>> +    PROP_COPY,
>> +    PROP_REJECT,
>> +};
>> +
>> +static struct {
>> +    const char *name;
>> +    enum GenericPropertyAction action;
>> +} generic_properties[] = {
>> +    { "name",            PROP_IGNORE }, /* handled automatically */
>> +    { "phandle",         PROP_IGNORE }, /* not needed for the generic case */
>> +
>> +    /* The following are copied and remapped by dedicated code */
>> +    { "reg",             PROP_IGNORE },
>> +    { "interrupts",      PROP_IGNORE },
> Shouldn't interrupt-parent be safely ignored as remapped?
>> +
>> +    /* The following are handled by the host */
>> +    { "power-domains",   PROP_IGNORE }, /* power management (+ opt. clocks) */
>> +    { "iommus",          PROP_IGNORE }, /* isolation */
>> +    { "resets",          PROP_IGNORE }, /* isolation */
>> +    { "pinctrl-*",       PROP_IGNORE }, /* pin control */
>> +
>> +    /* Ignoring the following may or may not work, hence the warning */
>> +    { "gpio-ranges",     PROP_WARN },   /* no support for pinctrl yet */
>> +    { "dmas",            PROP_WARN },   /* no support for external DMACs yet */
> I would be tempted to simply reject things that may not work.
>> +
>> +    /* The following are irrelevant, as corresponding specifiers are ignored */
>> +    { "clock-names",     PROP_IGNORE },
>> +    { "reset-names",     PROP_IGNORE },
>> +    { "dma-names",       PROP_IGNORE },
>> +
>> +    /* Whitelist of properties not taking phandles, and thus safe to copy */
>> +    { "compatible",      PROP_COPY },
>> +    { "status",          PROP_COPY },
>> +    { "reg-names",       PROP_COPY },
>> +    { "interrupt-names", PROP_COPY },
>> +    { "#*-cells",        PROP_COPY },
>> +};
>> +
>> +#ifndef CONFIG_FNMATCH
>> +/* Fall back to exact string matching instead of allowing wildcards */
>> +static inline int fnmatch(const char *pattern, const char *string, int flags)
>> +{
>> +        return strcmp(pattern, string);
>> +}
>> +#endif
>> +
>> +static enum GenericPropertyAction get_generic_property_action(const char *name)
>> +{
>> +    unsigned int i;
>> +
>> +    for (i = 0; i < ARRAY_SIZE(generic_properties); i++) {
>> +        if (!fnmatch(generic_properties[i].name, name, 0)) {
>> +            return generic_properties[i].action;
>> +        }
>> +    }
>> +
>> +    /*
>> +     * Unfortunately DT properties do not carry type information,
>> +     * so we have to assume everything else contains a phandle,
>> +     * and must be rejected
>> +     */
>> +    return PROP_REJECT;
>> +}
>> +
>> +/**
>> + * copy_generic_node
>> + *
>> + * Copy the generic part of a DT node from the host
>> + */
>> +static void copy_generic_node(void *host_fdt, void *guest_fdt, char *host_path,
>> +                              char *guest_path)
>> +{
>> +    int node, prop, len;
>> +    const void *data;
>> +    const char *name;
>> +    enum GenericPropertyAction action;
>> +
>> +    node = fdt_path_offset(host_fdt, host_path);
>> +    if (node < 0) {
>> +        error_report("Cannot find node %s: %s", host_path, fdt_strerror(node));
>> +        exit(1);
>> +    }
>> +
>> +    /* Submodes are not yet supported */
>> +    if (fdt_first_subnode(host_fdt, node) >= 0) {
>> +        error_report("%s has unsupported subnodes", host_path);
>> +        goto unsupported;
>> +    }
>> +
>> +    /* Copy generic properties */
>> +    fdt_for_each_property_offset(prop, host_fdt, node) {
>> +        data = fdt_getprop_by_offset(host_fdt, prop, &name, &len);
>> +        if (!data) {
>> +            error_report("Cannot get property of %s: %s", host_path,
>> +                         fdt_strerror(len));
>> +            exit(1);
>> +        }
>> +
>> +        if (!len) {
>> +            /* Zero-sized properties are safe to copy */
>> +            action = PROP_COPY;
>> +        } else if (!strcmp(name, "clocks")) {
>> +            /* Reject "clocks" if "power-domains" is not present */
> Could you elaborate on clock management with and without power-domain.
> If clock handles can be found on host side, don't we need to generate
> clock nodes on guest side (as what was done with the amd xgmac). And in
> that case don't you need clock-names prop too?
> 
> Please can you explain how the fact power-domain is not present
> simplifies the problem. It is not obvious to me.
> 
>> +            action = fdt_getprop(host_fdt, node, "power-domains", NULL)
>> +                     ? PROP_WARN : PROP_REJECT;
>> +        } else {
>> +            action = get_generic_property_action(name);
>> +        }
>> +
>> +        switch (action) {
>> +        case PROP_WARN:
>> +            warn_report("%s: Ignoring %s property", host_path, name);
>> +        case PROP_IGNORE:
>> +            break;
>> +
>> +        case PROP_COPY:
>> +            qemu_fdt_setprop(guest_fdt, guest_path, name, data, len);
>> +            break;
>> +
>> +        case PROP_REJECT:
>> +            error_report("%s has unsupported %s property", host_path, name);
>> +            goto unsupported;
>> +        }
>> +    }
>> +    return;
>> +
>> +unsupported:
>> +    error_report("You can ask a wizard to enhance me");
> maybe report which property causes the assignment abort
>> +    exit(1);
>> +}
>> +
>> +/**
>> + * add_generic_fdt_node
>> + *
>> + * Generates a generic DT node by copying properties from the host
>> + */
>> +static int add_generic_fdt_node(SysBusDevice *sbdev, void *opaque)
>> +{
>> +    PlatformBusFDTData *data = opaque;
>> +    VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
>> +    const char *parent_node = data->pbus_node_name;
>> +    void *guest_fdt = data->fdt, *host_fdt;
>> +    VFIODevice *vbasedev = &vdev->vbasedev;
>> +    char **node_path, *node_name, *dt_name;
>> +    PlatformBusDevice *pbus = data->pbus;
>> +    uint32_t *reg_attr, *irq_attr;
>> +    uint64_t mmio_base;
>> +    int i, irq_number;
>> +    VFIOINTp *intp;
>> +
>> +    host_fdt = load_device_tree_from_sysfs();
>> +
>> +    dt_name = sysfs_to_dt_name(vbasedev->name);
>> +    if (!dt_name) {
>> +        error_report("%s incorrect sysfs device name %s", __func__,
>> +                     vbasedev->name);
>> +        exit(1);
>> +    }
>> +    node_path = qemu_fdt_node_path(host_fdt, dt_name, vdev->compat,
>> +                                   &error_fatal);
>> +    if (!node_path || !node_path[0]) {
>> +        error_report("%s unable to retrieve node path for %s/%s", __func__,
>> +                     dt_name, vdev->compat);
>> +        exit(1);
>> +    }
>> +
>> +    if (node_path[1]) {
>> +        error_report("%s more than one node matching %s/%s!", __func__,
>> +                     dt_name, vdev->compat);
>> +        exit(1);
>> +    }
> The above part now is duplicated with code in add_amd_xgbe_fdt_node().
> couldn't we factorize this into an helper such like
> char [*]*get_host_node_path(VFIODevice *vbasedev).
>> +
>> +    mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
>> +    node_name = g_strdup_printf("%s/%s@%" PRIx64, parent_node,
>> +                               vbasedev->name, mmio_base);
>> +    qemu_fdt_add_subnode(guest_fdt, node_name);
>> +
>> +    /* Copy generic parts from host */
>> +    copy_generic_node(host_fdt, guest_fdt, node_path[0], node_name);
>> +
>> +    /* Copy reg (remapped) */
>> +    reg_attr = g_new(uint32_t, vbasedev->num_regions * 2);
>> +    for (i = 0; i < vbasedev->num_regions; i++) {
>> +        mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
>> +        reg_attr[2 * i] = cpu_to_be32(mmio_base);
>> +        reg_attr[2 * i + 1] = cpu_to_be32(
>> +                                memory_region_size(vdev->regions[i]->mem));
>> +    }
>> +    qemu_fdt_setprop(guest_fdt, node_name, "reg", reg_attr,
>> +                     vbasedev->num_regions * 2 * sizeof(uint32_t));
>> +
>> +    /* Copy interrupts (remapped) */
>> +    if (vbasedev->num_irqs) {
>> +        irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3);
>> +        for (i = 0; i < vbasedev->num_irqs; i++) {
>> +            irq_number = platform_bus_get_irqn(pbus, sbdev, i) +
>> +                         data->irq_start;
>> +            irq_attr[3 * i] = cpu_to_be32(GIC_FDT_IRQ_TYPE_SPI);
>> +            irq_attr[3 * i + 1] = cpu_to_be32(irq_number);
>> +            QLIST_FOREACH(intp, &vdev->intp_list, next) {
>> +                if (intp->pin == i) {
>> +                    break;
>> +                }
>> +            }
>> +            if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) {
>> +                irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_LEVEL_HI);
>> +            } else {
>> +                irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
>> +            }
>> +        }
>> +        qemu_fdt_setprop(guest_fdt, node_name, "interrupts",
>> +                         irq_attr, vbasedev->num_irqs * 3 * sizeof(uint32_t));
>> +        g_free(irq_attr);
while we are at it, I think we should factorize the "reg" and
"interrupts" property generation code and share it with the amd xgmac
node creation function. There are duplicate.

Thanks

Eric
>> +    }
>> +
>> +    g_free(reg_attr);
>> +    g_free(node_name);
>> +    g_strfreev(node_path);
>> +    g_free(dt_name);
>> +    g_free(host_fdt);
>> +    return 0;
>> +}
>> +
>>  /* DT compatible matching */
>>  static bool vfio_platform_match(SysBusDevice *sbdev,
>>                                  const BindingEntry *entry)
>> @@ -423,6 +652,11 @@ static bool vfio_platform_match(SysBusDevice *sbdev,
>>      const char *compat;
>>      unsigned int n;
>>  
>> +    if (!entry->compat) {
>> +        /* Generic DT fallback */
>> +        return true;
>> +    }
>> +
>>      for (n = vdev->num_compat, compat = vdev->compat; n > 0;
>>           n--, compat += strlen(compat) + 1) {
>>          if (!strcmp(entry->compat, compat)) {
>> @@ -459,6 +693,10 @@ static const BindingEntry bindings[] = {
>>      VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
>>  #endif
>>      TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
>> +#ifdef CONFIG_LINUX
>> +    /* Generic DT fallback must be last */
>> +    VFIO_PLATFORM_BINDING(NULL, add_generic_fdt_node),
>> +#endif
>>      TYPE_BINDING("", NULL), /* last element */
>>  };
>>  
>>
> Could you share the SATA node that was generated with the generic function.
> 
> Thanks
> 
> Eric
>
Peter Maydell Jan. 9, 2019, 11:28 p.m. UTC | #7
So I should start out upfront by saying that I'm aware that
the reality is that people do want to do passthrough with
this kind of hardware, and that's not an unreasonable
thing to do. I just don't really like the way that pushes
the software into having to do ugly things...

Overall I'll let Eric call the shots about how well this
feature fits into our sysbus-fdt support; he knows this
code much better than I do. (I would have preferred us
not to have sysbus-fdt passthrough at all, in an
ideal world :-))

On Wed, 9 Jan 2019 at 16:30, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> On Wed, Jan 9, 2019 at 5:03 PM Peter Maydell <peter.maydell@linaro.org> wrote:
> > whitelists for the devices we want to allow passthrough of and
> > that we've tested to work.
>
> In the end, this will become a loooooong list (SoC x devices)...

Well, yes, but it deters people from trying to do passthrough
with hardware that's not designed in a way that makes
passthrough reasonably supportable at a software level.

> Reality is that on embedded, on-SoC devices are usually not on a PCI bus.
> But IOMMUs are present, and virtualization is wanted.

I don't insist on PCI. Probeable, and consistent in
terms of what they provide and how you interact with
them, is what we want. Embedded on-SoC devices are
generally neither. (The host kernel could in theory
provide an API that exposed only devices that were safely
pass-through-able in a consistent way, I suppose, but it
doesn't, or we wouldn't be having to fish through the
device tree nodes making guesses about what's safe to
allow and what isn't and having heuristics about not
providing clocks info being ok if we think the clock
might only be used for power management...)

thanks
-- PMM
diff mbox series

Patch

diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
index ad698d4832c694be..52de8c9a040c282a 100644
--- a/hw/arm/sysbus-fdt.c
+++ b/hw/arm/sysbus-fdt.c
@@ -28,6 +28,9 @@ 
 #ifdef CONFIG_LINUX
 #include <linux/vfio.h>
 #endif
+#ifdef CONFIG_FNMATCH
+#include <fnmatch.h>
+#endif
 #include "hw/arm/sysbus-fdt.h"
 #include "qemu/error-report.h"
 #include "sysemu/device_tree.h"
@@ -415,6 +418,232 @@  static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque)
     return 0;
 }
 
+enum GenericPropertyAction {
+    PROP_IGNORE,
+    PROP_WARN,
+    PROP_COPY,
+    PROP_REJECT,
+};
+
+static struct {
+    const char *name;
+    enum GenericPropertyAction action;
+} generic_properties[] = {
+    { "name",            PROP_IGNORE }, /* handled automatically */
+    { "phandle",         PROP_IGNORE }, /* not needed for the generic case */
+
+    /* The following are copied and remapped by dedicated code */
+    { "reg",             PROP_IGNORE },
+    { "interrupts",      PROP_IGNORE },
+
+    /* The following are handled by the host */
+    { "power-domains",   PROP_IGNORE }, /* power management (+ opt. clocks) */
+    { "iommus",          PROP_IGNORE }, /* isolation */
+    { "resets",          PROP_IGNORE }, /* isolation */
+    { "pinctrl-*",       PROP_IGNORE }, /* pin control */
+
+    /* Ignoring the following may or may not work, hence the warning */
+    { "gpio-ranges",     PROP_WARN },   /* no support for pinctrl yet */
+    { "dmas",            PROP_WARN },   /* no support for external DMACs yet */
+
+    /* The following are irrelevant, as corresponding specifiers are ignored */
+    { "clock-names",     PROP_IGNORE },
+    { "reset-names",     PROP_IGNORE },
+    { "dma-names",       PROP_IGNORE },
+
+    /* Whitelist of properties not taking phandles, and thus safe to copy */
+    { "compatible",      PROP_COPY },
+    { "status",          PROP_COPY },
+    { "reg-names",       PROP_COPY },
+    { "interrupt-names", PROP_COPY },
+    { "#*-cells",        PROP_COPY },
+};
+
+#ifndef CONFIG_FNMATCH
+/* Fall back to exact string matching instead of allowing wildcards */
+static inline int fnmatch(const char *pattern, const char *string, int flags)
+{
+        return strcmp(pattern, string);
+}
+#endif
+
+static enum GenericPropertyAction get_generic_property_action(const char *name)
+{
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(generic_properties); i++) {
+        if (!fnmatch(generic_properties[i].name, name, 0)) {
+            return generic_properties[i].action;
+        }
+    }
+
+    /*
+     * Unfortunately DT properties do not carry type information,
+     * so we have to assume everything else contains a phandle,
+     * and must be rejected
+     */
+    return PROP_REJECT;
+}
+
+/**
+ * copy_generic_node
+ *
+ * Copy the generic part of a DT node from the host
+ */
+static void copy_generic_node(void *host_fdt, void *guest_fdt, char *host_path,
+                              char *guest_path)
+{
+    int node, prop, len;
+    const void *data;
+    const char *name;
+    enum GenericPropertyAction action;
+
+    node = fdt_path_offset(host_fdt, host_path);
+    if (node < 0) {
+        error_report("Cannot find node %s: %s", host_path, fdt_strerror(node));
+        exit(1);
+    }
+
+    /* Submodes are not yet supported */
+    if (fdt_first_subnode(host_fdt, node) >= 0) {
+        error_report("%s has unsupported subnodes", host_path);
+        goto unsupported;
+    }
+
+    /* Copy generic properties */
+    fdt_for_each_property_offset(prop, host_fdt, node) {
+        data = fdt_getprop_by_offset(host_fdt, prop, &name, &len);
+        if (!data) {
+            error_report("Cannot get property of %s: %s", host_path,
+                         fdt_strerror(len));
+            exit(1);
+        }
+
+        if (!len) {
+            /* Zero-sized properties are safe to copy */
+            action = PROP_COPY;
+        } else if (!strcmp(name, "clocks")) {
+            /* Reject "clocks" if "power-domains" is not present */
+            action = fdt_getprop(host_fdt, node, "power-domains", NULL)
+                     ? PROP_WARN : PROP_REJECT;
+        } else {
+            action = get_generic_property_action(name);
+        }
+
+        switch (action) {
+        case PROP_WARN:
+            warn_report("%s: Ignoring %s property", host_path, name);
+        case PROP_IGNORE:
+            break;
+
+        case PROP_COPY:
+            qemu_fdt_setprop(guest_fdt, guest_path, name, data, len);
+            break;
+
+        case PROP_REJECT:
+            error_report("%s has unsupported %s property", host_path, name);
+            goto unsupported;
+        }
+    }
+    return;
+
+unsupported:
+    error_report("You can ask a wizard to enhance me");
+    exit(1);
+}
+
+/**
+ * add_generic_fdt_node
+ *
+ * Generates a generic DT node by copying properties from the host
+ */
+static int add_generic_fdt_node(SysBusDevice *sbdev, void *opaque)
+{
+    PlatformBusFDTData *data = opaque;
+    VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
+    const char *parent_node = data->pbus_node_name;
+    void *guest_fdt = data->fdt, *host_fdt;
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    char **node_path, *node_name, *dt_name;
+    PlatformBusDevice *pbus = data->pbus;
+    uint32_t *reg_attr, *irq_attr;
+    uint64_t mmio_base;
+    int i, irq_number;
+    VFIOINTp *intp;
+
+    host_fdt = load_device_tree_from_sysfs();
+
+    dt_name = sysfs_to_dt_name(vbasedev->name);
+    if (!dt_name) {
+        error_report("%s incorrect sysfs device name %s", __func__,
+                     vbasedev->name);
+        exit(1);
+    }
+    node_path = qemu_fdt_node_path(host_fdt, dt_name, vdev->compat,
+                                   &error_fatal);
+    if (!node_path || !node_path[0]) {
+        error_report("%s unable to retrieve node path for %s/%s", __func__,
+                     dt_name, vdev->compat);
+        exit(1);
+    }
+
+    if (node_path[1]) {
+        error_report("%s more than one node matching %s/%s!", __func__,
+                     dt_name, vdev->compat);
+        exit(1);
+    }
+
+    mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+    node_name = g_strdup_printf("%s/%s@%" PRIx64, parent_node,
+                               vbasedev->name, mmio_base);
+    qemu_fdt_add_subnode(guest_fdt, node_name);
+
+    /* Copy generic parts from host */
+    copy_generic_node(host_fdt, guest_fdt, node_path[0], node_name);
+
+    /* Copy reg (remapped) */
+    reg_attr = g_new(uint32_t, vbasedev->num_regions * 2);
+    for (i = 0; i < vbasedev->num_regions; i++) {
+        mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
+        reg_attr[2 * i] = cpu_to_be32(mmio_base);
+        reg_attr[2 * i + 1] = cpu_to_be32(
+                                memory_region_size(vdev->regions[i]->mem));
+    }
+    qemu_fdt_setprop(guest_fdt, node_name, "reg", reg_attr,
+                     vbasedev->num_regions * 2 * sizeof(uint32_t));
+
+    /* Copy interrupts (remapped) */
+    if (vbasedev->num_irqs) {
+        irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3);
+        for (i = 0; i < vbasedev->num_irqs; i++) {
+            irq_number = platform_bus_get_irqn(pbus, sbdev, i) +
+                         data->irq_start;
+            irq_attr[3 * i] = cpu_to_be32(GIC_FDT_IRQ_TYPE_SPI);
+            irq_attr[3 * i + 1] = cpu_to_be32(irq_number);
+            QLIST_FOREACH(intp, &vdev->intp_list, next) {
+                if (intp->pin == i) {
+                    break;
+                }
+            }
+            if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) {
+                irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+            } else {
+                irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
+            }
+        }
+        qemu_fdt_setprop(guest_fdt, node_name, "interrupts",
+                         irq_attr, vbasedev->num_irqs * 3 * sizeof(uint32_t));
+        g_free(irq_attr);
+    }
+
+    g_free(reg_attr);
+    g_free(node_name);
+    g_strfreev(node_path);
+    g_free(dt_name);
+    g_free(host_fdt);
+    return 0;
+}
+
 /* DT compatible matching */
 static bool vfio_platform_match(SysBusDevice *sbdev,
                                 const BindingEntry *entry)
@@ -423,6 +652,11 @@  static bool vfio_platform_match(SysBusDevice *sbdev,
     const char *compat;
     unsigned int n;
 
+    if (!entry->compat) {
+        /* Generic DT fallback */
+        return true;
+    }
+
     for (n = vdev->num_compat, compat = vdev->compat; n > 0;
          n--, compat += strlen(compat) + 1) {
         if (!strcmp(entry->compat, compat)) {
@@ -459,6 +693,10 @@  static const BindingEntry bindings[] = {
     VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
 #endif
     TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
+#ifdef CONFIG_LINUX
+    /* Generic DT fallback must be last */
+    VFIO_PLATFORM_BINDING(NULL, add_generic_fdt_node),
+#endif
     TYPE_BINDING("", NULL), /* last element */
 };