diff mbox series

[v4,4/7] iommu/arm: Introduce iommu_add_dt_pci_sideband_ids API

Message ID 20230607030220.22698-5-stewart.hildebrand@amd.com (mailing list archive)
State New, archived
Headers show
Series SMMU handling for PCIe Passthrough on ARM | expand

Commit Message

Stewart Hildebrand June 7, 2023, 3:02 a.m. UTC
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>

The main purpose of this patch is to add a way to register PCI device
(which is behind the IOMMU) using the generic PCI-IOMMU DT bindings [1]
before assigning that device to a domain.

This behaves similarly to the existing iommu_add_dt_device API, except it
handles PCI devices, and it is to be invoked from the add_device hook in the
SMMU driver.

The function of_map_id to translate an ID through a downstream mapping
(which is also suitable for mapping Requester ID) was borrowed from Linux
(v5.10-rc6) and updated according to the Xen code base.

[1] https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/pci-iommu.txt

Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
v3->v4:
* wrap #include <asm/acpi.h> and if ( acpi_disabled ) in #ifdef CONFIG_ACPI
* fix Michal's remarks about style, parenthesis, and print formats
* remove !ops->dt_xlate check since it is already in iommu_dt_xlate helper
* rename s/iommu_dt_pci_map_id/dt_map_id/ because it is generic, not specific
  to iommu
* update commit description

v2->v3:
* new patch title (was: iommu/arm: Introduce iommu_add_dt_pci_device API)
* renamed function
  from: iommu_add_dt_pci_device
  to: iommu_add_dt_pci_sideband_ids
* removed stale ops->add_device check
* iommu.h: add empty stub iommu_add_dt_pci_sideband_ids for !HAS_DEVICE_TREE
* iommu.h: add iommu_add_pci_sideband_ids helper
* iommu.h: don't wrap prototype in #ifdef CONFIG_HAS_PCI
* s/iommu_fwspec_free(pci_to_dev(pdev))/iommu_fwspec_free(dev)/

v1->v2:
* remove extra devfn parameter since pdev fully describes the device
* remove ops->add_device() call from iommu_add_dt_pci_device(). Instead, rely on
  the existing iommu call in iommu_add_device().
* move the ops->add_device and ops->dt_xlate checks earlier

downstream->v1:
* rebase
* add const qualifier to struct dt_device_node *np arg in dt_map_id()
* add const qualifier to struct dt_device_node *np declaration in iommu_add_pci_device()
* use stdint.h types instead of u8/u32/etc...
* rename functions:
  s/dt_iommu_xlate/iommu_dt_xlate/
  s/dt_map_id/iommu_dt_pci_map_id/
  s/iommu_add_pci_device/iommu_add_dt_pci_device/
* add device_is_protected check in iommu_add_dt_pci_device
* wrap prototypes in CONFIG_HAS_PCI

(cherry picked from commit 734e3bf6ee77e7947667ab8fa96c25b349c2e1da from
 the downstream branch poc/pci-passthrough from
 https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git)
---
 xen/drivers/passthrough/device_tree.c | 134 ++++++++++++++++++++++++++
 xen/include/xen/device_tree.h         |  25 +++++
 xen/include/xen/iommu.h               |  22 ++++-
 3 files changed, 180 insertions(+), 1 deletion(-)

Comments

Jan Beulich June 7, 2023, 7:59 a.m. UTC | #1
On 07.06.2023 05:02, Stewart Hildebrand wrote:
> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -26,6 +26,9 @@
>  #include <xen/spinlock.h>
>  #include <public/domctl.h>
>  #include <public/hvm/ioreq.h>
> +#ifdef CONFIG_ACPI
> +#include <asm/acpi.h>
> +#endif

This header is supposed to be usable without #ifdef, and then ...

> @@ -228,12 +232,28 @@ int iommu_release_dt_devices(struct domain *d);
>   *      (IOMMU is not enabled/present or device is not connected to it).
>   */
>  int iommu_add_dt_device(struct dt_device_node *np);
> +int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev);
>  
>  int iommu_do_dt_domctl(struct xen_domctl *, struct domain *,
>                         XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
>  
> +#else /* !HAS_DEVICE_TREE */
> +static inline int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev)
> +{
> +    return 0;
> +}
>  #endif /* HAS_DEVICE_TREE */
>  
> +static inline int iommu_add_pci_sideband_ids(struct pci_dev *pdev)
> +{
> +    int ret = 0;
> +#ifdef CONFIG_ACPI
> +    if ( acpi_disabled )
> +#endif

... you don't need #ifdef here either.

> +        ret = iommu_add_dt_pci_sideband_ids(pdev);
> +    return ret;
> +}

Also (nit) please follow (partly unwritten, I admit) style guidelines:
A blank line between declaration(s) and statement(s), and another one
ahead of a function's main "return".

Jan
Stewart Hildebrand June 7, 2023, 12:45 p.m. UTC | #2
On 6/7/23 03:59, Jan Beulich wrote:
> On 07.06.2023 05:02, Stewart Hildebrand wrote:
>> --- a/xen/include/xen/iommu.h
>> +++ b/xen/include/xen/iommu.h
>> @@ -26,6 +26,9 @@
>>  #include <xen/spinlock.h>
>>  #include <public/domctl.h>
>>  #include <public/hvm/ioreq.h>
>> +#ifdef CONFIG_ACPI
>> +#include <asm/acpi.h>
>> +#endif
> 
> This header is supposed to be usable without #ifdef, and then ...

You suggested adding the #ifdef

https://lists.xenproject.org/archives/html/xen-devel/2023-05/msg01409.html

Please clarify

>> @@ -228,12 +232,28 @@ int iommu_release_dt_devices(struct domain *d);
>>   *      (IOMMU is not enabled/present or device is not connected to it).
>>   */
>>  int iommu_add_dt_device(struct dt_device_node *np);
>> +int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev);
>>
>>  int iommu_do_dt_domctl(struct xen_domctl *, struct domain *,
>>                         XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
>>
>> +#else /* !HAS_DEVICE_TREE */
>> +static inline int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev)
>> +{
>> +    return 0;
>> +}
>>  #endif /* HAS_DEVICE_TREE */
>>
>> +static inline int iommu_add_pci_sideband_ids(struct pci_dev *pdev)
>> +{
>> +    int ret = 0;
>> +#ifdef CONFIG_ACPI
>> +    if ( acpi_disabled )
>> +#endif
> 
> ... you don't need #ifdef here either.
> 
>> +        ret = iommu_add_dt_pci_sideband_ids(pdev);
>> +    return ret;
>> +}
> 
> Also (nit) please follow (partly unwritten, I admit) style guidelines:
> A blank line between declaration(s) and statement(s), and another one
> ahead of a function's main "return".

OK
Jan Beulich June 7, 2023, 1:41 p.m. UTC | #3
On 07.06.2023 14:45, Stewart Hildebrand wrote:
> On 6/7/23 03:59, Jan Beulich wrote:
>> On 07.06.2023 05:02, Stewart Hildebrand wrote:
>>> --- a/xen/include/xen/iommu.h
>>> +++ b/xen/include/xen/iommu.h
>>> @@ -26,6 +26,9 @@
>>>  #include <xen/spinlock.h>
>>>  #include <public/domctl.h>
>>>  #include <public/hvm/ioreq.h>
>>> +#ifdef CONFIG_ACPI
>>> +#include <asm/acpi.h>
>>> +#endif
>>
>> This header is supposed to be usable without #ifdef, and then ...
> 
> You suggested adding the #ifdef
> 
> https://lists.xenproject.org/archives/html/xen-devel/2023-05/msg01409.html
> 
> Please clarify

Hmm, indeed. Which is a sign that neither solution is really nice. I
guess the one with #ifdef is slightly preferable because it'll avoid
(as said in the earlier reply you refer to above) the need for every
arch, even if ignorant of ACPI altogether, needing to have an
asm/acpi.h. I guess we really need to deal with all of this better
in / by having suitable common code.

Jan
Julien Grall June 29, 2023, 10:37 p.m. UTC | #4
Hi Stewart,

I haven't yet fully reviewed the code. However, I have one question...

On 07/06/2023 04:02, Stewart Hildebrand wrote:
> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>

> +static inline int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev)
> +{
> +    return 0;
> +}
>   #endif /* HAS_DEVICE_TREE */
>   
> +static inline int iommu_add_pci_sideband_ids(struct pci_dev *pdev)
> +{
> +    int ret = 0;

Shouldn't this return NO_IOMMU when booting on ACPI platform?

> +#ifdef CONFIG_ACPI
> +    if ( acpi_disabled )
> +#endif
> +        ret = iommu_add_dt_pci_sideband_ids(pdev);
> +    return ret;
> +}
> +
>   struct page_info;
>   
>   /*

Cheers,
Julien Grall July 4, 2023, 9:35 a.m. UTC | #5
Hi,

On 07/06/2023 04:02, Stewart Hildebrand wrote:
> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
> 
> The main purpose of this patch is to add a way to register PCI device
> (which is behind the IOMMU) using the generic PCI-IOMMU DT bindings [1]
> before assigning that device to a domain.
> 
> This behaves similarly to the existing iommu_add_dt_device API, except it
> handles PCI devices, and it is to be invoked from the add_device hook in the
> SMMU driver.
> 
> The function of_map_id to translate an ID through a downstream mapping

You called the function dt_map_id in Xen.

> (which is also suitable for mapping Requester ID) was borrowed from Linux
> (v5.10-rc6) and updated according to the Xen code base.
> 
> [1] https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/pci-iommu.txt
> 
> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
> ---
> v3->v4:
> * wrap #include <asm/acpi.h> and if ( acpi_disabled ) in #ifdef CONFIG_ACPI
> * fix Michal's remarks about style, parenthesis, and print formats
> * remove !ops->dt_xlate check since it is already in iommu_dt_xlate helper
> * rename s/iommu_dt_pci_map_id/dt_map_id/ because it is generic, not specific
>    to iommu
> * update commit description
> 
> v2->v3:
> * new patch title (was: iommu/arm: Introduce iommu_add_dt_pci_device API)
> * renamed function
>    from: iommu_add_dt_pci_device
>    to: iommu_add_dt_pci_sideband_ids
> * removed stale ops->add_device check
> * iommu.h: add empty stub iommu_add_dt_pci_sideband_ids for !HAS_DEVICE_TREE
> * iommu.h: add iommu_add_pci_sideband_ids helper
> * iommu.h: don't wrap prototype in #ifdef CONFIG_HAS_PCI
> * s/iommu_fwspec_free(pci_to_dev(pdev))/iommu_fwspec_free(dev)/
> 
> v1->v2:
> * remove extra devfn parameter since pdev fully describes the device
> * remove ops->add_device() call from iommu_add_dt_pci_device(). Instead, rely on
>    the existing iommu call in iommu_add_device().
> * move the ops->add_device and ops->dt_xlate checks earlier
> 
> downstream->v1:
> * rebase
> * add const qualifier to struct dt_device_node *np arg in dt_map_id()
> * add const qualifier to struct dt_device_node *np declaration in iommu_add_pci_device()
> * use stdint.h types instead of u8/u32/etc...
> * rename functions:
>    s/dt_iommu_xlate/iommu_dt_xlate/
>    s/dt_map_id/iommu_dt_pci_map_id/
>    s/iommu_add_pci_device/iommu_add_dt_pci_device/
> * add device_is_protected check in iommu_add_dt_pci_device
> * wrap prototypes in CONFIG_HAS_PCI
> 
> (cherry picked from commit 734e3bf6ee77e7947667ab8fa96c25b349c2e1da from
>   the downstream branch poc/pci-passthrough from
>   https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git)
> ---
>   xen/drivers/passthrough/device_tree.c | 134 ++++++++++++++++++++++++++
>   xen/include/xen/device_tree.h         |  25 +++++
>   xen/include/xen/iommu.h               |  22 ++++-
>   3 files changed, 180 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/drivers/passthrough/device_tree.c b/xen/drivers/passthrough/device_tree.c
> index ff9e66ebf92a..bd0aed5df651 100644
> --- a/xen/drivers/passthrough/device_tree.c
> +++ b/xen/drivers/passthrough/device_tree.c
> @@ -154,6 +154,140 @@ static int iommu_dt_xlate(struct device *dev,
>       return ops->dt_xlate(dev, iommu_spec);
>   }
>   
> +#ifdef CONFIG_HAS_PCI

The code below doesn't seem to be specific to PCI.

> +int dt_map_id(const struct dt_device_node *np, uint32_t id,
> +              const char *map_name, const char *map_mask_name,
> +              struct dt_device_node **target, uint32_t *id_out)

AFAICT, this function can also be used outside of the IOMMU code. So 
shouldn't this be implemented in common/device_tree.c?


> +{
> +    uint32_t map_mask, masked_id, map_len;
> +    const __be32 *map = NULL;
> +
> +    if ( !np || !map_name || (!target && !id_out) )
> +        return -EINVAL;
> +
> +    map = dt_get_property(np, map_name, &map_len);
> +    if ( !map )
> +    {
> +        if ( target )
> +            return -ENODEV;
> +
> +        /* Otherwise, no map implies no translation */
> +        *id_out = id;
> +        return 0;
> +    }
> +
> +    if ( !map_len || (map_len % (4 * sizeof(*map))) )
> +    {
> +        printk(XENLOG_ERR "%s: Error: Bad %s length: %u\n", np->full_name,
> +               map_name, map_len);

I think it would be helpful if you add the function name in the error 
message.

> +        return -EINVAL;
> +    }
> +
> +    /* The default is to select all bits. */
> +    map_mask = 0xffffffff;

Please add a U. That said, I would switch to GENMASK(31, 0) so it is 
easier to check the value.

> +
> +    /*
> +     * Can be overridden by "{iommu,msi}-map-mask" property.
> +     * If df_property_read_u32() fails, the default is used.

s/df/dt/

> +     */
> +    if ( map_mask_name )
> +        dt_property_read_u32(np, map_mask_name, &map_mask);
> +
> +    masked_id = map_mask & id;
> +    for ( ; (int)map_len > 0; map_len -= 4 * sizeof(*map), map += 4 )

Why do you cast map_len to 'int'?

> +    {
> +        struct dt_device_node *phandle_node;
> +        uint32_t id_base = be32_to_cpup(map + 0);
> +        uint32_t phandle = be32_to_cpup(map + 1);
> +        uint32_t out_base = be32_to_cpup(map + 2);
> +        uint32_t id_len = be32_to_cpup(map + 3);
> +
> +        if ( id_base & ~map_mask )
> +        {
> +            printk(XENLOG_ERR "%s: Invalid %s translation - %s-mask (0x%"PRIx32") ignores id-base (0x%"PRIx32")\n",
> +                   np->full_name, map_name, map_name, map_mask, id_base);

Same here about adding the function name.

> +            return -EFAULT;
> +        }
> +
> +        if ( (masked_id < id_base) || (masked_id >= (id_base + id_len)) )
> +            continue;
> +
> +        phandle_node = dt_find_node_by_phandle(phandle);
> +        if ( !phandle_node )
> +            return -ENODEV;
> +
> +        if ( target )
> +        {
> +            if ( !*target )
> +                *target = phandle_node;
> +
> +            if ( *target != phandle_node )
> +                continue;
> +        }
> +
> +        if ( id_out )
> +            *id_out = masked_id - id_base + out_base;
> +
> +        printk(XENLOG_DEBUG "%s: %s, using mask %08"PRIx32", id-base: %08"PRIx32", out-base: %08"PRIx32", length: %08"PRIx32", id: %08"PRIx32" -> %08"PRIx32"\n",

How about using dprintk()?

> +               np->full_name, map_name, map_mask, id_base, out_base, id_len, id,
> +               masked_id - id_base + out_base);
> +        return 0;
> +    }
> +
> +    printk(XENLOG_ERR "%s: no %s translation for id 0x%"PRIx32" on %s\n",
> +           np->full_name, map_name, id, (target && *target) ? (*target)->full_name : NULL);

Same here about adding the function name.

> +
> +    /*
> +     * NOTE: Linux bypasses translation without returning an error here,
> +     * but should we behave in the same way on Xen? Restrict for now.
> +     */

Can you outline why we would want to bypass the translation?

> +    return -EFAULT;
> +}
> +
> +int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev)
> +{
> +    const struct iommu_ops *ops = iommu_get_ops();
> +    struct dt_phandle_args iommu_spec = { .args_count = 1 };
> +    struct device *dev = pci_to_dev(pdev);
> +    const struct dt_device_node *np;
> +    int rc = NO_IOMMU;

AFAICT, the initial value will never be read. So is it necessary?

> +
> +    if ( !iommu_enabled )
> +        return NO_IOMMU;
> +
> +    if ( !ops )
> +        return -EINVAL;
> +
> +    if ( device_is_protected(dev) )
> +        return 0;

These two lines are a bit odd to read because you would think that if 
the device is protected, then you want to continue translation. So can 
you add a comment explaining what this check means?

> +
> +    if ( dev_iommu_fwspec_get(dev) )
> +        return -EEXIST;
> +
> +    np = pci_find_host_bridge_node(pdev);
> +    if ( !np )
> +        return -ENODEV;
> +
> +    /*
> +     * According to the Documentation/devicetree/bindings/pci/pci-iommu.txt
> +     * from Linux.
> +     */
> +    rc = dt_map_id(np, PCI_BDF(pdev->bus, pdev->devfn), "iommu-map",
> +                   "iommu-map-mask", &iommu_spec.np, iommu_spec.args);
> +    if ( rc )
> +        return (rc == -ENODEV) ? NO_IOMMU : rc;
> +
> +    rc = iommu_dt_xlate(dev, &iommu_spec);
> +    if ( rc < 0 )
> +    {
> +        iommu_fwspec_free(dev);
> +        return -EINVAL;
> +    }
> +
> +    return rc;
> +}
> +#endif /* CONFIG_HAS_PCI */
> +
>   int iommu_add_dt_device(struct dt_device_node *np)
>   {
>       const struct iommu_ops *ops = iommu_get_ops();
> diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
> index c2f315140560..8385cd538a58 100644
> --- a/xen/include/xen/device_tree.h
> +++ b/xen/include/xen/device_tree.h
> @@ -892,6 +892,31 @@ int dt_count_phandle_with_args(const struct dt_device_node *np,
>    */
>   int dt_get_pci_domain_nr(struct dt_device_node *node);
>   
> +#ifdef CONFIG_HAS_PCI

In Xen, we don't usually add #ifdef for prototype only.

> +/**
> + * dt_map_id - Translate an ID through a downstream mapping.
> + * @np: root complex device node.
> + * @id: device ID to map.
> + * @map_name: property name of the map to use.
> + * @map_mask_name: optional property name of the mask to use.
> + * @target: optional pointer to a target device node.
> + * @id_out: optional pointer to receive the translated ID.
> + *
> + * Given a device ID, look up the appropriate implementation-defined
> + * platform ID and/or the target device which receives transactions on that
> + * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
> + * @id_out may be NULL if only the other is required. If @target points to
> + * a non-NULL device node pointer, only entries targeting that node will be
> + * matched; if it points to a NULL value, it will receive the device node of
> + * the first matching target phandle, with a reference held.
> + *
> + * Return: 0 on success or a standard error code on failure.
> + */
> +int dt_map_id(const struct dt_device_node *np, uint32_t id,
> +              const char *map_name, const char *map_mask_name,
> +              struct dt_device_node **target, uint32_t *id_out);
> +#endif /* CONFIG_HAS_PCI */
> +
>   struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle);
>   
>   #ifdef CONFIG_DEVICE_TREE_DEBUG
> diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
> index 405db59971c5..3cac177840f7 100644
> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -26,6 +26,9 @@
>   #include <xen/spinlock.h>
>   #include <public/domctl.h>
>   #include <public/hvm/ioreq.h>
> +#ifdef CONFIG_ACPI
> +#include <asm/acpi.h>
> +#endif
>   #include <asm/device.h>
>   
>   TYPE_SAFE(uint64_t, dfn);
> @@ -219,7 +222,8 @@ int iommu_dt_domain_init(struct domain *d);
>   int iommu_release_dt_devices(struct domain *d);
>   
>   /*
> - * Helper to add master device to the IOMMU using generic IOMMU DT bindings.
> + * Helpers to add master device to the IOMMU using generic (PCI-)IOMMU
> + * DT bindings.
>    *
>    * Return values:
>    *  0 : device is protected by an IOMMU
> @@ -228,12 +232,28 @@ int iommu_release_dt_devices(struct domain *d);
>    *      (IOMMU is not enabled/present or device is not connected to it).
>    */
>   int iommu_add_dt_device(struct dt_device_node *np);
> +int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev);
>   
>   int iommu_do_dt_domctl(struct xen_domctl *, struct domain *,
>                          XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
>   
> +#else /* !HAS_DEVICE_TREE */
> +static inline int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev)
> +{
> +    return 0;
Shouldn't this return an error because we wouldn't be able to add the 
Device?

> +}
>   #endif /* HAS_DEVICE_TREE */
>   
> +static inline int iommu_add_pci_sideband_ids(struct pci_dev *pdev)
> +{
> +    int ret = 0;

Same here.

> +#ifdef CONFIG_ACPI
> +    if ( acpi_disabled )
> +#endif
> +        ret = iommu_add_dt_pci_sideband_ids(pdev);
> +    return ret;
> +}
> +
>   struct page_info;
>   
>   /*

Cheers,
Stewart Hildebrand Sept. 29, 2023, 9:03 p.m. UTC | #6
On 7/4/23 05:35, Julien Grall wrote:
> Hi,
> 
> On 07/06/2023 04:02, Stewart Hildebrand wrote:
>> From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>>
>> The main purpose of this patch is to add a way to register PCI device
>> (which is behind the IOMMU) using the generic PCI-IOMMU DT bindings [1]
>> before assigning that device to a domain.
>>
>> This behaves similarly to the existing iommu_add_dt_device API, except it
>> handles PCI devices, and it is to be invoked from the add_device hook in the
>> SMMU driver.
>>
>> The function of_map_id to translate an ID through a downstream mapping
> 
> You called the function dt_map_id in Xen.

I will fix

>> (which is also suitable for mapping Requester ID) was borrowed from Linux
>> (v5.10-rc6) and updated according to the Xen code base.
>>
>> [1] https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/pci-iommu.txt
>>
>> Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
>> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
>> ---
>> v3->v4:
>> * wrap #include <asm/acpi.h> and if ( acpi_disabled ) in #ifdef CONFIG_ACPI
>> * fix Michal's remarks about style, parenthesis, and print formats
>> * remove !ops->dt_xlate check since it is already in iommu_dt_xlate helper
>> * rename s/iommu_dt_pci_map_id/dt_map_id/ because it is generic, not specific
>>    to iommu
>> * update commit description
>>
>> v2->v3:
>> * new patch title (was: iommu/arm: Introduce iommu_add_dt_pci_device API)
>> * renamed function
>>    from: iommu_add_dt_pci_device
>>    to: iommu_add_dt_pci_sideband_ids
>> * removed stale ops->add_device check
>> * iommu.h: add empty stub iommu_add_dt_pci_sideband_ids for !HAS_DEVICE_TREE
>> * iommu.h: add iommu_add_pci_sideband_ids helper
>> * iommu.h: don't wrap prototype in #ifdef CONFIG_HAS_PCI
>> * s/iommu_fwspec_free(pci_to_dev(pdev))/iommu_fwspec_free(dev)/
>>
>> v1->v2:
>> * remove extra devfn parameter since pdev fully describes the device
>> * remove ops->add_device() call from iommu_add_dt_pci_device(). Instead, rely on
>>    the existing iommu call in iommu_add_device().
>> * move the ops->add_device and ops->dt_xlate checks earlier
>>
>> downstream->v1:
>> * rebase
>> * add const qualifier to struct dt_device_node *np arg in dt_map_id()
>> * add const qualifier to struct dt_device_node *np declaration in iommu_add_pci_device()
>> * use stdint.h types instead of u8/u32/etc...
>> * rename functions:
>>    s/dt_iommu_xlate/iommu_dt_xlate/
>>    s/dt_map_id/iommu_dt_pci_map_id/
>>    s/iommu_add_pci_device/iommu_add_dt_pci_device/
>> * add device_is_protected check in iommu_add_dt_pci_device
>> * wrap prototypes in CONFIG_HAS_PCI
>>
>> (cherry picked from commit 734e3bf6ee77e7947667ab8fa96c25b349c2e1da from
>>   the downstream branch poc/pci-passthrough from
>>   https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git)
>> ---
>>   xen/drivers/passthrough/device_tree.c | 134 ++++++++++++++++++++++++++
>>   xen/include/xen/device_tree.h         |  25 +++++
>>   xen/include/xen/iommu.h               |  22 ++++-
>>   3 files changed, 180 insertions(+), 1 deletion(-)
>>
>> diff --git a/xen/drivers/passthrough/device_tree.c b/xen/drivers/passthrough/device_tree.c
>> index ff9e66ebf92a..bd0aed5df651 100644
>> --- a/xen/drivers/passthrough/device_tree.c
>> +++ b/xen/drivers/passthrough/device_tree.c
>> @@ -154,6 +154,140 @@ static int iommu_dt_xlate(struct device *dev,
>>       return ops->dt_xlate(dev, iommu_spec);
>>   }
>>
>> +#ifdef CONFIG_HAS_PCI
> 
> The code below doesn't seem to be specific to PCI.

I will move dt_map_id() outside the #ifdef CONFIG_HAS_PCI ...

> 
>> +int dt_map_id(const struct dt_device_node *np, uint32_t id,
>> +              const char *map_name, const char *map_mask_name,
>> +              struct dt_device_node **target, uint32_t *id_out)
> 
> AFAICT, this function can also be used outside of the IOMMU code. So
> shouldn't this be implemented in common/device_tree.c?

... to common/device_tree.c.

>> +{
>> +    uint32_t map_mask, masked_id, map_len;
>> +    const __be32 *map = NULL;
>> +
>> +    if ( !np || !map_name || (!target && !id_out) )
>> +        return -EINVAL;
>> +
>> +    map = dt_get_property(np, map_name, &map_len);
>> +    if ( !map )
>> +    {
>> +        if ( target )
>> +            return -ENODEV;
>> +
>> +        /* Otherwise, no map implies no translation */
>> +        *id_out = id;
>> +        return 0;
>> +    }
>> +
>> +    if ( !map_len || (map_len % (4 * sizeof(*map))) )
>> +    {
>> +        printk(XENLOG_ERR "%s: Error: Bad %s length: %u\n", np->full_name,
>> +               map_name, map_len);
> 
> I think it would be helpful if you add the function name in the error
> message.

Will do

>> +        return -EINVAL;
>> +    }
>> +
>> +    /* The default is to select all bits. */
>> +    map_mask = 0xffffffff;
> 
> Please add a U. That said, I would switch to GENMASK(31, 0) so it is
> easier to check the value.

OK, I will use GENMASK and add #include <xen/bitops.h>.

>> +
>> +    /*
>> +     * Can be overridden by "{iommu,msi}-map-mask" property.
>> +     * If df_property_read_u32() fails, the default is used.
> 
> s/df/dt/

Will fix, thanks for spotting this

>> +     */
>> +    if ( map_mask_name )
>> +        dt_property_read_u32(np, map_mask_name, &map_mask);
>> +
>> +    masked_id = map_mask & id;
>> +    for ( ; (int)map_len > 0; map_len -= 4 * sizeof(*map), map += 4 )
> 
> Why do you cast map_len to 'int'?

No good reason, I will remove the cast.

>> +    {
>> +        struct dt_device_node *phandle_node;
>> +        uint32_t id_base = be32_to_cpup(map + 0);
>> +        uint32_t phandle = be32_to_cpup(map + 1);
>> +        uint32_t out_base = be32_to_cpup(map + 2);
>> +        uint32_t id_len = be32_to_cpup(map + 3);
>> +
>> +        if ( id_base & ~map_mask )
>> +        {
>> +            printk(XENLOG_ERR "%s: Invalid %s translation - %s-mask (0x%"PRIx32") ignores id-base (0x%"PRIx32")\n",
>> +                   np->full_name, map_name, map_name, map_mask, id_base);
> 
> Same here about adding the function name.

Will do

>> +            return -EFAULT;
>> +        }
>> +
>> +        if ( (masked_id < id_base) || (masked_id >= (id_base + id_len)) )
>> +            continue;
>> +
>> +        phandle_node = dt_find_node_by_phandle(phandle);
>> +        if ( !phandle_node )
>> +            return -ENODEV;
>> +
>> +        if ( target )
>> +        {
>> +            if ( !*target )
>> +                *target = phandle_node;
>> +
>> +            if ( *target != phandle_node )
>> +                continue;
>> +        }
>> +
>> +        if ( id_out )
>> +            *id_out = masked_id - id_base + out_base;
>> +
>> +        printk(XENLOG_DEBUG "%s: %s, using mask %08"PRIx32", id-base: %08"PRIx32", out-base: %08"PRIx32", length: %08"PRIx32", id: %08"PRIx32" -> %08"PRIx32"\n",
> 
> How about using dprintk()?

OK

>> +               np->full_name, map_name, map_mask, id_base, out_base, id_len, id,
>> +               masked_id - id_base + out_base);
>> +        return 0;
>> +    }
>> +
>> +    printk(XENLOG_ERR "%s: no %s translation for id 0x%"PRIx32" on %s\n",
>> +           np->full_name, map_name, id, (target && *target) ? (*target)->full_name : NULL);
> 
> Same here about adding the function name.

I'll change this one to dprintk too, since it's not necessarily an error condition to reach here (see below).

>> +
>> +    /*
>> +     * NOTE: Linux bypasses translation without returning an error here,
>> +     * but should we behave in the same way on Xen? Restrict for now.
>> +     */
> 
> Can you outline why we would want to bypass the translation?

We can reach here if the iommu-map property doesn't cover all possible RIDs, or is otherwise malformed in some way. E.g. you might have iommu-map = <0x0 &smmu 0x8000 0x8000>;. RIDs 0x0-0x7fff will get translated, but RIDs 0x8000-0xffff won't. Also, RIDs 0x8000-0xffff won't have the &smmu phandle reference.

Reference the iommu-map DT binding [1] linked in the commit message for more info.

On an ARM platform, it would probably be an error to not apply a RID -> stream ID translation. However, this code is not ARM specific. Also, in case of no translation, the SMMU driver will fail to add the device, because the *target = phandle_node; line above would not get executed. So I think we can safely change it to return 0, which is the same as of_map_id in Linux.

>> +    return -EFAULT;
>> +}
>> +
>> +int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev)
>> +{
>> +    const struct iommu_ops *ops = iommu_get_ops();
>> +    struct dt_phandle_args iommu_spec = { .args_count = 1 };
>> +    struct device *dev = pci_to_dev(pdev);
>> +    const struct dt_device_node *np;
>> +    int rc = NO_IOMMU;
> 
> AFAICT, the initial value will never be read. So is it necessary?

You're right, no need to initialize. I will fix.

>> +
>> +    if ( !iommu_enabled )
>> +        return NO_IOMMU;
>> +
>> +    if ( !ops )
>> +        return -EINVAL;
>> +
>> +    if ( device_is_protected(dev) )
>> +        return 0;
> 
> These two lines are a bit odd to read because you would think that if
> the device is protected, then you want to continue translation. So can
> you add a comment explaining what this check means?

Since dropping ("xen/arm: Move is_protected flag to struct device") the is_protected check is no longer relevant. I will remove it.

>> +
>> +    if ( dev_iommu_fwspec_get(dev) )
>> +        return -EEXIST;
>> +
>> +    np = pci_find_host_bridge_node(pdev);
>> +    if ( !np )
>> +        return -ENODEV;
>> +
>> +    /*
>> +     * According to the Documentation/devicetree/bindings/pci/pci-iommu.txt
>> +     * from Linux.
>> +     */
>> +    rc = dt_map_id(np, PCI_BDF(pdev->bus, pdev->devfn), "iommu-map",
>> +                   "iommu-map-mask", &iommu_spec.np, iommu_spec.args);
>> +    if ( rc )
>> +        return (rc == -ENODEV) ? NO_IOMMU : rc;
>> +
>> +    rc = iommu_dt_xlate(dev, &iommu_spec);
>> +    if ( rc < 0 )
>> +    {
>> +        iommu_fwspec_free(dev);
>> +        return -EINVAL;
>> +    }
>> +
>> +    return rc;
>> +}
>> +#endif /* CONFIG_HAS_PCI */
>> +
>>   int iommu_add_dt_device(struct dt_device_node *np)
>>   {
>>       const struct iommu_ops *ops = iommu_get_ops();
>> diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
>> index c2f315140560..8385cd538a58 100644
>> --- a/xen/include/xen/device_tree.h
>> +++ b/xen/include/xen/device_tree.h
>> @@ -892,6 +892,31 @@ int dt_count_phandle_with_args(const struct dt_device_node *np,
>>    */
>>   int dt_get_pci_domain_nr(struct dt_device_node *node);
>>
>> +#ifdef CONFIG_HAS_PCI
> 
> In Xen, we don't usually add #ifdef for prototype only.

OK, I will remove the #ifdef

>> +/**
>> + * dt_map_id - Translate an ID through a downstream mapping.
>> + * @np: root complex device node.
>> + * @id: device ID to map.
>> + * @map_name: property name of the map to use.
>> + * @map_mask_name: optional property name of the mask to use.
>> + * @target: optional pointer to a target device node.
>> + * @id_out: optional pointer to receive the translated ID.
>> + *
>> + * Given a device ID, look up the appropriate implementation-defined
>> + * platform ID and/or the target device which receives transactions on that
>> + * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
>> + * @id_out may be NULL if only the other is required. If @target points to
>> + * a non-NULL device node pointer, only entries targeting that node will be
>> + * matched; if it points to a NULL value, it will receive the device node of
>> + * the first matching target phandle, with a reference held.
>> + *
>> + * Return: 0 on success or a standard error code on failure.
>> + */
>> +int dt_map_id(const struct dt_device_node *np, uint32_t id,
>> +              const char *map_name, const char *map_mask_name,
>> +              struct dt_device_node **target, uint32_t *id_out);
>> +#endif /* CONFIG_HAS_PCI */
>> +
>>   struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle);
>>
>>   #ifdef CONFIG_DEVICE_TREE_DEBUG
>> diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
>> index 405db59971c5..3cac177840f7 100644
>> --- a/xen/include/xen/iommu.h
>> +++ b/xen/include/xen/iommu.h
>> @@ -26,6 +26,9 @@
>>   #include <xen/spinlock.h>
>>   #include <public/domctl.h>
>>   #include <public/hvm/ioreq.h>
>> +#ifdef CONFIG_ACPI
>> +#include <asm/acpi.h>
>> +#endif
>>   #include <asm/device.h>
>>
>>   TYPE_SAFE(uint64_t, dfn);
>> @@ -219,7 +222,8 @@ int iommu_dt_domain_init(struct domain *d);
>>   int iommu_release_dt_devices(struct domain *d);
>>
>>   /*
>> - * Helper to add master device to the IOMMU using generic IOMMU DT bindings.
>> + * Helpers to add master device to the IOMMU using generic (PCI-)IOMMU
>> + * DT bindings.
>>    *
>>    * Return values:
>>    *  0 : device is protected by an IOMMU
>> @@ -228,12 +232,28 @@ int iommu_release_dt_devices(struct domain *d);
>>    *      (IOMMU is not enabled/present or device is not connected to it).
>>    */
>>   int iommu_add_dt_device(struct dt_device_node *np);
>> +int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev);
>>
>>   int iommu_do_dt_domctl(struct xen_domctl *, struct domain *,
>>                          XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
>>
>> +#else /* !HAS_DEVICE_TREE */
>> +static inline int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev)
>> +{
>> +    return 0;
> Shouldn't this return an error because we wouldn't be able to add the
> Device?

Yep, I will fix

>> +}
>>   #endif /* HAS_DEVICE_TREE */
>>
>> +static inline int iommu_add_pci_sideband_ids(struct pci_dev *pdev)
>> +{
>> +    int ret = 0;
> 
> Same here.

OK

>> +#ifdef CONFIG_ACPI
>> +    if ( acpi_disabled )
>> +#endif
>> +        ret = iommu_add_dt_pci_sideband_ids(pdev);
>> +    return ret;
>> +}
>> +
>>   struct page_info;
>>
>>   /*
> 
> Cheers,
> 
> -- 
> Julien Grall
diff mbox series

Patch

diff --git a/xen/drivers/passthrough/device_tree.c b/xen/drivers/passthrough/device_tree.c
index ff9e66ebf92a..bd0aed5df651 100644
--- a/xen/drivers/passthrough/device_tree.c
+++ b/xen/drivers/passthrough/device_tree.c
@@ -154,6 +154,140 @@  static int iommu_dt_xlate(struct device *dev,
     return ops->dt_xlate(dev, iommu_spec);
 }
 
+#ifdef CONFIG_HAS_PCI
+int dt_map_id(const struct dt_device_node *np, uint32_t id,
+              const char *map_name, const char *map_mask_name,
+              struct dt_device_node **target, uint32_t *id_out)
+{
+    uint32_t map_mask, masked_id, map_len;
+    const __be32 *map = NULL;
+
+    if ( !np || !map_name || (!target && !id_out) )
+        return -EINVAL;
+
+    map = dt_get_property(np, map_name, &map_len);
+    if ( !map )
+    {
+        if ( target )
+            return -ENODEV;
+
+        /* Otherwise, no map implies no translation */
+        *id_out = id;
+        return 0;
+    }
+
+    if ( !map_len || (map_len % (4 * sizeof(*map))) )
+    {
+        printk(XENLOG_ERR "%s: Error: Bad %s length: %u\n", np->full_name,
+               map_name, map_len);
+        return -EINVAL;
+    }
+
+    /* The default is to select all bits. */
+    map_mask = 0xffffffff;
+
+    /*
+     * Can be overridden by "{iommu,msi}-map-mask" property.
+     * If df_property_read_u32() fails, the default is used.
+     */
+    if ( map_mask_name )
+        dt_property_read_u32(np, map_mask_name, &map_mask);
+
+    masked_id = map_mask & id;
+    for ( ; (int)map_len > 0; map_len -= 4 * sizeof(*map), map += 4 )
+    {
+        struct dt_device_node *phandle_node;
+        uint32_t id_base = be32_to_cpup(map + 0);
+        uint32_t phandle = be32_to_cpup(map + 1);
+        uint32_t out_base = be32_to_cpup(map + 2);
+        uint32_t id_len = be32_to_cpup(map + 3);
+
+        if ( id_base & ~map_mask )
+        {
+            printk(XENLOG_ERR "%s: Invalid %s translation - %s-mask (0x%"PRIx32") ignores id-base (0x%"PRIx32")\n",
+                   np->full_name, map_name, map_name, map_mask, id_base);
+            return -EFAULT;
+        }
+
+        if ( (masked_id < id_base) || (masked_id >= (id_base + id_len)) )
+            continue;
+
+        phandle_node = dt_find_node_by_phandle(phandle);
+        if ( !phandle_node )
+            return -ENODEV;
+
+        if ( target )
+        {
+            if ( !*target )
+                *target = phandle_node;
+
+            if ( *target != phandle_node )
+                continue;
+        }
+
+        if ( id_out )
+            *id_out = masked_id - id_base + out_base;
+
+        printk(XENLOG_DEBUG "%s: %s, using mask %08"PRIx32", id-base: %08"PRIx32", out-base: %08"PRIx32", length: %08"PRIx32", id: %08"PRIx32" -> %08"PRIx32"\n",
+               np->full_name, map_name, map_mask, id_base, out_base, id_len, id,
+               masked_id - id_base + out_base);
+        return 0;
+    }
+
+    printk(XENLOG_ERR "%s: no %s translation for id 0x%"PRIx32" on %s\n",
+           np->full_name, map_name, id, (target && *target) ? (*target)->full_name : NULL);
+
+    /*
+     * NOTE: Linux bypasses translation without returning an error here,
+     * but should we behave in the same way on Xen? Restrict for now.
+     */
+    return -EFAULT;
+}
+
+int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev)
+{
+    const struct iommu_ops *ops = iommu_get_ops();
+    struct dt_phandle_args iommu_spec = { .args_count = 1 };
+    struct device *dev = pci_to_dev(pdev);
+    const struct dt_device_node *np;
+    int rc = NO_IOMMU;
+
+    if ( !iommu_enabled )
+        return NO_IOMMU;
+
+    if ( !ops )
+        return -EINVAL;
+
+    if ( device_is_protected(dev) )
+        return 0;
+
+    if ( dev_iommu_fwspec_get(dev) )
+        return -EEXIST;
+
+    np = pci_find_host_bridge_node(pdev);
+    if ( !np )
+        return -ENODEV;
+
+    /*
+     * According to the Documentation/devicetree/bindings/pci/pci-iommu.txt
+     * from Linux.
+     */
+    rc = dt_map_id(np, PCI_BDF(pdev->bus, pdev->devfn), "iommu-map",
+                   "iommu-map-mask", &iommu_spec.np, iommu_spec.args);
+    if ( rc )
+        return (rc == -ENODEV) ? NO_IOMMU : rc;
+
+    rc = iommu_dt_xlate(dev, &iommu_spec);
+    if ( rc < 0 )
+    {
+        iommu_fwspec_free(dev);
+        return -EINVAL;
+    }
+
+    return rc;
+}
+#endif /* CONFIG_HAS_PCI */
+
 int iommu_add_dt_device(struct dt_device_node *np)
 {
     const struct iommu_ops *ops = iommu_get_ops();
diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
index c2f315140560..8385cd538a58 100644
--- a/xen/include/xen/device_tree.h
+++ b/xen/include/xen/device_tree.h
@@ -892,6 +892,31 @@  int dt_count_phandle_with_args(const struct dt_device_node *np,
  */
 int dt_get_pci_domain_nr(struct dt_device_node *node);
 
+#ifdef CONFIG_HAS_PCI
+/**
+ * dt_map_id - Translate an ID through a downstream mapping.
+ * @np: root complex device node.
+ * @id: device ID to map.
+ * @map_name: property name of the map to use.
+ * @map_mask_name: optional property name of the mask to use.
+ * @target: optional pointer to a target device node.
+ * @id_out: optional pointer to receive the translated ID.
+ *
+ * Given a device ID, look up the appropriate implementation-defined
+ * platform ID and/or the target device which receives transactions on that
+ * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
+ * @id_out may be NULL if only the other is required. If @target points to
+ * a non-NULL device node pointer, only entries targeting that node will be
+ * matched; if it points to a NULL value, it will receive the device node of
+ * the first matching target phandle, with a reference held.
+ *
+ * Return: 0 on success or a standard error code on failure.
+ */
+int dt_map_id(const struct dt_device_node *np, uint32_t id,
+              const char *map_name, const char *map_mask_name,
+              struct dt_device_node **target, uint32_t *id_out);
+#endif /* CONFIG_HAS_PCI */
+
 struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle);
 
 #ifdef CONFIG_DEVICE_TREE_DEBUG
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 405db59971c5..3cac177840f7 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -26,6 +26,9 @@ 
 #include <xen/spinlock.h>
 #include <public/domctl.h>
 #include <public/hvm/ioreq.h>
+#ifdef CONFIG_ACPI
+#include <asm/acpi.h>
+#endif
 #include <asm/device.h>
 
 TYPE_SAFE(uint64_t, dfn);
@@ -219,7 +222,8 @@  int iommu_dt_domain_init(struct domain *d);
 int iommu_release_dt_devices(struct domain *d);
 
 /*
- * Helper to add master device to the IOMMU using generic IOMMU DT bindings.
+ * Helpers to add master device to the IOMMU using generic (PCI-)IOMMU
+ * DT bindings.
  *
  * Return values:
  *  0 : device is protected by an IOMMU
@@ -228,12 +232,28 @@  int iommu_release_dt_devices(struct domain *d);
  *      (IOMMU is not enabled/present or device is not connected to it).
  */
 int iommu_add_dt_device(struct dt_device_node *np);
+int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev);
 
 int iommu_do_dt_domctl(struct xen_domctl *, struct domain *,
                        XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
 
+#else /* !HAS_DEVICE_TREE */
+static inline int iommu_add_dt_pci_sideband_ids(struct pci_dev *pdev)
+{
+    return 0;
+}
 #endif /* HAS_DEVICE_TREE */
 
+static inline int iommu_add_pci_sideband_ids(struct pci_dev *pdev)
+{
+    int ret = 0;
+#ifdef CONFIG_ACPI
+    if ( acpi_disabled )
+#endif
+        ret = iommu_add_dt_pci_sideband_ids(pdev);
+    return ret;
+}
+
 struct page_info;
 
 /*