diff mbox series

[v2,11/11] xen/arm: Translate virtual PCI bus topology for guests

Message ID 20210923125501.234252-12-andr2000@gmail.com (mailing list archive)
State Superseded
Headers show
Series PCI devices passthrough on Arm, part 3 | expand

Commit Message

Oleksandr Andrushchenko Sept. 23, 2021, 12:55 p.m. UTC
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

There are three  originators for the PCI configuration space access:
1. The domain that owns physical host bridge: MMIO handlers are
there so we can update vPCI register handlers with the values
written by the hardware domain, e.g. physical view of the registers
vs guest's view on the configuration space.
2. Guest access to the passed through PCI devices: we need to properly
map virtual bus topology to the physical one, e.g. pass the configuration
space access to the corresponding physical devices.
3. Emulated host PCI bridge access. It doesn't exist in the physical
topology, e.g. it can't be mapped to some physical host bridge.
So, all access to the host bridge itself needs to be trapped and
emulated.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

---
New in v2
---
 xen/arch/arm/domain.c         |  1 +
 xen/arch/arm/vpci.c           | 87 +++++++++++++++++++++++++++++++----
 xen/arch/arm/vpci.h           |  3 ++
 xen/drivers/passthrough/pci.c | 25 ++++++++++
 xen/include/asm-arm/pci.h     |  1 +
 xen/include/xen/pci.h         |  1 +
 xen/include/xen/sched.h       |  2 +
 7 files changed, 111 insertions(+), 9 deletions(-)

Comments

Jan Beulich Sept. 27, 2021, 11:31 a.m. UTC | #1
On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> 
> There are three  originators for the PCI configuration space access:
> 1. The domain that owns physical host bridge: MMIO handlers are
> there so we can update vPCI register handlers with the values
> written by the hardware domain, e.g. physical view of the registers
> vs guest's view on the configuration space.
> 2. Guest access to the passed through PCI devices: we need to properly
> map virtual bus topology to the physical one, e.g. pass the configuration
> space access to the corresponding physical devices.
> 3. Emulated host PCI bridge access. It doesn't exist in the physical
> topology, e.g. it can't be mapped to some physical host bridge.
> So, all access to the host bridge itself needs to be trapped and
> emulated.
> 
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> 
> ---
> New in v2
> ---
>  xen/arch/arm/domain.c         |  1 +
>  xen/arch/arm/vpci.c           | 87 +++++++++++++++++++++++++++++++----
>  xen/arch/arm/vpci.h           |  3 ++
>  xen/drivers/passthrough/pci.c | 25 ++++++++++
>  xen/include/asm-arm/pci.h     |  1 +
>  xen/include/xen/pci.h         |  1 +
>  xen/include/xen/sched.h       |  2 +
>  7 files changed, 111 insertions(+), 9 deletions(-)
> 
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index c7b25bc70439..c0ad6ad682d2 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -797,6 +797,7 @@ void arch_domain_destroy(struct domain *d)
>                         get_order_from_bytes(d->arch.efi_acpi_len));
>  #endif
>      domain_io_free(d);
> +    domain_vpci_free(d);
>  }
>  
>  void arch_domain_shutdown(struct domain *d)
> diff --git a/xen/arch/arm/vpci.c b/xen/arch/arm/vpci.c
> index 14947e975d69..012f958960d1 100644
> --- a/xen/arch/arm/vpci.c
> +++ b/xen/arch/arm/vpci.c
> @@ -17,6 +17,14 @@
>  
>  #define REGISTER_OFFSET(addr)  ( (addr) & 0x00000fff)
>  
> +struct vpci_mmio_priv {
> +    /*
> +     * Set to true if the MMIO handlers were set up for the emulated
> +     * ECAM host PCI bridge.
> +     */
> +    bool is_virt_ecam;
> +};
> +
>  /* Do some sanity checks. */
>  static bool vpci_mmio_access_allowed(unsigned int reg, unsigned int len)
>  {
> @@ -38,6 +46,7 @@ static int vpci_mmio_read(struct vcpu *v, mmio_info_t *info,
>      pci_sbdf_t sbdf;
>      unsigned long data = ~0UL;
>      unsigned int size = 1U << info->dabt.size;
> +    struct vpci_mmio_priv *priv = (struct vpci_mmio_priv *)p;
>  
>      sbdf.sbdf = MMCFG_BDF(info->gpa);
>      reg = REGISTER_OFFSET(info->gpa);
> @@ -45,6 +54,13 @@ static int vpci_mmio_read(struct vcpu *v, mmio_info_t *info,
>      if ( !vpci_mmio_access_allowed(reg, size) )
>          return 0;
>  
> +    /*
> +     * For the passed through devices we need to map their virtual SBDF
> +     * to the physical PCI device being passed through.
> +     */
> +    if ( priv->is_virt_ecam && !pci_translate_virtual_device(v, &sbdf) )
> +            return 1;
> +
>      data = vpci_read(sbdf, reg, min(4u, size));
>      if ( size == 8 )
>          data |= (uint64_t)vpci_read(sbdf, reg + 4, 4) << 32;
> @@ -61,6 +77,7 @@ static int vpci_mmio_write(struct vcpu *v, mmio_info_t *info,
>      pci_sbdf_t sbdf;
>      unsigned long data = r;
>      unsigned int size = 1U << info->dabt.size;
> +    struct vpci_mmio_priv *priv = (struct vpci_mmio_priv *)p;
>  
>      sbdf.sbdf = MMCFG_BDF(info->gpa);
>      reg = REGISTER_OFFSET(info->gpa);
> @@ -68,6 +85,13 @@ static int vpci_mmio_write(struct vcpu *v, mmio_info_t *info,
>      if ( !vpci_mmio_access_allowed(reg, size) )
>          return 0;
>  
> +    /*
> +     * For the passed through devices we need to map their virtual SBDF
> +     * to the physical PCI device being passed through.
> +     */
> +    if ( priv->is_virt_ecam && !pci_translate_virtual_device(v, &sbdf) )
> +            return 1;
> +
>      vpci_write(sbdf, reg, min(4u, size), data);
>      if ( size == 8 )
>          vpci_write(sbdf, reg + 4, 4, data >> 32);
> @@ -80,13 +104,48 @@ static const struct mmio_handler_ops vpci_mmio_handler = {
>      .write = vpci_mmio_write,
>  };
>  
> +/*
> + * There are three  originators for the PCI configuration space access:
> + * 1. The domain that owns physical host bridge: MMIO handlers are
> + *    there so we can update vPCI register handlers with the values
> + *    written by the hardware domain, e.g. physical view of the registers/
> + *    configuration space.
> + * 2. Guest access to the passed through PCI devices: we need to properly
> + *    map virtual bus topology to the physical one, e.g. pass the configuration
> + *    space access to the corresponding physical devices.
> + * 3. Emulated host PCI bridge access. It doesn't exist in the physical
> + *    topology, e.g. it can't be mapped to some physical host bridge.
> + *    So, all access to the host bridge itself needs to be trapped and
> + *    emulated.
> + */
>  static int vpci_setup_mmio_handler(struct domain *d,
>                                     struct pci_host_bridge *bridge)
>  {
> -    struct pci_config_window *cfg = bridge->cfg;
> +    struct vpci_mmio_priv *priv;
> +
> +    priv = xzalloc(struct vpci_mmio_priv);
> +    if ( !priv )
> +        return -ENOMEM;
> +
> +    priv->is_virt_ecam = !is_hardware_domain(d);
>  
> -    register_mmio_handler(d, &vpci_mmio_handler,
> -                          cfg->phys_addr, cfg->size, NULL);
> +    if ( is_hardware_domain(d) )
> +    {
> +        struct pci_config_window *cfg = bridge->cfg;
> +
> +        bridge->mmio_priv = priv;
> +        register_mmio_handler(d, &vpci_mmio_handler,
> +                              cfg->phys_addr, cfg->size,
> +                              priv);
> +    }
> +    else
> +    {
> +        d->vpci_mmio_priv = priv;
> +        /* Guest domains use what is programmed in their device tree. */
> +        register_mmio_handler(d, &vpci_mmio_handler,
> +                              GUEST_VPCI_ECAM_BASE, GUEST_VPCI_ECAM_SIZE,
> +                              priv);
> +    }
>      return 0;
>  }
>  
> @@ -95,13 +154,16 @@ int domain_vpci_init(struct domain *d)
>      if ( !has_vpci(d) )
>          return 0;
>  
> -    if ( is_hardware_domain(d) )
> -        return pci_host_iterate_bridges(d, vpci_setup_mmio_handler);
> -
> -    /* Guest domains use what is programmed in their device tree. */
> -    register_mmio_handler(d, &vpci_mmio_handler,
> -                          GUEST_VPCI_ECAM_BASE, GUEST_VPCI_ECAM_SIZE, NULL);
> +    return pci_host_iterate_bridges(d, vpci_setup_mmio_handler);
> +}
>  
> +static int domain_vpci_free_cb(struct domain *d,
> +                               struct pci_host_bridge *bridge)
> +{
> +    if ( is_hardware_domain(d) )
> +        XFREE(bridge->mmio_priv);
> +    else
> +        XFREE(d->vpci_mmio_priv);
>      return 0;
>  }
>  
> @@ -124,6 +186,13 @@ int domain_vpci_get_num_mmio_handlers(struct domain *d)
>      return count;
>  }
>  
> +void domain_vpci_free(struct domain *d)
> +{
> +    if ( !has_vpci(d) )
> +        return;
> +
> +    pci_host_iterate_bridges(d, domain_vpci_free_cb);
> +}
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/arm/vpci.h b/xen/arch/arm/vpci.h
> index 27a2b069abd2..38e5a28c0d95 100644
> --- a/xen/arch/arm/vpci.h
> +++ b/xen/arch/arm/vpci.h
> @@ -18,6 +18,7 @@
>  #ifdef CONFIG_HAS_VPCI
>  int domain_vpci_init(struct domain *d);
>  int domain_vpci_get_num_mmio_handlers(struct domain *d);
> +void domain_vpci_free(struct domain *d);
>  #else
>  static inline int domain_vpci_init(struct domain *d)
>  {
> @@ -28,6 +29,8 @@ static inline int domain_vpci_get_num_mmio_handlers(struct domain *d)
>  {
>      return 0;
>  }
> +
> +static inline void domain_vpci_free(struct domain *d) { }
>  #endif
>  
>  #endif /* __ARCH_ARM_VPCI_H__ */
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index 4552ace855e0..579c6947cc35 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -890,6 +890,31 @@ int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
>      return 0;
>  }
>  
> +/*
> + * Find the physical device which is mapped to the virtual device
> + * and translate virtual SBDF to the physical one.
> + */
> +bool pci_translate_virtual_device(struct vcpu *v, pci_sbdf_t *sbdf)

Why struct vcpu, when you only need ...

> +{
> +    struct domain *d = v->domain;

... this? It's also not really logical for this function to take a
struct vcpu, as the translation should be uniform within a domain.

Also - const please (as said elsewhere before, ideally wherever possible
and sensible).

> +    struct vpci_dev *vdev;
> +    bool found = false;
> +
> +    pcidevs_lock();
> +    list_for_each_entry ( vdev, &d->vdev_list, list )
> +    {
> +        if ( vdev->sbdf.sbdf == sbdf->sbdf )
> +        {
> +            /* Replace virtual SBDF with the physical one. */
> +            *sbdf = vdev->pdev->sbdf;
> +            found = true;
> +            break;
> +        }
> +    }

For a DomU with just one or at most a couple of devices, such a brute
force lookup may be fine. What about Dom0 though? The physical topology
gets split at the segment level, so maybe this would by a reasonable
granularity here as well?

> +    pcidevs_unlock();
> +    return found;

Nit: Blank line please ahead of the main "return" of a function.

> +}
> +
>  /* Caller should hold the pcidevs_lock */
>  static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
>                             uint8_t devfn)

Seeing this function in context (which patch 2 adds without any #ifdef
around it afaics), will this new function needlessly be built on x86 as
well? (I didn't look at other intermediate patches yet, so please
forgive if I've missed the addition of an #ifdef.)

Jan
Oleksandr Andrushchenko Sept. 27, 2021, 12:08 p.m. UTC | #2
On 27.09.21 14:31, Jan Beulich wrote:
> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>
>> There are three  originators for the PCI configuration space access:
>> 1. The domain that owns physical host bridge: MMIO handlers are
>> there so we can update vPCI register handlers with the values
>> written by the hardware domain, e.g. physical view of the registers
>> vs guest's view on the configuration space.
>> 2. Guest access to the passed through PCI devices: we need to properly
>> map virtual bus topology to the physical one, e.g. pass the configuration
>> space access to the corresponding physical devices.
>> 3. Emulated host PCI bridge access. It doesn't exist in the physical
>> topology, e.g. it can't be mapped to some physical host bridge.
>> So, all access to the host bridge itself needs to be trapped and
>> emulated.
>>
>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>
>> ---
>> New in v2
>> ---
>>   xen/arch/arm/domain.c         |  1 +
>>   xen/arch/arm/vpci.c           | 87 +++++++++++++++++++++++++++++++----
>>   xen/arch/arm/vpci.h           |  3 ++
>>   xen/drivers/passthrough/pci.c | 25 ++++++++++
>>   xen/include/asm-arm/pci.h     |  1 +
>>   xen/include/xen/pci.h         |  1 +
>>   xen/include/xen/sched.h       |  2 +
>>   7 files changed, 111 insertions(+), 9 deletions(-)
>>
>> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
>> index c7b25bc70439..c0ad6ad682d2 100644
>> --- a/xen/arch/arm/domain.c
>> +++ b/xen/arch/arm/domain.c
>> @@ -797,6 +797,7 @@ void arch_domain_destroy(struct domain *d)
>>                          get_order_from_bytes(d->arch.efi_acpi_len));
>>   #endif
>>       domain_io_free(d);
>> +    domain_vpci_free(d);
>>   }
>>   
>>   void arch_domain_shutdown(struct domain *d)
>> diff --git a/xen/arch/arm/vpci.c b/xen/arch/arm/vpci.c
>> index 14947e975d69..012f958960d1 100644
>> --- a/xen/arch/arm/vpci.c
>> +++ b/xen/arch/arm/vpci.c
>> @@ -17,6 +17,14 @@
>>   
>>   #define REGISTER_OFFSET(addr)  ( (addr) & 0x00000fff)
>>   
>> +struct vpci_mmio_priv {
>> +    /*
>> +     * Set to true if the MMIO handlers were set up for the emulated
>> +     * ECAM host PCI bridge.
>> +     */
>> +    bool is_virt_ecam;
>> +};
>> +
>>   /* Do some sanity checks. */
>>   static bool vpci_mmio_access_allowed(unsigned int reg, unsigned int len)
>>   {
>> @@ -38,6 +46,7 @@ static int vpci_mmio_read(struct vcpu *v, mmio_info_t *info,
>>       pci_sbdf_t sbdf;
>>       unsigned long data = ~0UL;
>>       unsigned int size = 1U << info->dabt.size;
>> +    struct vpci_mmio_priv *priv = (struct vpci_mmio_priv *)p;
>>   
>>       sbdf.sbdf = MMCFG_BDF(info->gpa);
>>       reg = REGISTER_OFFSET(info->gpa);
>> @@ -45,6 +54,13 @@ static int vpci_mmio_read(struct vcpu *v, mmio_info_t *info,
>>       if ( !vpci_mmio_access_allowed(reg, size) )
>>           return 0;
>>   
>> +    /*
>> +     * For the passed through devices we need to map their virtual SBDF
>> +     * to the physical PCI device being passed through.
>> +     */
>> +    if ( priv->is_virt_ecam && !pci_translate_virtual_device(v, &sbdf) )
>> +            return 1;
>> +
>>       data = vpci_read(sbdf, reg, min(4u, size));
>>       if ( size == 8 )
>>           data |= (uint64_t)vpci_read(sbdf, reg + 4, 4) << 32;
>> @@ -61,6 +77,7 @@ static int vpci_mmio_write(struct vcpu *v, mmio_info_t *info,
>>       pci_sbdf_t sbdf;
>>       unsigned long data = r;
>>       unsigned int size = 1U << info->dabt.size;
>> +    struct vpci_mmio_priv *priv = (struct vpci_mmio_priv *)p;
>>   
>>       sbdf.sbdf = MMCFG_BDF(info->gpa);
>>       reg = REGISTER_OFFSET(info->gpa);
>> @@ -68,6 +85,13 @@ static int vpci_mmio_write(struct vcpu *v, mmio_info_t *info,
>>       if ( !vpci_mmio_access_allowed(reg, size) )
>>           return 0;
>>   
>> +    /*
>> +     * For the passed through devices we need to map their virtual SBDF
>> +     * to the physical PCI device being passed through.
>> +     */
>> +    if ( priv->is_virt_ecam && !pci_translate_virtual_device(v, &sbdf) )
>> +            return 1;
>> +
>>       vpci_write(sbdf, reg, min(4u, size), data);
>>       if ( size == 8 )
>>           vpci_write(sbdf, reg + 4, 4, data >> 32);
>> @@ -80,13 +104,48 @@ static const struct mmio_handler_ops vpci_mmio_handler = {
>>       .write = vpci_mmio_write,
>>   };
>>   
>> +/*
>> + * There are three  originators for the PCI configuration space access:
>> + * 1. The domain that owns physical host bridge: MMIO handlers are
>> + *    there so we can update vPCI register handlers with the values
>> + *    written by the hardware domain, e.g. physical view of the registers/
>> + *    configuration space.
>> + * 2. Guest access to the passed through PCI devices: we need to properly
>> + *    map virtual bus topology to the physical one, e.g. pass the configuration
>> + *    space access to the corresponding physical devices.
>> + * 3. Emulated host PCI bridge access. It doesn't exist in the physical
>> + *    topology, e.g. it can't be mapped to some physical host bridge.
>> + *    So, all access to the host bridge itself needs to be trapped and
>> + *    emulated.
>> + */
>>   static int vpci_setup_mmio_handler(struct domain *d,
>>                                      struct pci_host_bridge *bridge)
>>   {
>> -    struct pci_config_window *cfg = bridge->cfg;
>> +    struct vpci_mmio_priv *priv;
>> +
>> +    priv = xzalloc(struct vpci_mmio_priv);
>> +    if ( !priv )
>> +        return -ENOMEM;
>> +
>> +    priv->is_virt_ecam = !is_hardware_domain(d);
>>   
>> -    register_mmio_handler(d, &vpci_mmio_handler,
>> -                          cfg->phys_addr, cfg->size, NULL);
>> +    if ( is_hardware_domain(d) )
>> +    {
>> +        struct pci_config_window *cfg = bridge->cfg;
>> +
>> +        bridge->mmio_priv = priv;
>> +        register_mmio_handler(d, &vpci_mmio_handler,
>> +                              cfg->phys_addr, cfg->size,
>> +                              priv);
>> +    }
>> +    else
>> +    {
>> +        d->vpci_mmio_priv = priv;
>> +        /* Guest domains use what is programmed in their device tree. */
>> +        register_mmio_handler(d, &vpci_mmio_handler,
>> +                              GUEST_VPCI_ECAM_BASE, GUEST_VPCI_ECAM_SIZE,
>> +                              priv);
>> +    }
>>       return 0;
>>   }
>>   
>> @@ -95,13 +154,16 @@ int domain_vpci_init(struct domain *d)
>>       if ( !has_vpci(d) )
>>           return 0;
>>   
>> -    if ( is_hardware_domain(d) )
>> -        return pci_host_iterate_bridges(d, vpci_setup_mmio_handler);
>> -
>> -    /* Guest domains use what is programmed in their device tree. */
>> -    register_mmio_handler(d, &vpci_mmio_handler,
>> -                          GUEST_VPCI_ECAM_BASE, GUEST_VPCI_ECAM_SIZE, NULL);
>> +    return pci_host_iterate_bridges(d, vpci_setup_mmio_handler);
>> +}
>>   
>> +static int domain_vpci_free_cb(struct domain *d,
>> +                               struct pci_host_bridge *bridge)
>> +{
>> +    if ( is_hardware_domain(d) )
>> +        XFREE(bridge->mmio_priv);
>> +    else
>> +        XFREE(d->vpci_mmio_priv);
>>       return 0;
>>   }
>>   
>> @@ -124,6 +186,13 @@ int domain_vpci_get_num_mmio_handlers(struct domain *d)
>>       return count;
>>   }
>>   
>> +void domain_vpci_free(struct domain *d)
>> +{
>> +    if ( !has_vpci(d) )
>> +        return;
>> +
>> +    pci_host_iterate_bridges(d, domain_vpci_free_cb);
>> +}
>>   /*
>>    * Local variables:
>>    * mode: C
>> diff --git a/xen/arch/arm/vpci.h b/xen/arch/arm/vpci.h
>> index 27a2b069abd2..38e5a28c0d95 100644
>> --- a/xen/arch/arm/vpci.h
>> +++ b/xen/arch/arm/vpci.h
>> @@ -18,6 +18,7 @@
>>   #ifdef CONFIG_HAS_VPCI
>>   int domain_vpci_init(struct domain *d);
>>   int domain_vpci_get_num_mmio_handlers(struct domain *d);
>> +void domain_vpci_free(struct domain *d);
>>   #else
>>   static inline int domain_vpci_init(struct domain *d)
>>   {
>> @@ -28,6 +29,8 @@ static inline int domain_vpci_get_num_mmio_handlers(struct domain *d)
>>   {
>>       return 0;
>>   }
>> +
>> +static inline void domain_vpci_free(struct domain *d) { }
>>   #endif
>>   
>>   #endif /* __ARCH_ARM_VPCI_H__ */
>> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
>> index 4552ace855e0..579c6947cc35 100644
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -890,6 +890,31 @@ int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
>>       return 0;
>>   }
>>   
>> +/*
>> + * Find the physical device which is mapped to the virtual device
>> + * and translate virtual SBDF to the physical one.
>> + */
>> +bool pci_translate_virtual_device(struct vcpu *v, pci_sbdf_t *sbdf)
> Why struct vcpu, when you only need ...
>
>> +{
>> +    struct domain *d = v->domain;
> ... this? It's also not really logical for this function to take a
> struct vcpu, as the translation should be uniform within a domain.
Agree, struct domain is just enough
>
> Also - const please (as said elsewhere before, ideally wherever possible
> and sensible).
Ok
>
>> +    struct vpci_dev *vdev;
>> +    bool found = false;
>> +
>> +    pcidevs_lock();
>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>> +    {
>> +        if ( vdev->sbdf.sbdf == sbdf->sbdf )
>> +        {
>> +            /* Replace virtual SBDF with the physical one. */
>> +            *sbdf = vdev->pdev->sbdf;
>> +            found = true;
>> +            break;
>> +        }
>> +    }
> For a DomU with just one or at most a couple of devices, such a brute
> force lookup may be fine. What about Dom0 though? The physical topology
> gets split at the segment level, so maybe this would by a reasonable
> granularity here as well?

Not sure I am following why topology matters here. We are just trying to

match one SBDF (as seen by the guest) to other SBDF (physical,

as seen by Dom0), so we can proxy DomU's configuration space access

to the proper device in Dom0.

>
>> +    pcidevs_unlock();
>> +    return found;
> Nit: Blank line please ahead of the main "return" of a function.
Sure
>
>> +}
>> +
>>   /* Caller should hold the pcidevs_lock */
>>   static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
>>                              uint8_t devfn)
> Seeing this function in context (which patch 2 adds without any #ifdef
> around it afaics),

I believe you are talking about vpci_deassign_device here

vpci_{assign|deassign}_device seem to be not called on x86 PVH as of now,

this is true.

>   will this new function needlessly be built on x86 as
> well?

It will at the moment. But in order to avoid ifdefery I would like

to still implement it as an empty function for x86.

>   (I didn't look at other intermediate patches yet, so please
> forgive if I've missed the addition of an #ifdef.)

So, I can gate this with HAS_VPCI_GUEST_SUPPORT in patch 2

(HAS_VPCI_GUEST_SUPPORT is introduced in patch 4, so I'll move it to 2)

Does this sound good?

>
> Jan
>
Thank you,

Oleksandr
Jan Beulich Sept. 27, 2021, 1:34 p.m. UTC | #3
On 27.09.2021 14:08, Oleksandr Andrushchenko wrote:
> On 27.09.21 14:31, Jan Beulich wrote:
>> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>>> --- a/xen/drivers/passthrough/pci.c
>>> +++ b/xen/drivers/passthrough/pci.c
>>> @@ -890,6 +890,31 @@ int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
>>>       return 0;
>>>   }
>>>   
>>> +/*
>>> + * Find the physical device which is mapped to the virtual device
>>> + * and translate virtual SBDF to the physical one.
>>> + */
>>> +bool pci_translate_virtual_device(struct vcpu *v, pci_sbdf_t *sbdf)
>> Why struct vcpu, when you only need ...
>>
>>> +{
>>> +    struct domain *d = v->domain;
>> ... this? It's also not really logical for this function to take a
>> struct vcpu, as the translation should be uniform within a domain.
> Agree, struct domain is just enough
>>
>> Also - const please (as said elsewhere before, ideally wherever possible
>> and sensible).
> Ok
>>
>>> +    struct vpci_dev *vdev;
>>> +    bool found = false;
>>> +
>>> +    pcidevs_lock();
>>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>>> +    {
>>> +        if ( vdev->sbdf.sbdf == sbdf->sbdf )
>>> +        {
>>> +            /* Replace virtual SBDF with the physical one. */
>>> +            *sbdf = vdev->pdev->sbdf;
>>> +            found = true;
>>> +            break;
>>> +        }
>>> +    }
>> For a DomU with just one or at most a couple of devices, such a brute
>> force lookup may be fine. What about Dom0 though? The physical topology
>> gets split at the segment level, so maybe this would by a reasonable
>> granularity here as well?
> 
> Not sure I am following why topology matters here. We are just trying to
> match one SBDF (as seen by the guest) to other SBDF (physical,
> as seen by Dom0), so we can proxy DomU's configuration space access
> to the proper device in Dom0.

Topology here matters only in so far as I've suggested to have separate
lists per segment, to reduce look times. Other methods of avoiding a
fully linear search are of course possible as well.

>>> +    pcidevs_unlock();
>>> +    return found;
>> Nit: Blank line please ahead of the main "return" of a function.
> Sure
>>
>>> +}
>>> +
>>>   /* Caller should hold the pcidevs_lock */
>>>   static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
>>>                              uint8_t devfn)
>> Seeing this function in context (which patch 2 adds without any #ifdef
>> around it afaics),
> 
> I believe you are talking about vpci_deassign_device here
> vpci_{assign|deassign}_device seem to be not called on x86 PVH as of now,
> this is true.
> 
>>   will this new function needlessly be built on x86 as
>> well?
> 
> It will at the moment. But in order to avoid ifdefery I would like
> to still implement it as an empty function for x86.
> 
>>   (I didn't look at other intermediate patches yet, so please
>> forgive if I've missed the addition of an #ifdef.)
> 
> So, I can gate this with HAS_VPCI_GUEST_SUPPORT in patch 2
> (HAS_VPCI_GUEST_SUPPORT is introduced in patch 4, so I'll move it to 2)
> Does this sound good?

Yes.

Jan
Oleksandr Andrushchenko Sept. 27, 2021, 1:43 p.m. UTC | #4
On 27.09.21 16:34, Jan Beulich wrote:
> On 27.09.2021 14:08, Oleksandr Andrushchenko wrote:
>> On 27.09.21 14:31, Jan Beulich wrote:
>>> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>>>> --- a/xen/drivers/passthrough/pci.c
>>>> +++ b/xen/drivers/passthrough/pci.c
>>>> @@ -890,6 +890,31 @@ int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
>>>>        return 0;
>>>>    }
>>>>    
>>>> +/*
>>>> + * Find the physical device which is mapped to the virtual device
>>>> + * and translate virtual SBDF to the physical one.
>>>> + */
>>>> +bool pci_translate_virtual_device(struct vcpu *v, pci_sbdf_t *sbdf)
>>> Why struct vcpu, when you only need ...
>>>
>>>> +{
>>>> +    struct domain *d = v->domain;
>>> ... this? It's also not really logical for this function to take a
>>> struct vcpu, as the translation should be uniform within a domain.
>> Agree, struct domain is just enough
>>> Also - const please (as said elsewhere before, ideally wherever possible
>>> and sensible).
>> Ok
>>>> +    struct vpci_dev *vdev;
>>>> +    bool found = false;
>>>> +
>>>> +    pcidevs_lock();
>>>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>>>> +    {
>>>> +        if ( vdev->sbdf.sbdf == sbdf->sbdf )
>>>> +        {
>>>> +            /* Replace virtual SBDF with the physical one. */
>>>> +            *sbdf = vdev->pdev->sbdf;
>>>> +            found = true;
>>>> +            break;
>>>> +        }
>>>> +    }
>>> For a DomU with just one or at most a couple of devices, such a brute
>>> force lookup may be fine. What about Dom0 though? The physical topology
>>> gets split at the segment level, so maybe this would by a reasonable
>>> granularity here as well?
>> Not sure I am following why topology matters here. We are just trying to
>> match one SBDF (as seen by the guest) to other SBDF (physical,
>> as seen by Dom0), so we can proxy DomU's configuration space access
>> to the proper device in Dom0.
> Topology here matters only in so far as I've suggested to have separate
> lists per segment, to reduce look times. Other methods of avoiding a
> fully linear search are of course possible as well.

Ah, with that that respect then of course. But let's be realistic.

How many PCI devices are normally passed through to a guest?

I can assume this is probably less than 10 most of the time.

By assuming that the number of devices is small I see no profit,

but unneeded complexity in accounting virtual devices per segment

and performing the relevant lookup. So, I would go with a single list

and "brute force lookup" unless it is clearly seen that this needs to be

optimized.

>
>>>> +    pcidevs_unlock();
>>>> +    return found;
>>> Nit: Blank line please ahead of the main "return" of a function.
>> Sure
>>>> +}
>>>> +
>>>>    /* Caller should hold the pcidevs_lock */
>>>>    static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
>>>>                               uint8_t devfn)
>>> Seeing this function in context (which patch 2 adds without any #ifdef
>>> around it afaics),
>> I believe you are talking about vpci_deassign_device here
>> vpci_{assign|deassign}_device seem to be not called on x86 PVH as of now,
>> this is true.
>>
>>>    will this new function needlessly be built on x86 as
>>> well?
>> It will at the moment. But in order to avoid ifdefery I would like
>> to still implement it as an empty function for x86.
>>
>>>    (I didn't look at other intermediate patches yet, so please
>>> forgive if I've missed the addition of an #ifdef.)
>> So, I can gate this with HAS_VPCI_GUEST_SUPPORT in patch 2
>> (HAS_VPCI_GUEST_SUPPORT is introduced in patch 4, so I'll move it to 2)
>> Does this sound good?
> Yes.
I'll see how I can get rid of the code that x86 doesn't use
> Jan
>
Thank you,

Oleksandr
Jan Beulich Sept. 27, 2021, 1:51 p.m. UTC | #5
On 27.09.2021 15:43, Oleksandr Andrushchenko wrote:
> 
> On 27.09.21 16:34, Jan Beulich wrote:
>> On 27.09.2021 14:08, Oleksandr Andrushchenko wrote:
>>> On 27.09.21 14:31, Jan Beulich wrote:
>>>> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>>>>> --- a/xen/drivers/passthrough/pci.c
>>>>> +++ b/xen/drivers/passthrough/pci.c
>>>>> @@ -890,6 +890,31 @@ int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
>>>>>        return 0;
>>>>>    }
>>>>>    
>>>>> +/*
>>>>> + * Find the physical device which is mapped to the virtual device
>>>>> + * and translate virtual SBDF to the physical one.
>>>>> + */
>>>>> +bool pci_translate_virtual_device(struct vcpu *v, pci_sbdf_t *sbdf)
>>>> Why struct vcpu, when you only need ...
>>>>
>>>>> +{
>>>>> +    struct domain *d = v->domain;
>>>> ... this? It's also not really logical for this function to take a
>>>> struct vcpu, as the translation should be uniform within a domain.
>>> Agree, struct domain is just enough
>>>> Also - const please (as said elsewhere before, ideally wherever possible
>>>> and sensible).
>>> Ok
>>>>> +    struct vpci_dev *vdev;
>>>>> +    bool found = false;
>>>>> +
>>>>> +    pcidevs_lock();
>>>>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>>>>> +    {
>>>>> +        if ( vdev->sbdf.sbdf == sbdf->sbdf )
>>>>> +        {
>>>>> +            /* Replace virtual SBDF with the physical one. */
>>>>> +            *sbdf = vdev->pdev->sbdf;
>>>>> +            found = true;
>>>>> +            break;
>>>>> +        }
>>>>> +    }
>>>> For a DomU with just one or at most a couple of devices, such a brute
>>>> force lookup may be fine. What about Dom0 though? The physical topology
>>>> gets split at the segment level, so maybe this would by a reasonable
>>>> granularity here as well?
>>> Not sure I am following why topology matters here. We are just trying to
>>> match one SBDF (as seen by the guest) to other SBDF (physical,
>>> as seen by Dom0), so we can proxy DomU's configuration space access
>>> to the proper device in Dom0.
>> Topology here matters only in so far as I've suggested to have separate
>> lists per segment, to reduce look times. Other methods of avoiding a
>> fully linear search are of course possible as well.
> 
> Ah, with that that respect then of course. But let's be realistic.
> How many PCI devices are normally passed through to a guest?
> I can assume this is probably less than 10 most of the time.
> By assuming that the number of devices is small I see no profit,
> but unneeded complexity in accounting virtual devices per segment
> and performing the relevant lookup. So, I would go with a single list
> and "brute force lookup" unless it is clearly seen that this needs to be
> optimized.


Just to repeat my initial reply: "For a DomU with just one or at most
a couple of devices, such a brute force lookup may be fine. What about
Dom0 though?" If the code uses the simpler form because it's only
going to be used for DomU, then that's fine for now. But such latent
issues will want recording - e.g. by TODO comments or at the very
least suitable pointing out in the description.

Jan
Oleksandr Andrushchenko Sept. 27, 2021, 2:04 p.m. UTC | #6
On 27.09.21 16:51, Jan Beulich wrote:
> On 27.09.2021 15:43, Oleksandr Andrushchenko wrote:
>> On 27.09.21 16:34, Jan Beulich wrote:
>>> On 27.09.2021 14:08, Oleksandr Andrushchenko wrote:
>>>> On 27.09.21 14:31, Jan Beulich wrote:
>>>>> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>>>>>> --- a/xen/drivers/passthrough/pci.c
>>>>>> +++ b/xen/drivers/passthrough/pci.c
>>>>>> @@ -890,6 +890,31 @@ int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
>>>>>>         return 0;
>>>>>>     }
>>>>>>     
>>>>>> +/*
>>>>>> + * Find the physical device which is mapped to the virtual device
>>>>>> + * and translate virtual SBDF to the physical one.
>>>>>> + */
>>>>>> +bool pci_translate_virtual_device(struct vcpu *v, pci_sbdf_t *sbdf)
>>>>> Why struct vcpu, when you only need ...
>>>>>
>>>>>> +{
>>>>>> +    struct domain *d = v->domain;
>>>>> ... this? It's also not really logical for this function to take a
>>>>> struct vcpu, as the translation should be uniform within a domain.
>>>> Agree, struct domain is just enough
>>>>> Also - const please (as said elsewhere before, ideally wherever possible
>>>>> and sensible).
>>>> Ok
>>>>>> +    struct vpci_dev *vdev;
>>>>>> +    bool found = false;
>>>>>> +
>>>>>> +    pcidevs_lock();
>>>>>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>>>>>> +    {
>>>>>> +        if ( vdev->sbdf.sbdf == sbdf->sbdf )
>>>>>> +        {
>>>>>> +            /* Replace virtual SBDF with the physical one. */
>>>>>> +            *sbdf = vdev->pdev->sbdf;
>>>>>> +            found = true;
>>>>>> +            break;
>>>>>> +        }
>>>>>> +    }
>>>>> For a DomU with just one or at most a couple of devices, such a brute
>>>>> force lookup may be fine. What about Dom0 though? The physical topology
>>>>> gets split at the segment level, so maybe this would by a reasonable
>>>>> granularity here as well?
>>>> Not sure I am following why topology matters here. We are just trying to
>>>> match one SBDF (as seen by the guest) to other SBDF (physical,
>>>> as seen by Dom0), so we can proxy DomU's configuration space access
>>>> to the proper device in Dom0.
>>> Topology here matters only in so far as I've suggested to have separate
>>> lists per segment, to reduce look times. Other methods of avoiding a
>>> fully linear search are of course possible as well.
>> Ah, with that that respect then of course. But let's be realistic.
>> How many PCI devices are normally passed through to a guest?
>> I can assume this is probably less than 10 most of the time.
>> By assuming that the number of devices is small I see no profit,
>> but unneeded complexity in accounting virtual devices per segment
>> and performing the relevant lookup. So, I would go with a single list
>> and "brute force lookup" unless it is clearly seen that this needs to be
>> optimized.
>
> Just to repeat my initial reply: "For a DomU with just one or at most
> a couple of devices, such a brute force lookup may be fine. What about
> Dom0 though?" If the code uses the simpler form because it's only
> going to be used for DomU, then that's fine for now. But such latent
> issues will want recording - e.g. by TODO comments or at the very
> least suitable pointing out in the description.

As we do not emulate virtual bus topology for Dom0 then it is

clearly seen that the code may only have impact on DomUs.

But anyways, virtual bus topology for DomUs is emulated with

a single segment 0. We have a single list of virtual SBDFs,

again, for virtual segment 0, which maps those virtual SBDFs

to physical SBDFs. So, we go over the list of virtual devices

assigned to that guest and match the virtual SBDF in question to

its counterpart in Dom0. I can't see how this can be optimized or

needs that optimization because of the fact that Dom0 may have

multiple segments...

So, how would that comment look like?


>
> Jan
>
Thank you,

Oleksandr
Jan Beulich Sept. 27, 2021, 2:16 p.m. UTC | #7
On 27.09.2021 16:04, Oleksandr Andrushchenko wrote:
> 
> On 27.09.21 16:51, Jan Beulich wrote:
>> On 27.09.2021 15:43, Oleksandr Andrushchenko wrote:
>>> On 27.09.21 16:34, Jan Beulich wrote:
>>>> On 27.09.2021 14:08, Oleksandr Andrushchenko wrote:
>>>>> On 27.09.21 14:31, Jan Beulich wrote:
>>>>>> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>>>>>>> --- a/xen/drivers/passthrough/pci.c
>>>>>>> +++ b/xen/drivers/passthrough/pci.c
>>>>>>> @@ -890,6 +890,31 @@ int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
>>>>>>>         return 0;
>>>>>>>     }
>>>>>>>     
>>>>>>> +/*
>>>>>>> + * Find the physical device which is mapped to the virtual device
>>>>>>> + * and translate virtual SBDF to the physical one.
>>>>>>> + */
>>>>>>> +bool pci_translate_virtual_device(struct vcpu *v, pci_sbdf_t *sbdf)
>>>>>> Why struct vcpu, when you only need ...
>>>>>>
>>>>>>> +{
>>>>>>> +    struct domain *d = v->domain;
>>>>>> ... this? It's also not really logical for this function to take a
>>>>>> struct vcpu, as the translation should be uniform within a domain.
>>>>> Agree, struct domain is just enough
>>>>>> Also - const please (as said elsewhere before, ideally wherever possible
>>>>>> and sensible).
>>>>> Ok
>>>>>>> +    struct vpci_dev *vdev;
>>>>>>> +    bool found = false;
>>>>>>> +
>>>>>>> +    pcidevs_lock();
>>>>>>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>>>>>>> +    {
>>>>>>> +        if ( vdev->sbdf.sbdf == sbdf->sbdf )
>>>>>>> +        {
>>>>>>> +            /* Replace virtual SBDF with the physical one. */
>>>>>>> +            *sbdf = vdev->pdev->sbdf;
>>>>>>> +            found = true;
>>>>>>> +            break;
>>>>>>> +        }
>>>>>>> +    }
>>>>>> For a DomU with just one or at most a couple of devices, such a brute
>>>>>> force lookup may be fine. What about Dom0 though? The physical topology
>>>>>> gets split at the segment level, so maybe this would by a reasonable
>>>>>> granularity here as well?
>>>>> Not sure I am following why topology matters here. We are just trying to
>>>>> match one SBDF (as seen by the guest) to other SBDF (physical,
>>>>> as seen by Dom0), so we can proxy DomU's configuration space access
>>>>> to the proper device in Dom0.
>>>> Topology here matters only in so far as I've suggested to have separate
>>>> lists per segment, to reduce look times. Other methods of avoiding a
>>>> fully linear search are of course possible as well.
>>> Ah, with that that respect then of course. But let's be realistic.
>>> How many PCI devices are normally passed through to a guest?
>>> I can assume this is probably less than 10 most of the time.
>>> By assuming that the number of devices is small I see no profit,
>>> but unneeded complexity in accounting virtual devices per segment
>>> and performing the relevant lookup. So, I would go with a single list
>>> and "brute force lookup" unless it is clearly seen that this needs to be
>>> optimized.
>>
>> Just to repeat my initial reply: "For a DomU with just one or at most
>> a couple of devices, such a brute force lookup may be fine. What about
>> Dom0 though?" If the code uses the simpler form because it's only
>> going to be used for DomU, then that's fine for now. But such latent
>> issues will want recording - e.g. by TODO comments or at the very
>> least suitable pointing out in the description.
> 
> As we do not emulate virtual bus topology for Dom0 then it is
> 
> clearly seen that the code may only have impact on DomUs.
> 
> But anyways, virtual bus topology for DomUs is emulated with
> 
> a single segment 0. We have a single list of virtual SBDFs,
> 
> again, for virtual segment 0, which maps those virtual SBDFs
> 
> to physical SBDFs. So, we go over the list of virtual devices
> 
> assigned to that guest and match the virtual SBDF in question to
> 
> its counterpart in Dom0. I can't see how this can be optimized or
> 
> needs that optimization because of the fact that Dom0 may have
> 
> multiple segments...
> 
> So, how would that comment look like?

Well - if the plan is that this code would never be used for Dom0,
then all is fine as is, I guess. But as you can see the Dom0 plans
on Arm wrt vPCI weren't clear to me at all.

Jan
Oleksandr Andrushchenko Sept. 27, 2021, 2:20 p.m. UTC | #8
On 27.09.21 17:16, Jan Beulich wrote:
> On 27.09.2021 16:04, Oleksandr Andrushchenko wrote:
>> On 27.09.21 16:51, Jan Beulich wrote:
>>> On 27.09.2021 15:43, Oleksandr Andrushchenko wrote:
>>>> On 27.09.21 16:34, Jan Beulich wrote:
>>>>> On 27.09.2021 14:08, Oleksandr Andrushchenko wrote:
>>>>>> On 27.09.21 14:31, Jan Beulich wrote:
>>>>>>> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>>>>>>>> --- a/xen/drivers/passthrough/pci.c
>>>>>>>> +++ b/xen/drivers/passthrough/pci.c
>>>>>>>> @@ -890,6 +890,31 @@ int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
>>>>>>>>          return 0;
>>>>>>>>      }
>>>>>>>>      
>>>>>>>> +/*
>>>>>>>> + * Find the physical device which is mapped to the virtual device
>>>>>>>> + * and translate virtual SBDF to the physical one.
>>>>>>>> + */
>>>>>>>> +bool pci_translate_virtual_device(struct vcpu *v, pci_sbdf_t *sbdf)
>>>>>>> Why struct vcpu, when you only need ...
>>>>>>>
>>>>>>>> +{
>>>>>>>> +    struct domain *d = v->domain;
>>>>>>> ... this? It's also not really logical for this function to take a
>>>>>>> struct vcpu, as the translation should be uniform within a domain.
>>>>>> Agree, struct domain is just enough
>>>>>>> Also - const please (as said elsewhere before, ideally wherever possible
>>>>>>> and sensible).
>>>>>> Ok
>>>>>>>> +    struct vpci_dev *vdev;
>>>>>>>> +    bool found = false;
>>>>>>>> +
>>>>>>>> +    pcidevs_lock();
>>>>>>>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>>>>>>>> +    {
>>>>>>>> +        if ( vdev->sbdf.sbdf == sbdf->sbdf )
>>>>>>>> +        {
>>>>>>>> +            /* Replace virtual SBDF with the physical one. */
>>>>>>>> +            *sbdf = vdev->pdev->sbdf;
>>>>>>>> +            found = true;
>>>>>>>> +            break;
>>>>>>>> +        }
>>>>>>>> +    }
>>>>>>> For a DomU with just one or at most a couple of devices, such a brute
>>>>>>> force lookup may be fine. What about Dom0 though? The physical topology
>>>>>>> gets split at the segment level, so maybe this would by a reasonable
>>>>>>> granularity here as well?
>>>>>> Not sure I am following why topology matters here. We are just trying to
>>>>>> match one SBDF (as seen by the guest) to other SBDF (physical,
>>>>>> as seen by Dom0), so we can proxy DomU's configuration space access
>>>>>> to the proper device in Dom0.
>>>>> Topology here matters only in so far as I've suggested to have separate
>>>>> lists per segment, to reduce look times. Other methods of avoiding a
>>>>> fully linear search are of course possible as well.
>>>> Ah, with that that respect then of course. But let's be realistic.
>>>> How many PCI devices are normally passed through to a guest?
>>>> I can assume this is probably less than 10 most of the time.
>>>> By assuming that the number of devices is small I see no profit,
>>>> but unneeded complexity in accounting virtual devices per segment
>>>> and performing the relevant lookup. So, I would go with a single list
>>>> and "brute force lookup" unless it is clearly seen that this needs to be
>>>> optimized.
>>> Just to repeat my initial reply: "For a DomU with just one or at most
>>> a couple of devices, such a brute force lookup may be fine. What about
>>> Dom0 though?" If the code uses the simpler form because it's only
>>> going to be used for DomU, then that's fine for now. But such latent
>>> issues will want recording - e.g. by TODO comments or at the very
>>> least suitable pointing out in the description.
>> As we do not emulate virtual bus topology for Dom0 then it is
>>
>> clearly seen that the code may only have impact on DomUs.
>>
>> But anyways, virtual bus topology for DomUs is emulated with
>>
>> a single segment 0. We have a single list of virtual SBDFs,
>>
>> again, for virtual segment 0, which maps those virtual SBDFs
>>
>> to physical SBDFs. So, we go over the list of virtual devices
>>
>> assigned to that guest and match the virtual SBDF in question to
>>
>> its counterpart in Dom0. I can't see how this can be optimized or
>>
>> needs that optimization because of the fact that Dom0 may have
>>
>> multiple segments...
>>
>> So, how would that comment look like?
> Well - if the plan is that this code would never be used for Dom0,
> then all is fine as is, I guess. But as you can see the Dom0 plans
> on Arm wrt vPCI weren't clear to me at all.

It is all new to all of us ;) No problem.


> Jan
>
Thank you,

Oleksandr
diff mbox series

Patch

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index c7b25bc70439..c0ad6ad682d2 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -797,6 +797,7 @@  void arch_domain_destroy(struct domain *d)
                        get_order_from_bytes(d->arch.efi_acpi_len));
 #endif
     domain_io_free(d);
+    domain_vpci_free(d);
 }
 
 void arch_domain_shutdown(struct domain *d)
diff --git a/xen/arch/arm/vpci.c b/xen/arch/arm/vpci.c
index 14947e975d69..012f958960d1 100644
--- a/xen/arch/arm/vpci.c
+++ b/xen/arch/arm/vpci.c
@@ -17,6 +17,14 @@ 
 
 #define REGISTER_OFFSET(addr)  ( (addr) & 0x00000fff)
 
+struct vpci_mmio_priv {
+    /*
+     * Set to true if the MMIO handlers were set up for the emulated
+     * ECAM host PCI bridge.
+     */
+    bool is_virt_ecam;
+};
+
 /* Do some sanity checks. */
 static bool vpci_mmio_access_allowed(unsigned int reg, unsigned int len)
 {
@@ -38,6 +46,7 @@  static int vpci_mmio_read(struct vcpu *v, mmio_info_t *info,
     pci_sbdf_t sbdf;
     unsigned long data = ~0UL;
     unsigned int size = 1U << info->dabt.size;
+    struct vpci_mmio_priv *priv = (struct vpci_mmio_priv *)p;
 
     sbdf.sbdf = MMCFG_BDF(info->gpa);
     reg = REGISTER_OFFSET(info->gpa);
@@ -45,6 +54,13 @@  static int vpci_mmio_read(struct vcpu *v, mmio_info_t *info,
     if ( !vpci_mmio_access_allowed(reg, size) )
         return 0;
 
+    /*
+     * For the passed through devices we need to map their virtual SBDF
+     * to the physical PCI device being passed through.
+     */
+    if ( priv->is_virt_ecam && !pci_translate_virtual_device(v, &sbdf) )
+            return 1;
+
     data = vpci_read(sbdf, reg, min(4u, size));
     if ( size == 8 )
         data |= (uint64_t)vpci_read(sbdf, reg + 4, 4) << 32;
@@ -61,6 +77,7 @@  static int vpci_mmio_write(struct vcpu *v, mmio_info_t *info,
     pci_sbdf_t sbdf;
     unsigned long data = r;
     unsigned int size = 1U << info->dabt.size;
+    struct vpci_mmio_priv *priv = (struct vpci_mmio_priv *)p;
 
     sbdf.sbdf = MMCFG_BDF(info->gpa);
     reg = REGISTER_OFFSET(info->gpa);
@@ -68,6 +85,13 @@  static int vpci_mmio_write(struct vcpu *v, mmio_info_t *info,
     if ( !vpci_mmio_access_allowed(reg, size) )
         return 0;
 
+    /*
+     * For the passed through devices we need to map their virtual SBDF
+     * to the physical PCI device being passed through.
+     */
+    if ( priv->is_virt_ecam && !pci_translate_virtual_device(v, &sbdf) )
+            return 1;
+
     vpci_write(sbdf, reg, min(4u, size), data);
     if ( size == 8 )
         vpci_write(sbdf, reg + 4, 4, data >> 32);
@@ -80,13 +104,48 @@  static const struct mmio_handler_ops vpci_mmio_handler = {
     .write = vpci_mmio_write,
 };
 
+/*
+ * There are three  originators for the PCI configuration space access:
+ * 1. The domain that owns physical host bridge: MMIO handlers are
+ *    there so we can update vPCI register handlers with the values
+ *    written by the hardware domain, e.g. physical view of the registers/
+ *    configuration space.
+ * 2. Guest access to the passed through PCI devices: we need to properly
+ *    map virtual bus topology to the physical one, e.g. pass the configuration
+ *    space access to the corresponding physical devices.
+ * 3. Emulated host PCI bridge access. It doesn't exist in the physical
+ *    topology, e.g. it can't be mapped to some physical host bridge.
+ *    So, all access to the host bridge itself needs to be trapped and
+ *    emulated.
+ */
 static int vpci_setup_mmio_handler(struct domain *d,
                                    struct pci_host_bridge *bridge)
 {
-    struct pci_config_window *cfg = bridge->cfg;
+    struct vpci_mmio_priv *priv;
+
+    priv = xzalloc(struct vpci_mmio_priv);
+    if ( !priv )
+        return -ENOMEM;
+
+    priv->is_virt_ecam = !is_hardware_domain(d);
 
-    register_mmio_handler(d, &vpci_mmio_handler,
-                          cfg->phys_addr, cfg->size, NULL);
+    if ( is_hardware_domain(d) )
+    {
+        struct pci_config_window *cfg = bridge->cfg;
+
+        bridge->mmio_priv = priv;
+        register_mmio_handler(d, &vpci_mmio_handler,
+                              cfg->phys_addr, cfg->size,
+                              priv);
+    }
+    else
+    {
+        d->vpci_mmio_priv = priv;
+        /* Guest domains use what is programmed in their device tree. */
+        register_mmio_handler(d, &vpci_mmio_handler,
+                              GUEST_VPCI_ECAM_BASE, GUEST_VPCI_ECAM_SIZE,
+                              priv);
+    }
     return 0;
 }
 
@@ -95,13 +154,16 @@  int domain_vpci_init(struct domain *d)
     if ( !has_vpci(d) )
         return 0;
 
-    if ( is_hardware_domain(d) )
-        return pci_host_iterate_bridges(d, vpci_setup_mmio_handler);
-
-    /* Guest domains use what is programmed in their device tree. */
-    register_mmio_handler(d, &vpci_mmio_handler,
-                          GUEST_VPCI_ECAM_BASE, GUEST_VPCI_ECAM_SIZE, NULL);
+    return pci_host_iterate_bridges(d, vpci_setup_mmio_handler);
+}
 
+static int domain_vpci_free_cb(struct domain *d,
+                               struct pci_host_bridge *bridge)
+{
+    if ( is_hardware_domain(d) )
+        XFREE(bridge->mmio_priv);
+    else
+        XFREE(d->vpci_mmio_priv);
     return 0;
 }
 
@@ -124,6 +186,13 @@  int domain_vpci_get_num_mmio_handlers(struct domain *d)
     return count;
 }
 
+void domain_vpci_free(struct domain *d)
+{
+    if ( !has_vpci(d) )
+        return;
+
+    pci_host_iterate_bridges(d, domain_vpci_free_cb);
+}
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/vpci.h b/xen/arch/arm/vpci.h
index 27a2b069abd2..38e5a28c0d95 100644
--- a/xen/arch/arm/vpci.h
+++ b/xen/arch/arm/vpci.h
@@ -18,6 +18,7 @@ 
 #ifdef CONFIG_HAS_VPCI
 int domain_vpci_init(struct domain *d);
 int domain_vpci_get_num_mmio_handlers(struct domain *d);
+void domain_vpci_free(struct domain *d);
 #else
 static inline int domain_vpci_init(struct domain *d)
 {
@@ -28,6 +29,8 @@  static inline int domain_vpci_get_num_mmio_handlers(struct domain *d)
 {
     return 0;
 }
+
+static inline void domain_vpci_free(struct domain *d) { }
 #endif
 
 #endif /* __ARCH_ARM_VPCI_H__ */
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 4552ace855e0..579c6947cc35 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -890,6 +890,31 @@  int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
     return 0;
 }
 
+/*
+ * Find the physical device which is mapped to the virtual device
+ * and translate virtual SBDF to the physical one.
+ */
+bool pci_translate_virtual_device(struct vcpu *v, pci_sbdf_t *sbdf)
+{
+    struct domain *d = v->domain;
+    struct vpci_dev *vdev;
+    bool found = false;
+
+    pcidevs_lock();
+    list_for_each_entry ( vdev, &d->vdev_list, list )
+    {
+        if ( vdev->sbdf.sbdf == sbdf->sbdf )
+        {
+            /* Replace virtual SBDF with the physical one. */
+            *sbdf = vdev->pdev->sbdf;
+            found = true;
+            break;
+        }
+    }
+    pcidevs_unlock();
+    return found;
+}
+
 /* Caller should hold the pcidevs_lock */
 static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
                            uint8_t devfn)
diff --git a/xen/include/asm-arm/pci.h b/xen/include/asm-arm/pci.h
index b81f66e813ef..983a63be7572 100644
--- a/xen/include/asm-arm/pci.h
+++ b/xen/include/asm-arm/pci.h
@@ -72,6 +72,7 @@  struct pci_host_bridge {
     struct pci_config_window* cfg;   /* Pointer to the bridge config window */
     void *sysdata;                   /* Pointer to the config space window*/
     const struct pci_ops *ops;
+    void *mmio_priv;                 /* MMIO handler's private data. */
 };
 
 struct pci_ops {
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 35ae1d093921..0017de19e7c8 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -191,6 +191,7 @@  struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);
 struct pci_dev *pci_get_pdev_by_domain(const struct domain *, int seg,
                                        int bus, int devfn);
 void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
+bool pci_translate_virtual_device(struct vcpu *v, pci_sbdf_t *sbdf);
 
 uint8_t pci_conf_read8(pci_sbdf_t sbdf, unsigned int reg);
 uint16_t pci_conf_read16(pci_sbdf_t sbdf, unsigned int reg);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index d304c7ebe766..0626820545cc 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -450,6 +450,8 @@  struct domain
      * to assign a unique SBDF to a passed through virtual PCI device.
      */
     int vpci_dev_next;
+    /* Virtual PCI MMIO handler's private data. */
+    void *vpci_mmio_priv;
 #endif
 
 #ifdef CONFIG_HAS_PASSTHROUGH