diff mbox

[v2,06/27] ARM: GICv3 ITS: introduce device mapping

Message ID 20170316112030.20419-7-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara March 16, 2017, 11:20 a.m. UTC
The ITS uses device IDs to map LPIs to a device. Dom0 will later use
those IDs, which we directly pass on to the host.
For this we have to map each device that Dom0 may request to a host
ITS device with the same identifier.
Allocate the respective memory and enter each device into an rbtree to
later be able to iterate over it or to easily teardown guests.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 xen/arch/arm/gic-v3-its.c        | 207 +++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/vgic-v3.c           |   3 +
 xen/include/asm-arm/domain.h     |   3 +
 xen/include/asm-arm/gic_v3_its.h |  18 ++++
 4 files changed, 231 insertions(+)

Comments

Julien Grall March 22, 2017, 5:29 p.m. UTC | #1
Hi Andre,

On 16/03/17 11:20, Andre Przywara wrote:
> The ITS uses device IDs to map LPIs to a device. Dom0 will later use
> those IDs, which we directly pass on to the host.
> For this we have to map each device that Dom0 may request to a host
> ITS device with the same identifier.
> Allocate the respective memory and enter each device into an rbtree to
> later be able to iterate over it or to easily teardown guests.
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/gic-v3-its.c        | 207 +++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/vgic-v3.c           |   3 +
>  xen/include/asm-arm/domain.h     |   3 +
>  xen/include/asm-arm/gic_v3_its.h |  18 ++++
>  4 files changed, 231 insertions(+)
>
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index 5c11b0d..60b15b5 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -21,6 +21,8 @@
>  #include <xen/lib.h>
>  #include <xen/delay.h>
>  #include <xen/mm.h>
> +#include <xen/rbtree.h>
> +#include <xen/sched.h>
>  #include <xen/sizes.h>
>  #include <asm/gic.h>
>  #include <asm/gic_v3_defs.h>
> @@ -32,6 +34,17 @@
>
>  LIST_HEAD(host_its_list);
>
> +struct its_devices {
> +    struct rb_node rbnode;
> +    struct host_its *hw_its;
> +    void *itt_addr;
> +    paddr_t guest_doorbell;

I think it would be worth to explain in the commit message why you need 
the guest_doorbell in the struct its_devices and how you plan to use it.

> +    uint32_t host_devid;
> +    uint32_t guest_devid;
> +    uint32_t eventids;
> +    uint32_t *host_lpis;
> +};
> +
>  bool gicv3_its_host_has_its(void)
>  {
>      return !list_empty(&host_its_list);
> @@ -149,6 +162,24 @@ static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
>      return its_send_command(its, cmd);
>  }
>
> +static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
> +                             uint8_t size_bits, paddr_t itt_addr, bool valid)
> +{
> +    uint64_t cmd[4];
> +
> +    if ( valid )
> +    {
> +        ASSERT(size_bits < 32);

It would be better if you do the check against the real number in 
hardware (i.e GITS_TYPER.ID_bits).


> +        ASSERT(!(itt_addr & ~GENMASK(51, 8)));
> +    }
> +    cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
> +    cmd[1] = valid ? size_bits : 0x00;

This is really confusing. The check was not on the previous version. So 
why do you need that?

Also, it would have been better to hide the "size - 1" in the helper 
avoiding to really on the caller to do the right thing.

> +    cmd[2] = valid ? (itt_addr | GITS_VALID_BIT) : 0x00;

Ditto about "valid? ...".

[...]

> +static struct host_its *gicv3_its_find_by_doorbell(paddr_t doorbell_address)
> +{
> +    struct host_its *hw_its;
> +
> +    list_for_each_entry(hw_its, &host_its_list, entry)
> +    {
> +        if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )

Why not storing the ITS address rather than the doorbell to avoid this 
check?


[...]

> +int gicv3_its_map_guest_device(struct domain *d,
> +                               paddr_t host_doorbell, uint32_t host_devid,
> +                               paddr_t guest_doorbell, uint32_t guest_devid,
> +                               uint32_t nr_events, bool valid)
> +{
> +    void *itt_addr = NULL;
> +    struct host_its *hw_its;
> +    struct its_devices *dev = NULL, *temp;
> +    struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent = NULL;
> +    int ret = -ENOENT;
> +
> +    hw_its = gicv3_its_find_by_doorbell(host_doorbell);
> +    if ( !hw_its )
> +        return ret;
> +
> +    /* check for already existing mappings */
> +    spin_lock(&d->arch.vgic.its_devices_lock);
> +    while ( *new )
> +    {
> +        temp = rb_entry(*new, struct its_devices, rbnode);
> +
> +        parent = *new;
> +        if ( !compare_its_guest_devices(temp, guest_doorbell, guest_devid) )
> +        {
> +            if ( !valid )
> +                rb_erase(&temp->rbnode, &d->arch.vgic.its_devices);
> +
> +            spin_unlock(&d->arch.vgic.its_devices_lock);
> +
> +            if ( valid )

Again, a printk(XENLOG_GUEST...) here would be useful to know which host 
DeviceID was associated to the guest DeviceID.

> +                return -EBUSY;
> +
> +            return remove_mapped_guest_device(temp);

Just above you removed the device from the RB-tree but this function may 
fail and never free the memory. This means that memory will be leaked 
leading to a potential denial of service.

> +        }
> +
> +        if ( compare_its_guest_devices(temp, guest_doorbell, guest_devid) > 0 )
> +            new = &((*new)->rb_left);
> +        else
> +            new = &((*new)->rb_right);
> +    }
> +
> +    if ( !valid )
> +        goto out_unlock;
> +
> +    ret = -ENOMEM;
> +
> +    /* An Interrupt Translation Table needs to be 256-byte aligned. */
> +    itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);
> +    if ( !itt_addr )
> +        goto out_unlock;
> +
> +    dev = xzalloc(struct its_devices);
> +    if ( !dev )
> +        goto out_unlock;
> +
> +    ret = its_send_cmd_mapd(hw_its, host_devid, fls(nr_events - 1) - 1,

I don't understand why nr_events - 1. Can you explain?

[...]

> +/* Removing any connections a domain had to any ITS in the system. */
> +void gicv3_its_unmap_all_devices(struct domain *d)
> +{
> +    struct rb_node *victim;
> +    struct its_devices *dev;
> +
> +    /*
> +     * This is an easily readable, yet inefficient implementation.
> +     * It uses the provided iteration wrapper and erases each node, which
> +     * possibly triggers rebalancing.
> +     * This seems overkill since we are going to abolish the whole tree, but
> +     * avoids an open-coded re-implementation of the traversal functions with
> +     * some recursive function calls.
> +     * Performance does not matter here, since we are destroying a domain.

Again, this is slightly untrue. Performance matter when destroying a 
domain as Xen cannot be preempted. So if it takes too long, you will 
have an impact on the overall system.

However, I think it would be fair to assume that all device will be 
deassigned before the ITS is destroyed. So I would just drop this 
function. Note that we have the same assumption in the SMMU driver.

> +     */
> +restart:
> +    spin_lock(&d->arch.vgic.its_devices_lock);
> +    if ( (victim = rb_first(&d->arch.vgic.its_devices)) )
> +    {
> +        dev = rb_entry(victim, struct its_devices, rbnode);
> +        rb_erase(victim, &d->arch.vgic.its_devices);
> +
> +        spin_unlock(&d->arch.vgic.its_devices_lock);
> +
> +        remove_mapped_guest_device(dev);
> +
> +        goto restart;
> +    }
> +
> +    spin_unlock(&d->arch.vgic.its_devices_lock);
> +}
> +
>  /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
>  void gicv3_its_dt_init(const struct dt_device_node *node)
>  {
> @@ -455,6 +661,7 @@ void gicv3_its_dt_init(const struct dt_device_node *node)
>          its_data->addr = addr;
>          its_data->size = size;
>          its_data->dt_node = its;
> +        spin_lock_init(&its_data->cmd_lock);

This should be in patch #5.

>
>          printk("GICv3: Found ITS @0x%lx\n", addr);
>
> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> index d61479d..1fadb00 100644
> --- a/xen/arch/arm/vgic-v3.c
> +++ b/xen/arch/arm/vgic-v3.c
> @@ -1450,6 +1450,9 @@ static int vgic_v3_domain_init(struct domain *d)
>      d->arch.vgic.nr_regions = rdist_count;
>      d->arch.vgic.rdist_regions = rdist_regions;
>
> +    spin_lock_init(&d->arch.vgic.its_devices_lock);
> +    d->arch.vgic.its_devices = RB_ROOT;

Again, the placement of those 2 lines are likely wrong. This should 
belong to the vITS and not the vgic-v3.

I think it would make sense to get a patch that introduces a skeleton 
for the vITS before this patch and start plumbing through.

> +
>      /*
>       * Domain 0 gets the hardware address.
>       * Guests get the virtual platform layout.

Cheers,
Stefano Stabellini March 22, 2017, 10:45 p.m. UTC | #2
On Thu, 16 Mar 2017, Andre Przywara wrote:
> The ITS uses device IDs to map LPIs to a device. Dom0 will later use
> those IDs, which we directly pass on to the host.
> For this we have to map each device that Dom0 may request to a host
> ITS device with the same identifier.
> Allocate the respective memory and enter each device into an rbtree to
> later be able to iterate over it or to easily teardown guests.
> 
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/gic-v3-its.c        | 207 +++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/vgic-v3.c           |   3 +
>  xen/include/asm-arm/domain.h     |   3 +
>  xen/include/asm-arm/gic_v3_its.h |  18 ++++
>  4 files changed, 231 insertions(+)
> 
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index 5c11b0d..60b15b5 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -21,6 +21,8 @@
>  #include <xen/lib.h>
>  #include <xen/delay.h>
>  #include <xen/mm.h>
> +#include <xen/rbtree.h>
> +#include <xen/sched.h>
>  #include <xen/sizes.h>
>  #include <asm/gic.h>
>  #include <asm/gic_v3_defs.h>
> @@ -32,6 +34,17 @@
>  
>  LIST_HEAD(host_its_list);
>  
> +struct its_devices {
> +    struct rb_node rbnode;
> +    struct host_its *hw_its;
> +    void *itt_addr;
> +    paddr_t guest_doorbell;
> +    uint32_t host_devid;
> +    uint32_t guest_devid;
> +    uint32_t eventids;
> +    uint32_t *host_lpis;
> +};
> +
>  bool gicv3_its_host_has_its(void)
>  {
>      return !list_empty(&host_its_list);
> @@ -149,6 +162,24 @@ static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
>      return its_send_command(its, cmd);
>  }
>  
> +static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
> +                             uint8_t size_bits, paddr_t itt_addr, bool valid)
> +{
> +    uint64_t cmd[4];
> +
> +    if ( valid )
> +    {
> +        ASSERT(size_bits < 32);
> +        ASSERT(!(itt_addr & ~GENMASK(51, 8)));
> +    }
> +    cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
> +    cmd[1] = valid ? size_bits : 0x00;
> +    cmd[2] = valid ? (itt_addr | GITS_VALID_BIT) : 0x00;
> +    cmd[3] = 0x00;
> +
> +    return its_send_command(its, cmd);
> +}
> +
>  /* Set up the (1:1) collection mapping for the given host CPU. */
>  int gicv3_its_setup_collection(unsigned int cpu)
>  {
> @@ -379,6 +410,7 @@ static int gicv3_its_init_single_its(struct host_its *hw_its)
>      devid_bits = min(devid_bits, max_its_device_bits);
>      if ( reg & GITS_TYPER_PTA )
>          hw_its->flags |= HOST_ITS_USES_PTA;
> +    hw_its->itte_size = GITS_TYPER_ITT_SIZE(reg);
>  
>      for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
>      {
> @@ -428,6 +460,180 @@ int gicv3_its_init(void)
>      return 0;
>  }
>  
> +static int remove_mapped_guest_device(struct its_devices *dev)
> +{
> +    int ret;
> +
> +    if ( dev->hw_its )
> +    {
> +        int ret = its_send_cmd_mapd(dev->hw_its, dev->host_devid, 0, 0, false);
> +        if ( ret )
> +            return ret;
> +    }
> +
> +    ret = gicv3_its_wait_commands(dev->hw_its);
> +    if ( ret )
> +        return ret;
> +
> +    xfree(dev->itt_addr);
> +    xfree(dev);
> +
> +    return 0;
> +}
> +
> +static struct host_its *gicv3_its_find_by_doorbell(paddr_t doorbell_address)
> +{
> +    struct host_its *hw_its;
> +
> +    list_for_each_entry(hw_its, &host_its_list, entry)

Does this need to take a spinlock to protect host_its_list? I guess not
because the list is not modified after boot?


> +    {
> +        if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )
> +            return hw_its;
> +    }
> +
> +    return NULL;
> +}
> +
> +static int compare_its_guest_devices(struct its_devices *dev,
> +                                     paddr_t doorbell, uint32_t devid)
> +{
> +    if ( dev->guest_doorbell < doorbell )
> +        return -1;
> +
> +    if ( dev->guest_doorbell > doorbell )
> +        return 1;
> +
> +    if ( dev->guest_devid < devid )
> +        return -1;
> +
> +    if ( dev->guest_devid > devid )
> +        return 1;
> +
> +    return 0;
> +}
> +
> +/*
> + * Map a hardware device, identified by a certain host ITS and its device ID
> + * to domain d, a guest ITS (identified by its doorbell address) and device ID.
> + * Also provide the number of events (MSIs) needed for that device.
> + * This does not check if this particular hardware device is already mapped
> + * at another domain, it is expected that this would be done by the caller.
> + */
> +int gicv3_its_map_guest_device(struct domain *d,
> +                               paddr_t host_doorbell, uint32_t host_devid,
> +                               paddr_t guest_doorbell, uint32_t guest_devid,
> +                               uint32_t nr_events, bool valid)
> +{
> +    void *itt_addr = NULL;
> +    struct host_its *hw_its;
> +    struct its_devices *dev = NULL, *temp;
> +    struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent = NULL;
> +    int ret = -ENOENT;
> +
> +    hw_its = gicv3_its_find_by_doorbell(host_doorbell);
> +    if ( !hw_its )
> +        return ret;
> +
> +    /* check for already existing mappings */
> +    spin_lock(&d->arch.vgic.its_devices_lock);
> +    while ( *new )
> +    {
> +        temp = rb_entry(*new, struct its_devices, rbnode);
> +
> +        parent = *new;
> +        if ( !compare_its_guest_devices(temp, guest_doorbell, guest_devid) )
> +        {
> +            if ( !valid )
> +                rb_erase(&temp->rbnode, &d->arch.vgic.its_devices);
> +
> +            spin_unlock(&d->arch.vgic.its_devices_lock);
> +
> +            if ( valid )
> +                return -EBUSY;
> +
> +            return remove_mapped_guest_device(temp);
> +        }
> +

Plese don't run compare_its_guest_devices twice with the same arguments,
just store the return value.


> +        if ( compare_its_guest_devices(temp, guest_doorbell, guest_devid) > 0 )
> +            new = &((*new)->rb_left);
> +        else
> +            new = &((*new)->rb_right);
> +    }
> +
> +    if ( !valid )
> +        goto out_unlock;
> +
> +    ret = -ENOMEM;
> +
> +    /* An Interrupt Translation Table needs to be 256-byte aligned. */
> +    itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);
> +    if ( !itt_addr )
> +        goto out_unlock;
> +
> +    dev = xzalloc(struct its_devices);
> +    if ( !dev )
> +        goto out_unlock;
> +
> +    ret = its_send_cmd_mapd(hw_its, host_devid, fls(nr_events - 1) - 1,
> +                            virt_to_maddr(itt_addr), true);
> +    if ( ret )
> +        goto out_unlock;
> +
> +    dev->itt_addr = itt_addr;
> +    dev->hw_its = hw_its;
> +    dev->guest_doorbell = guest_doorbell;
> +    dev->guest_devid = guest_devid;
> +    dev->host_devid = host_devid;
> +    dev->eventids = nr_events;
> +
> +    rb_link_node(&dev->rbnode, parent, new);
> +    rb_insert_color(&dev->rbnode, &d->arch.vgic.its_devices);
> +
> +    spin_unlock(&d->arch.vgic.its_devices_lock);
> +
> +    return 0;
> +
> +out_unlock:
> +    spin_unlock(&d->arch.vgic.its_devices_lock);
> +    if ( dev )
> +        xfree(dev->host_lpis);

Where is host_lpis allocated? Why is it freed here?


> +    xfree(itt_addr);
> +    xfree(dev);
> +    return ret;
> +}
> +
> +/* Removing any connections a domain had to any ITS in the system. */
> +void gicv3_its_unmap_all_devices(struct domain *d)
> +{
> +    struct rb_node *victim;
> +    struct its_devices *dev;
> +
> +    /*
> +     * This is an easily readable, yet inefficient implementation.
> +     * It uses the provided iteration wrapper and erases each node, which
> +     * possibly triggers rebalancing.
> +     * This seems overkill since we are going to abolish the whole tree, but
> +     * avoids an open-coded re-implementation of the traversal functions with
> +     * some recursive function calls.
> +     * Performance does not matter here, since we are destroying a domain.
> +     */
> +restart:
> +    spin_lock(&d->arch.vgic.its_devices_lock);
> +    if ( (victim = rb_first(&d->arch.vgic.its_devices)) )
> +    {
> +        dev = rb_entry(victim, struct its_devices, rbnode);
> +        rb_erase(victim, &d->arch.vgic.its_devices);
> +
> +        spin_unlock(&d->arch.vgic.its_devices_lock);
> +
> +        remove_mapped_guest_device(dev);
> +
> +        goto restart;
> +    }

There is no need to use goto here, a simple while loop should suffice.


> +    spin_unlock(&d->arch.vgic.its_devices_lock);
> +}
> +
>  /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
>  void gicv3_its_dt_init(const struct dt_device_node *node)
>  {
> @@ -455,6 +661,7 @@ void gicv3_its_dt_init(const struct dt_device_node *node)
>          its_data->addr = addr;
>          its_data->size = size;
>          its_data->dt_node = its;
> +        spin_lock_init(&its_data->cmd_lock);

This change should be in the previous patch


>          printk("GICv3: Found ITS @0x%lx\n", addr);
>  
> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> index d61479d..1fadb00 100644
> --- a/xen/arch/arm/vgic-v3.c
> +++ b/xen/arch/arm/vgic-v3.c
> @@ -1450,6 +1450,9 @@ static int vgic_v3_domain_init(struct domain *d)
>      d->arch.vgic.nr_regions = rdist_count;
>      d->arch.vgic.rdist_regions = rdist_regions;
>  
> +    spin_lock_init(&d->arch.vgic.its_devices_lock);
> +    d->arch.vgic.its_devices = RB_ROOT;
> +
>      /*
>       * Domain 0 gets the hardware address.
>       * Guests get the virtual platform layout.
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 2d6fbb1..00b9c1a 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -11,6 +11,7 @@
>  #include <asm/gic.h>
>  #include <public/hvm/params.h>
>  #include <xen/serial.h>
> +#include <xen/rbtree.h>
>  
>  struct hvm_domain
>  {
> @@ -109,6 +110,8 @@ struct arch_domain
>          } *rdist_regions;
>          int nr_regions;                     /* Number of rdist regions */
>          uint32_t rdist_stride;              /* Re-Distributor stride */
> +        struct rb_root its_devices;         /* devices mapped to an ITS */
> +        spinlock_t its_devices_lock;        /* protects the its_devices tree */
>  #endif
>      } vgic;
>  
> diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
> index 8b493fb..3421ea0 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -48,6 +48,10 @@
>  #define GITS_TYPER_DEVICE_ID_BITS(r)    (((r & GITS_TYPER_DEVIDS_MASK) >> \
>                                                 GITS_TYPER_DEVIDS_SHIFT) + 1)
>  #define GITS_TYPER_IDBITS_SHIFT         8
> +#define GITS_TYPER_ITT_SIZE_SHIFT       4
> +#define GITS_TYPER_ITT_SIZE_MASK        (0xfUL << GITS_TYPER_ITT_SIZE_SHIFT)
> +#define GITS_TYPER_ITT_SIZE(r)          ((((r) & GITS_TYPER_ITT_SIZE_MASK) >> \
> +                                                GITS_TYPER_ITT_SIZE_SHIFT) + 1)
>  
>  #define GITS_IIDR_VALUE                 0x34c
>  
> @@ -94,7 +98,10 @@
>  #define GITS_CMD_MOVALL                 0x0e
>  #define GITS_CMD_DISCARD                0x0f
>  
> +#define ITS_DOORBELL_OFFSET             0x10040
> +
>  #include <xen/device_tree.h>
> +#include <xen/rbtree.h>
>  
>  #define HOST_ITS_FLUSH_CMD_QUEUE        (1U << 0)
>  #define HOST_ITS_USES_PTA               (1U << 1)
> @@ -108,6 +115,7 @@ struct host_its {
>      void __iomem *its_base;
>      spinlock_t cmd_lock;
>      void *cmd_buf;
> +    unsigned int itte_size;
>      unsigned int flags;
>  };

I would move itte_size and its initialization to the patch that
introduced struct host_its.


> @@ -134,6 +142,16 @@ uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta);
>  /* Map a collection for this host CPU to each host ITS. */
>  int gicv3_its_setup_collection(unsigned int cpu);
>  
> +/*
> + * Map a device on the host by allocating an ITT on the host (ITS).
> + * "nr_event" specifies how many events (interrupts) this device will need.
> + * Setting "valid" to false deallocates the device.
> + */
> +int gicv3_its_map_guest_device(struct domain *d,
> +                               paddr_t host_doorbell, uint32_t host_devid,
> +                               paddr_t guest_doorbell, uint32_t guest_devid,
> +                               uint32_t nr_events, bool valid);
> +
>  #else
>  
>  static LIST_HEAD(host_its_list);
> -- 
> 2.9.0
>
Vijay Kilari March 30, 2017, 11:17 a.m. UTC | #3
Hi Andre,

On Thu, Mar 16, 2017 at 4:50 PM, Andre Przywara <andre.przywara@arm.com> wrote:
> The ITS uses device IDs to map LPIs to a device. Dom0 will later use
> those IDs, which we directly pass on to the host.
> For this we have to map each device that Dom0 may request to a host
> ITS device with the same identifier.
> Allocate the respective memory and enter each device into an rbtree to
> later be able to iterate over it or to easily teardown guests.
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/gic-v3-its.c        | 207 +++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/vgic-v3.c           |   3 +
>  xen/include/asm-arm/domain.h     |   3 +
>  xen/include/asm-arm/gic_v3_its.h |  18 ++++
>  4 files changed, 231 insertions(+)
>
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index 5c11b0d..60b15b5 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -21,6 +21,8 @@
>  #include <xen/lib.h>
>  #include <xen/delay.h>
>  #include <xen/mm.h>
> +#include <xen/rbtree.h>
> +#include <xen/sched.h>
>  #include <xen/sizes.h>
>  #include <asm/gic.h>
>  #include <asm/gic_v3_defs.h>
> @@ -32,6 +34,17 @@
>
>  LIST_HEAD(host_its_list);
>
> +struct its_devices {
> +    struct rb_node rbnode;
> +    struct host_its *hw_its;
> +    void *itt_addr;
> +    paddr_t guest_doorbell;
> +    uint32_t host_devid;
> +    uint32_t guest_devid;
> +    uint32_t eventids;
> +    uint32_t *host_lpis;
> +};
> +
>  bool gicv3_its_host_has_its(void)
>  {
>      return !list_empty(&host_its_list);
> @@ -149,6 +162,24 @@ static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
>      return its_send_command(its, cmd);
>  }
>
> +static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
> +                             uint8_t size_bits, paddr_t itt_addr, bool valid)
> +{
> +    uint64_t cmd[4];
> +
> +    if ( valid )
> +    {
> +        ASSERT(size_bits < 32);
> +        ASSERT(!(itt_addr & ~GENMASK(51, 8)));
> +    }
> +    cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
> +    cmd[1] = valid ? size_bits : 0x00;
> +    cmd[2] = valid ? (itt_addr | GITS_VALID_BIT) : 0x00;
> +    cmd[3] = 0x00;
> +
> +    return its_send_command(its, cmd);
> +}
> +
>  /* Set up the (1:1) collection mapping for the given host CPU. */
>  int gicv3_its_setup_collection(unsigned int cpu)
>  {
> @@ -379,6 +410,7 @@ static int gicv3_its_init_single_its(struct host_its *hw_its)
>      devid_bits = min(devid_bits, max_its_device_bits);
>      if ( reg & GITS_TYPER_PTA )
>          hw_its->flags |= HOST_ITS_USES_PTA;
> +    hw_its->itte_size = GITS_TYPER_ITT_SIZE(reg);
>
>      for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
>      {
> @@ -428,6 +460,180 @@ int gicv3_its_init(void)
>      return 0;
>  }
>
> +static int remove_mapped_guest_device(struct its_devices *dev)
> +{
> +    int ret;
> +
> +    if ( dev->hw_its )
> +    {
> +        int ret = its_send_cmd_mapd(dev->hw_its, dev->host_devid, 0, 0, false);
> +        if ( ret )
> +            return ret;
> +    }
> +
> +    ret = gicv3_its_wait_commands(dev->hw_its);
> +    if ( ret )
> +        return ret;
> +
> +    xfree(dev->itt_addr);
> +    xfree(dev);
> +
> +    return 0;
> +}
> +
> +static struct host_its *gicv3_its_find_by_doorbell(paddr_t doorbell_address)
> +{
> +    struct host_its *hw_its;
> +
> +    list_for_each_entry(hw_its, &host_its_list, entry)
> +    {
> +        if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )
> +            return hw_its;
> +    }
> +
> +    return NULL;
> +}
> +
> +static int compare_its_guest_devices(struct its_devices *dev,
> +                                     paddr_t doorbell, uint32_t devid)
> +{
> +    if ( dev->guest_doorbell < doorbell )
> +        return -1;
> +
> +    if ( dev->guest_doorbell > doorbell )
> +        return 1;
> +
> +    if ( dev->guest_devid < devid )
> +        return -1;
> +
> +    if ( dev->guest_devid > devid )
> +        return 1;
> +
> +    return 0;
> +}
> +
> +/*
> + * Map a hardware device, identified by a certain host ITS and its device ID
> + * to domain d, a guest ITS (identified by its doorbell address) and device ID.
> + * Also provide the number of events (MSIs) needed for that device.
> + * This does not check if this particular hardware device is already mapped
> + * at another domain, it is expected that this would be done by the caller.
> + */
> +int gicv3_its_map_guest_device(struct domain *d,
> +                               paddr_t host_doorbell, uint32_t host_devid,
> +                               paddr_t guest_doorbell, uint32_t guest_devid,
> +                               uint32_t nr_events, bool valid)
> +{
> +    void *itt_addr = NULL;
> +    struct host_its *hw_its;
> +    struct its_devices *dev = NULL, *temp;
> +    struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent = NULL;
> +    int ret = -ENOENT;
> +
> +    hw_its = gicv3_its_find_by_doorbell(host_doorbell);
> +    if ( !hw_its )
> +        return ret;
> +
> +    /* check for already existing mappings */
> +    spin_lock(&d->arch.vgic.its_devices_lock);
> +    while ( *new )
> +    {
> +        temp = rb_entry(*new, struct its_devices, rbnode);
> +
> +        parent = *new;
> +        if ( !compare_its_guest_devices(temp, guest_doorbell, guest_devid) )
> +        {
> +            if ( !valid )
> +                rb_erase(&temp->rbnode, &d->arch.vgic.its_devices);
> +
> +            spin_unlock(&d->arch.vgic.its_devices_lock);
> +
> +            if ( valid )
> +                return -EBUSY;
> +
> +            return remove_mapped_guest_device(temp);
> +        }
> +
> +        if ( compare_its_guest_devices(temp, guest_doorbell, guest_devid) > 0 )
> +            new = &((*new)->rb_left);
> +        else
> +            new = &((*new)->rb_right);
> +    }
> +
> +    if ( !valid )
> +        goto out_unlock;
> +
> +    ret = -ENOMEM;
> +
> +    /* An Interrupt Translation Table needs to be 256-byte aligned. */
> +    itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);

Here itt_addr should be calculated based number of IDs mapped with MAPTI.
Should be LPI_BLOCK size. Ex: If guest sends MAPD with nr_events 2 and
later MAPTI
is sent for Xen for mapping 32 events, causes memory corruption.

> +    if ( !itt_addr )
> +        goto out_unlock;
> +
> +    dev = xzalloc(struct its_devices);
> +    if ( !dev )
> +        goto out_unlock;
> +
> +    ret = its_send_cmd_mapd(hw_its, host_devid, fls(nr_events - 1) - 1,
> +                            virt_to_maddr(itt_addr), true);

     Here MAPD is sent with Size (nr_events) set by domain. However
MAPTI is sent for LPI block (32). ITS HW will throw 0xa05 (MAPVI_ID_OOR)
error indicating ID out of range.

> +    if ( ret )
> +        goto out_unlock;
> +
> +    dev->itt_addr = itt_addr;
> +    dev->hw_its = hw_its;
> +    dev->guest_doorbell = guest_doorbell;
> +    dev->guest_devid = guest_devid;
> +    dev->host_devid = host_devid;
> +    dev->eventids = nr_events;
> +
> +    rb_link_node(&dev->rbnode, parent, new);
> +    rb_insert_color(&dev->rbnode, &d->arch.vgic.its_devices);
> +
> +    spin_unlock(&d->arch.vgic.its_devices_lock);
> +
> +    return 0;
> +
> +out_unlock:
> +    spin_unlock(&d->arch.vgic.its_devices_lock);
> +    if ( dev )
> +        xfree(dev->host_lpis);
> +    xfree(itt_addr);
> +    xfree(dev);
> +    return ret;
> +}
> +
> +/* Removing any connections a domain had to any ITS in the system. */
> +void gicv3_its_unmap_all_devices(struct domain *d)
> +{
> +    struct rb_node *victim;
> +    struct its_devices *dev;
> +
> +    /*
> +     * This is an easily readable, yet inefficient implementation.
> +     * It uses the provided iteration wrapper and erases each node, which
> +     * possibly triggers rebalancing.
> +     * This seems overkill since we are going to abolish the whole tree, but
> +     * avoids an open-coded re-implementation of the traversal functions with
> +     * some recursive function calls.
> +     * Performance does not matter here, since we are destroying a domain.
> +     */
> +restart:
> +    spin_lock(&d->arch.vgic.its_devices_lock);
> +    if ( (victim = rb_first(&d->arch.vgic.its_devices)) )
> +    {
> +        dev = rb_entry(victim, struct its_devices, rbnode);
> +        rb_erase(victim, &d->arch.vgic.its_devices);
> +
> +        spin_unlock(&d->arch.vgic.its_devices_lock);
> +
> +        remove_mapped_guest_device(dev);
> +
> +        goto restart;
> +    }
> +
> +    spin_unlock(&d->arch.vgic.its_devices_lock);
> +}
> +
>  /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
>  void gicv3_its_dt_init(const struct dt_device_node *node)
>  {
> @@ -455,6 +661,7 @@ void gicv3_its_dt_init(const struct dt_device_node *node)
>          its_data->addr = addr;
>          its_data->size = size;
>          its_data->dt_node = its;
> +        spin_lock_init(&its_data->cmd_lock);
>
>          printk("GICv3: Found ITS @0x%lx\n", addr);
>
> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> index d61479d..1fadb00 100644
> --- a/xen/arch/arm/vgic-v3.c
> +++ b/xen/arch/arm/vgic-v3.c
> @@ -1450,6 +1450,9 @@ static int vgic_v3_domain_init(struct domain *d)
>      d->arch.vgic.nr_regions = rdist_count;
>      d->arch.vgic.rdist_regions = rdist_regions;
>
> +    spin_lock_init(&d->arch.vgic.its_devices_lock);
> +    d->arch.vgic.its_devices = RB_ROOT;
> +
>      /*
>       * Domain 0 gets the hardware address.
>       * Guests get the virtual platform layout.
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 2d6fbb1..00b9c1a 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -11,6 +11,7 @@
>  #include <asm/gic.h>
>  #include <public/hvm/params.h>
>  #include <xen/serial.h>
> +#include <xen/rbtree.h>
>
>  struct hvm_domain
>  {
> @@ -109,6 +110,8 @@ struct arch_domain
>          } *rdist_regions;
>          int nr_regions;                     /* Number of rdist regions */
>          uint32_t rdist_stride;              /* Re-Distributor stride */
> +        struct rb_root its_devices;         /* devices mapped to an ITS */
> +        spinlock_t its_devices_lock;        /* protects the its_devices tree */
>  #endif
>      } vgic;
>
> diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
> index 8b493fb..3421ea0 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -48,6 +48,10 @@
>  #define GITS_TYPER_DEVICE_ID_BITS(r)    (((r & GITS_TYPER_DEVIDS_MASK) >> \
>                                                 GITS_TYPER_DEVIDS_SHIFT) + 1)
>  #define GITS_TYPER_IDBITS_SHIFT         8
> +#define GITS_TYPER_ITT_SIZE_SHIFT       4
> +#define GITS_TYPER_ITT_SIZE_MASK        (0xfUL << GITS_TYPER_ITT_SIZE_SHIFT)
> +#define GITS_TYPER_ITT_SIZE(r)          ((((r) & GITS_TYPER_ITT_SIZE_MASK) >> \
> +                                                GITS_TYPER_ITT_SIZE_SHIFT) + 1)
>
>  #define GITS_IIDR_VALUE                 0x34c
>
> @@ -94,7 +98,10 @@
>  #define GITS_CMD_MOVALL                 0x0e
>  #define GITS_CMD_DISCARD                0x0f
>
> +#define ITS_DOORBELL_OFFSET             0x10040
> +
>  #include <xen/device_tree.h>
> +#include <xen/rbtree.h>
>
>  #define HOST_ITS_FLUSH_CMD_QUEUE        (1U << 0)
>  #define HOST_ITS_USES_PTA               (1U << 1)
> @@ -108,6 +115,7 @@ struct host_its {
>      void __iomem *its_base;
>      spinlock_t cmd_lock;
>      void *cmd_buf;
> +    unsigned int itte_size;
>      unsigned int flags;
>  };
>
> @@ -134,6 +142,16 @@ uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta);
>  /* Map a collection for this host CPU to each host ITS. */
>  int gicv3_its_setup_collection(unsigned int cpu);
>
> +/*
> + * Map a device on the host by allocating an ITT on the host (ITS).
> + * "nr_event" specifies how many events (interrupts) this device will need.
> + * Setting "valid" to false deallocates the device.
> + */
> +int gicv3_its_map_guest_device(struct domain *d,
> +                               paddr_t host_doorbell, uint32_t host_devid,
> +                               paddr_t guest_doorbell, uint32_t guest_devid,
> +                               uint32_t nr_events, bool valid);
> +
>  #else
>
>  static LIST_HEAD(host_its_list);
> --
> 2.9.0
>
Andre Przywara April 3, 2017, 7:45 p.m. UTC | #4
Hi,

On 22/03/17 22:45, Stefano Stabellini wrote:
> On Thu, 16 Mar 2017, Andre Przywara wrote:
>> The ITS uses device IDs to map LPIs to a device. Dom0 will later use
>> those IDs, which we directly pass on to the host.
>> For this we have to map each device that Dom0 may request to a host
>> ITS device with the same identifier.
>> Allocate the respective memory and enter each device into an rbtree to
>> later be able to iterate over it or to easily teardown guests.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  xen/arch/arm/gic-v3-its.c        | 207 +++++++++++++++++++++++++++++++++++++++
>>  xen/arch/arm/vgic-v3.c           |   3 +
>>  xen/include/asm-arm/domain.h     |   3 +
>>  xen/include/asm-arm/gic_v3_its.h |  18 ++++
>>  4 files changed, 231 insertions(+)
>>
>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>> index 5c11b0d..60b15b5 100644
>> --- a/xen/arch/arm/gic-v3-its.c
>> +++ b/xen/arch/arm/gic-v3-its.c
>> @@ -21,6 +21,8 @@
>>  #include <xen/lib.h>
>>  #include <xen/delay.h>
>>  #include <xen/mm.h>
>> +#include <xen/rbtree.h>
>> +#include <xen/sched.h>
>>  #include <xen/sizes.h>
>>  #include <asm/gic.h>
>>  #include <asm/gic_v3_defs.h>
>> @@ -32,6 +34,17 @@
>>  
>>  LIST_HEAD(host_its_list);
>>  
>> +struct its_devices {
>> +    struct rb_node rbnode;
>> +    struct host_its *hw_its;
>> +    void *itt_addr;
>> +    paddr_t guest_doorbell;
>> +    uint32_t host_devid;
>> +    uint32_t guest_devid;
>> +    uint32_t eventids;
>> +    uint32_t *host_lpis;
>> +};
>> +
>>  bool gicv3_its_host_has_its(void)
>>  {
>>      return !list_empty(&host_its_list);
>> @@ -149,6 +162,24 @@ static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
>>      return its_send_command(its, cmd);
>>  }
>>  
>> +static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
>> +                             uint8_t size_bits, paddr_t itt_addr, bool valid)
>> +{
>> +    uint64_t cmd[4];
>> +
>> +    if ( valid )
>> +    {
>> +        ASSERT(size_bits < 32);
>> +        ASSERT(!(itt_addr & ~GENMASK(51, 8)));
>> +    }
>> +    cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
>> +    cmd[1] = valid ? size_bits : 0x00;
>> +    cmd[2] = valid ? (itt_addr | GITS_VALID_BIT) : 0x00;
>> +    cmd[3] = 0x00;
>> +
>> +    return its_send_command(its, cmd);
>> +}
>> +
>>  /* Set up the (1:1) collection mapping for the given host CPU. */
>>  int gicv3_its_setup_collection(unsigned int cpu)
>>  {
>> @@ -379,6 +410,7 @@ static int gicv3_its_init_single_its(struct host_its *hw_its)
>>      devid_bits = min(devid_bits, max_its_device_bits);
>>      if ( reg & GITS_TYPER_PTA )
>>          hw_its->flags |= HOST_ITS_USES_PTA;
>> +    hw_its->itte_size = GITS_TYPER_ITT_SIZE(reg);
>>  
>>      for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
>>      {
>> @@ -428,6 +460,180 @@ int gicv3_its_init(void)
>>      return 0;
>>  }
>>  
>> +static int remove_mapped_guest_device(struct its_devices *dev)
>> +{
>> +    int ret;
>> +
>> +    if ( dev->hw_its )
>> +    {
>> +        int ret = its_send_cmd_mapd(dev->hw_its, dev->host_devid, 0, 0, false);
>> +        if ( ret )
>> +            return ret;
>> +    }
>> +
>> +    ret = gicv3_its_wait_commands(dev->hw_its);
>> +    if ( ret )
>> +        return ret;
>> +
>> +    xfree(dev->itt_addr);
>> +    xfree(dev);
>> +
>> +    return 0;
>> +}
>> +
>> +static struct host_its *gicv3_its_find_by_doorbell(paddr_t doorbell_address)
>> +{
>> +    struct host_its *hw_its;
>> +
>> +    list_for_each_entry(hw_its, &host_its_list, entry)
> 
> Does this need to take a spinlock to protect host_its_list? I guess not
> because the list is not modified after boot?

Exactly, I added a comment in v4 explaining this.

>> +    {
>> +        if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )
>> +            return hw_its;
>> +    }
>> +
>> +    return NULL;
>> +}
>> +
>> +static int compare_its_guest_devices(struct its_devices *dev,
>> +                                     paddr_t doorbell, uint32_t devid)
>> +{
>> +    if ( dev->guest_doorbell < doorbell )
>> +        return -1;
>> +
>> +    if ( dev->guest_doorbell > doorbell )
>> +        return 1;
>> +
>> +    if ( dev->guest_devid < devid )
>> +        return -1;
>> +
>> +    if ( dev->guest_devid > devid )
>> +        return 1;
>> +
>> +    return 0;
>> +}
>> +
>> +/*
>> + * Map a hardware device, identified by a certain host ITS and its device ID
>> + * to domain d, a guest ITS (identified by its doorbell address) and device ID.
>> + * Also provide the number of events (MSIs) needed for that device.
>> + * This does not check if this particular hardware device is already mapped
>> + * at another domain, it is expected that this would be done by the caller.
>> + */
>> +int gicv3_its_map_guest_device(struct domain *d,
>> +                               paddr_t host_doorbell, uint32_t host_devid,
>> +                               paddr_t guest_doorbell, uint32_t guest_devid,
>> +                               uint32_t nr_events, bool valid)
>> +{
>> +    void *itt_addr = NULL;
>> +    struct host_its *hw_its;
>> +    struct its_devices *dev = NULL, *temp;
>> +    struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent = NULL;
>> +    int ret = -ENOENT;
>> +
>> +    hw_its = gicv3_its_find_by_doorbell(host_doorbell);
>> +    if ( !hw_its )
>> +        return ret;
>> +
>> +    /* check for already existing mappings */
>> +    spin_lock(&d->arch.vgic.its_devices_lock);
>> +    while ( *new )
>> +    {
>> +        temp = rb_entry(*new, struct its_devices, rbnode);
>> +
>> +        parent = *new;
>> +        if ( !compare_its_guest_devices(temp, guest_doorbell, guest_devid) )
>> +        {
>> +            if ( !valid )
>> +                rb_erase(&temp->rbnode, &d->arch.vgic.its_devices);
>> +
>> +            spin_unlock(&d->arch.vgic.its_devices_lock);
>> +
>> +            if ( valid )
>> +                return -EBUSY;
>> +
>> +            return remove_mapped_guest_device(temp);
>> +        }
>> +
> 
> Plese don't run compare_its_guest_devices twice with the same arguments,
> just store the return value.

Fixed in v3.

>> +        if ( compare_its_guest_devices(temp, guest_doorbell, guest_devid) > 0 )
>> +            new = &((*new)->rb_left);
>> +        else
>> +            new = &((*new)->rb_right);
>> +    }
>> +
>> +    if ( !valid )
>> +        goto out_unlock;
>> +
>> +    ret = -ENOMEM;
>> +
>> +    /* An Interrupt Translation Table needs to be 256-byte aligned. */
>> +    itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);
>> +    if ( !itt_addr )
>> +        goto out_unlock;
>> +
>> +    dev = xzalloc(struct its_devices);
>> +    if ( !dev )
>> +        goto out_unlock;
>> +
>> +    ret = its_send_cmd_mapd(hw_its, host_devid, fls(nr_events - 1) - 1,
>> +                            virt_to_maddr(itt_addr), true);
>> +    if ( ret )
>> +        goto out_unlock;
>> +
>> +    dev->itt_addr = itt_addr;
>> +    dev->hw_its = hw_its;
>> +    dev->guest_doorbell = guest_doorbell;
>> +    dev->guest_devid = guest_devid;
>> +    dev->host_devid = host_devid;
>> +    dev->eventids = nr_events;
>> +
>> +    rb_link_node(&dev->rbnode, parent, new);
>> +    rb_insert_color(&dev->rbnode, &d->arch.vgic.its_devices);
>> +
>> +    spin_unlock(&d->arch.vgic.its_devices_lock);
>> +
>> +    return 0;
>> +
>> +out_unlock:
>> +    spin_unlock(&d->arch.vgic.its_devices_lock);
>> +    if ( dev )
>> +        xfree(dev->host_lpis);
> 
> Where is host_lpis allocated? Why is it freed here?

So I swapped this patch with the next one in v4, because this solves
this issue.

>> +    xfree(itt_addr);
>> +    xfree(dev);
>> +    return ret;
>> +}
>> +
>> +/* Removing any connections a domain had to any ITS in the system. */
>> +void gicv3_its_unmap_all_devices(struct domain *d)
>> +{
>> +    struct rb_node *victim;
>> +    struct its_devices *dev;
>> +
>> +    /*
>> +     * This is an easily readable, yet inefficient implementation.
>> +     * It uses the provided iteration wrapper and erases each node, which
>> +     * possibly triggers rebalancing.
>> +     * This seems overkill since we are going to abolish the whole tree, but
>> +     * avoids an open-coded re-implementation of the traversal functions with
>> +     * some recursive function calls.
>> +     * Performance does not matter here, since we are destroying a domain.
>> +     */
>> +restart:
>> +    spin_lock(&d->arch.vgic.its_devices_lock);
>> +    if ( (victim = rb_first(&d->arch.vgic.its_devices)) )
>> +    {
>> +        dev = rb_entry(victim, struct its_devices, rbnode);
>> +        rb_erase(victim, &d->arch.vgic.its_devices);
>> +
>> +        spin_unlock(&d->arch.vgic.its_devices_lock);
>> +
>> +        remove_mapped_guest_device(dev);
>> +
>> +        goto restart;
>> +    }
> 
> There is no need to use goto here, a simple while loop should suffice.

Fixed in v4.

>> +    spin_unlock(&d->arch.vgic.its_devices_lock);
>> +}
>> +
>>  /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
>>  void gicv3_its_dt_init(const struct dt_device_node *node)
>>  {
>> @@ -455,6 +661,7 @@ void gicv3_its_dt_init(const struct dt_device_node *node)
>>          its_data->addr = addr;
>>          its_data->size = size;
>>          its_data->dt_node = its;
>> +        spin_lock_init(&its_data->cmd_lock);
> 
> This change should be in the previous patch

Fixed in v4.

>>          printk("GICv3: Found ITS @0x%lx\n", addr);
>>  
>> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
>> index d61479d..1fadb00 100644
>> --- a/xen/arch/arm/vgic-v3.c
>> +++ b/xen/arch/arm/vgic-v3.c
>> @@ -1450,6 +1450,9 @@ static int vgic_v3_domain_init(struct domain *d)
>>      d->arch.vgic.nr_regions = rdist_count;
>>      d->arch.vgic.rdist_regions = rdist_regions;
>>  
>> +    spin_lock_init(&d->arch.vgic.its_devices_lock);
>> +    d->arch.vgic.its_devices = RB_ROOT;
>> +
>>      /*
>>       * Domain 0 gets the hardware address.
>>       * Guests get the virtual platform layout.
>> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
>> index 2d6fbb1..00b9c1a 100644
>> --- a/xen/include/asm-arm/domain.h
>> +++ b/xen/include/asm-arm/domain.h
>> @@ -11,6 +11,7 @@
>>  #include <asm/gic.h>
>>  #include <public/hvm/params.h>
>>  #include <xen/serial.h>
>> +#include <xen/rbtree.h>
>>  
>>  struct hvm_domain
>>  {
>> @@ -109,6 +110,8 @@ struct arch_domain
>>          } *rdist_regions;
>>          int nr_regions;                     /* Number of rdist regions */
>>          uint32_t rdist_stride;              /* Re-Distributor stride */
>> +        struct rb_root its_devices;         /* devices mapped to an ITS */
>> +        spinlock_t its_devices_lock;        /* protects the its_devices tree */
>>  #endif
>>      } vgic;
>>  
>> diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
>> index 8b493fb..3421ea0 100644
>> --- a/xen/include/asm-arm/gic_v3_its.h
>> +++ b/xen/include/asm-arm/gic_v3_its.h
>> @@ -48,6 +48,10 @@
>>  #define GITS_TYPER_DEVICE_ID_BITS(r)    (((r & GITS_TYPER_DEVIDS_MASK) >> \
>>                                                 GITS_TYPER_DEVIDS_SHIFT) + 1)
>>  #define GITS_TYPER_IDBITS_SHIFT         8
>> +#define GITS_TYPER_ITT_SIZE_SHIFT       4
>> +#define GITS_TYPER_ITT_SIZE_MASK        (0xfUL << GITS_TYPER_ITT_SIZE_SHIFT)
>> +#define GITS_TYPER_ITT_SIZE(r)          ((((r) & GITS_TYPER_ITT_SIZE_MASK) >> \
>> +                                                GITS_TYPER_ITT_SIZE_SHIFT) + 1)
>>  
>>  #define GITS_IIDR_VALUE                 0x34c
>>  
>> @@ -94,7 +98,10 @@
>>  #define GITS_CMD_MOVALL                 0x0e
>>  #define GITS_CMD_DISCARD                0x0f
>>  
>> +#define ITS_DOORBELL_OFFSET             0x10040
>> +
>>  #include <xen/device_tree.h>
>> +#include <xen/rbtree.h>
>>  
>>  #define HOST_ITS_FLUSH_CMD_QUEUE        (1U << 0)
>>  #define HOST_ITS_USES_PTA               (1U << 1)
>> @@ -108,6 +115,7 @@ struct host_its {
>>      void __iomem *its_base;
>>      spinlock_t cmd_lock;
>>      void *cmd_buf;
>> +    unsigned int itte_size;
>>      unsigned int flags;
>>  };
> 
> I would move itte_size and its initialization to the patch that
> introduced struct host_its.

Puh, OK. This wasn't as innocent as it looks like on the first glance,
so I created a new patch to introduce the host ITS init early and
separate this from the ITS table init.

Cheers,
Andre.


>> @@ -134,6 +142,16 @@ uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta);
>>  /* Map a collection for this host CPU to each host ITS. */
>>  int gicv3_its_setup_collection(unsigned int cpu);
>>  
>> +/*
>> + * Map a device on the host by allocating an ITT on the host (ITS).
>> + * "nr_event" specifies how many events (interrupts) this device will need.
>> + * Setting "valid" to false deallocates the device.
>> + */
>> +int gicv3_its_map_guest_device(struct domain *d,
>> +                               paddr_t host_doorbell, uint32_t host_devid,
>> +                               paddr_t guest_doorbell, uint32_t guest_devid,
>> +                               uint32_t nr_events, bool valid);
>> +
>>  #else
>>  
>>  static LIST_HEAD(host_its_list);
>> -- 
>> 2.9.0
>>
Andre Przywara April 3, 2017, 8:08 p.m. UTC | #5
Hi,

On 22/03/17 17:29, Julien Grall wrote:
> Hi Andre,
> 
> On 16/03/17 11:20, Andre Przywara wrote:
>> The ITS uses device IDs to map LPIs to a device. Dom0 will later use
>> those IDs, which we directly pass on to the host.
>> For this we have to map each device that Dom0 may request to a host
>> ITS device with the same identifier.
>> Allocate the respective memory and enter each device into an rbtree to
>> later be able to iterate over it or to easily teardown guests.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  xen/arch/arm/gic-v3-its.c        | 207
>> +++++++++++++++++++++++++++++++++++++++
>>  xen/arch/arm/vgic-v3.c           |   3 +
>>  xen/include/asm-arm/domain.h     |   3 +
>>  xen/include/asm-arm/gic_v3_its.h |  18 ++++
>>  4 files changed, 231 insertions(+)
>>
>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>> index 5c11b0d..60b15b5 100644
>> --- a/xen/arch/arm/gic-v3-its.c
>> +++ b/xen/arch/arm/gic-v3-its.c
>> @@ -21,6 +21,8 @@
>>  #include <xen/lib.h>
>>  #include <xen/delay.h>
>>  #include <xen/mm.h>
>> +#include <xen/rbtree.h>
>> +#include <xen/sched.h>
>>  #include <xen/sizes.h>
>>  #include <asm/gic.h>
>>  #include <asm/gic_v3_defs.h>
>> @@ -32,6 +34,17 @@
>>
>>  LIST_HEAD(host_its_list);
>>
>> +struct its_devices {
>> +    struct rb_node rbnode;
>> +    struct host_its *hw_its;
>> +    void *itt_addr;
>> +    paddr_t guest_doorbell;
> 
> I think it would be worth to explain in the commit message why you need
> the guest_doorbell in the struct its_devices and how you plan to use it.

In v4 I now also elaborated on the reason in a comment (before the
struct), which I deem more useful than something in the commit message.

>> +    uint32_t host_devid;
>> +    uint32_t guest_devid;
>> +    uint32_t eventids;
>> +    uint32_t *host_lpis;
>> +};
>> +
>>  bool gicv3_its_host_has_its(void)
>>  {
>>      return !list_empty(&host_its_list);
>> @@ -149,6 +162,24 @@ static int its_send_cmd_mapc(struct host_its
>> *its, uint32_t collection_id,
>>      return its_send_command(its, cmd);
>>  }
>>
>> +static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
>> +                             uint8_t size_bits, paddr_t itt_addr,
>> bool valid)
>> +{
>> +    uint64_t cmd[4];
>> +
>> +    if ( valid )
>> +    {
>> +        ASSERT(size_bits < 32);
> 
> It would be better if you do the check against the real number in
> hardware (i.e GITS_TYPER.ID_bits).

Added in v4.

> 
>> +        ASSERT(!(itt_addr & ~GENMASK(51, 8)));
>> +    }
>> +    cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
>> +    cmd[1] = valid ? size_bits : 0x00;
> 
> This is really confusing. The check was not on the previous version. So
> why do you need that?

Admittedly I was taken away be some intention to check this here
properly. But since itt_addr and size are only valid with V=1, I removed
this in v3.

> Also, it would have been better to hide the "size - 1" in the helper
> avoiding to really on the caller to do the right thing.

I tend to agree, but then we have the awkward case where an unmap passes
0 in size, which then gets decremented by one. But you are right that
it's still saner this way, so I pass 1 now in the unmap call and do the
"-1" encoding in here.

>> +    cmd[2] = valid ? (itt_addr | GITS_VALID_BIT) : 0x00;
> 
> Ditto about "valid? ...".

Removed in v3.

> [...]
> 
>> +static struct host_its *gicv3_its_find_by_doorbell(paddr_t
>> doorbell_address)
>> +{
>> +    struct host_its *hw_its;
>> +
>> +    list_for_each_entry(hw_its, &host_its_list, entry)
>> +    {
>> +        if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )
> 
> Why not storing the ITS address rather than the doorbell to avoid this
> check?

Because the doorbell address is a nice architectural property of MSIs in
general. And we need this check anyway, it's just the addition of the
doorbell offset that is different.

> [...]
> 
>> +int gicv3_its_map_guest_device(struct domain *d,
>> +                               paddr_t host_doorbell, uint32_t
>> host_devid,
>> +                               paddr_t guest_doorbell, uint32_t
>> guest_devid,
>> +                               uint32_t nr_events, bool valid)
>> +{
>> +    void *itt_addr = NULL;
>> +    struct host_its *hw_its;
>> +    struct its_devices *dev = NULL, *temp;
>> +    struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent
>> = NULL;
>> +    int ret = -ENOENT;
>> +
>> +    hw_its = gicv3_its_find_by_doorbell(host_doorbell);
>> +    if ( !hw_its )
>> +        return ret;
>> +
>> +    /* check for already existing mappings */
>> +    spin_lock(&d->arch.vgic.its_devices_lock);
>> +    while ( *new )
>> +    {
>> +        temp = rb_entry(*new, struct its_devices, rbnode);
>> +
>> +        parent = *new;
>> +        if ( !compare_its_guest_devices(temp, guest_doorbell,
>> guest_devid) )
>> +        {
>> +            if ( !valid )
>> +                rb_erase(&temp->rbnode, &d->arch.vgic.its_devices);
>> +
>> +            spin_unlock(&d->arch.vgic.its_devices_lock);
>> +
>> +            if ( valid )
> 
> Again, a printk(XENLOG_GUEST...) here would be useful to know which host
> DeviceID was associated to the guest DeviceID.

I added a gprintk(XENLOG_DEBUG, ), which I think is more appropriate (as
it may spam the console when some stupid guest is running). Let me know
if you want to have a different loglevel.

>> +                return -EBUSY;
>> +
>> +            return remove_mapped_guest_device(temp);
> 
> Just above you removed the device from the RB-tree but this function may
> fail and never free the memory. This means that memory will be leaked
> leading to a potential denial of service.

So I fixed this case in v4, though there is still a tiny chance of a
memleak: if the MAPD(V=0) command fails. We can't free the ITT table
then, really, because it still belongs to the ITS. I don't think we can
do much about it, though.
I free the other allocations of the memory now, anyway.

>> +        }
>> +
>> +        if ( compare_its_guest_devices(temp, guest_doorbell,
>> guest_devid) > 0 )
>> +            new = &((*new)->rb_left);
>> +        else
>> +            new = &((*new)->rb_right);
>> +    }
>> +
>> +    if ( !valid )
>> +        goto out_unlock;
>> +
>> +    ret = -ENOMEM;
>> +
>> +    /* An Interrupt Translation Table needs to be 256-byte aligned. */
>> +    itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);
>> +    if ( !itt_addr )
>> +        goto out_unlock;
>> +
>> +    dev = xzalloc(struct its_devices);
>> +    if ( !dev )
>> +        goto out_unlock;
>> +
>> +    ret = its_send_cmd_mapd(hw_its, host_devid, fls(nr_events - 1) - 1,
> 
> I don't understand why nr_events - 1. Can you explain?

Xen lacks an ilog2, so "fls" is the closest I could find. "fls" has this
slightly weird semantic (from the Linux source):
"Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32."
I think this translates into: "How many bits do I need to express this
number?". For our case the highest event number we need to encode is
"nr_events - 1", hence the subtraction.
So is it worth to introduce a:
	static inline int ilog2_64(uint64_t n) ...
in xen/include/asm-arm/bitops.h to document this?

> 
> [...]
> 
>> +/* Removing any connections a domain had to any ITS in the system. */
>> +void gicv3_its_unmap_all_devices(struct domain *d)
>> +{
>> +    struct rb_node *victim;
>> +    struct its_devices *dev;
>> +
>> +    /*
>> +     * This is an easily readable, yet inefficient implementation.
>> +     * It uses the provided iteration wrapper and erases each node,
>> which
>> +     * possibly triggers rebalancing.
>> +     * This seems overkill since we are going to abolish the whole
>> tree, but
>> +     * avoids an open-coded re-implementation of the traversal
>> functions with
>> +     * some recursive function calls.
>> +     * Performance does not matter here, since we are destroying a
>> domain.
> 
> Again, this is slightly untrue. Performance matter when destroying a
> domain as Xen cannot be preempted. So if it takes too long, you will
> have an impact on the overall system.

I reworded this sentence in v3, since you apparently misunderstood me.
By inefficient I meant sub-optimal, but this is not a _critical_ path,
so we don't care too much. The execution time is clearly bounded by the
number of devices. We simply shouldn't allow gazillion of devices on a
DomU if we care about those things.

> However, I think it would be fair to assume that all device will be
> deassigned before the ITS is destroyed. So I would just drop this
> function. Note that we have the same assumption in the SMMU driver.

Well, why not keep it as a safeguard, then? If a domain gets destroyed,
aren't we supposed to clean up?

>> +     */
>> +restart:
>> +    spin_lock(&d->arch.vgic.its_devices_lock);
>> +    if ( (victim = rb_first(&d->arch.vgic.its_devices)) )
>> +    {
>> +        dev = rb_entry(victim, struct its_devices, rbnode);
>> +        rb_erase(victim, &d->arch.vgic.its_devices);
>> +
>> +        spin_unlock(&d->arch.vgic.its_devices_lock);
>> +
>> +        remove_mapped_guest_device(dev);
>> +
>> +        goto restart;
>> +    }
>> +
>> +    spin_unlock(&d->arch.vgic.its_devices_lock);
>> +}
>> +
>>  /* Scan the DT for any ITS nodes and create a list of host ITSes out
>> of it. */
>>  void gicv3_its_dt_init(const struct dt_device_node *node)
>>  {
>> @@ -455,6 +661,7 @@ void gicv3_its_dt_init(const struct dt_device_node
>> *node)
>>          its_data->addr = addr;
>>          its_data->size = size;
>>          its_data->dt_node = its;
>> +        spin_lock_init(&its_data->cmd_lock);
> 
> This should be in patch #5.

Done in v4.

>>
>>          printk("GICv3: Found ITS @0x%lx\n", addr);
>>
>> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
>> index d61479d..1fadb00 100644
>> --- a/xen/arch/arm/vgic-v3.c
>> +++ b/xen/arch/arm/vgic-v3.c
>> @@ -1450,6 +1450,9 @@ static int vgic_v3_domain_init(struct domain *d)
>>      d->arch.vgic.nr_regions = rdist_count;
>>      d->arch.vgic.rdist_regions = rdist_regions;
>>
>> +    spin_lock_init(&d->arch.vgic.its_devices_lock);
>> +    d->arch.vgic.its_devices = RB_ROOT;
> 
> Again, the placement of those 2 lines are likely wrong. This should
> belong to the vITS and not the vgic-v3.

Well, it's a "domain which has an ITS" thing, as this is not per ITS. So
far we only have an per-ITS init function, as we don't iterate over the
host ITSes there anymore.
So I refrained from adding a separate function to initialize these
simple and generic data structures. Please let me know if you insist on
some ITS-per-domain init function.

Cheers,
Andre.

> I think it would make sense to get a patch that introduces a skeleton
> for the vITS before this patch and start plumbing through.
> 
>> +
>>      /*
>>       * Domain 0 gets the hardware address.
>>       * Guests get the virtual platform layout.
> 
> Cheers,
>
Julien Grall April 3, 2017, 8:41 p.m. UTC | #6
On 04/03/2017 09:08 PM, Andre Przywara wrote:
> Hi,

Hi Andre,

> On 22/03/17 17:29, Julien Grall wrote:
>>> +int gicv3_its_map_guest_device(struct domain *d,
>>> +                               paddr_t host_doorbell, uint32_t
>>> host_devid,
>>> +                               paddr_t guest_doorbell, uint32_t
>>> guest_devid,
>>> +                               uint32_t nr_events, bool valid)
>>> +{
>>> +    void *itt_addr = NULL;
>>> +    struct host_its *hw_its;
>>> +    struct its_devices *dev = NULL, *temp;
>>> +    struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent
>>> = NULL;
>>> +    int ret = -ENOENT;
>>> +
>>> +    hw_its = gicv3_its_find_by_doorbell(host_doorbell);
>>> +    if ( !hw_its )
>>> +        return ret;
>>> +
>>> +    /* check for already existing mappings */
>>> +    spin_lock(&d->arch.vgic.its_devices_lock);
>>> +    while ( *new )
>>> +    {
>>> +        temp = rb_entry(*new, struct its_devices, rbnode);
>>> +
>>> +        parent = *new;
>>> +        if ( !compare_its_guest_devices(temp, guest_doorbell,
>>> guest_devid) )
>>> +        {
>>> +            if ( !valid )
>>> +                rb_erase(&temp->rbnode, &d->arch.vgic.its_devices);
>>> +
>>> +            spin_unlock(&d->arch.vgic.its_devices_lock);
>>> +
>>> +            if ( valid )
>>
>> Again, a printk(XENLOG_GUEST...) here would be useful to know which host
>> DeviceID was associated to the guest DeviceID.
>
> I added a gprintk(XENLOG_DEBUG, ), which I think is more appropriate (as
> it may spam the console when some stupid guest is running). Let me know
> if you want to have a different loglevel.

I don't think this is more appropriate. gprintk will print the domain ID 
of the current domain, whilst this function will be called by the 
toolstack in the future.

Furthemore, if you look at the implementation of gprintk you will notice 
that it is basically turning into printk(XENLOG_GUEST...) and adding 
information of the current vCPU.

What matters for ratelimiting is XENLOG_GUEST.

>
>>> +                return -EBUSY;
>>> +
>>> +            return remove_mapped_guest_device(temp);
>>
>> Just above you removed the device from the RB-tree but this function may
>> fail and never free the memory. This means that memory will be leaked
>> leading to a potential denial of service.
>
> So I fixed this case in v4, though there is still a tiny chance of a
> memleak: if the MAPD(V=0) command fails. We can't free the ITT table
> then, really, because it still belongs to the ITS. I don't think we can
> do much about it, though.

This is a leak and even tiny is quite worrying. How do you plan to 
address this in the future? What is the best thing to do?

> I free the other allocations of the memory now, anyway.
>
>>> +        }
>>> +
>>> +        if ( compare_its_guest_devices(temp, guest_doorbell,
>>> guest_devid) > 0 )
>>> +            new = &((*new)->rb_left);
>>> +        else
>>> +            new = &((*new)->rb_right);
>>> +    }
>>> +
>>> +    if ( !valid )
>>> +        goto out_unlock;
>>> +
>>> +    ret = -ENOMEM;
>>> +
>>> +    /* An Interrupt Translation Table needs to be 256-byte aligned. */
>>> +    itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);
>>> +    if ( !itt_addr )
>>> +        goto out_unlock;
>>> +
>>> +    dev = xzalloc(struct its_devices);
>>> +    if ( !dev )
>>> +        goto out_unlock;
>>> +
>>> +    ret = its_send_cmd_mapd(hw_its, host_devid, fls(nr_events - 1) - 1,
>>
>> I don't understand why nr_events - 1. Can you explain?
>
> Xen lacks an ilog2, so "fls" is the closest I could find. "fls" has this
> slightly weird semantic (from the Linux source):
> "Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32."
> I think this translates into: "How many bits do I need to express this
> number?". For our case the highest event number we need to encode is
> "nr_events - 1", hence the subtraction.
> So is it worth to introduce a:
> 	static inline int ilog2_64(uint64_t n) ...
> in xen/include/asm-arm/bitops.h to document this?

This might make easier to read the code.

>>
>> [...]
>>
>>> +/* Removing any connections a domain had to any ITS in the system. */
>>> +void gicv3_its_unmap_all_devices(struct domain *d)
>>> +{
>>> +    struct rb_node *victim;
>>> +    struct its_devices *dev;
>>> +
>>> +    /*
>>> +     * This is an easily readable, yet inefficient implementation.
>>> +     * It uses the provided iteration wrapper and erases each node,
>>> which
>>> +     * possibly triggers rebalancing.
>>> +     * This seems overkill since we are going to abolish the whole
>>> tree, but
>>> +     * avoids an open-coded re-implementation of the traversal
>>> functions with
>>> +     * some recursive function calls.
>>> +     * Performance does not matter here, since we are destroying a
>>> domain.
>>
>> Again, this is slightly untrue. Performance matter when destroying a
>> domain as Xen cannot be preempted. So if it takes too long, you will
>> have an impact on the overall system.
>
> I reworded this sentence in v3, since you apparently misunderstood me.
> By inefficient I meant sub-optimal, but this is not a _critical_ path,
> so we don't care too much. The execution time is clearly bounded by the
> number of devices. We simply shouldn't allow gazillion of devices on a
> DomU if we care about those things.

This is a very naive way of thinking how domain destruction is working 
on Xen. Domain destruction is likely to get time, you have to release 
all the resources (memory, devices...). So we usually split in smaller 
pieces to be able to handle interrupts or allowing preemption.

>
>> However, I think it would be fair to assume that all device will be
>> deassigned before the ITS is destroyed. So I would just drop this
>> function. Note that we have the same assumption in the SMMU driver.
>
> Well, why not keep it as a safeguard, then? If a domain gets destroyed,
> aren't we supposed to clean up?

See above. We use ASSERT(....) in the SMMU code to check this assumption.

>>>
>>>          printk("GICv3: Found ITS @0x%lx\n", addr);
>>>
>>> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
>>> index d61479d..1fadb00 100644
>>> --- a/xen/arch/arm/vgic-v3.c
>>> +++ b/xen/arch/arm/vgic-v3.c
>>> @@ -1450,6 +1450,9 @@ static int vgic_v3_domain_init(struct domain *d)
>>>      d->arch.vgic.nr_regions = rdist_count;
>>>      d->arch.vgic.rdist_regions = rdist_regions;
>>>
>>> +    spin_lock_init(&d->arch.vgic.its_devices_lock);
>>> +    d->arch.vgic.its_devices = RB_ROOT;
>>
>> Again, the placement of those 2 lines are likely wrong. This should
>> belong to the vITS and not the vgic-v3.
>
> Well, it's a "domain which has an ITS" thing, as this is not per ITS. So
> far we only have an per-ITS init function, as we don't iterate over the
> host ITSes there anymore.
> So I refrained from adding a separate function to initialize these
> simple and generic data structures. Please let me know if you insist on
> some ITS-per-domain init function.

Where is the limit to refrain from creating function then? What matter 
is logic...

Cheers,
Andre Przywara April 4, 2017, 9:57 a.m. UTC | #7
Hi,

On 03/04/17 21:41, Julien Grall wrote:
> 
> 
> On 04/03/2017 09:08 PM, Andre Przywara wrote:
>> Hi,
> 
> Hi Andre,
> 
>> On 22/03/17 17:29, Julien Grall wrote:
>>>> +int gicv3_its_map_guest_device(struct domain *d,
>>>> +                               paddr_t host_doorbell, uint32_t
>>>> host_devid,
>>>> +                               paddr_t guest_doorbell, uint32_t
>>>> guest_devid,
>>>> +                               uint32_t nr_events, bool valid)
>>>> +{
>>>> +    void *itt_addr = NULL;
>>>> +    struct host_its *hw_its;
>>>> +    struct its_devices *dev = NULL, *temp;
>>>> +    struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent
>>>> = NULL;
>>>> +    int ret = -ENOENT;
>>>> +
>>>> +    hw_its = gicv3_its_find_by_doorbell(host_doorbell);
>>>> +    if ( !hw_its )
>>>> +        return ret;
>>>> +
>>>> +    /* check for already existing mappings */
>>>> +    spin_lock(&d->arch.vgic.its_devices_lock);
>>>> +    while ( *new )
>>>> +    {
>>>> +        temp = rb_entry(*new, struct its_devices, rbnode);
>>>> +
>>>> +        parent = *new;
>>>> +        if ( !compare_its_guest_devices(temp, guest_doorbell,
>>>> guest_devid) )
>>>> +        {
>>>> +            if ( !valid )
>>>> +                rb_erase(&temp->rbnode, &d->arch.vgic.its_devices);
>>>> +
>>>> +            spin_unlock(&d->arch.vgic.its_devices_lock);
>>>> +
>>>> +            if ( valid )
>>>
>>> Again, a printk(XENLOG_GUEST...) here would be useful to know which host
>>> DeviceID was associated to the guest DeviceID.
>>
>> I added a gprintk(XENLOG_DEBUG, ), which I think is more appropriate (as
>> it may spam the console when some stupid guest is running). Let me know
>> if you want to have a different loglevel.
> 
> I don't think this is more appropriate. gprintk will print the domain ID
> of the current domain, whilst this function will be called by the
> toolstack in the future.
> 
> Furthemore, if you look at the implementation of gprintk you will notice
> that it is basically turning into printk(XENLOG_GUEST...) and adding
> information of the current vCPU.
> 
> What matters for ratelimiting is XENLOG_GUEST.
> 
>>
>>>> +                return -EBUSY;
>>>> +
>>>> +            return remove_mapped_guest_device(temp);
>>>
>>> Just above you removed the device from the RB-tree but this function may
>>> fail and never free the memory. This means that memory will be leaked
>>> leading to a potential denial of service.
>>
>> So I fixed this case in v4, though there is still a tiny chance of a
>> memleak: if the MAPD(V=0) command fails. We can't free the ITT table
>> then, really, because it still belongs to the ITS. I don't think we can
>> do much about it, though.
> 
> This is a leak and even tiny is quite worrying. How do you plan to
> address this in the future? What is the best thing to do?

In an ideal world we would probably have a spillover queue for ITS
commands. Whenever a command can't be queued to the hardware ITS queue
immediately, the guest should be put to wait somehow and the commands
recorded, to be executed whenever the hardware command queue gets free
slots again.
However I think we can't really do this today, because we don't have a
good way of putting a guest to sleep when it trapped on an MMIO access.
When in the future device assignment / de-assignment is handled via a
hypercall, we can probably address this more easily.

As for the likeliness: I think this is extremely rare. With our large
command queue and the ITS running at multiple hundred MHz I don't think
we will ever run into this situation, really, especially not with just Dom0.

So for now are we OK with this comment noting the corner case? Maybe a TODO?

Cheers,
Andre.
diff mbox

Patch

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index 5c11b0d..60b15b5 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -21,6 +21,8 @@ 
 #include <xen/lib.h>
 #include <xen/delay.h>
 #include <xen/mm.h>
+#include <xen/rbtree.h>
+#include <xen/sched.h>
 #include <xen/sizes.h>
 #include <asm/gic.h>
 #include <asm/gic_v3_defs.h>
@@ -32,6 +34,17 @@ 
 
 LIST_HEAD(host_its_list);
 
+struct its_devices {
+    struct rb_node rbnode;
+    struct host_its *hw_its;
+    void *itt_addr;
+    paddr_t guest_doorbell;
+    uint32_t host_devid;
+    uint32_t guest_devid;
+    uint32_t eventids;
+    uint32_t *host_lpis;
+};
+
 bool gicv3_its_host_has_its(void)
 {
     return !list_empty(&host_its_list);
@@ -149,6 +162,24 @@  static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
     return its_send_command(its, cmd);
 }
 
+static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
+                             uint8_t size_bits, paddr_t itt_addr, bool valid)
+{
+    uint64_t cmd[4];
+
+    if ( valid )
+    {
+        ASSERT(size_bits < 32);
+        ASSERT(!(itt_addr & ~GENMASK(51, 8)));
+    }
+    cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
+    cmd[1] = valid ? size_bits : 0x00;
+    cmd[2] = valid ? (itt_addr | GITS_VALID_BIT) : 0x00;
+    cmd[3] = 0x00;
+
+    return its_send_command(its, cmd);
+}
+
 /* Set up the (1:1) collection mapping for the given host CPU. */
 int gicv3_its_setup_collection(unsigned int cpu)
 {
@@ -379,6 +410,7 @@  static int gicv3_its_init_single_its(struct host_its *hw_its)
     devid_bits = min(devid_bits, max_its_device_bits);
     if ( reg & GITS_TYPER_PTA )
         hw_its->flags |= HOST_ITS_USES_PTA;
+    hw_its->itte_size = GITS_TYPER_ITT_SIZE(reg);
 
     for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
     {
@@ -428,6 +460,180 @@  int gicv3_its_init(void)
     return 0;
 }
 
+static int remove_mapped_guest_device(struct its_devices *dev)
+{
+    int ret;
+
+    if ( dev->hw_its )
+    {
+        int ret = its_send_cmd_mapd(dev->hw_its, dev->host_devid, 0, 0, false);
+        if ( ret )
+            return ret;
+    }
+
+    ret = gicv3_its_wait_commands(dev->hw_its);
+    if ( ret )
+        return ret;
+
+    xfree(dev->itt_addr);
+    xfree(dev);
+
+    return 0;
+}
+
+static struct host_its *gicv3_its_find_by_doorbell(paddr_t doorbell_address)
+{
+    struct host_its *hw_its;
+
+    list_for_each_entry(hw_its, &host_its_list, entry)
+    {
+        if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )
+            return hw_its;
+    }
+
+    return NULL;
+}
+
+static int compare_its_guest_devices(struct its_devices *dev,
+                                     paddr_t doorbell, uint32_t devid)
+{
+    if ( dev->guest_doorbell < doorbell )
+        return -1;
+
+    if ( dev->guest_doorbell > doorbell )
+        return 1;
+
+    if ( dev->guest_devid < devid )
+        return -1;
+
+    if ( dev->guest_devid > devid )
+        return 1;
+
+    return 0;
+}
+
+/*
+ * Map a hardware device, identified by a certain host ITS and its device ID
+ * to domain d, a guest ITS (identified by its doorbell address) and device ID.
+ * Also provide the number of events (MSIs) needed for that device.
+ * This does not check if this particular hardware device is already mapped
+ * at another domain, it is expected that this would be done by the caller.
+ */
+int gicv3_its_map_guest_device(struct domain *d,
+                               paddr_t host_doorbell, uint32_t host_devid,
+                               paddr_t guest_doorbell, uint32_t guest_devid,
+                               uint32_t nr_events, bool valid)
+{
+    void *itt_addr = NULL;
+    struct host_its *hw_its;
+    struct its_devices *dev = NULL, *temp;
+    struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent = NULL;
+    int ret = -ENOENT;
+
+    hw_its = gicv3_its_find_by_doorbell(host_doorbell);
+    if ( !hw_its )
+        return ret;
+
+    /* check for already existing mappings */
+    spin_lock(&d->arch.vgic.its_devices_lock);
+    while ( *new )
+    {
+        temp = rb_entry(*new, struct its_devices, rbnode);
+
+        parent = *new;
+        if ( !compare_its_guest_devices(temp, guest_doorbell, guest_devid) )
+        {
+            if ( !valid )
+                rb_erase(&temp->rbnode, &d->arch.vgic.its_devices);
+
+            spin_unlock(&d->arch.vgic.its_devices_lock);
+
+            if ( valid )
+                return -EBUSY;
+
+            return remove_mapped_guest_device(temp);
+        }
+
+        if ( compare_its_guest_devices(temp, guest_doorbell, guest_devid) > 0 )
+            new = &((*new)->rb_left);
+        else
+            new = &((*new)->rb_right);
+    }
+
+    if ( !valid )
+        goto out_unlock;
+
+    ret = -ENOMEM;
+
+    /* An Interrupt Translation Table needs to be 256-byte aligned. */
+    itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);
+    if ( !itt_addr )
+        goto out_unlock;
+
+    dev = xzalloc(struct its_devices);
+    if ( !dev )
+        goto out_unlock;
+
+    ret = its_send_cmd_mapd(hw_its, host_devid, fls(nr_events - 1) - 1,
+                            virt_to_maddr(itt_addr), true);
+    if ( ret )
+        goto out_unlock;
+
+    dev->itt_addr = itt_addr;
+    dev->hw_its = hw_its;
+    dev->guest_doorbell = guest_doorbell;
+    dev->guest_devid = guest_devid;
+    dev->host_devid = host_devid;
+    dev->eventids = nr_events;
+
+    rb_link_node(&dev->rbnode, parent, new);
+    rb_insert_color(&dev->rbnode, &d->arch.vgic.its_devices);
+
+    spin_unlock(&d->arch.vgic.its_devices_lock);
+
+    return 0;
+
+out_unlock:
+    spin_unlock(&d->arch.vgic.its_devices_lock);
+    if ( dev )
+        xfree(dev->host_lpis);
+    xfree(itt_addr);
+    xfree(dev);
+    return ret;
+}
+
+/* Removing any connections a domain had to any ITS in the system. */
+void gicv3_its_unmap_all_devices(struct domain *d)
+{
+    struct rb_node *victim;
+    struct its_devices *dev;
+
+    /*
+     * This is an easily readable, yet inefficient implementation.
+     * It uses the provided iteration wrapper and erases each node, which
+     * possibly triggers rebalancing.
+     * This seems overkill since we are going to abolish the whole tree, but
+     * avoids an open-coded re-implementation of the traversal functions with
+     * some recursive function calls.
+     * Performance does not matter here, since we are destroying a domain.
+     */
+restart:
+    spin_lock(&d->arch.vgic.its_devices_lock);
+    if ( (victim = rb_first(&d->arch.vgic.its_devices)) )
+    {
+        dev = rb_entry(victim, struct its_devices, rbnode);
+        rb_erase(victim, &d->arch.vgic.its_devices);
+
+        spin_unlock(&d->arch.vgic.its_devices_lock);
+
+        remove_mapped_guest_device(dev);
+
+        goto restart;
+    }
+
+    spin_unlock(&d->arch.vgic.its_devices_lock);
+}
+
 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
 {
@@ -455,6 +661,7 @@  void gicv3_its_dt_init(const struct dt_device_node *node)
         its_data->addr = addr;
         its_data->size = size;
         its_data->dt_node = its;
+        spin_lock_init(&its_data->cmd_lock);
 
         printk("GICv3: Found ITS @0x%lx\n", addr);
 
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index d61479d..1fadb00 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -1450,6 +1450,9 @@  static int vgic_v3_domain_init(struct domain *d)
     d->arch.vgic.nr_regions = rdist_count;
     d->arch.vgic.rdist_regions = rdist_regions;
 
+    spin_lock_init(&d->arch.vgic.its_devices_lock);
+    d->arch.vgic.its_devices = RB_ROOT;
+
     /*
      * Domain 0 gets the hardware address.
      * Guests get the virtual platform layout.
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 2d6fbb1..00b9c1a 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -11,6 +11,7 @@ 
 #include <asm/gic.h>
 #include <public/hvm/params.h>
 #include <xen/serial.h>
+#include <xen/rbtree.h>
 
 struct hvm_domain
 {
@@ -109,6 +110,8 @@  struct arch_domain
         } *rdist_regions;
         int nr_regions;                     /* Number of rdist regions */
         uint32_t rdist_stride;              /* Re-Distributor stride */
+        struct rb_root its_devices;         /* devices mapped to an ITS */
+        spinlock_t its_devices_lock;        /* protects the its_devices tree */
 #endif
     } vgic;
 
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index 8b493fb..3421ea0 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -48,6 +48,10 @@ 
 #define GITS_TYPER_DEVICE_ID_BITS(r)    (((r & GITS_TYPER_DEVIDS_MASK) >> \
                                                GITS_TYPER_DEVIDS_SHIFT) + 1)
 #define GITS_TYPER_IDBITS_SHIFT         8
+#define GITS_TYPER_ITT_SIZE_SHIFT       4
+#define GITS_TYPER_ITT_SIZE_MASK        (0xfUL << GITS_TYPER_ITT_SIZE_SHIFT)
+#define GITS_TYPER_ITT_SIZE(r)          ((((r) & GITS_TYPER_ITT_SIZE_MASK) >> \
+                                                GITS_TYPER_ITT_SIZE_SHIFT) + 1)
 
 #define GITS_IIDR_VALUE                 0x34c
 
@@ -94,7 +98,10 @@ 
 #define GITS_CMD_MOVALL                 0x0e
 #define GITS_CMD_DISCARD                0x0f
 
+#define ITS_DOORBELL_OFFSET             0x10040
+
 #include <xen/device_tree.h>
+#include <xen/rbtree.h>
 
 #define HOST_ITS_FLUSH_CMD_QUEUE        (1U << 0)
 #define HOST_ITS_USES_PTA               (1U << 1)
@@ -108,6 +115,7 @@  struct host_its {
     void __iomem *its_base;
     spinlock_t cmd_lock;
     void *cmd_buf;
+    unsigned int itte_size;
     unsigned int flags;
 };
 
@@ -134,6 +142,16 @@  uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta);
 /* Map a collection for this host CPU to each host ITS. */
 int gicv3_its_setup_collection(unsigned int cpu);
 
+/*
+ * Map a device on the host by allocating an ITT on the host (ITS).
+ * "nr_event" specifies how many events (interrupts) this device will need.
+ * Setting "valid" to false deallocates the device.
+ */
+int gicv3_its_map_guest_device(struct domain *d,
+                               paddr_t host_doorbell, uint32_t host_devid,
+                               paddr_t guest_doorbell, uint32_t guest_devid,
+                               uint32_t nr_events, bool valid);
+
 #else
 
 static LIST_HEAD(host_its_list);