diff mbox

[v8,18/27] ARM: vITS: handle MAPD command

Message ID 1491957874-31600-19-git-send-email-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara April 12, 2017, 12:44 a.m. UTC
The MAPD command maps a device by associating a memory region for
storing ITEs with a certain device ID. Since it features a valid bit,
MAPD also covers the "unmap" functionality, which we also cover here.
We store the given guest physical address in the device table, and, if
this command comes from Dom0, tell the host ITS driver about this new
mapping, so it can issue the corresponding host MAPD command and create
the required tables. We take care of rolling back actions should one
step fail.
Upon unmapping a device we make sure we clean up all associated
resources and release the memory again.
We use our existing guest memory access function to find the right ITT
entry and store the mapping there (in guest memory).

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 xen/arch/arm/gic-v3-its.c        |  60 +++++++++++++++++
 xen/arch/arm/gic-v3-lpi.c        |  18 +++++
 xen/arch/arm/vgic-v3-its.c       | 137 +++++++++++++++++++++++++++++++++++++++
 xen/include/asm-arm/gic_v3_its.h |   6 ++
 4 files changed, 221 insertions(+)

Comments

Julien Grall April 12, 2017, 3:21 p.m. UTC | #1
Hi Andre,

On 12/04/17 01:44, Andre Przywara wrote:
> The MAPD command maps a device by associating a memory region for
> storing ITEs with a certain device ID. Since it features a valid bit,
> MAPD also covers the "unmap" functionality, which we also cover here.
> We store the given guest physical address in the device table, and, if
> this command comes from Dom0, tell the host ITS driver about this new
> mapping, so it can issue the corresponding host MAPD command and create
> the required tables. We take care of rolling back actions should one
> step fail.
> Upon unmapping a device we make sure we clean up all associated
> resources and release the memory again.
> We use our existing guest memory access function to find the right ITT
> entry and store the mapping there (in guest memory).
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/gic-v3-its.c        |  60 +++++++++++++++++
>  xen/arch/arm/gic-v3-lpi.c        |  18 +++++
>  xen/arch/arm/vgic-v3-its.c       | 137 +++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-arm/gic_v3_its.h |   6 ++
>  4 files changed, 221 insertions(+)
>
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index aebc257..900c9d1 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -800,6 +800,66 @@ out:
>      return ret;
>  }
>
> +/* Must be called with the its_device_lock held. */
> +static struct its_device *get_its_device(struct domain *d, paddr_t vdoorbell,
> +                                          uint32_t vdevid)
> +{
> +    struct rb_node *node = d->arch.vgic.its_devices.rb_node;
> +    struct its_device *dev;
> +
> +    ASSERT(spin_is_locked(&d->arch.vgic.its_devices_lock));
> +
> +    while (node)
> +    {
> +        int cmp;
> +
> +        dev = rb_entry(node, struct its_device, rbnode);
> +        cmp = compare_its_guest_devices(dev, vdoorbell, vdevid);
> +
> +        if ( !cmp )
> +            return dev;
> +
> +        if ( cmp > 0 )
> +            node = node->rb_left;
> +        else
> +            node = node->rb_right;
> +    }
> +
> +    return NULL;
> +}
> +
> +static uint32_t get_host_lpi(struct its_device *dev, uint32_t eventid)
> +{
> +    uint32_t host_lpi = INVALID_LPI;
> +
> +    if ( dev && (eventid < dev->eventids) )
> +        host_lpi = dev->host_lpi_blocks[eventid / LPI_BLOCK] +
> +                                       (eventid % LPI_BLOCK);
> +
> +    return host_lpi;
> +}
> +
> +int gicv3_remove_guest_event(struct domain *d, paddr_t vdoorbell_address,
> +                             uint32_t vdevid, uint32_t veventid)
> +{
> +    struct its_device *dev;
> +    uint32_t host_lpi = INVALID_LPI;
> +
> +    spin_lock(&d->arch.vgic.its_devices_lock);
> +    dev = get_its_device(d, vdoorbell_address, vdevid);
> +    if ( dev && veventid <= dev->eventids )
> +        host_lpi = get_host_lpi(dev, veventid);
> +    spin_unlock(&d->arch.vgic.its_devices_lock);
> +
> +    if ( host_lpi == INVALID_LPI )
> +        return -EINVAL;
> +
> +    gicv3_lpi_update_host_entry(host_lpi, d->domain_id,
> +                                INVALID_VCPU_ID, INVALID_LPI);
> +
> +    return 0;
> +}
> +
>  /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
>  void gicv3_its_dt_init(const struct dt_device_node *node)
>  {
> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
> index 44f6315..d427539 100644
> --- a/xen/arch/arm/gic-v3-lpi.c
> +++ b/xen/arch/arm/gic-v3-lpi.c
> @@ -207,6 +207,24 @@ out:
>      irq_exit();
>  }
>
> +void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
> +                                 unsigned int vcpu_id, uint32_t virt_lpi)
> +{
> +    union host_lpi *hlpip, hlpi;
> +
> +    ASSERT(host_lpi >= LPI_OFFSET);
> +
> +    host_lpi -= LPI_OFFSET;
> +
> +    hlpip = &lpi_data.host_lpis[host_lpi / HOST_LPIS_PER_PAGE][host_lpi % HOST_LPIS_PER_PAGE];
> +
> +    hlpi.virt_lpi = virt_lpi;
> +    hlpi.dom_id = domain_id;
> +    hlpi.vcpu_id = vcpu_id;
> +
> +    write_u64_atomic(&hlpip->data, hlpi.data);
> +}
> +
>  static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
>  {
>      uint64_t val;
> diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
> index 6e505cb..104017e 100644
> --- a/xen/arch/arm/vgic-v3-its.c
> +++ b/xen/arch/arm/vgic-v3-its.c
> @@ -52,6 +52,7 @@
>   */
>  struct virt_its {
>      struct domain *d;
> +    paddr_t doorbell_address;
>      unsigned int devid_bits;
>      unsigned int evid_bits;
>      spinlock_t vcmd_lock;       /* Protects the virtual command buffer, which */
> @@ -166,6 +167,20 @@ static struct vcpu *get_vcpu_from_collection(struct virt_its *its,
>  #define DEV_TABLE_ENTRY(addr, bits)                     \
>          (((addr) & GENMASK(51, 8)) | (((bits) - 1) & GENMASK(4, 0)))
>
> +/* Set the address of an ITT for a given device ID. */
> +static int its_set_itt_address(struct virt_its *its, uint32_t devid,
> +                               paddr_t itt_address, uint32_t nr_bits)
> +{
> +    paddr_t addr = get_baser_phys_addr(its->baser_dev);
> +    uint64_t itt_entry = DEV_TABLE_ENTRY(itt_address, nr_bits);
> +
> +    if ( devid >= its->max_devices )
> +        return -ENOENT;
> +
> +    return vgic_access_guest_memory(its->d, addr + devid * sizeof(uint64_t),
> +                                    &itt_entry, sizeof(itt_entry), true);
> +}
> +
>  /*
>   * Lookup the address of the Interrupt Translation Table associated with
>   * that device ID.
> @@ -398,6 +413,125 @@ static int its_handle_mapc(struct virt_its *its, uint64_t *cmdptr)
>      return 0;
>  }
>
> +/* Must be called with the ITS lock held. */
> +static int its_discard_event(struct virt_its *its,
> +                             uint32_t vdevid, uint32_t vevid)
> +{
> +    struct pending_irq *p;
> +    unsigned long flags;
> +    struct vcpu *vcpu;
> +    uint32_t vlpi;
> +
> +    ASSERT(spin_is_locked(&its->its_lock));
> +
> +    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
> +        return -ENOENT;
> +
> +    if ( vlpi == INVALID_LPI )
> +        return -ENOENT;
> +
> +    /* Lock this VCPU's VGIC to make sure nobody is using the pending_irq. */
> +    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);

There is an interesting issue happening with this code. You don't check 
the content of the memory provided by the guest. So a malicious guest 
could craft the memory in order to setup mapping with known vlpi and a 
different vCPU.

This would lead to use the wrong lock here and corrupt the list.

> +
> +    /* Remove the pending_irq from the tree. */
> +    write_lock(&its->d->arch.vgic.pend_lpi_tree_lock);
> +    p = radix_tree_delete(&its->d->arch.vgic.pend_lpi_tree, vlpi);
> +    write_unlock(&its->d->arch.vgic.pend_lpi_tree_lock);
> +
> +    if ( !p )
> +    {
> +        spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
> +
> +        return -ENOENT;
> +    }
> +
> +    /* Cleanup the pending_irq, to be reusable later. */
> +    list_del_init(&p->inflight);
> +    list_del_init(&p->lr_queue);
> +    p->status = 0;
> +    p->lr = GIC_INVALID_LR;
> +    p->irq = INVALID_LPI;

Can't we have a generic helper to do that?

> +
> +    spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
> +
> +    /* Remove the corresponding host LPI entry */
> +    return gicv3_remove_guest_event(its->d, its->doorbell_address,
> +                                    vdevid, vevid);
> +}
> +
> +static int its_unmap_device(struct virt_its *its, uint32_t devid)
> +{
> +    uint64_t itt, evid;
> +    int ret;
> +
> +    spin_lock(&its->its_lock);
> +
> +    ret = its_get_itt(its, devid, &itt);
> +    if ( ret )
> +    {
> +        spin_unlock(&its->its_lock);
> +        return ret;
> +    }
> +
> +    for ( evid = 0; evid < DEV_TABLE_ITT_SIZE(itt); evid++ )
> +        /* Don't care about errors here, clean up as much as possible. */
> +        its_discard_event(its, devid, evid);
> +
> +    spin_unlock(&its->its_lock);

This code can be long to execute as the number of event can be huge. How 
do you plan to handle that?

*hint* this likely means a comment + ASSERT + TODO in the cover letter 
*hint*

> +
> +    return 0;
> +}
> +
> +static int its_handle_mapd(struct virt_its *its, uint64_t *cmdptr)
> +{
> +    /* size and devid get validated by the functions called below. */
> +    uint32_t devid = its_cmd_get_deviceid(cmdptr);
> +    unsigned int size = its_cmd_get_size(cmdptr) + 1;
> +    bool valid = its_cmd_get_validbit(cmdptr);
> +    paddr_t itt_addr = its_cmd_get_ittaddr(cmdptr);
> +    int ret;
> +
> +    /* Sanitize the number of events. */
> +    if ( valid && (size > its->evid_bits) )
> +        return -1;
> +
> +    /*
> +     * There is no easy and clean way for Xen to know the ITS device ID of a
> +     * particular (PCI) device, so we have to rely on the guest telling
> +     * us about it. For *now* we are just using the device ID *Dom0* uses,
> +     * because the driver there has the actual knowledge.
> +     * Eventually this will be replaced with a dedicated hypercall to
> +     * announce pass-through of devices.
> +     */
> +    if ( is_hardware_domain(its->d) )
> +    {
> +        if ( !valid )
> +            /* Discard all events and remove pending LPIs. */
> +            its_unmap_device(its, devid);

Likely, this call will have to be done for all the guests.

> +
> +        /*
> +         * Dom0's ITSes are mapped 1:1, so both addresses are the same.
> +         * Also the device IDs are equal.
> +         */
> +        ret = gicv3_its_map_guest_device(its->d, its->doorbell_address, devid,
> +                                         its->doorbell_address, devid,
> +                                         BIT(size), valid);
> +        if ( ret && valid )
> +            return ret;
> +    }
> +
> +    spin_lock(&its->its_lock);
> +
> +    if ( valid )
> +        ret = its_set_itt_address(its, devid, itt_addr, size);
> +    else
> +        ret = its_set_itt_address(its, devid, INVALID_PADDR, 1);
> +
> +    spin_unlock(&its->its_lock);
> +
> +    return ret;
> +}
> +
>  #define ITS_CMD_BUFFER_SIZE(baser)      ((((baser) & 0xff) + 1) << 12)
>  #define ITS_CMD_OFFSET(reg)             ((reg) & GENMASK(19, 5))
>
> @@ -436,6 +570,9 @@ static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its)
>          case GITS_CMD_MAPC:
>              ret = its_handle_mapc(its, command);
>              break;
> +        case GITS_CMD_MAPD:
> +            ret = its_handle_mapd(its, command);
> +            break;
>          case GITS_CMD_SYNC:
>              /* We handle ITS commands synchronously, so we ignore SYNC. */
>              break;
> diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
> index 40f4ef5..60ffdb6 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -169,6 +169,12 @@ int gicv3_its_map_guest_device(struct domain *d,
>  int gicv3_allocate_host_lpi_block(struct domain *d, uint32_t *first_lpi);
>  void gicv3_free_host_lpi_block(uint32_t first_lpi);
>
> +int gicv3_remove_guest_event(struct domain *d, paddr_t vdoorbell_address,
> +                                     uint32_t vdevid, uint32_t veventid);
> +
> +void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
> +                                 unsigned int vcpu_id, uint32_t virt_lpi);
> +
>  #else
>
>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
>

Cheers,
Andre Przywara April 12, 2017, 5:03 p.m. UTC | #2
Hi,

On 12/04/17 16:21, Julien Grall wrote:
> Hi Andre,
> 
> On 12/04/17 01:44, Andre Przywara wrote:
>> The MAPD command maps a device by associating a memory region for
>> storing ITEs with a certain device ID. Since it features a valid bit,
>> MAPD also covers the "unmap" functionality, which we also cover here.
>> We store the given guest physical address in the device table, and, if
>> this command comes from Dom0, tell the host ITS driver about this new
>> mapping, so it can issue the corresponding host MAPD command and create
>> the required tables. We take care of rolling back actions should one
>> step fail.
>> Upon unmapping a device we make sure we clean up all associated
>> resources and release the memory again.
>> We use our existing guest memory access function to find the right ITT
>> entry and store the mapping there (in guest memory).
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  xen/arch/arm/gic-v3-its.c        |  60 +++++++++++++++++
>>  xen/arch/arm/gic-v3-lpi.c        |  18 +++++
>>  xen/arch/arm/vgic-v3-its.c       | 137
>> +++++++++++++++++++++++++++++++++++++++
>>  xen/include/asm-arm/gic_v3_its.h |   6 ++
>>  4 files changed, 221 insertions(+)
>>
>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>> index aebc257..900c9d1 100644
>> --- a/xen/arch/arm/gic-v3-its.c
>> +++ b/xen/arch/arm/gic-v3-its.c
>> @@ -800,6 +800,66 @@ out:
>>      return ret;
>>  }
>>
>> +/* Must be called with the its_device_lock held. */
>> +static struct its_device *get_its_device(struct domain *d, paddr_t
>> vdoorbell,
>> +                                          uint32_t vdevid)
>> +{
>> +    struct rb_node *node = d->arch.vgic.its_devices.rb_node;
>> +    struct its_device *dev;
>> +
>> +    ASSERT(spin_is_locked(&d->arch.vgic.its_devices_lock));
>> +
>> +    while (node)
>> +    {
>> +        int cmp;
>> +
>> +        dev = rb_entry(node, struct its_device, rbnode);
>> +        cmp = compare_its_guest_devices(dev, vdoorbell, vdevid);
>> +
>> +        if ( !cmp )
>> +            return dev;
>> +
>> +        if ( cmp > 0 )
>> +            node = node->rb_left;
>> +        else
>> +            node = node->rb_right;
>> +    }
>> +
>> +    return NULL;
>> +}
>> +
>> +static uint32_t get_host_lpi(struct its_device *dev, uint32_t eventid)
>> +{
>> +    uint32_t host_lpi = INVALID_LPI;
>> +
>> +    if ( dev && (eventid < dev->eventids) )
>> +        host_lpi = dev->host_lpi_blocks[eventid / LPI_BLOCK] +
>> +                                       (eventid % LPI_BLOCK);
>> +
>> +    return host_lpi;
>> +}
>> +
>> +int gicv3_remove_guest_event(struct domain *d, paddr_t
>> vdoorbell_address,
>> +                             uint32_t vdevid, uint32_t veventid)
>> +{
>> +    struct its_device *dev;
>> +    uint32_t host_lpi = INVALID_LPI;
>> +
>> +    spin_lock(&d->arch.vgic.its_devices_lock);
>> +    dev = get_its_device(d, vdoorbell_address, vdevid);
>> +    if ( dev && veventid <= dev->eventids )
>> +        host_lpi = get_host_lpi(dev, veventid);
>> +    spin_unlock(&d->arch.vgic.its_devices_lock);
>> +
>> +    if ( host_lpi == INVALID_LPI )
>> +        return -EINVAL;
>> +
>> +    gicv3_lpi_update_host_entry(host_lpi, d->domain_id,
>> +                                INVALID_VCPU_ID, INVALID_LPI);
>> +
>> +    return 0;
>> +}
>> +
>>  /* Scan the DT for any ITS nodes and create a list of host ITSes out
>> of it. */
>>  void gicv3_its_dt_init(const struct dt_device_node *node)
>>  {
>> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
>> index 44f6315..d427539 100644
>> --- a/xen/arch/arm/gic-v3-lpi.c
>> +++ b/xen/arch/arm/gic-v3-lpi.c
>> @@ -207,6 +207,24 @@ out:
>>      irq_exit();
>>  }
>>
>> +void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
>> +                                 unsigned int vcpu_id, uint32_t
>> virt_lpi)
>> +{
>> +    union host_lpi *hlpip, hlpi;
>> +
>> +    ASSERT(host_lpi >= LPI_OFFSET);
>> +
>> +    host_lpi -= LPI_OFFSET;
>> +
>> +    hlpip = &lpi_data.host_lpis[host_lpi /
>> HOST_LPIS_PER_PAGE][host_lpi % HOST_LPIS_PER_PAGE];
>> +
>> +    hlpi.virt_lpi = virt_lpi;
>> +    hlpi.dom_id = domain_id;
>> +    hlpi.vcpu_id = vcpu_id;
>> +
>> +    write_u64_atomic(&hlpip->data, hlpi.data);
>> +}
>> +
>>  static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
>>  {
>>      uint64_t val;
>> diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
>> index 6e505cb..104017e 100644
>> --- a/xen/arch/arm/vgic-v3-its.c
>> +++ b/xen/arch/arm/vgic-v3-its.c
>> @@ -52,6 +52,7 @@
>>   */
>>  struct virt_its {
>>      struct domain *d;
>> +    paddr_t doorbell_address;
>>      unsigned int devid_bits;
>>      unsigned int evid_bits;
>>      spinlock_t vcmd_lock;       /* Protects the virtual command
>> buffer, which */
>> @@ -166,6 +167,20 @@ static struct vcpu
>> *get_vcpu_from_collection(struct virt_its *its,
>>  #define DEV_TABLE_ENTRY(addr, bits)                     \
>>          (((addr) & GENMASK(51, 8)) | (((bits) - 1) & GENMASK(4, 0)))
>>
>> +/* Set the address of an ITT for a given device ID. */
>> +static int its_set_itt_address(struct virt_its *its, uint32_t devid,
>> +                               paddr_t itt_address, uint32_t nr_bits)
>> +{
>> +    paddr_t addr = get_baser_phys_addr(its->baser_dev);
>> +    uint64_t itt_entry = DEV_TABLE_ENTRY(itt_address, nr_bits);
>> +
>> +    if ( devid >= its->max_devices )
>> +        return -ENOENT;
>> +
>> +    return vgic_access_guest_memory(its->d, addr + devid *
>> sizeof(uint64_t),
>> +                                    &itt_entry, sizeof(itt_entry),
>> true);
>> +}
>> +
>>  /*
>>   * Lookup the address of the Interrupt Translation Table associated with
>>   * that device ID.
>> @@ -398,6 +413,125 @@ static int its_handle_mapc(struct virt_its *its,
>> uint64_t *cmdptr)
>>      return 0;
>>  }
>>
>> +/* Must be called with the ITS lock held. */
>> +static int its_discard_event(struct virt_its *its,
>> +                             uint32_t vdevid, uint32_t vevid)
>> +{
>> +    struct pending_irq *p;
>> +    unsigned long flags;
>> +    struct vcpu *vcpu;
>> +    uint32_t vlpi;
>> +
>> +    ASSERT(spin_is_locked(&its->its_lock));
>> +
>> +    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
>> +        return -ENOENT;
>> +
>> +    if ( vlpi == INVALID_LPI )
>> +        return -ENOENT;
>> +
>> +    /* Lock this VCPU's VGIC to make sure nobody is using the
>> pending_irq. */
>> +    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);
> 
> There is an interesting issue happening with this code. You don't check
> the content of the memory provided by the guest. So a malicious guest
> could craft the memory in order to setup mapping with known vlpi and a
> different vCPU.
> 
> This would lead to use the wrong lock here and corrupt the list.

But this is true for basically most of the ITS table lookups. I added an
item to the cover letter to note this.

>> +
>> +    /* Remove the pending_irq from the tree. */
>> +    write_lock(&its->d->arch.vgic.pend_lpi_tree_lock);
>> +    p = radix_tree_delete(&its->d->arch.vgic.pend_lpi_tree, vlpi);
>> +    write_unlock(&its->d->arch.vgic.pend_lpi_tree_lock);
>> +
>> +    if ( !p )
>> +    {
>> +        spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
>> +
>> +        return -ENOENT;
>> +    }
>> +
>> +    /* Cleanup the pending_irq, to be reusable later. */
>> +    list_del_init(&p->inflight);
>> +    list_del_init(&p->lr_queue);
>> +    p->status = 0;
>> +    p->lr = GIC_INVALID_LR;
>> +    p->irq = INVALID_LPI;
> 
> Can't we have a generic helper to do that?

Added vgic_clear_pending_irq() next to vgic_init_pending_irq() in
vgic.c, if that is what you meant...

>> +
>> +    spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
>> +
>> +    /* Remove the corresponding host LPI entry */
>> +    return gicv3_remove_guest_event(its->d, its->doorbell_address,
>> +                                    vdevid, vevid);
>> +}
>> +
>> +static int its_unmap_device(struct virt_its *its, uint32_t devid)
>> +{
>> +    uint64_t itt, evid;
>> +    int ret;
>> +
>> +    spin_lock(&its->its_lock);
>> +
>> +    ret = its_get_itt(its, devid, &itt);
>> +    if ( ret )
>> +    {
>> +        spin_unlock(&its->its_lock);
>> +        return ret;
>> +    }
>> +
>> +    for ( evid = 0; evid < DEV_TABLE_ITT_SIZE(itt); evid++ )
>> +        /* Don't care about errors here, clean up as much as
>> possible. */
>> +        its_discard_event(its, devid, evid);
>> +
>> +    spin_unlock(&its->its_lock);
> 
> This code can be long to execute as the number of event can be huge. How
> do you plan to handle that?

It is assumed that DomUs get only a *very* limited and controlled number
of ITS resources, so the number of LPIs, number of devices and their
number of events will be very small, probably just enough as really
needed (event ID bits being 4 or so, for instance).

> *hint* this likely means a comment + ASSERT + TODO in the cover letter
> *hint*

Added.

>> +
>> +    return 0;
>> +}
>> +
>> +static int its_handle_mapd(struct virt_its *its, uint64_t *cmdptr)
>> +{
>> +    /* size and devid get validated by the functions called below. */
>> +    uint32_t devid = its_cmd_get_deviceid(cmdptr);
>> +    unsigned int size = its_cmd_get_size(cmdptr) + 1;
>> +    bool valid = its_cmd_get_validbit(cmdptr);
>> +    paddr_t itt_addr = its_cmd_get_ittaddr(cmdptr);
>> +    int ret;
>> +
>> +    /* Sanitize the number of events. */
>> +    if ( valid && (size > its->evid_bits) )
>> +        return -1;
>> +
>> +    /*
>> +     * There is no easy and clean way for Xen to know the ITS device
>> ID of a
>> +     * particular (PCI) device, so we have to rely on the guest telling
>> +     * us about it. For *now* we are just using the device ID *Dom0*
>> uses,
>> +     * because the driver there has the actual knowledge.
>> +     * Eventually this will be replaced with a dedicated hypercall to
>> +     * announce pass-through of devices.
>> +     */
>> +    if ( is_hardware_domain(its->d) )
>> +    {
>> +        if ( !valid )
>> +            /* Discard all events and remove pending LPIs. */
>> +            its_unmap_device(its, devid);
> 
> Likely, this call will have to be done for all the guests.

True.

>> +
>> +        /*
>> +         * Dom0's ITSes are mapped 1:1, so both addresses are the same.
>> +         * Also the device IDs are equal.
>> +         */
>> +        ret = gicv3_its_map_guest_device(its->d,
>> its->doorbell_address, devid,
>> +                                         its->doorbell_address, devid,
>> +                                         BIT(size), valid);
>> +        if ( ret && valid )
>> +            return ret;
>> +    }
>> +
>> +    spin_lock(&its->its_lock);
>> +
>> +    if ( valid )
>> +        ret = its_set_itt_address(its, devid, itt_addr, size);
>> +    else
>> +        ret = its_set_itt_address(its, devid, INVALID_PADDR, 1);
>> +
>> +    spin_unlock(&its->its_lock);
>> +
>> +    return ret;
>> +}
>> +
>>  #define ITS_CMD_BUFFER_SIZE(baser)      ((((baser) & 0xff) + 1) << 12)
>>  #define ITS_CMD_OFFSET(reg)             ((reg) & GENMASK(19, 5))
>>
>> @@ -436,6 +570,9 @@ static int vgic_its_handle_cmds(struct domain *d,
>> struct virt_its *its)
>>          case GITS_CMD_MAPC:
>>              ret = its_handle_mapc(its, command);
>>              break;
>> +        case GITS_CMD_MAPD:
>> +            ret = its_handle_mapd(its, command);
>> +            break;
>>          case GITS_CMD_SYNC:
>>              /* We handle ITS commands synchronously, so we ignore
>> SYNC. */
>>              break;
>> diff --git a/xen/include/asm-arm/gic_v3_its.h
>> b/xen/include/asm-arm/gic_v3_its.h
>> index 40f4ef5..60ffdb6 100644
>> --- a/xen/include/asm-arm/gic_v3_its.h
>> +++ b/xen/include/asm-arm/gic_v3_its.h
>> @@ -169,6 +169,12 @@ int gicv3_its_map_guest_device(struct domain *d,
>>  int gicv3_allocate_host_lpi_block(struct domain *d, uint32_t
>> *first_lpi);
>>  void gicv3_free_host_lpi_block(uint32_t first_lpi);
>>
>> +int gicv3_remove_guest_event(struct domain *d, paddr_t
>> vdoorbell_address,
>> +                                     uint32_t vdevid, uint32_t
>> veventid);
>> +
>> +void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
>> +                                 unsigned int vcpu_id, uint32_t
>> virt_lpi);
>> +
>>  #else
>>
>>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
>>
> 
> Cheers,
>
Julien Grall April 12, 2017, 5:05 p.m. UTC | #3
Hi Andre,

On 12/04/17 18:03, Andre Przywara wrote:
> On 12/04/17 16:21, Julien Grall wrote:
>> On 12/04/17 01:44, Andre Przywara wrote:
>>> +
>>> +    spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
>>> +
>>> +    /* Remove the corresponding host LPI entry */
>>> +    return gicv3_remove_guest_event(its->d, its->doorbell_address,
>>> +                                    vdevid, vevid);
>>> +}
>>> +
>>> +static int its_unmap_device(struct virt_its *its, uint32_t devid)
>>> +{
>>> +    uint64_t itt, evid;
>>> +    int ret;
>>> +
>>> +    spin_lock(&its->its_lock);
>>> +
>>> +    ret = its_get_itt(its, devid, &itt);
>>> +    if ( ret )
>>> +    {
>>> +        spin_unlock(&its->its_lock);
>>> +        return ret;
>>> +    }
>>> +
>>> +    for ( evid = 0; evid < DEV_TABLE_ITT_SIZE(itt); evid++ )
>>> +        /* Don't care about errors here, clean up as much as
>>> possible. */
>>> +        its_discard_event(its, devid, evid);
>>> +
>>> +    spin_unlock(&its->its_lock);
>>
>> This code can be long to execute as the number of event can be huge. How
>> do you plan to handle that?
>
> It is assumed that DomUs get only a *very* limited and controlled number
> of ITS resources, so the number of LPIs, number of devices and their
> number of events will be very small, probably just enough as really
> needed (event ID bits being 4 or so, for instance).

I really doubt this. I would not be surprised to see PCI device 
passthrough with hundreds of event. And then we will have a beloved XSA 
to handle...

>
>> *hint* this likely means a comment + ASSERT + TODO in the cover letter
>> *hint*
>
> Added.
>

Cheers,
Andrew Cooper April 12, 2017, 5:24 p.m. UTC | #4
On 12/04/17 18:05, Julien Grall wrote:
> I really doubt this. I would not be surprised to see PCI device
> passthrough with hundreds of event. And then we will have a beloved
> XSA to handle...

You and I clearly have a different idea of what beloved means :)

~Andrew (with a security team hat on)
Wei Liu April 12, 2017, 6:18 p.m. UTC | #5
On Wed, Apr 12, 2017 at 06:24:58PM +0100, Andrew Cooper wrote:
> On 12/04/17 18:05, Julien Grall wrote:
> > I really doubt this. I would not be surprised to see PCI device
> > passthrough with hundreds of event. And then we will have a beloved
> > XSA to handle...
> 
> You and I clearly have a different idea of what beloved means :)
> 

El Reg and co. love them, no? ;-)

/me ducks

> ~Andrew (with a security team hat on)
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel
Andre Przywara May 10, 2017, 10:42 a.m. UTC | #6
Hi,

On 12/04/17 18:05, Julien Grall wrote:
> Hi Andre,
> 
> On 12/04/17 18:03, Andre Przywara wrote:
>> On 12/04/17 16:21, Julien Grall wrote:
>>> On 12/04/17 01:44, Andre Przywara wrote:
>>>> +
>>>> +    spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
>>>> +
>>>> +    /* Remove the corresponding host LPI entry */
>>>> +    return gicv3_remove_guest_event(its->d, its->doorbell_address,
>>>> +                                    vdevid, vevid);
>>>> +}
>>>> +
>>>> +static int its_unmap_device(struct virt_its *its, uint32_t devid)
>>>> +{
>>>> +    uint64_t itt, evid;
>>>> +    int ret;
>>>> +
>>>> +    spin_lock(&its->its_lock);
>>>> +
>>>> +    ret = its_get_itt(its, devid, &itt);
>>>> +    if ( ret )
>>>> +    {
>>>> +        spin_unlock(&its->its_lock);
>>>> +        return ret;
>>>> +    }
>>>> +
>>>> +    for ( evid = 0; evid < DEV_TABLE_ITT_SIZE(itt); evid++ )
>>>> +        /* Don't care about errors here, clean up as much as
>>>> possible. */
>>>> +        its_discard_event(its, devid, evid);
>>>> +
>>>> +    spin_unlock(&its->its_lock);
>>>
>>> This code can be long to execute as the number of event can be huge. How
>>> do you plan to handle that?
>>
>> It is assumed that DomUs get only a *very* limited and controlled number
>> of ITS resources, so the number of LPIs, number of devices and their
>> number of events will be very small, probably just enough as really
>> needed (event ID bits being 4 or so, for instance).
> 
> I really doubt this. I would not be surprised to see PCI device
> passthrough with hundreds of event.

But that's not *huge*. Chasing down the calls from here I don't see
anything which takes a long time to execute. So the execution time
becomes only an issue if we reach into the tens of thousands of events,
but I'd rely on the responsibility of a sysadmin when passing through
devices with that large number of MSI. I'd hope that PCI passthrough
would help here with providing some means of policy limits if users care
about this.

Cheers,
Andre.

> And then we will have a beloved XSA
> to handle...
> 
>>
>>> *hint* this likely means a comment + ASSERT + TODO in the cover letter
>>> *hint*
>>
>> Added.
>>
> 
> Cheers,
>
Julien Grall May 10, 2017, 11:30 a.m. UTC | #7
On 05/10/2017 11:42 AM, Andre Przywara wrote:
> Hi,

Hi Andre,

> On 12/04/17 18:05, Julien Grall wrote:
>> Hi Andre,
>>
>> On 12/04/17 18:03, Andre Przywara wrote:
>>> On 12/04/17 16:21, Julien Grall wrote:
>>>> On 12/04/17 01:44, Andre Przywara wrote:
>>>>> +
>>>>> +    spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
>>>>> +
>>>>> +    /* Remove the corresponding host LPI entry */
>>>>> +    return gicv3_remove_guest_event(its->d, its->doorbell_address,
>>>>> +                                    vdevid, vevid);
>>>>> +}
>>>>> +
>>>>> +static int its_unmap_device(struct virt_its *its, uint32_t devid)
>>>>> +{
>>>>> +    uint64_t itt, evid;
>>>>> +    int ret;
>>>>> +
>>>>> +    spin_lock(&its->its_lock);
>>>>> +
>>>>> +    ret = its_get_itt(its, devid, &itt);
>>>>> +    if ( ret )
>>>>> +    {
>>>>> +        spin_unlock(&its->its_lock);
>>>>> +        return ret;
>>>>> +    }
>>>>> +
>>>>> +    for ( evid = 0; evid < DEV_TABLE_ITT_SIZE(itt); evid++ )
>>>>> +        /* Don't care about errors here, clean up as much as
>>>>> possible. */
>>>>> +        its_discard_event(its, devid, evid);
>>>>> +
>>>>> +    spin_unlock(&its->its_lock);
>>>>
>>>> This code can be long to execute as the number of event can be huge. How
>>>> do you plan to handle that?
>>>
>>> It is assumed that DomUs get only a *very* limited and controlled number
>>> of ITS resources, so the number of LPIs, number of devices and their
>>> number of events will be very small, probably just enough as really
>>> needed (event ID bits being 4 or so, for instance).
>>
>> I really doubt this. I would not be surprised to see PCI device
>> passthrough with hundreds of event.
>
> But that's not *huge*. Chasing down the calls from here I don't see
> anything which takes a long time to execute. So the execution time
> becomes only an issue if we reach into the tens of thousands of events,
> but I'd rely on the responsibility of a sysadmin when passing through
> devices with that large number of MSI. I'd hope that PCI passthrough
> would help here with providing some means of policy limits if users care
> about this.

I am getting annoyed to repeat that again and again. Xen has to deal 
with *any* number of MSIs as long as the spec says it is possible and 
not expose a security issue. The problem can appear easily because of 
command time may be multiplied * 32K if the guest decides to queue 32K 
command.

How a sysadmin, which very likely does not know the internal Xen, would 
know that he should not passing through devices with that large number 
of MSI? Surely he will not be able to guess that the ITS code is not 
able to cope with any number without any documentation.

I really don't want to have to deal with XSAs because you knowingly hide 
a potential issue thinking someone else will fix for you.

Cheers,
diff mbox

Patch

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index aebc257..900c9d1 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -800,6 +800,66 @@  out:
     return ret;
 }
 
+/* Must be called with the its_device_lock held. */
+static struct its_device *get_its_device(struct domain *d, paddr_t vdoorbell,
+                                          uint32_t vdevid)
+{
+    struct rb_node *node = d->arch.vgic.its_devices.rb_node;
+    struct its_device *dev;
+
+    ASSERT(spin_is_locked(&d->arch.vgic.its_devices_lock));
+
+    while (node)
+    {
+        int cmp;
+
+        dev = rb_entry(node, struct its_device, rbnode);
+        cmp = compare_its_guest_devices(dev, vdoorbell, vdevid);
+
+        if ( !cmp )
+            return dev;
+
+        if ( cmp > 0 )
+            node = node->rb_left;
+        else
+            node = node->rb_right;
+    }
+
+    return NULL;
+}
+
+static uint32_t get_host_lpi(struct its_device *dev, uint32_t eventid)
+{
+    uint32_t host_lpi = INVALID_LPI;
+
+    if ( dev && (eventid < dev->eventids) )
+        host_lpi = dev->host_lpi_blocks[eventid / LPI_BLOCK] +
+                                       (eventid % LPI_BLOCK);
+
+    return host_lpi;
+}
+
+int gicv3_remove_guest_event(struct domain *d, paddr_t vdoorbell_address,
+                             uint32_t vdevid, uint32_t veventid)
+{
+    struct its_device *dev;
+    uint32_t host_lpi = INVALID_LPI;
+
+    spin_lock(&d->arch.vgic.its_devices_lock);
+    dev = get_its_device(d, vdoorbell_address, vdevid);
+    if ( dev && veventid <= dev->eventids )
+        host_lpi = get_host_lpi(dev, veventid);
+    spin_unlock(&d->arch.vgic.its_devices_lock);
+
+    if ( host_lpi == INVALID_LPI )
+        return -EINVAL;
+
+    gicv3_lpi_update_host_entry(host_lpi, d->domain_id,
+                                INVALID_VCPU_ID, INVALID_LPI);
+
+    return 0;
+}
+
 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
 {
diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
index 44f6315..d427539 100644
--- a/xen/arch/arm/gic-v3-lpi.c
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -207,6 +207,24 @@  out:
     irq_exit();
 }
 
+void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
+                                 unsigned int vcpu_id, uint32_t virt_lpi)
+{
+    union host_lpi *hlpip, hlpi;
+
+    ASSERT(host_lpi >= LPI_OFFSET);
+
+    host_lpi -= LPI_OFFSET;
+
+    hlpip = &lpi_data.host_lpis[host_lpi / HOST_LPIS_PER_PAGE][host_lpi % HOST_LPIS_PER_PAGE];
+
+    hlpi.virt_lpi = virt_lpi;
+    hlpi.dom_id = domain_id;
+    hlpi.vcpu_id = vcpu_id;
+
+    write_u64_atomic(&hlpip->data, hlpi.data);
+}
+
 static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
 {
     uint64_t val;
diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index 6e505cb..104017e 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -52,6 +52,7 @@ 
  */
 struct virt_its {
     struct domain *d;
+    paddr_t doorbell_address;
     unsigned int devid_bits;
     unsigned int evid_bits;
     spinlock_t vcmd_lock;       /* Protects the virtual command buffer, which */
@@ -166,6 +167,20 @@  static struct vcpu *get_vcpu_from_collection(struct virt_its *its,
 #define DEV_TABLE_ENTRY(addr, bits)                     \
         (((addr) & GENMASK(51, 8)) | (((bits) - 1) & GENMASK(4, 0)))
 
+/* Set the address of an ITT for a given device ID. */
+static int its_set_itt_address(struct virt_its *its, uint32_t devid,
+                               paddr_t itt_address, uint32_t nr_bits)
+{
+    paddr_t addr = get_baser_phys_addr(its->baser_dev);
+    uint64_t itt_entry = DEV_TABLE_ENTRY(itt_address, nr_bits);
+
+    if ( devid >= its->max_devices )
+        return -ENOENT;
+
+    return vgic_access_guest_memory(its->d, addr + devid * sizeof(uint64_t),
+                                    &itt_entry, sizeof(itt_entry), true);
+}
+
 /*
  * Lookup the address of the Interrupt Translation Table associated with
  * that device ID.
@@ -398,6 +413,125 @@  static int its_handle_mapc(struct virt_its *its, uint64_t *cmdptr)
     return 0;
 }
 
+/* Must be called with the ITS lock held. */
+static int its_discard_event(struct virt_its *its,
+                             uint32_t vdevid, uint32_t vevid)
+{
+    struct pending_irq *p;
+    unsigned long flags;
+    struct vcpu *vcpu;
+    uint32_t vlpi;
+
+    ASSERT(spin_is_locked(&its->its_lock));
+
+    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
+        return -ENOENT;
+
+    if ( vlpi == INVALID_LPI )
+        return -ENOENT;
+
+    /* Lock this VCPU's VGIC to make sure nobody is using the pending_irq. */
+    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);
+
+    /* Remove the pending_irq from the tree. */
+    write_lock(&its->d->arch.vgic.pend_lpi_tree_lock);
+    p = radix_tree_delete(&its->d->arch.vgic.pend_lpi_tree, vlpi);
+    write_unlock(&its->d->arch.vgic.pend_lpi_tree_lock);
+
+    if ( !p )
+    {
+        spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
+
+        return -ENOENT;
+    }
+
+    /* Cleanup the pending_irq, to be reusable later. */
+    list_del_init(&p->inflight);
+    list_del_init(&p->lr_queue);
+    p->status = 0;
+    p->lr = GIC_INVALID_LR;
+    p->irq = INVALID_LPI;
+
+    spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
+
+    /* Remove the corresponding host LPI entry */
+    return gicv3_remove_guest_event(its->d, its->doorbell_address,
+                                    vdevid, vevid);
+}
+
+static int its_unmap_device(struct virt_its *its, uint32_t devid)
+{
+    uint64_t itt, evid;
+    int ret;
+
+    spin_lock(&its->its_lock);
+
+    ret = its_get_itt(its, devid, &itt);
+    if ( ret )
+    {
+        spin_unlock(&its->its_lock);
+        return ret;
+    }
+
+    for ( evid = 0; evid < DEV_TABLE_ITT_SIZE(itt); evid++ )
+        /* Don't care about errors here, clean up as much as possible. */
+        its_discard_event(its, devid, evid);
+
+    spin_unlock(&its->its_lock);
+
+    return 0;
+}
+
+static int its_handle_mapd(struct virt_its *its, uint64_t *cmdptr)
+{
+    /* size and devid get validated by the functions called below. */
+    uint32_t devid = its_cmd_get_deviceid(cmdptr);
+    unsigned int size = its_cmd_get_size(cmdptr) + 1;
+    bool valid = its_cmd_get_validbit(cmdptr);
+    paddr_t itt_addr = its_cmd_get_ittaddr(cmdptr);
+    int ret;
+
+    /* Sanitize the number of events. */
+    if ( valid && (size > its->evid_bits) )
+        return -1;
+
+    /*
+     * There is no easy and clean way for Xen to know the ITS device ID of a
+     * particular (PCI) device, so we have to rely on the guest telling
+     * us about it. For *now* we are just using the device ID *Dom0* uses,
+     * because the driver there has the actual knowledge.
+     * Eventually this will be replaced with a dedicated hypercall to
+     * announce pass-through of devices.
+     */
+    if ( is_hardware_domain(its->d) )
+    {
+        if ( !valid )
+            /* Discard all events and remove pending LPIs. */
+            its_unmap_device(its, devid);
+
+        /*
+         * Dom0's ITSes are mapped 1:1, so both addresses are the same.
+         * Also the device IDs are equal.
+         */
+        ret = gicv3_its_map_guest_device(its->d, its->doorbell_address, devid,
+                                         its->doorbell_address, devid,
+                                         BIT(size), valid);
+        if ( ret && valid )
+            return ret;
+    }
+
+    spin_lock(&its->its_lock);
+
+    if ( valid )
+        ret = its_set_itt_address(its, devid, itt_addr, size);
+    else
+        ret = its_set_itt_address(its, devid, INVALID_PADDR, 1);
+
+    spin_unlock(&its->its_lock);
+
+    return ret;
+}
+
 #define ITS_CMD_BUFFER_SIZE(baser)      ((((baser) & 0xff) + 1) << 12)
 #define ITS_CMD_OFFSET(reg)             ((reg) & GENMASK(19, 5))
 
@@ -436,6 +570,9 @@  static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its)
         case GITS_CMD_MAPC:
             ret = its_handle_mapc(its, command);
             break;
+        case GITS_CMD_MAPD:
+            ret = its_handle_mapd(its, command);
+            break;
         case GITS_CMD_SYNC:
             /* We handle ITS commands synchronously, so we ignore SYNC. */
             break;
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index 40f4ef5..60ffdb6 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -169,6 +169,12 @@  int gicv3_its_map_guest_device(struct domain *d,
 int gicv3_allocate_host_lpi_block(struct domain *d, uint32_t *first_lpi);
 void gicv3_free_host_lpi_block(uint32_t first_lpi);
 
+int gicv3_remove_guest_event(struct domain *d, paddr_t vdoorbell_address,
+                                     uint32_t vdevid, uint32_t veventid);
+
+void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
+                                 unsigned int vcpu_id, uint32_t virt_lpi);
+
 #else
 
 static inline void gicv3_its_dt_init(const struct dt_device_node *node)