diff mbox

[v9,19/28] ARM: vITS: handle MAPD command

Message ID 20170511175340.8448-20-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara May 11, 2017, 5:53 p.m. UTC
The MAPD command maps a device by associating a memory region for
storing ITEs with a certain device ID. Since it features a valid bit,
MAPD also covers the "unmap" functionality, which we also cover here.
We store the given guest physical address in the device table, and, if
this command comes from Dom0, tell the host ITS driver about this new
mapping, so it can issue the corresponding host MAPD command and create
the required tables. We take care of rolling back actions should one
step fail.
Upon unmapping a device we make sure we clean up all associated
resources and release the memory again.
We use our existing guest memory access function to find the right ITT
entry and store the mapping there (in guest memory).

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 xen/arch/arm/gic-v3-its.c        |  18 +++++
 xen/arch/arm/gic-v3-lpi.c        |  18 +++++
 xen/arch/arm/vgic-v3-its.c       | 145 +++++++++++++++++++++++++++++++++++++++
 xen/include/asm-arm/gic_v3_its.h |   5 ++
 4 files changed, 186 insertions(+)

Comments

Julien Grall May 17, 2017, 6:07 p.m. UTC | #1
Hi Andre,

On 11/05/17 18:53, Andre Przywara wrote:
> The MAPD command maps a device by associating a memory region for
> storing ITEs with a certain device ID. Since it features a valid bit,
> MAPD also covers the "unmap" functionality, which we also cover here.
> We store the given guest physical address in the device table, and, if
> this command comes from Dom0, tell the host ITS driver about this new
> mapping, so it can issue the corresponding host MAPD command and create
> the required tables. We take care of rolling back actions should one
> step fail.
> Upon unmapping a device we make sure we clean up all associated
> resources and release the memory again.
> We use our existing guest memory access function to find the right ITT
> entry and store the mapping there (in guest memory).
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/gic-v3-its.c        |  18 +++++
>  xen/arch/arm/gic-v3-lpi.c        |  18 +++++
>  xen/arch/arm/vgic-v3-its.c       | 145 +++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-arm/gic_v3_its.h |   5 ++
>  4 files changed, 186 insertions(+)
>
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index fd6a394..be4c3e0 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -869,6 +869,24 @@ struct pending_irq *gicv3_its_get_event_pending_irq(struct domain *d,
>      return get_event_pending_irq(d, vdoorbell_address, vdevid, veventid, NULL);
>  }
>
> +int gicv3_remove_guest_event(struct domain *d, paddr_t vdoorbell_address,
> +                             uint32_t vdevid, uint32_t veventid)
> +{
> +    uint32_t host_lpi = INVALID_LPI;
> +
> +    if ( !get_event_pending_irq(d, vdoorbell_address, vdevid, veventid,
> +                                &host_lpi) )
> +        return -EINVAL;
> +
> +    if ( host_lpi == INVALID_LPI )
> +        return -EINVAL;
> +
> +    gicv3_lpi_update_host_entry(host_lpi, d->domain_id,
> +                                INVALID_VCPU_ID, INVALID_LPI);
> +
> +    return 0;
> +}
> +
>  /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
>  void gicv3_its_dt_init(const struct dt_device_node *node)
>  {
> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
> index 44f6315..d427539 100644
> --- a/xen/arch/arm/gic-v3-lpi.c
> +++ b/xen/arch/arm/gic-v3-lpi.c
> @@ -207,6 +207,24 @@ out:
>      irq_exit();
>  }
>
> +void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
> +                                 unsigned int vcpu_id, uint32_t virt_lpi)
> +{
> +    union host_lpi *hlpip, hlpi;
> +
> +    ASSERT(host_lpi >= LPI_OFFSET);
> +
> +    host_lpi -= LPI_OFFSET;
> +
> +    hlpip = &lpi_data.host_lpis[host_lpi / HOST_LPIS_PER_PAGE][host_lpi % HOST_LPIS_PER_PAGE];
> +
> +    hlpi.virt_lpi = virt_lpi;
> +    hlpi.dom_id = domain_id;
> +    hlpi.vcpu_id = vcpu_id;
> +
> +    write_u64_atomic(&hlpip->data, hlpi.data);
> +}
> +
>  static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
>  {
>      uint64_t val;
> diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
> index 8a200e9..731fe0c 100644
> --- a/xen/arch/arm/vgic-v3-its.c
> +++ b/xen/arch/arm/vgic-v3-its.c
> @@ -175,6 +175,21 @@ static struct vcpu *get_vcpu_from_collection(struct virt_its *its,
>  #define DEV_TABLE_ENTRY(addr, bits)                     \
>          (((addr) & GENMASK(51, 8)) | (((bits) - 1) & GENMASK(4, 0)))
>
> +/* Set the address of an ITT for a given device ID. */
> +static int its_set_itt_address(struct virt_its *its, uint32_t devid,
> +                               paddr_t itt_address, uint32_t nr_bits)
> +{
> +    paddr_t addr = get_baser_phys_addr(its->baser_dev);
> +    dev_table_entry_t itt_entry = DEV_TABLE_ENTRY(itt_address, nr_bits);
> +
> +    if ( devid >= its->max_devices )
> +        return -ENOENT;
> +
> +    return vgic_access_guest_memory(its->d,
> +                                    addr + devid * sizeof(dev_table_entry_t),
> +                                    &itt_entry, sizeof(itt_entry), true);
> +}
> +
>  /*
>   * Lookup the address of the Interrupt Translation Table associated with
>   * that device ID.
> @@ -414,6 +429,133 @@ out_unlock:
>      return ret;
>  }
>
> +/* Must be called with the ITS lock held. */
> +static int its_discard_event(struct virt_its *its,
> +                             uint32_t vdevid, uint32_t vevid)
> +{
> +    struct pending_irq *p;
> +    unsigned long flags;
> +    struct vcpu *vcpu;
> +    uint32_t vlpi;
> +
> +    ASSERT(spin_is_locked(&its->its_lock));
> +
> +    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
> +        return -ENOENT;
> +
> +    if ( vlpi == INVALID_LPI )
> +        return -ENOENT;
> +
> +    /* Lock this VCPU's VGIC to make sure nobody is using the pending_irq. */
> +    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);

There is an interesting issue happening with this code. You don't check 
the content of the memory provided by the guest. So a malicious guest 
could craft the memory in order to setup mapping with known vlpi and a 
different vCPU.

This would lead to use the wrong lock here and corrupt the list.

> +
> +    /* Remove the pending_irq from the tree. */
> +    write_lock(&its->d->arch.vgic.pend_lpi_tree_lock);
> +    p = radix_tree_delete(&its->d->arch.vgic.pend_lpi_tree, vlpi);
> +    write_unlock(&its->d->arch.vgic.pend_lpi_tree_lock);
> +
> +    if ( !p )
> +    {
> +        spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
> +
> +        return -ENOENT;
> +    }
> +
> +    /* Cleanup the pending_irq and disconnect it from the LPI. */
> +    list_del_init(&p->inflight);
> +    list_del_init(&p->lr_queue);
> +    vgic_init_pending_irq(p, INVALID_LPI);
> +
> +    spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
> +
> +    /* Remove the corresponding host LPI entry */
> +    return gicv3_remove_guest_event(its->d, its->doorbell_address,
> +                                    vdevid, vevid);
> +}
> +
> +static int its_unmap_device(struct virt_its *its, uint32_t devid)
> +{
> +    dev_table_entry_t itt;
> +    uint64_t evid;
> +    int ret;
> +
> +    spin_lock(&its->its_lock);
> +
> +    ret = its_get_itt(its, devid, &itt);
> +    if ( ret )
> +    {
> +        spin_unlock(&its->its_lock);
> +        return ret;
> +    }
> +
> +    /*
> +     * For DomUs we need to check that the number of events per device
> +     * is really limited, otherwise looping over all events can take too
> +     * long for a guest. This ASSERT can then be removed if that is
> +     * covered.
> +     */
> +    ASSERT(is_hardware_domain(its->d));
> +
> +    for ( evid = 0; evid < DEV_TABLE_ITT_SIZE(itt); evid++ )
> +        /* Don't care about errors here, clean up as much as possible. */
> +        its_discard_event(its, devid, evid);
> +
> +    spin_unlock(&its->its_lock);
> +
> +    return 0;
> +}
> +
> +static int its_handle_mapd(struct virt_its *its, uint64_t *cmdptr)
> +{
> +    /* size and devid get validated by the functions called below. */
> +    uint32_t devid = its_cmd_get_deviceid(cmdptr);
> +    unsigned int size = its_cmd_get_size(cmdptr) + 1;
> +    bool valid = its_cmd_get_validbit(cmdptr);
> +    paddr_t itt_addr = its_cmd_get_ittaddr(cmdptr);
> +    int ret;
> +
> +    /* Sanitize the number of events. */
> +    if ( valid && (size > its->evid_bits) )
> +        return -1;
> +
> +    if ( !valid )
> +        /* Discard all events and remove pending LPIs. */
> +        its_unmap_device(its, devid);

its_unmap_device is returning an error but you don't check it. Please 
explain why.

> +
> +    /*
> +     * There is no easy and clean way for Xen to know the ITS device ID of a
> +     * particular (PCI) device, so we have to rely on the guest telling
> +     * us about it. For *now* we are just using the device ID *Dom0* uses,
> +     * because the driver there has the actual knowledge.
> +     * Eventually this will be replaced with a dedicated hypercall to
> +     * announce pass-through of devices.
> +     */
> +    if ( is_hardware_domain(its->d) )
> +    {
> +
> +        /*
> +         * Dom0's ITSes are mapped 1:1, so both addresses are the same.
> +         * Also the device IDs are equal.
> +         */
> +        ret = gicv3_its_map_guest_device(its->d, its->doorbell_address, devid,
> +                                         its->doorbell_address, devid,
> +                                         BIT(size), valid);
> +        if ( ret && valid )
> +            return ret;
> +    }
> +
> +    spin_lock(&its->its_lock);
> +
> +    if ( valid )
> +        ret = its_set_itt_address(its, devid, itt_addr, size);
> +    else
> +        ret = its_set_itt_address(its, devid, INVALID_PADDR, 1);
> +
> +    spin_unlock(&its->its_lock);
> +
> +    return ret;
> +}
> +
>  #define ITS_CMD_BUFFER_SIZE(baser)      ((((baser) & 0xff) + 1) << 12)
>  #define ITS_CMD_OFFSET(reg)             ((reg) & GENMASK(19, 5))
>
> @@ -452,6 +594,9 @@ static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its)
>          case GITS_CMD_MAPC:
>              ret = its_handle_mapc(its, command);
>              break;
> +        case GITS_CMD_MAPD:
> +            ret = its_handle_mapd(its, command);
> +            break;
>          case GITS_CMD_SYNC:
>              /* We handle ITS commands synchronously, so we ignore SYNC. */
>              break;
> diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
> index d162e89..6f94e65 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -173,6 +173,11 @@ struct pending_irq *gicv3_its_get_event_pending_irq(struct domain *d,
>                                                      paddr_t vdoorbell_address,
>                                                      uint32_t vdevid,
>                                                      uint32_t veventid);
> +int gicv3_remove_guest_event(struct domain *d, paddr_t vdoorbell_address,
> +                                     uint32_t vdevid, uint32_t veventid);
> +void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
> +                                 unsigned int vcpu_id, uint32_t virt_lpi);
> +
>  #else
>
>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
>

Cheers,
Andre Przywara May 24, 2017, 9:10 a.m. UTC | #2
Hi,

On 17/05/17 19:07, Julien Grall wrote:
> Hi Andre,
> 
> On 11/05/17 18:53, Andre Przywara wrote:
>> The MAPD command maps a device by associating a memory region for
>> storing ITEs with a certain device ID. Since it features a valid bit,
>> MAPD also covers the "unmap" functionality, which we also cover here.
>> We store the given guest physical address in the device table, and, if
>> this command comes from Dom0, tell the host ITS driver about this new
>> mapping, so it can issue the corresponding host MAPD command and create
>> the required tables. We take care of rolling back actions should one
>> step fail.
>> Upon unmapping a device we make sure we clean up all associated
>> resources and release the memory again.
>> We use our existing guest memory access function to find the right ITT
>> entry and store the mapping there (in guest memory).
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  xen/arch/arm/gic-v3-its.c        |  18 +++++
>>  xen/arch/arm/gic-v3-lpi.c        |  18 +++++
>>  xen/arch/arm/vgic-v3-its.c       | 145
>> +++++++++++++++++++++++++++++++++++++++
>>  xen/include/asm-arm/gic_v3_its.h |   5 ++
>>  4 files changed, 186 insertions(+)
>>
>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>> index fd6a394..be4c3e0 100644
>> --- a/xen/arch/arm/gic-v3-its.c
>> +++ b/xen/arch/arm/gic-v3-its.c
>> @@ -869,6 +869,24 @@ struct pending_irq
>> *gicv3_its_get_event_pending_irq(struct domain *d,
>>      return get_event_pending_irq(d, vdoorbell_address, vdevid,
>> veventid, NULL);
>>  }
>>
>> +int gicv3_remove_guest_event(struct domain *d, paddr_t
>> vdoorbell_address,
>> +                             uint32_t vdevid, uint32_t veventid)
>> +{
>> +    uint32_t host_lpi = INVALID_LPI;
>> +
>> +    if ( !get_event_pending_irq(d, vdoorbell_address, vdevid, veventid,
>> +                                &host_lpi) )
>> +        return -EINVAL;
>> +
>> +    if ( host_lpi == INVALID_LPI )
>> +        return -EINVAL;
>> +
>> +    gicv3_lpi_update_host_entry(host_lpi, d->domain_id,
>> +                                INVALID_VCPU_ID, INVALID_LPI);
>> +
>> +    return 0;
>> +}
>> +
>>  /* Scan the DT for any ITS nodes and create a list of host ITSes out
>> of it. */
>>  void gicv3_its_dt_init(const struct dt_device_node *node)
>>  {
>> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
>> index 44f6315..d427539 100644
>> --- a/xen/arch/arm/gic-v3-lpi.c
>> +++ b/xen/arch/arm/gic-v3-lpi.c
>> @@ -207,6 +207,24 @@ out:
>>      irq_exit();
>>  }
>>
>> +void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
>> +                                 unsigned int vcpu_id, uint32_t
>> virt_lpi)
>> +{
>> +    union host_lpi *hlpip, hlpi;
>> +
>> +    ASSERT(host_lpi >= LPI_OFFSET);
>> +
>> +    host_lpi -= LPI_OFFSET;
>> +
>> +    hlpip = &lpi_data.host_lpis[host_lpi /
>> HOST_LPIS_PER_PAGE][host_lpi % HOST_LPIS_PER_PAGE];
>> +
>> +    hlpi.virt_lpi = virt_lpi;
>> +    hlpi.dom_id = domain_id;
>> +    hlpi.vcpu_id = vcpu_id;
>> +
>> +    write_u64_atomic(&hlpip->data, hlpi.data);
>> +}
>> +
>>  static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
>>  {
>>      uint64_t val;
>> diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
>> index 8a200e9..731fe0c 100644
>> --- a/xen/arch/arm/vgic-v3-its.c
>> +++ b/xen/arch/arm/vgic-v3-its.c
>> @@ -175,6 +175,21 @@ static struct vcpu
>> *get_vcpu_from_collection(struct virt_its *its,
>>  #define DEV_TABLE_ENTRY(addr, bits)                     \
>>          (((addr) & GENMASK(51, 8)) | (((bits) - 1) & GENMASK(4, 0)))
>>
>> +/* Set the address of an ITT for a given device ID. */
>> +static int its_set_itt_address(struct virt_its *its, uint32_t devid,
>> +                               paddr_t itt_address, uint32_t nr_bits)
>> +{
>> +    paddr_t addr = get_baser_phys_addr(its->baser_dev);
>> +    dev_table_entry_t itt_entry = DEV_TABLE_ENTRY(itt_address, nr_bits);
>> +
>> +    if ( devid >= its->max_devices )
>> +        return -ENOENT;
>> +
>> +    return vgic_access_guest_memory(its->d,
>> +                                    addr + devid *
>> sizeof(dev_table_entry_t),
>> +                                    &itt_entry, sizeof(itt_entry),
>> true);
>> +}
>> +
>>  /*
>>   * Lookup the address of the Interrupt Translation Table associated with
>>   * that device ID.
>> @@ -414,6 +429,133 @@ out_unlock:
>>      return ret;
>>  }
>>
>> +/* Must be called with the ITS lock held. */
>> +static int its_discard_event(struct virt_its *its,
>> +                             uint32_t vdevid, uint32_t vevid)
>> +{
>> +    struct pending_irq *p;
>> +    unsigned long flags;
>> +    struct vcpu *vcpu;
>> +    uint32_t vlpi;
>> +
>> +    ASSERT(spin_is_locked(&its->its_lock));
>> +
>> +    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
>> +        return -ENOENT;
>> +
>> +    if ( vlpi == INVALID_LPI )
>> +        return -ENOENT;
>> +
>> +    /* Lock this VCPU's VGIC to make sure nobody is using the
>> pending_irq. */
>> +    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);
> 
> There is an interesting issue happening with this code. You don't check
> the content of the memory provided by the guest. So a malicious guest
> could craft the memory in order to setup mapping with known vlpi and a
> different vCPU.
> 
> This would lead to use the wrong lock here and corrupt the list.

What about this:
Right now (mostly due to the requirements of the INVALL implementation)
we store the VCPU ID in our struct pending_irq, populated upon MAPTI. So
originally this was just for caching (INVALL being the only user of
this), but I was wondering if we should move the actual instance of this
information to pending_irq instead of relying on the collection ID from
the ITS table. So we would never need to look up and trust the ITS
tables for this information anymore. Later with the VGIC rework we will
need this field anyway (even for SPIs).

I think this should solve this threat, where a guest can manipulate Xen
by crafting the tables. Tinkering with the other information stored in
the tables should not harm Xen, the guest would just shoot itself into
the foot.

Does that make sense?

Cheers,
Andre.

>> +
>> +    /* Remove the pending_irq from the tree. */
>> +    write_lock(&its->d->arch.vgic.pend_lpi_tree_lock);
>> +    p = radix_tree_delete(&its->d->arch.vgic.pend_lpi_tree, vlpi);
>> +    write_unlock(&its->d->arch.vgic.pend_lpi_tree_lock);
>> +
>> +    if ( !p )
>> +    {
>> +        spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
>> +
>> +        return -ENOENT;
>> +    }
>> +
>> +    /* Cleanup the pending_irq and disconnect it from the LPI. */
>> +    list_del_init(&p->inflight);
>> +    list_del_init(&p->lr_queue);
>> +    vgic_init_pending_irq(p, INVALID_LPI);
>> +
>> +    spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
>> +
>> +    /* Remove the corresponding host LPI entry */
>> +    return gicv3_remove_guest_event(its->d, its->doorbell_address,
>> +                                    vdevid, vevid);
>> +}
>> +
>> +static int its_unmap_device(struct virt_its *its, uint32_t devid)
>> +{
>> +    dev_table_entry_t itt;
>> +    uint64_t evid;
>> +    int ret;
>> +
>> +    spin_lock(&its->its_lock);
>> +
>> +    ret = its_get_itt(its, devid, &itt);
>> +    if ( ret )
>> +    {
>> +        spin_unlock(&its->its_lock);
>> +        return ret;
>> +    }
>> +
>> +    /*
>> +     * For DomUs we need to check that the number of events per device
>> +     * is really limited, otherwise looping over all events can take too
>> +     * long for a guest. This ASSERT can then be removed if that is
>> +     * covered.
>> +     */
>> +    ASSERT(is_hardware_domain(its->d));
>> +
>> +    for ( evid = 0; evid < DEV_TABLE_ITT_SIZE(itt); evid++ )
>> +        /* Don't care about errors here, clean up as much as
>> possible. */
>> +        its_discard_event(its, devid, evid);
>> +
>> +    spin_unlock(&its->its_lock);
>> +
>> +    return 0;
>> +}
>> +
>> +static int its_handle_mapd(struct virt_its *its, uint64_t *cmdptr)
>> +{
>> +    /* size and devid get validated by the functions called below. */
>> +    uint32_t devid = its_cmd_get_deviceid(cmdptr);
>> +    unsigned int size = its_cmd_get_size(cmdptr) + 1;
>> +    bool valid = its_cmd_get_validbit(cmdptr);
>> +    paddr_t itt_addr = its_cmd_get_ittaddr(cmdptr);
>> +    int ret;
>> +
>> +    /* Sanitize the number of events. */
>> +    if ( valid && (size > its->evid_bits) )
>> +        return -1;
>> +
>> +    if ( !valid )
>> +        /* Discard all events and remove pending LPIs. */
>> +        its_unmap_device(its, devid);
> 
> its_unmap_device is returning an error but you don't check it. Please
> explain why.
> 
>> +
>> +    /*
>> +     * There is no easy and clean way for Xen to know the ITS device
>> ID of a
>> +     * particular (PCI) device, so we have to rely on the guest telling
>> +     * us about it. For *now* we are just using the device ID *Dom0*
>> uses,
>> +     * because the driver there has the actual knowledge.
>> +     * Eventually this will be replaced with a dedicated hypercall to
>> +     * announce pass-through of devices.
>> +     */
>> +    if ( is_hardware_domain(its->d) )
>> +    {
>> +
>> +        /*
>> +         * Dom0's ITSes are mapped 1:1, so both addresses are the same.
>> +         * Also the device IDs are equal.
>> +         */
>> +        ret = gicv3_its_map_guest_device(its->d,
>> its->doorbell_address, devid,
>> +                                         its->doorbell_address, devid,
>> +                                         BIT(size), valid);
>> +        if ( ret && valid )
>> +            return ret;
>> +    }
>> +
>> +    spin_lock(&its->its_lock);
>> +
>> +    if ( valid )
>> +        ret = its_set_itt_address(its, devid, itt_addr, size);
>> +    else
>> +        ret = its_set_itt_address(its, devid, INVALID_PADDR, 1);
>> +
>> +    spin_unlock(&its->its_lock);
>> +
>> +    return ret;
>> +}
>> +
>>  #define ITS_CMD_BUFFER_SIZE(baser)      ((((baser) & 0xff) + 1) << 12)
>>  #define ITS_CMD_OFFSET(reg)             ((reg) & GENMASK(19, 5))
>>
>> @@ -452,6 +594,9 @@ static int vgic_its_handle_cmds(struct domain *d,
>> struct virt_its *its)
>>          case GITS_CMD_MAPC:
>>              ret = its_handle_mapc(its, command);
>>              break;
>> +        case GITS_CMD_MAPD:
>> +            ret = its_handle_mapd(its, command);
>> +            break;
>>          case GITS_CMD_SYNC:
>>              /* We handle ITS commands synchronously, so we ignore
>> SYNC. */
>>              break;
>> diff --git a/xen/include/asm-arm/gic_v3_its.h
>> b/xen/include/asm-arm/gic_v3_its.h
>> index d162e89..6f94e65 100644
>> --- a/xen/include/asm-arm/gic_v3_its.h
>> +++ b/xen/include/asm-arm/gic_v3_its.h
>> @@ -173,6 +173,11 @@ struct pending_irq
>> *gicv3_its_get_event_pending_irq(struct domain *d,
>>                                                      paddr_t
>> vdoorbell_address,
>>                                                      uint32_t vdevid,
>>                                                      uint32_t veventid);
>> +int gicv3_remove_guest_event(struct domain *d, paddr_t
>> vdoorbell_address,
>> +                                     uint32_t vdevid, uint32_t
>> veventid);
>> +void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
>> +                                 unsigned int vcpu_id, uint32_t
>> virt_lpi);
>> +
>>  #else
>>
>>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
>>
> 
> Cheers,
>
Julien Grall May 24, 2017, 9:56 a.m. UTC | #3
Hi Andre,

On 05/24/2017 10:10 AM, Andre Przywara wrote:
> On 17/05/17 19:07, Julien Grall wrote:
>>>  /*
>>>   * Lookup the address of the Interrupt Translation Table associated with
>>>   * that device ID.
>>> @@ -414,6 +429,133 @@ out_unlock:
>>>      return ret;
>>>  }
>>>
>>> +/* Must be called with the ITS lock held. */
>>> +static int its_discard_event(struct virt_its *its,
>>> +                             uint32_t vdevid, uint32_t vevid)
>>> +{
>>> +    struct pending_irq *p;
>>> +    unsigned long flags;
>>> +    struct vcpu *vcpu;
>>> +    uint32_t vlpi;
>>> +
>>> +    ASSERT(spin_is_locked(&its->its_lock));
>>> +
>>> +    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
>>> +        return -ENOENT;
>>> +
>>> +    if ( vlpi == INVALID_LPI )
>>> +        return -ENOENT;
>>> +
>>> +    /* Lock this VCPU's VGIC to make sure nobody is using the
>>> pending_irq. */
>>> +    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);
>>
>> There is an interesting issue happening with this code. You don't check
>> the content of the memory provided by the guest. So a malicious guest
>> could craft the memory in order to setup mapping with known vlpi and a
>> different vCPU.
>>
>> This would lead to use the wrong lock here and corrupt the list.
>
> What about this:
> Right now (mostly due to the requirements of the INVALL implementation)
> we store the VCPU ID in our struct pending_irq, populated upon MAPTI. So
> originally this was just for caching (INVALL being the only user of
> this), but I was wondering if we should move the actual instance of this
> information to pending_irq instead of relying on the collection ID from
> the ITS table. So we would never need to look up and trust the ITS
> tables for this information anymore. Later with the VGIC rework we will
> need this field anyway (even for SPIs).
>
> I think this should solve this threat, where a guest can manipulate Xen
> by crafting the tables. Tinkering with the other information stored in
> the tables should not harm Xen, the guest would just shoot itself into
> the foot.
>
> Does that make sense?

I think so. If I understand correctly, with that solution we would not 
need to protect the memory provided by the guest?

Cheers.
Andre Przywara May 24, 2017, 1:09 p.m. UTC | #4
Hi,

On 24/05/17 10:56, Julien Grall wrote:
> Hi Andre,
> 
> On 05/24/2017 10:10 AM, Andre Przywara wrote:
>> On 17/05/17 19:07, Julien Grall wrote:
>>>>  /*
>>>>   * Lookup the address of the Interrupt Translation Table associated
>>>> with
>>>>   * that device ID.
>>>> @@ -414,6 +429,133 @@ out_unlock:
>>>>      return ret;
>>>>  }
>>>>
>>>> +/* Must be called with the ITS lock held. */
>>>> +static int its_discard_event(struct virt_its *its,
>>>> +                             uint32_t vdevid, uint32_t vevid)
>>>> +{
>>>> +    struct pending_irq *p;
>>>> +    unsigned long flags;
>>>> +    struct vcpu *vcpu;
>>>> +    uint32_t vlpi;
>>>> +
>>>> +    ASSERT(spin_is_locked(&its->its_lock));
>>>> +
>>>> +    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
>>>> +        return -ENOENT;
>>>> +
>>>> +    if ( vlpi == INVALID_LPI )
>>>> +        return -ENOENT;
>>>> +
>>>> +    /* Lock this VCPU's VGIC to make sure nobody is using the
>>>> pending_irq. */
>>>> +    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);
>>>
>>> There is an interesting issue happening with this code. You don't check
>>> the content of the memory provided by the guest. So a malicious guest
>>> could craft the memory in order to setup mapping with known vlpi and a
>>> different vCPU.
>>>
>>> This would lead to use the wrong lock here and corrupt the list.
>>
>> What about this:
>> Right now (mostly due to the requirements of the INVALL implementation)
>> we store the VCPU ID in our struct pending_irq, populated upon MAPTI. So
>> originally this was just for caching (INVALL being the only user of
>> this), but I was wondering if we should move the actual instance of this
>> information to pending_irq instead of relying on the collection ID from
>> the ITS table. So we would never need to look up and trust the ITS
>> tables for this information anymore. Later with the VGIC rework we will
>> need this field anyway (even for SPIs).
>>
>> I think this should solve this threat, where a guest can manipulate Xen
>> by crafting the tables. Tinkering with the other information stored in
>> the tables should not harm Xen, the guest would just shoot itself into
>> the foot.
>>
>> Does that make sense?
> 
> I think so. If I understand correctly, with that solution we would not
> need to protect the memory provided by the guest?

Well, it gets better (though also a bit scary):
Currently we use the guest's ITS tables to translate a DeviceID/EventID
pair to a vLPI/vCPU pair. Now there is this new
gicv3_its_get_event_pending_irq() function, which also takes an ITS and
an DeviceID/EventID pair and gives us a struct pending_irq.
And here we have both the vLPI number and the VCPU number in there
already, so actually we don't need read_itte() anymore. And if we don't
read, we don't need write. And if we don't write, we don't need to
access guest memory. So this seems to ripple through and allows us to
possibly dump the guest memory tables altogether.
Now we still use the collection table in guest memory, but I was
wondering if we could store the collection ID in the vcpu struct and use
some hashing scheme to do the reverse lookup. But that might be
something for some future cleanup / optimization series.

Do I miss something here? It sounds a bit scary that we can dump this
guest memory access scheme which gave us so many headaches and kept us
busy for some months now ...

Cheers,
Andre.
Stefano Stabellini May 25, 2017, 6:55 p.m. UTC | #5
On Wed, 24 May 2017, Andre Przywara wrote:
> Hi,
> 
> On 24/05/17 10:56, Julien Grall wrote:
> > Hi Andre,
> > 
> > On 05/24/2017 10:10 AM, Andre Przywara wrote:
> >> On 17/05/17 19:07, Julien Grall wrote:
> >>>>  /*
> >>>>   * Lookup the address of the Interrupt Translation Table associated
> >>>> with
> >>>>   * that device ID.
> >>>> @@ -414,6 +429,133 @@ out_unlock:
> >>>>      return ret;
> >>>>  }
> >>>>
> >>>> +/* Must be called with the ITS lock held. */
> >>>> +static int its_discard_event(struct virt_its *its,
> >>>> +                             uint32_t vdevid, uint32_t vevid)
> >>>> +{
> >>>> +    struct pending_irq *p;
> >>>> +    unsigned long flags;
> >>>> +    struct vcpu *vcpu;
> >>>> +    uint32_t vlpi;
> >>>> +
> >>>> +    ASSERT(spin_is_locked(&its->its_lock));
> >>>> +
> >>>> +    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
> >>>> +        return -ENOENT;
> >>>> +
> >>>> +    if ( vlpi == INVALID_LPI )
> >>>> +        return -ENOENT;
> >>>> +
> >>>> +    /* Lock this VCPU's VGIC to make sure nobody is using the
> >>>> pending_irq. */
> >>>> +    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);
> >>>
> >>> There is an interesting issue happening with this code. You don't check
> >>> the content of the memory provided by the guest. So a malicious guest
> >>> could craft the memory in order to setup mapping with known vlpi and a
> >>> different vCPU.
> >>>
> >>> This would lead to use the wrong lock here and corrupt the list.
> >>
> >> What about this:
> >> Right now (mostly due to the requirements of the INVALL implementation)
> >> we store the VCPU ID in our struct pending_irq, populated upon MAPTI. So
> >> originally this was just for caching (INVALL being the only user of
> >> this), but I was wondering if we should move the actual instance of this
> >> information to pending_irq instead of relying on the collection ID from
> >> the ITS table. So we would never need to look up and trust the ITS
> >> tables for this information anymore. Later with the VGIC rework we will
> >> need this field anyway (even for SPIs).
> >>
> >> I think this should solve this threat, where a guest can manipulate Xen
> >> by crafting the tables. Tinkering with the other information stored in
> >> the tables should not harm Xen, the guest would just shoot itself into
> >> the foot.
> >>
> >> Does that make sense?
> > 
> > I think so. If I understand correctly, with that solution we would not
> > need to protect the memory provided by the guest?
> 
> Well, it gets better (though also a bit scary):
> Currently we use the guest's ITS tables to translate a DeviceID/EventID
> pair to a vLPI/vCPU pair. Now there is this new
> gicv3_its_get_event_pending_irq() function, which also takes an ITS and
> an DeviceID/EventID pair and gives us a struct pending_irq.
> And here we have both the vLPI number and the VCPU number in there
> already, so actually we don't need read_itte() anymore. And if we don't
> read, we don't need write. And if we don't write, we don't need to
> access guest memory. So this seems to ripple through and allows us to
> possibly dump the guest memory tables altogether.

Sounds like a good idea to me for DeviceID/EventID to vLPI/vCPU
translations.


> Now we still use the collection table in guest memory, but I was
> wondering if we could store the collection ID in the vcpu struct and use
> some hashing scheme to do the reverse lookup. But that might be
> something for some future cleanup / optimization series.

Leaving the security angle aside for a moment, I would prefer to keep
the guest memory accesses rather than adding another hashing scheme to
Xen for collection IDs.

Going back to security: it looks like it should be possible to check for
the validity of collection IDs without too much troubles?
too?
Julien Grall May 25, 2017, 8:17 p.m. UTC | #6
On 25/05/2017 19:55, Stefano Stabellini wrote:
> On Wed, 24 May 2017, Andre Przywara wrote:
>> Hi,
>>
>> On 24/05/17 10:56, Julien Grall wrote:
>>> Hi Andre,
>>>
>>> On 05/24/2017 10:10 AM, Andre Przywara wrote:
>>>> On 17/05/17 19:07, Julien Grall wrote:
>>>>>>  /*
>>>>>>   * Lookup the address of the Interrupt Translation Table associated
>>>>>> with
>>>>>>   * that device ID.
>>>>>> @@ -414,6 +429,133 @@ out_unlock:
>>>>>>      return ret;
>>>>>>  }
>>>>>>
>>>>>> +/* Must be called with the ITS lock held. */
>>>>>> +static int its_discard_event(struct virt_its *its,
>>>>>> +                             uint32_t vdevid, uint32_t vevid)
>>>>>> +{
>>>>>> +    struct pending_irq *p;
>>>>>> +    unsigned long flags;
>>>>>> +    struct vcpu *vcpu;
>>>>>> +    uint32_t vlpi;
>>>>>> +
>>>>>> +    ASSERT(spin_is_locked(&its->its_lock));
>>>>>> +
>>>>>> +    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
>>>>>> +        return -ENOENT;
>>>>>> +
>>>>>> +    if ( vlpi == INVALID_LPI )
>>>>>> +        return -ENOENT;
>>>>>> +
>>>>>> +    /* Lock this VCPU's VGIC to make sure nobody is using the
>>>>>> pending_irq. */
>>>>>> +    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);
>>>>>
>>>>> There is an interesting issue happening with this code. You don't check
>>>>> the content of the memory provided by the guest. So a malicious guest
>>>>> could craft the memory in order to setup mapping with known vlpi and a
>>>>> different vCPU.
>>>>>
>>>>> This would lead to use the wrong lock here and corrupt the list.
>>>>
>>>> What about this:
>>>> Right now (mostly due to the requirements of the INVALL implementation)
>>>> we store the VCPU ID in our struct pending_irq, populated upon MAPTI. So
>>>> originally this was just for caching (INVALL being the only user of
>>>> this), but I was wondering if we should move the actual instance of this
>>>> information to pending_irq instead of relying on the collection ID from
>>>> the ITS table. So we would never need to look up and trust the ITS
>>>> tables for this information anymore. Later with the VGIC rework we will
>>>> need this field anyway (even for SPIs).
>>>>
>>>> I think this should solve this threat, where a guest can manipulate Xen
>>>> by crafting the tables. Tinkering with the other information stored in
>>>> the tables should not harm Xen, the guest would just shoot itself into
>>>> the foot.
>>>>
>>>> Does that make sense?
>>>
>>> I think so. If I understand correctly, with that solution we would not
>>> need to protect the memory provided by the guest?
>>
>> Well, it gets better (though also a bit scary):
>> Currently we use the guest's ITS tables to translate a DeviceID/EventID
>> pair to a vLPI/vCPU pair. Now there is this new
>> gicv3_its_get_event_pending_irq() function, which also takes an ITS and
>> an DeviceID/EventID pair and gives us a struct pending_irq.
>> And here we have both the vLPI number and the VCPU number in there
>> already, so actually we don't need read_itte() anymore. And if we don't
>> read, we don't need write. And if we don't write, we don't need to
>> access guest memory. So this seems to ripple through and allows us to
>> possibly dump the guest memory tables altogether.
>
> Sounds like a good idea to me for DeviceID/EventID to vLPI/vCPU
> translations.
>
>
>> Now we still use the collection table in guest memory, but I was
>> wondering if we could store the collection ID in the vcpu struct and use
>> some hashing scheme to do the reverse lookup. But that might be
>> something for some future cleanup / optimization series.
>
> Leaving the security angle aside for a moment, I would prefer to keep
> the guest memory accesses rather than adding another hashing scheme to
> Xen for collection IDs.

The spec only require you to implement max cpus + 1 collections. I don't 
think an hashing scheme would be necessary here. It is a simple array (1 
byte per entry in today).

>
> Going back to security: it looks like it should be possible to check for
> the validity of collection IDs without too much troubles?
> too?

If we store everthing in the pending_irq the use of the collection would 
limited to a few commands (e.g MOVI, MAPTI...). We don't much care if 
the guest modify the collection table as long as we check the vCPU is valid.

Cheers,
Stefano Stabellini May 25, 2017, 8:44 p.m. UTC | #7
On Thu, 25 May 2017, Julien Grall wrote:
> On 25/05/2017 19:55, Stefano Stabellini wrote:
> > On Wed, 24 May 2017, Andre Przywara wrote:
> > > Hi,
> > > 
> > > On 24/05/17 10:56, Julien Grall wrote:
> > > > Hi Andre,
> > > > 
> > > > On 05/24/2017 10:10 AM, Andre Przywara wrote:
> > > > > On 17/05/17 19:07, Julien Grall wrote:
> > > > > > >  /*
> > > > > > >   * Lookup the address of the Interrupt Translation Table
> > > > > > > associated
> > > > > > > with
> > > > > > >   * that device ID.
> > > > > > > @@ -414,6 +429,133 @@ out_unlock:
> > > > > > >      return ret;
> > > > > > >  }
> > > > > > > 
> > > > > > > +/* Must be called with the ITS lock held. */
> > > > > > > +static int its_discard_event(struct virt_its *its,
> > > > > > > +                             uint32_t vdevid, uint32_t vevid)
> > > > > > > +{
> > > > > > > +    struct pending_irq *p;
> > > > > > > +    unsigned long flags;
> > > > > > > +    struct vcpu *vcpu;
> > > > > > > +    uint32_t vlpi;
> > > > > > > +
> > > > > > > +    ASSERT(spin_is_locked(&its->its_lock));
> > > > > > > +
> > > > > > > +    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
> > > > > > > +        return -ENOENT;
> > > > > > > +
> > > > > > > +    if ( vlpi == INVALID_LPI )
> > > > > > > +        return -ENOENT;
> > > > > > > +
> > > > > > > +    /* Lock this VCPU's VGIC to make sure nobody is using the
> > > > > > > pending_irq. */
> > > > > > > +    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);
> > > > > > 
> > > > > > There is an interesting issue happening with this code. You don't
> > > > > > check
> > > > > > the content of the memory provided by the guest. So a malicious
> > > > > > guest
> > > > > > could craft the memory in order to setup mapping with known vlpi and
> > > > > > a
> > > > > > different vCPU.
> > > > > > 
> > > > > > This would lead to use the wrong lock here and corrupt the list.
> > > > > 
> > > > > What about this:
> > > > > Right now (mostly due to the requirements of the INVALL
> > > > > implementation)
> > > > > we store the VCPU ID in our struct pending_irq, populated upon MAPTI.
> > > > > So
> > > > > originally this was just for caching (INVALL being the only user of
> > > > > this), but I was wondering if we should move the actual instance of
> > > > > this
> > > > > information to pending_irq instead of relying on the collection ID
> > > > > from
> > > > > the ITS table. So we would never need to look up and trust the ITS
> > > > > tables for this information anymore. Later with the VGIC rework we
> > > > > will
> > > > > need this field anyway (even for SPIs).
> > > > > 
> > > > > I think this should solve this threat, where a guest can manipulate
> > > > > Xen
> > > > > by crafting the tables. Tinkering with the other information stored in
> > > > > the tables should not harm Xen, the guest would just shoot itself into
> > > > > the foot.
> > > > > 
> > > > > Does that make sense?
> > > > 
> > > > I think so. If I understand correctly, with that solution we would not
> > > > need to protect the memory provided by the guest?
> > > 
> > > Well, it gets better (though also a bit scary):
> > > Currently we use the guest's ITS tables to translate a DeviceID/EventID
> > > pair to a vLPI/vCPU pair. Now there is this new
> > > gicv3_its_get_event_pending_irq() function, which also takes an ITS and
> > > an DeviceID/EventID pair and gives us a struct pending_irq.
> > > And here we have both the vLPI number and the VCPU number in there
> > > already, so actually we don't need read_itte() anymore. And if we don't
> > > read, we don't need write. And if we don't write, we don't need to
> > > access guest memory. So this seems to ripple through and allows us to
> > > possibly dump the guest memory tables altogether.
> > 
> > Sounds like a good idea to me for DeviceID/EventID to vLPI/vCPU
> > translations.
> > 
> > 
> > > Now we still use the collection table in guest memory, but I was
> > > wondering if we could store the collection ID in the vcpu struct and use
> > > some hashing scheme to do the reverse lookup. But that might be
> > > something for some future cleanup / optimization series.
> > 
> > Leaving the security angle aside for a moment, I would prefer to keep
> > the guest memory accesses rather than adding another hashing scheme to
> > Xen for collection IDs.
> 
> The spec only require you to implement max cpus + 1 collections. I don't think
> an hashing scheme would be necessary here. It is a simple array (1 byte per
> entry in today).
> 
> > 
> > Going back to security: it looks like it should be possible to check for
> > the validity of collection IDs without too much troubles?
> > too?
> 
> If we store everthing in the pending_irq the use of the collection would
> limited to a few commands (e.g MOVI, MAPTI...). We don't much care if the
> guest modify the collection table as long as we check the vCPU is valid.

That's what I thought. In that case we might as well keep the info in
guest memory.
Andre Przywara May 26, 2017, 8:16 a.m. UTC | #8
Hi,

On 25/05/17 21:44, Stefano Stabellini wrote:
> On Thu, 25 May 2017, Julien Grall wrote:
>> On 25/05/2017 19:55, Stefano Stabellini wrote:
>>> On Wed, 24 May 2017, Andre Przywara wrote:
>>>> Hi,
>>>>
>>>> On 24/05/17 10:56, Julien Grall wrote:
>>>>> Hi Andre,
>>>>>
>>>>> On 05/24/2017 10:10 AM, Andre Przywara wrote:
>>>>>> On 17/05/17 19:07, Julien Grall wrote:
>>>>>>>>  /*
>>>>>>>>   * Lookup the address of the Interrupt Translation Table
>>>>>>>> associated
>>>>>>>> with
>>>>>>>>   * that device ID.
>>>>>>>> @@ -414,6 +429,133 @@ out_unlock:
>>>>>>>>      return ret;
>>>>>>>>  }
>>>>>>>>
>>>>>>>> +/* Must be called with the ITS lock held. */
>>>>>>>> +static int its_discard_event(struct virt_its *its,
>>>>>>>> +                             uint32_t vdevid, uint32_t vevid)
>>>>>>>> +{
>>>>>>>> +    struct pending_irq *p;
>>>>>>>> +    unsigned long flags;
>>>>>>>> +    struct vcpu *vcpu;
>>>>>>>> +    uint32_t vlpi;
>>>>>>>> +
>>>>>>>> +    ASSERT(spin_is_locked(&its->its_lock));
>>>>>>>> +
>>>>>>>> +    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
>>>>>>>> +        return -ENOENT;
>>>>>>>> +
>>>>>>>> +    if ( vlpi == INVALID_LPI )
>>>>>>>> +        return -ENOENT;
>>>>>>>> +
>>>>>>>> +    /* Lock this VCPU's VGIC to make sure nobody is using the
>>>>>>>> pending_irq. */
>>>>>>>> +    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);
>>>>>>>
>>>>>>> There is an interesting issue happening with this code. You don't
>>>>>>> check
>>>>>>> the content of the memory provided by the guest. So a malicious
>>>>>>> guest
>>>>>>> could craft the memory in order to setup mapping with known vlpi and
>>>>>>> a
>>>>>>> different vCPU.
>>>>>>>
>>>>>>> This would lead to use the wrong lock here and corrupt the list.
>>>>>>
>>>>>> What about this:
>>>>>> Right now (mostly due to the requirements of the INVALL
>>>>>> implementation)
>>>>>> we store the VCPU ID in our struct pending_irq, populated upon MAPTI.
>>>>>> So
>>>>>> originally this was just for caching (INVALL being the only user of
>>>>>> this), but I was wondering if we should move the actual instance of
>>>>>> this
>>>>>> information to pending_irq instead of relying on the collection ID
>>>>>> from
>>>>>> the ITS table. So we would never need to look up and trust the ITS
>>>>>> tables for this information anymore. Later with the VGIC rework we
>>>>>> will
>>>>>> need this field anyway (even for SPIs).
>>>>>>
>>>>>> I think this should solve this threat, where a guest can manipulate
>>>>>> Xen
>>>>>> by crafting the tables. Tinkering with the other information stored in
>>>>>> the tables should not harm Xen, the guest would just shoot itself into
>>>>>> the foot.
>>>>>>
>>>>>> Does that make sense?
>>>>>
>>>>> I think so. If I understand correctly, with that solution we would not
>>>>> need to protect the memory provided by the guest?
>>>>
>>>> Well, it gets better (though also a bit scary):
>>>> Currently we use the guest's ITS tables to translate a DeviceID/EventID
>>>> pair to a vLPI/vCPU pair. Now there is this new
>>>> gicv3_its_get_event_pending_irq() function, which also takes an ITS and
>>>> an DeviceID/EventID pair and gives us a struct pending_irq.
>>>> And here we have both the vLPI number and the VCPU number in there
>>>> already, so actually we don't need read_itte() anymore. And if we don't
>>>> read, we don't need write. And if we don't write, we don't need to
>>>> access guest memory. So this seems to ripple through and allows us to
>>>> possibly dump the guest memory tables altogether.
>>>
>>> Sounds like a good idea to me for DeviceID/EventID to vLPI/vCPU
>>> translations.
>>>
>>>
>>>> Now we still use the collection table in guest memory, but I was
>>>> wondering if we could store the collection ID in the vcpu struct and use
>>>> some hashing scheme to do the reverse lookup. But that might be
>>>> something for some future cleanup / optimization series.
>>>
>>> Leaving the security angle aside for a moment, I would prefer to keep
>>> the guest memory accesses rather than adding another hashing scheme to
>>> Xen for collection IDs.
>>
>> The spec only require you to implement max cpus + 1 collections. I don't think
>> an hashing scheme would be necessary here. It is a simple array (1 byte per
>> entry in today).
>>
>>>
>>> Going back to security: it looks like it should be possible to check for
>>> the validity of collection IDs without too much troubles?
>>> too?
>>
>> If we store everthing in the pending_irq the use of the collection would
>> limited to a few commands (e.g MOVI, MAPTI...). We don't much care if the
>> guest modify the collection table as long as we check the vCPU is valid.
> 
> That's what I thought. In that case we might as well keep the info in
> guest memory.

Well, if we have a chance to drop guest memory accesses from the ITS
altogether, we should consider this.
But trying to implement this I saw that this requires quite some code
changes, which Julien suggested to postpone for the rework. And I agree.
So I kept it as it is now and added TODOs.

Cheers,
Andre
diff mbox

Patch

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index fd6a394..be4c3e0 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -869,6 +869,24 @@  struct pending_irq *gicv3_its_get_event_pending_irq(struct domain *d,
     return get_event_pending_irq(d, vdoorbell_address, vdevid, veventid, NULL);
 }
 
+int gicv3_remove_guest_event(struct domain *d, paddr_t vdoorbell_address,
+                             uint32_t vdevid, uint32_t veventid)
+{
+    uint32_t host_lpi = INVALID_LPI;
+
+    if ( !get_event_pending_irq(d, vdoorbell_address, vdevid, veventid,
+                                &host_lpi) )
+        return -EINVAL;
+
+    if ( host_lpi == INVALID_LPI )
+        return -EINVAL;
+
+    gicv3_lpi_update_host_entry(host_lpi, d->domain_id,
+                                INVALID_VCPU_ID, INVALID_LPI);
+
+    return 0;
+}
+
 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
 {
diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
index 44f6315..d427539 100644
--- a/xen/arch/arm/gic-v3-lpi.c
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -207,6 +207,24 @@  out:
     irq_exit();
 }
 
+void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
+                                 unsigned int vcpu_id, uint32_t virt_lpi)
+{
+    union host_lpi *hlpip, hlpi;
+
+    ASSERT(host_lpi >= LPI_OFFSET);
+
+    host_lpi -= LPI_OFFSET;
+
+    hlpip = &lpi_data.host_lpis[host_lpi / HOST_LPIS_PER_PAGE][host_lpi % HOST_LPIS_PER_PAGE];
+
+    hlpi.virt_lpi = virt_lpi;
+    hlpi.dom_id = domain_id;
+    hlpi.vcpu_id = vcpu_id;
+
+    write_u64_atomic(&hlpip->data, hlpi.data);
+}
+
 static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
 {
     uint64_t val;
diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index 8a200e9..731fe0c 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -175,6 +175,21 @@  static struct vcpu *get_vcpu_from_collection(struct virt_its *its,
 #define DEV_TABLE_ENTRY(addr, bits)                     \
         (((addr) & GENMASK(51, 8)) | (((bits) - 1) & GENMASK(4, 0)))
 
+/* Set the address of an ITT for a given device ID. */
+static int its_set_itt_address(struct virt_its *its, uint32_t devid,
+                               paddr_t itt_address, uint32_t nr_bits)
+{
+    paddr_t addr = get_baser_phys_addr(its->baser_dev);
+    dev_table_entry_t itt_entry = DEV_TABLE_ENTRY(itt_address, nr_bits);
+
+    if ( devid >= its->max_devices )
+        return -ENOENT;
+
+    return vgic_access_guest_memory(its->d,
+                                    addr + devid * sizeof(dev_table_entry_t),
+                                    &itt_entry, sizeof(itt_entry), true);
+}
+
 /*
  * Lookup the address of the Interrupt Translation Table associated with
  * that device ID.
@@ -414,6 +429,133 @@  out_unlock:
     return ret;
 }
 
+/* Must be called with the ITS lock held. */
+static int its_discard_event(struct virt_its *its,
+                             uint32_t vdevid, uint32_t vevid)
+{
+    struct pending_irq *p;
+    unsigned long flags;
+    struct vcpu *vcpu;
+    uint32_t vlpi;
+
+    ASSERT(spin_is_locked(&its->its_lock));
+
+    if ( !read_itte_locked(its, vdevid, vevid, &vcpu, &vlpi) )
+        return -ENOENT;
+
+    if ( vlpi == INVALID_LPI )
+        return -ENOENT;
+
+    /* Lock this VCPU's VGIC to make sure nobody is using the pending_irq. */
+    spin_lock_irqsave(&vcpu->arch.vgic.lock, flags);
+
+    /* Remove the pending_irq from the tree. */
+    write_lock(&its->d->arch.vgic.pend_lpi_tree_lock);
+    p = radix_tree_delete(&its->d->arch.vgic.pend_lpi_tree, vlpi);
+    write_unlock(&its->d->arch.vgic.pend_lpi_tree_lock);
+
+    if ( !p )
+    {
+        spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
+
+        return -ENOENT;
+    }
+
+    /* Cleanup the pending_irq and disconnect it from the LPI. */
+    list_del_init(&p->inflight);
+    list_del_init(&p->lr_queue);
+    vgic_init_pending_irq(p, INVALID_LPI);
+
+    spin_unlock_irqrestore(&vcpu->arch.vgic.lock, flags);
+
+    /* Remove the corresponding host LPI entry */
+    return gicv3_remove_guest_event(its->d, its->doorbell_address,
+                                    vdevid, vevid);
+}
+
+static int its_unmap_device(struct virt_its *its, uint32_t devid)
+{
+    dev_table_entry_t itt;
+    uint64_t evid;
+    int ret;
+
+    spin_lock(&its->its_lock);
+
+    ret = its_get_itt(its, devid, &itt);
+    if ( ret )
+    {
+        spin_unlock(&its->its_lock);
+        return ret;
+    }
+
+    /*
+     * For DomUs we need to check that the number of events per device
+     * is really limited, otherwise looping over all events can take too
+     * long for a guest. This ASSERT can then be removed if that is
+     * covered.
+     */
+    ASSERT(is_hardware_domain(its->d));
+
+    for ( evid = 0; evid < DEV_TABLE_ITT_SIZE(itt); evid++ )
+        /* Don't care about errors here, clean up as much as possible. */
+        its_discard_event(its, devid, evid);
+
+    spin_unlock(&its->its_lock);
+
+    return 0;
+}
+
+static int its_handle_mapd(struct virt_its *its, uint64_t *cmdptr)
+{
+    /* size and devid get validated by the functions called below. */
+    uint32_t devid = its_cmd_get_deviceid(cmdptr);
+    unsigned int size = its_cmd_get_size(cmdptr) + 1;
+    bool valid = its_cmd_get_validbit(cmdptr);
+    paddr_t itt_addr = its_cmd_get_ittaddr(cmdptr);
+    int ret;
+
+    /* Sanitize the number of events. */
+    if ( valid && (size > its->evid_bits) )
+        return -1;
+
+    if ( !valid )
+        /* Discard all events and remove pending LPIs. */
+        its_unmap_device(its, devid);
+
+    /*
+     * There is no easy and clean way for Xen to know the ITS device ID of a
+     * particular (PCI) device, so we have to rely on the guest telling
+     * us about it. For *now* we are just using the device ID *Dom0* uses,
+     * because the driver there has the actual knowledge.
+     * Eventually this will be replaced with a dedicated hypercall to
+     * announce pass-through of devices.
+     */
+    if ( is_hardware_domain(its->d) )
+    {
+
+        /*
+         * Dom0's ITSes are mapped 1:1, so both addresses are the same.
+         * Also the device IDs are equal.
+         */
+        ret = gicv3_its_map_guest_device(its->d, its->doorbell_address, devid,
+                                         its->doorbell_address, devid,
+                                         BIT(size), valid);
+        if ( ret && valid )
+            return ret;
+    }
+
+    spin_lock(&its->its_lock);
+
+    if ( valid )
+        ret = its_set_itt_address(its, devid, itt_addr, size);
+    else
+        ret = its_set_itt_address(its, devid, INVALID_PADDR, 1);
+
+    spin_unlock(&its->its_lock);
+
+    return ret;
+}
+
 #define ITS_CMD_BUFFER_SIZE(baser)      ((((baser) & 0xff) + 1) << 12)
 #define ITS_CMD_OFFSET(reg)             ((reg) & GENMASK(19, 5))
 
@@ -452,6 +594,9 @@  static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its)
         case GITS_CMD_MAPC:
             ret = its_handle_mapc(its, command);
             break;
+        case GITS_CMD_MAPD:
+            ret = its_handle_mapd(its, command);
+            break;
         case GITS_CMD_SYNC:
             /* We handle ITS commands synchronously, so we ignore SYNC. */
             break;
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index d162e89..6f94e65 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -173,6 +173,11 @@  struct pending_irq *gicv3_its_get_event_pending_irq(struct domain *d,
                                                     paddr_t vdoorbell_address,
                                                     uint32_t vdevid,
                                                     uint32_t veventid);
+int gicv3_remove_guest_event(struct domain *d, paddr_t vdoorbell_address,
+                                     uint32_t vdevid, uint32_t veventid);
+void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
+                                 unsigned int vcpu_id, uint32_t virt_lpi);
+
 #else
 
 static inline void gicv3_its_dt_init(const struct dt_device_node *node)