diff mbox

[04/28] ARM: GICv3 ITS: allocate device and collection table

Message ID 20170130183153.28566-5-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara Jan. 30, 2017, 6:31 p.m. UTC
Each ITS maps a pair of a DeviceID (usually the PCI b/d/f triplet) and
an EventID (the MSI payload or interrupt ID) to a pair of LPI number
and collection ID, which points to the target CPU.
This mapping is stored in the device and collection tables, which software
has to provide for the ITS to use.
Allocate the required memory and hand it the ITS.
The maximum number of devices is limited to a compile-time constant
exposed in Kconfig.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 xen/arch/arm/Kconfig             |  14 +++++
 xen/arch/arm/gic-v3-its.c        | 129 +++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/gic-v3.c            |   5 ++
 xen/include/asm-arm/gic_v3_its.h |  55 ++++++++++++++++-
 4 files changed, 202 insertions(+), 1 deletion(-)

Comments

Julien Grall Feb. 6, 2017, 5:19 p.m. UTC | #1
Hi Andre,

On 30/01/17 18:31, Andre Przywara wrote:
> Each ITS maps a pair of a DeviceID (usually the PCI b/d/f triplet) and
> an EventID (the MSI payload or interrupt ID) to a pair of LPI number
> and collection ID, which points to the target CPU.
> This mapping is stored in the device and collection tables, which software
> has to provide for the ITS to use.
> Allocate the required memory and hand it the ITS.

s/hand it the/hand it to the/ ?

> The maximum number of devices is limited to a compile-time constant
> exposed in Kconfig.
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/Kconfig             |  14 +++++
>  xen/arch/arm/gic-v3-its.c        | 129 +++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/gic-v3.c            |   5 ++
>  xen/include/asm-arm/gic_v3_its.h |  55 ++++++++++++++++-
>  4 files changed, 202 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 71734a1..81bc233 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -64,6 +64,20 @@ config MAX_PHYS_LPI_BITS
>            This can be overriden on the command line with the max_lpi_bits
>            parameter.
>
> +config MAX_PHYS_ITS_DEVICE_BITS
> +        depends on HAS_ITS
> +        int "Number of device bits the ITS supports"
> +        range 1 32
> +        default "10"
> +        help
> +          Specifies the maximum number of devices which want to use the ITS.
> +          Xen needs to allocates memory for the whole range very early.
> +          The allocation scheme may be sparse, so a much larger number must
> +          be supported to cover devices with a high bus number or those on
> +          separate bus segments.

As for MAX_PHYS_LPI_BITS, this Kconfig is questionable. The default 
value is arbitrary and may not fit everyone.

The way forward is to use the 2-level table if available to limit the 
memory usage. If only flat table is supported, then the user can use the 
command line option to limit it.

> +          This can be overriden on the command line with the max_its_device_bits

s/overriden/overridden/

> +          parameter.
> +
>  endmenu
>
>  menu "ARM errata workaround via the alternative framework"
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index ff0f571..c31fef6 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -20,9 +20,138 @@
>  #include <xen/lib.h>
>  #include <xen/device_tree.h>
>  #include <xen/libfdt/libfdt.h>
> +#include <xen/mm.h>
> +#include <xen/sizes.h>
>  #include <asm/gic.h>
>  #include <asm/gic_v3_defs.h>
>  #include <asm/gic_v3_its.h>
> +#include <asm/io.h>
> +
> +#define BASER_ATTR_MASK                                           \
> +        ((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)               | \
> +         (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)         | \
> +         (0x7UL << GITS_BASER_INNER_CACHEABILITY_SHIFT))
> +#define BASER_RO_MASK   (GENMASK(58, 56) | GENMASK(52, 48))
> +
> +static uint64_t encode_phys_addr(paddr_t addr, int page_bits)
> +{
> +    uint64_t ret;
> +
> +    if ( page_bits < 16 )
> +        return (uint64_t)addr & GENMASK(47, page_bits);
> +
> +    ret = addr & GENMASK(47, 16);
> +    return ret | (addr & GENMASK(51, 48)) >> (48 - 12);
> +}
> +
> +#define PAGE_BITS(sz) ((sz) * 2 + PAGE_SHIFT)

I know that PAGE_SHIFT has been suggested by Stefano on the previous 
version. However, I think  this is wrong. The PAGE_BITS is not based on 
the page granularity of Xen, so I would much prefer to keep an 12 
hardcoded with a comment.

> +
> +static int its_map_baser(void __iomem *basereg, uint64_t regc, int nr_items)

s/int/unsigned int/

> +{
> +    uint64_t attr, reg;
> +    int entry_size = ((regc >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1;

s/int/unsigned int/

> +    int pagesz = 0, order, table_size;

s/int/unsigned int/

> +    void *buffer = NULL;
> +
> +    attr  = GIC_BASER_InnerShareable << GITS_BASER_SHAREABILITY_SHIFT;
> +    attr |= GIC_BASER_CACHE_SameAsInner << GITS_BASER_OUTER_CACHEABILITY_SHIFT;
> +    attr |= GIC_BASER_CACHE_RaWaWb << GITS_BASER_INNER_CACHEABILITY_SHIFT;
> +
> +    /*
> +     * Setup the BASE register with the attributes that we like. Then read
> +     * it back and see what sticks (page size, cacheability and shareability
> +     * attributes), retrying if necessary.
> +     */
> +    while ( 1 )

This loop is really confusing to read. A set of goto would probably make 
it more readable thanks to the labels labels.

> +    {
> +        table_size = ROUNDUP(nr_items * entry_size, BIT(PAGE_BITS(pagesz)));
> +        order = get_order_from_bytes(table_size);
> +
> +        if ( !buffer )
> +            buffer = alloc_xenheap_pages(order, 0);
> +        if ( !buffer )
> +            return -ENOMEM;
> +
> +        reg  = attr;
> +        reg |= (pagesz << GITS_BASER_PAGE_SIZE_SHIFT);
> +        reg |= table_size >> PAGE_BITS(pagesz);
> +        reg |= regc & BASER_RO_MASK;
> +        reg |= GITS_VALID_BIT;
> +        reg |= encode_phys_addr(virt_to_maddr(buffer), PAGE_BITS(pagesz));
> +
> +        writeq_relaxed(reg, basereg);
> +        regc = readl_relaxed(basereg);
> +
> +        /* The host didn't like our attributes, just use what it returned. */
> +        if ( (regc & BASER_ATTR_MASK) != attr )
> +        {
> +            /* If we can't map it shareable, drop cacheability as well. */
> +            if ( (regc & GITS_BASER_SHAREABILITY_MASK) == GIC_BASER_NonShareable )
> +            {
> +                regc &= ~GITS_BASER_INNER_CACHEABILITY_MASK;

So you drop cacheability, but you never clean & invalidate the cache. Is 
it normal?

> +                attr = regc & BASER_ATTR_MASK;
> +                continue;
> +            }
> +            attr = regc & BASER_ATTR_MASK;
> +        }
> +
> +        /* If the host accepted our page size, we are done. */
> +        if ( (regc & (3UL << GITS_BASER_PAGE_SIZE_SHIFT)) == pagesz )

This check looks wrong to me. The page size is encoded in bits 9:8 but I 
don't see any shift here.

Also a mask for the field would be useful.

> +            return 0;
> +
> +        /* None of the page sizes was accepted, give up */
> +        if ( pagesz >= 2 )
> +            break;
> +
> +        free_xenheap_pages(buffer, order);
> +        buffer = NULL;

If you move the check "if ( pagesz >= 2 )" here ...

> +
> +        pagesz++;
> +    }
> +
> +    if ( buffer )
> +        free_xenheap_pages(buffer, order);

... those 2 lines could be dropped.

> +
> +    return -EINVAL;
> +}
> +
> +static unsigned int max_its_device_bits = CONFIG_MAX_PHYS_ITS_DEVICE_BITS;
> +integer_param("max_its_device_bits", max_its_device_bits);

This new command line option needs to be documented in 
docs/misc/xen-command-line.markdown.

> +
> +int gicv3_its_init(struct host_its *hw_its)
> +{
> +    uint64_t reg;
> +    int i;
> +
> +    hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
> +    if ( !hw_its->its_base )
> +        return -ENOMEM;
> +
> +    for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
> +    {
> +        void __iomem *basereg = hw_its->its_base + GITS_BASER0 + i * 8;
> +        int type;
> +
> +        reg = readq_relaxed(basereg);
> +        type = (reg & GITS_BASER_TYPE_MASK) >> GITS_BASER_TYPE_SHIFT;
> +        switch ( type )
> +        {
> +        case GITS_BASER_TYPE_NONE:
> +            continue;
> +        case GITS_BASER_TYPE_DEVICE:
> +            /* TODO: find some better way of limiting the number of devices */
> +            its_map_baser(basereg, reg, BIT(max_its_device_bits));

You will waste space if the platform support less DevID bits 
max_its_device_bits.

> +            break;
> +        case GITS_BASER_TYPE_COLLECTION:
> +            its_map_baser(basereg, reg, NR_CPUS);
> +            break;
> +        default:
> +            continue;
> +        }
> +    }
> +
> +    return 0;
> +}
>
>  /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
>  void gicv3_its_dt_init(const struct dt_device_node *node)
> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index fcb86c8..440c079 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -29,6 +29,7 @@
>  #include <xen/irq.h>
>  #include <xen/iocap.h>
>  #include <xen/sched.h>
> +#include <xen/err.h>
>  #include <xen/errno.h>
>  #include <xen/delay.h>
>  #include <xen/device_tree.h>
> @@ -1563,6 +1564,7 @@ static int __init gicv3_init(void)
>  {
>      int res, i;
>      uint32_t reg;
> +    struct host_its *hw_its;
>
>      if ( !cpu_has_gicv3 )
>      {
> @@ -1618,6 +1620,9 @@ static int __init gicv3_init(void)
>      res = gicv3_cpu_init();
>      gicv3_hyp_init();
>
> +    list_for_each_entry(hw_its, &host_its_list, entry)
> +        gicv3_its_init(hw_its);
> +

This loop could be handled in gic-v3-its.c.

>      spin_unlock(&gicv3.lock);
>
>      return res;

[...]

>  #ifndef __ASSEMBLY__
>  #include <xen/device_tree.h>
>
> @@ -27,6 +74,7 @@ struct host_its {
>      const struct dt_device_node *dt_node;
>      paddr_t addr;
>      paddr_t size;
> +    void __iomem *its_base;
>  };
>
>  extern struct list_head host_its_list;
> @@ -42,8 +90,9 @@ void gicv3_its_dt_init(const struct dt_device_node *node);
>  uint64_t gicv3_lpi_get_proptable(void);
>  uint64_t gicv3_lpi_allocate_pendtable(void);
>
> -/* Initialize the host structures for LPIs. */
> +/* Initialize the host structures for LPIs and the host ITSes. */
>  int gicv3_lpi_init_host_lpis(unsigned int nr_lpis);
> +int gicv3_its_init(struct host_its *hw_its);
>
>  #else
>
> @@ -62,6 +111,10 @@ static inline int gicv3_lpi_init_host_lpis(unsigned int nr_lpis)
>  {
>      return 0;
>  }

Newline here.

> +static inline int gicv3_its_init(struct host_its *hw_its)
> +{
> +    return 0;
> +}

Newline here.

>  #endif /* CONFIG_HAS_ITS */
>
>  #endif /* __ASSEMBLY__ */
>

Regards,
Julien Grall Feb. 6, 2017, 5:36 p.m. UTC | #2
On 30/01/17 18:31, Andre Przywara wrote:
> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index fcb86c8..440c079 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -29,6 +29,7 @@
>  #include <xen/irq.h>
>  #include <xen/iocap.h>
>  #include <xen/sched.h>
> +#include <xen/err.h>
>  #include <xen/errno.h>
>  #include <xen/delay.h>
>  #include <xen/device_tree.h>
> @@ -1563,6 +1564,7 @@ static int __init gicv3_init(void)
>  {
>      int res, i;
>      uint32_t reg;
> +    struct host_its *hw_its;
>
>      if ( !cpu_has_gicv3 )
>      {
> @@ -1618,6 +1620,9 @@ static int __init gicv3_init(void)
>      res = gicv3_cpu_init();
>      gicv3_hyp_init();
>
> +    list_for_each_entry(hw_its, &host_its_list, entry)
> +        gicv3_its_init(hw_its);

Also, it is probably a really bad idea to ignore any error from the ITS 
and not even printing an error. For the next version, I would add more 
error message to quickly find out what's going on from the log.

Cheers,
Julien Grall Feb. 6, 2017, 5:43 p.m. UTC | #3
Hi,

On 30/01/17 18:31, Andre Przywara wrote:
> +int gicv3_its_init(struct host_its *hw_its)
> +{
> +    uint64_t reg;
> +    int i;
> +
> +    hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
> +    if ( !hw_its->its_base )
> +        return -ENOMEM;
> +
> +    for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
> +    {
> +        void __iomem *basereg = hw_its->its_base + GITS_BASER0 + i * 8;
> +        int type;
> +
> +        reg = readq_relaxed(basereg);
> +        type = (reg & GITS_BASER_TYPE_MASK) >> GITS_BASER_TYPE_SHIFT;
> +        switch ( type )
> +        {
> +        case GITS_BASER_TYPE_NONE:
> +            continue;
> +        case GITS_BASER_TYPE_DEVICE:
> +            /* TODO: find some better way of limiting the number of devices */
> +            its_map_baser(basereg, reg, BIT(max_its_device_bits));
> +            break;
> +        case GITS_BASER_TYPE_COLLECTION:
> +            its_map_baser(basereg, reg, NR_CPUS);

And I forgot to mention about the collection. Same remark as for the 
device collection, NR_CPUS is the maximum size.

Cheers,
Stefano Stabellini Feb. 14, 2017, 12:54 a.m. UTC | #4
On Mon, 30 Jan 2017, Andre Przywara wrote:
> Each ITS maps a pair of a DeviceID (usually the PCI b/d/f triplet) and
> an EventID (the MSI payload or interrupt ID) to a pair of LPI number
> and collection ID, which points to the target CPU.
> This mapping is stored in the device and collection tables, which software
> has to provide for the ITS to use.
> Allocate the required memory and hand it the ITS.
> The maximum number of devices is limited to a compile-time constant
> exposed in Kconfig.
> 
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/Kconfig             |  14 +++++
>  xen/arch/arm/gic-v3-its.c        | 129 +++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/gic-v3.c            |   5 ++
>  xen/include/asm-arm/gic_v3_its.h |  55 ++++++++++++++++-
>  4 files changed, 202 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 71734a1..81bc233 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -64,6 +64,20 @@ config MAX_PHYS_LPI_BITS
>            This can be overriden on the command line with the max_lpi_bits
>            parameter.
>  
> +config MAX_PHYS_ITS_DEVICE_BITS
> +        depends on HAS_ITS
> +        int "Number of device bits the ITS supports"
> +        range 1 32
> +        default "10"
> +        help
> +          Specifies the maximum number of devices which want to use the ITS.
> +          Xen needs to allocates memory for the whole range very early.
> +          The allocation scheme may be sparse, so a much larger number must
> +          be supported to cover devices with a high bus number or those on
> +          separate bus segments.
> +          This can be overriden on the command line with the max_its_device_bits
> +          parameter.
> +
>  endmenu
>  
>  menu "ARM errata workaround via the alternative framework"
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index ff0f571..c31fef6 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -20,9 +20,138 @@
>  #include <xen/lib.h>
>  #include <xen/device_tree.h>
>  #include <xen/libfdt/libfdt.h>
> +#include <xen/mm.h>
> +#include <xen/sizes.h>
>  #include <asm/gic.h>
>  #include <asm/gic_v3_defs.h>
>  #include <asm/gic_v3_its.h>
> +#include <asm/io.h>
> +
> +#define BASER_ATTR_MASK                                           \
> +        ((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)               | \
> +         (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)         | \
> +         (0x7UL << GITS_BASER_INNER_CACHEABILITY_SHIFT))
> +#define BASER_RO_MASK   (GENMASK(58, 56) | GENMASK(52, 48))
> +
> +static uint64_t encode_phys_addr(paddr_t addr, int page_bits)
> +{
> +    uint64_t ret;
> +
> +    if ( page_bits < 16 )
> +        return (uint64_t)addr & GENMASK(47, page_bits);
> +
> +    ret = addr & GENMASK(47, 16);
> +    return ret | (addr & GENMASK(51, 48)) >> (48 - 12);
> +}
> +
> +#define PAGE_BITS(sz) ((sz) * 2 + PAGE_SHIFT)
> +
> +static int its_map_baser(void __iomem *basereg, uint64_t regc, int nr_items)
> +{
> +    uint64_t attr, reg;
> +    int entry_size = ((regc >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1;
> +    int pagesz = 0, order, table_size;
> +    void *buffer = NULL;
> +
> +    attr  = GIC_BASER_InnerShareable << GITS_BASER_SHAREABILITY_SHIFT;
> +    attr |= GIC_BASER_CACHE_SameAsInner << GITS_BASER_OUTER_CACHEABILITY_SHIFT;
> +    attr |= GIC_BASER_CACHE_RaWaWb << GITS_BASER_INNER_CACHEABILITY_SHIFT;
> +
> +    /*
> +     * Setup the BASE register with the attributes that we like. Then read
> +     * it back and see what sticks (page size, cacheability and shareability
> +     * attributes), retrying if necessary.
> +     */
> +    while ( 1 )
> +    {
> +        table_size = ROUNDUP(nr_items * entry_size, BIT(PAGE_BITS(pagesz)));
> +        order = get_order_from_bytes(table_size);
> +
> +        if ( !buffer )
> +            buffer = alloc_xenheap_pages(order, 0);
> +        if ( !buffer )
> +            return -ENOMEM;
> +
> +        reg  = attr;
> +        reg |= (pagesz << GITS_BASER_PAGE_SIZE_SHIFT);
> +        reg |= table_size >> PAGE_BITS(pagesz);
> +        reg |= regc & BASER_RO_MASK;
> +        reg |= GITS_VALID_BIT;
> +        reg |= encode_phys_addr(virt_to_maddr(buffer), PAGE_BITS(pagesz));
> +
> +        writeq_relaxed(reg, basereg);
> +        regc = readl_relaxed(basereg);
> +
> +        /* The host didn't like our attributes, just use what it returned. */
> +        if ( (regc & BASER_ATTR_MASK) != attr )
> +        {
> +            /* If we can't map it shareable, drop cacheability as well. */
> +            if ( (regc & GITS_BASER_SHAREABILITY_MASK) == GIC_BASER_NonShareable )
> +            {
> +                regc &= ~GITS_BASER_INNER_CACHEABILITY_MASK;
> +                attr = regc & BASER_ATTR_MASK;
> +                continue;

Why do we continue at this point? Shouldn't we go ahead to check if the
page size was accepted?


> +            }
> +            attr = regc & BASER_ATTR_MASK;
> +        }
> +
> +        /* If the host accepted our page size, we are done. */
> +        if ( (regc & (3UL << GITS_BASER_PAGE_SIZE_SHIFT)) == pagesz )
> +            return 0;
> +
> +        /* None of the page sizes was accepted, give up */
> +        if ( pagesz >= 2 )
> +            break;
> +
> +        free_xenheap_pages(buffer, order);
> +        buffer = NULL;
> +
> +        pagesz++;
> +    }
> +
> +    if ( buffer )
> +        free_xenheap_pages(buffer, order);
> +
> +    return -EINVAL;
> +}
> +
> +static unsigned int max_its_device_bits = CONFIG_MAX_PHYS_ITS_DEVICE_BITS;
> +integer_param("max_its_device_bits", max_its_device_bits);
> +
> +int gicv3_its_init(struct host_its *hw_its)
> +{
> +    uint64_t reg;
> +    int i;
> +
> +    hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
> +    if ( !hw_its->its_base )
> +        return -ENOMEM;
> +
> +    for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
> +    {
> +        void __iomem *basereg = hw_its->its_base + GITS_BASER0 + i * 8;
> +        int type;
> +
> +        reg = readq_relaxed(basereg);
> +        type = (reg & GITS_BASER_TYPE_MASK) >> GITS_BASER_TYPE_SHIFT;
> +        switch ( type )
> +        {
> +        case GITS_BASER_TYPE_NONE:
> +            continue;
> +        case GITS_BASER_TYPE_DEVICE:
> +            /* TODO: find some better way of limiting the number of devices */
> +            its_map_baser(basereg, reg, BIT(max_its_device_bits));
> +            break;
> +        case GITS_BASER_TYPE_COLLECTION:
> +            its_map_baser(basereg, reg, NR_CPUS);
> +            break;
> +        default:
> +            continue;
> +        }
> +    }
> +
> +    return 0;
> +}
>  
>  /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
>  void gicv3_its_dt_init(const struct dt_device_node *node)
> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index fcb86c8..440c079 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -29,6 +29,7 @@
>  #include <xen/irq.h>
>  #include <xen/iocap.h>
>  #include <xen/sched.h>
> +#include <xen/err.h>
>  #include <xen/errno.h>
>  #include <xen/delay.h>
>  #include <xen/device_tree.h>
> @@ -1563,6 +1564,7 @@ static int __init gicv3_init(void)
>  {
>      int res, i;
>      uint32_t reg;
> +    struct host_its *hw_its;
>  
>      if ( !cpu_has_gicv3 )
>      {
> @@ -1618,6 +1620,9 @@ static int __init gicv3_init(void)
>      res = gicv3_cpu_init();
>      gicv3_hyp_init();
>  
> +    list_for_each_entry(hw_its, &host_its_list, entry)
> +        gicv3_its_init(hw_its);
> +
>      spin_unlock(&gicv3.lock);
>  
>      return res;
> diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
> index a66b6be..ed44bdb 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -18,6 +18,53 @@
>  #ifndef __ASM_ARM_ITS_H__
>  #define __ASM_ARM_ITS_H__
>  
> +#define GITS_CTLR                       0x000
> +#define GITS_IIDR                       0x004
> +#define GITS_TYPER                      0x008
> +#define GITS_CBASER                     0x080
> +#define GITS_CWRITER                    0x088
> +#define GITS_CREADR                     0x090
> +#define GITS_BASER_NR_REGS              8
> +#define GITS_BASER0                     0x100
> +#define GITS_BASER1                     0x108
> +#define GITS_BASER2                     0x110
> +#define GITS_BASER3                     0x118
> +#define GITS_BASER4                     0x120
> +#define GITS_BASER5                     0x128
> +#define GITS_BASER6                     0x130
> +#define GITS_BASER7                     0x138
> +
> +/* Register bits */
> +#define GITS_VALID_BIT                  BIT_ULL(63)
> +
> +#define GITS_CTLR_QUIESCENT             BIT(31)
> +#define GITS_CTLR_ENABLE                BIT(0)
> +
> +#define GITS_IIDR_VALUE                 0x34c
> +
> +#define GITS_BASER_INDIRECT             BIT_ULL(62)
> +#define GITS_BASER_INNER_CACHEABILITY_SHIFT        59
> +#define GITS_BASER_TYPE_SHIFT           56
> +#define GITS_BASER_TYPE_MASK            (7ULL << GITS_BASER_TYPE_SHIFT)
> +#define GITS_BASER_OUTER_CACHEABILITY_SHIFT        53
> +#define GITS_BASER_TYPE_NONE            0UL
> +#define GITS_BASER_TYPE_DEVICE          1UL
> +#define GITS_BASER_TYPE_VCPU            2UL
> +#define GITS_BASER_TYPE_CPU             3UL
> +#define GITS_BASER_TYPE_COLLECTION      4UL
> +#define GITS_BASER_TYPE_RESERVED5       5UL
> +#define GITS_BASER_TYPE_RESERVED6       6UL
> +#define GITS_BASER_TYPE_RESERVED7       7UL
> +#define GITS_BASER_ENTRY_SIZE_SHIFT     48
> +#define GITS_BASER_SHAREABILITY_SHIFT   10
> +#define GITS_BASER_PAGE_SIZE_SHIFT      8
> +#define GITS_BASER_RO_MASK              (GITS_BASER_TYPE_MASK | \
> +                                        (31UL << GITS_BASER_ENTRY_SIZE_SHIFT) |\
> +                                        GITS_BASER_INDIRECT)
> +#define GITS_BASER_SHAREABILITY_MASK   (0x3ULL << GITS_BASER_SHAREABILITY_SHIFT)
> +#define GITS_BASER_OUTER_CACHEABILITY_MASK   (0x7ULL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)
> +#define GITS_BASER_INNER_CACHEABILITY_MASK   (0x7ULL << GITS_BASER_INNER_CACHEABILITY_SHIFT)
> +
>  #ifndef __ASSEMBLY__
>  #include <xen/device_tree.h>
>  
> @@ -27,6 +74,7 @@ struct host_its {
>      const struct dt_device_node *dt_node;
>      paddr_t addr;
>      paddr_t size;
> +    void __iomem *its_base;
>  };
>  
>  extern struct list_head host_its_list;
> @@ -42,8 +90,9 @@ void gicv3_its_dt_init(const struct dt_device_node *node);
>  uint64_t gicv3_lpi_get_proptable(void);
>  uint64_t gicv3_lpi_allocate_pendtable(void);
>  
> -/* Initialize the host structures for LPIs. */
> +/* Initialize the host structures for LPIs and the host ITSes. */
>  int gicv3_lpi_init_host_lpis(unsigned int nr_lpis);
> +int gicv3_its_init(struct host_its *hw_its);
>  
>  #else
>  
> @@ -62,6 +111,10 @@ static inline int gicv3_lpi_init_host_lpis(unsigned int nr_lpis)
>  {
>      return 0;
>  }
> +static inline int gicv3_its_init(struct host_its *hw_its)
> +{
> +    return 0;
> +}
>  #endif /* CONFIG_HAS_ITS */
>  
>  #endif /* __ASSEMBLY__ */
> -- 
> 2.9.0
>
Stefano Stabellini Feb. 14, 2017, 12:55 a.m. UTC | #5
On Mon, 6 Feb 2017, Julien Grall wrote:
> > +static uint64_t encode_phys_addr(paddr_t addr, int page_bits)
> > +{
> > +    uint64_t ret;
> > +
> > +    if ( page_bits < 16 )
> > +        return (uint64_t)addr & GENMASK(47, page_bits);
> > +
> > +    ret = addr & GENMASK(47, 16);
> > +    return ret | (addr & GENMASK(51, 48)) >> (48 - 12);
> > +}
> > +
> > +#define PAGE_BITS(sz) ((sz) * 2 + PAGE_SHIFT)
> 
> I know that PAGE_SHIFT has been suggested by Stefano on the previous version.
> However, I think  this is wrong. The PAGE_BITS is not based on the page
> granularity of Xen, so I would much prefer to keep an 12 hardcoded with a
> comment.

OK
Shanker Donthineni Feb. 15, 2017, 6:31 p.m. UTC | #6
Hi Andre,


On 01/30/2017 12:31 PM, Andre Przywara wrote:
> Each ITS maps a pair of a DeviceID (usually the PCI b/d/f triplet) and
> an EventID (the MSI payload or interrupt ID) to a pair of LPI number
> and collection ID, which points to the target CPU.
> This mapping is stored in the device and collection tables, which software
> has to provide for the ITS to use.
> Allocate the required memory and hand it the ITS.
> The maximum number of devices is limited to a compile-time constant
> exposed in Kconfig.
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>   xen/arch/arm/Kconfig             |  14 +++++
>   xen/arch/arm/gic-v3-its.c        | 129
> +++++++++++++++++++++++++++++++++++++++
>   xen/arch/arm/gic-v3.c            |   5 ++
>   xen/include/asm-arm/gic_v3_its.h |  55 ++++++++++++++++-
>   4 files changed, 202 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 71734a1..81bc233 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -64,6 +64,20 @@ config MAX_PHYS_LPI_BITS
>             This can be overriden on the command line with the max_lpi_bits
>             parameter.
>
> +config MAX_PHYS_ITS_DEVICE_BITS
> +        depends on HAS_ITS
> +        int "Number of device bits the ITS supports"
> +        range 1 32
> +        default "10"
> +        help
> +          Specifies the maximum number of devices which want to use the
> ITS.
> +          Xen needs to allocates memory for the whole range very early.
> +          The allocation scheme may be sparse, so a much larger number must
> +          be supported to cover devices with a high bus number or those on
> +          separate bus segments.
> +          This can be overriden on the command line with the
> max_its_device_bits
> +          parameter.
> +
The number of DEVID bits that hardware supports is discoverable through 
a register field GITS_TYPER.Devbits. The XEN driver must all the devices 
that hardware says like in the Linux kernel. I'm seeing a XEN crash if I 
set DEVID to 32bits.
>   endmenu
>
>   menu "ARM errata workaround via the alternative framework"
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index ff0f571..c31fef6 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -20,9 +20,138 @@
>   #include <xen/lib.h>
>   #include <xen/device_tree.h>
>   #include <xen/libfdt/libfdt.h>
> +#include <xen/mm.h>
> +#include <xen/sizes.h>
>   #include <asm/gic.h>
>   #include <asm/gic_v3_defs.h>
>   #include <asm/gic_v3_its.h>
> +#include <asm/io.h>
> +
> +#define BASER_ATTR_MASK                                           \
> +        ((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)               | \
> +         (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)         | \
> +         (0x7UL << GITS_BASER_INNER_CACHEABILITY_SHIFT))
> +#define BASER_RO_MASK   (GENMASK(58, 56) | GENMASK(52, 48))
> +
> +static uint64_t encode_phys_addr(paddr_t addr, int page_bits)
> +{
> +    uint64_t ret;
> +
> +    if ( page_bits < 16 )
> +        return (uint64_t)addr & GENMASK(47, page_bits);
> +
> +    ret = addr & GENMASK(47, 16);
> +    return ret | (addr & GENMASK(51, 48)) >> (48 - 12);
> +}
> +
> +#define PAGE_BITS(sz) ((sz) * 2 + PAGE_SHIFT)
> +
> +static int its_map_baser(void __iomem *basereg, uint64_t regc, int
> nr_items)
> +{
> +    uint64_t attr, reg;
> +    int entry_size = ((regc >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1;
> +    int pagesz = 0, order, table_size;
Better try using ITS page sizes in ote order 64K, 16K and 4K to cover 
maximum device as possible using a single level translation.
> +    void *buffer = NULL;
> +
> +    attr  = GIC_BASER_InnerShareable << GITS_BASER_SHAREABILITY_SHIFT;
> +    attr |= GIC_BASER_CACHE_SameAsInner <<
> GITS_BASER_OUTER_CACHEABILITY_SHIFT;
> +    attr |= GIC_BASER_CACHE_RaWaWb << GITS_BASER_INNER_CACHEABILITY_SHIFT;
> +
> +    /*
> +     * Setup the BASE register with the attributes that we like. Then read
> +     * it back and see what sticks (page size, cacheability and
> shareability
> +     * attributes), retrying if necessary.
> +     */
> +    while ( 1 )
> +    {
> +        table_size = ROUNDUP(nr_items * entry_size,
> BIT(PAGE_BITS(pagesz)));
> +        order = get_order_from_bytes(table_size);
> +
You can map a maximum of 256 ITS pages, order must be 'table_size >> 
PAGE_BITS(pagesz) <= 256'.

> +        if ( !buffer )
> +            buffer = alloc_xenheap_pages(order, 0);
> +        if ( !buffer )
> +            return -ENOMEM;
> +
> +        reg  = attr;
> +        reg |= (pagesz << GITS_BASER_PAGE_SIZE_SHIFT);
> +        reg |= table_size >> PAGE_BITS(pagesz);
Change to 'reg |= table_size >> (PAGE_BITS(pagesz)) & 0xff'
> +        reg |= regc & BASER_RO_MASK;
> +        reg |= GITS_VALID_BIT;
> +        reg |= encode_phys_addr(virt_to_maddr(buffer), PAGE_BITS(pagesz));
> +
> +        writeq_relaxed(reg, basereg);
> +        regc = readl_relaxed(basereg);
> +
> +        /* The host didn't like our attributes, just use what it returned.
> */
> +        if ( (regc & BASER_ATTR_MASK) != attr )
> +        {
> +            /* If we can't map it shareable, drop cacheability as well. */
> +            if ( (regc & GITS_BASER_SHAREABILITY_MASK) ==
> GIC_BASER_NonShareable )
> +            {
> +                regc &= ~GITS_BASER_INNER_CACHEABILITY_MASK;
> +                attr = regc & BASER_ATTR_MASK;
> +                continue;
> +            }
> +            attr = regc & BASER_ATTR_MASK;
> +        }
> +
> +        /* If the host accepted our page size, we are done. */
> +        if ( (regc & (3UL << GITS_BASER_PAGE_SIZE_SHIFT)) == pagesz )
> +            return 0;
> +
> +        /* None of the page sizes was accepted, give up */
> +        if ( pagesz >= 2 )
> +            break;
> +
> +        free_xenheap_pages(buffer, order);
> +        buffer = NULL;
> +
> +        pagesz++;
> +    }
> +
> +    if ( buffer )
> +        free_xenheap_pages(buffer, order);
> +
> +    return -EINVAL;
> +}
> +
> +static unsigned int max_its_device_bits = CONFIG_MAX_PHYS_ITS_DEVICE_BITS;
> +integer_param("max_its_device_bits", max_its_device_bits);
> +
> +int gicv3_its_init(struct host_its *hw_its)
> +{
> +    uint64_t reg;
> +    int i;
> +
> +    hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
> +    if ( !hw_its->its_base )
> +        return -ENOMEM;
> +
> +    for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
> +    {
> +        void __iomem *basereg = hw_its->its_base + GITS_BASER0 + i * 8;
> +        int type;
> +
> +        reg = readq_relaxed(basereg);
> +        type = (reg & GITS_BASER_TYPE_MASK) >> GITS_BASER_TYPE_SHIFT;
> +        switch ( type )
> +        {
> +        case GITS_BASER_TYPE_NONE:
> +            continue;
It's redundant since the default case already has a continue statement.

> +        case GITS_BASER_TYPE_DEVICE:
> +            /* TODO: find some better way of limiting the number of devices
> */
> +            its_map_baser(basereg, reg, BIT(max_its_device_bits));
> +            break;
> +        case GITS_BASER_TYPE_COLLECTION:
> +            its_map_baser(basereg, reg, NR_CPUS);
> +            break;
> +        default:
> +            continue;
> +        }
> +    }
> +
> +    return 0;
> +}
>
>   /* Scan the DT for any ITS nodes and create a list of host ITSes out of it.
> */
>   void gicv3_its_dt_init(const struct dt_device_node *node)
> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index fcb86c8..440c079 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -29,6 +29,7 @@
>   #include <xen/irq.h>
>   #include <xen/iocap.h>
>   #include <xen/sched.h>
> +#include <xen/err.h>
>   #include <xen/errno.h>
>   #include <xen/delay.h>
>   #include <xen/device_tree.h>
> @@ -1563,6 +1564,7 @@ static int __init gicv3_init(void)
>   {
>       int res, i;
>       uint32_t reg;
> +    struct host_its *hw_its;
>
>       if ( !cpu_has_gicv3 )
>       {
> @@ -1618,6 +1620,9 @@ static int __init gicv3_init(void)
>       res = gicv3_cpu_init();
>       gicv3_hyp_init();
>
> +    list_for_each_entry(hw_its, &host_its_list, entry)
> +        gicv3_its_init(hw_its);
> +
>       spin_unlock(&gicv3.lock);
>
>       return res;
> diff --git a/xen/include/asm-arm/gic_v3_its.h
> b/xen/include/asm-arm/gic_v3_its.h
> index a66b6be..ed44bdb 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -18,6 +18,53 @@
>   #ifndef __ASM_ARM_ITS_H__
>   #define __ASM_ARM_ITS_H__
>
> +#define GITS_CTLR                       0x000
> +#define GITS_IIDR                       0x004
> +#define GITS_TYPER                      0x008
> +#define GITS_CBASER                     0x080
> +#define GITS_CWRITER                    0x088
> +#define GITS_CREADR                     0x090
> +#define GITS_BASER_NR_REGS              8
> +#define GITS_BASER0                     0x100
> +#define GITS_BASER1                     0x108
> +#define GITS_BASER2                     0x110
> +#define GITS_BASER3                     0x118
> +#define GITS_BASER4                     0x120
> +#define GITS_BASER5                     0x128
> +#define GITS_BASER6                     0x130
> +#define GITS_BASER7                     0x138
> +
> +/* Register bits */
> +#define GITS_VALID_BIT                  BIT_ULL(63)
> +
> +#define GITS_CTLR_QUIESCENT             BIT(31)
> +#define GITS_CTLR_ENABLE                BIT(0)
> +
> +#define GITS_IIDR_VALUE                 0x34c
> +
> +#define GITS_BASER_INDIRECT             BIT_ULL(62)
> +#define GITS_BASER_INNER_CACHEABILITY_SHIFT        59
> +#define GITS_BASER_TYPE_SHIFT           56
> +#define GITS_BASER_TYPE_MASK            (7ULL << GITS_BASER_TYPE_SHIFT)
> +#define GITS_BASER_OUTER_CACHEABILITY_SHIFT        53
> +#define GITS_BASER_TYPE_NONE            0UL
> +#define GITS_BASER_TYPE_DEVICE          1UL
> +#define GITS_BASER_TYPE_VCPU            2UL
> +#define GITS_BASER_TYPE_CPU             3UL
> +#define GITS_BASER_TYPE_COLLECTION      4UL
> +#define GITS_BASER_TYPE_RESERVED5       5UL
> +#define GITS_BASER_TYPE_RESERVED6       6UL
> +#define GITS_BASER_TYPE_RESERVED7       7UL
> +#define GITS_BASER_ENTRY_SIZE_SHIFT     48
> +#define GITS_BASER_SHAREABILITY_SHIFT   10
> +#define GITS_BASER_PAGE_SIZE_SHIFT      8
> +#define GITS_BASER_RO_MASK              (GITS_BASER_TYPE_MASK | \
> +                                        (31UL <<
> GITS_BASER_ENTRY_SIZE_SHIFT) |\
> +                                        GITS_BASER_INDIRECT)
> +#define GITS_BASER_SHAREABILITY_MASK   (0x3ULL <<
> GITS_BASER_SHAREABILITY_SHIFT)
> +#define GITS_BASER_OUTER_CACHEABILITY_MASK   (0x7ULL <<
> GITS_BASER_OUTER_CACHEABILITY_SHIFT)
> +#define GITS_BASER_INNER_CACHEABILITY_MASK   (0x7ULL <<
> GITS_BASER_INNER_CACHEABILITY_SHIFT)
> +
>   #ifndef __ASSEMBLY__
>   #include <xen/device_tree.h>
>
> @@ -27,6 +74,7 @@ struct host_its {
>       const struct dt_device_node *dt_node;
>       paddr_t addr;
>       paddr_t size;
> +    void __iomem *its_base;
>   };
>
>   extern struct list_head host_its_list;
> @@ -42,8 +90,9 @@ void gicv3_its_dt_init(const struct dt_device_node *node);
>   uint64_t gicv3_lpi_get_proptable(void);
>   uint64_t gicv3_lpi_allocate_pendtable(void);
>
> -/* Initialize the host structures for LPIs. */
> +/* Initialize the host structures for LPIs and the host ITSes. */
>   int gicv3_lpi_init_host_lpis(unsigned int nr_lpis);
> +int gicv3_its_init(struct host_its *hw_its);
>
>   #else
>
> @@ -62,6 +111,10 @@ static inline int gicv3_lpi_init_host_lpis(unsigned int
> nr_lpis)
>   {
>       return 0;
>   }
> +static inline int gicv3_its_init(struct host_its *hw_its)
> +{
> +    return 0;
> +}
>   #endif /* CONFIG_HAS_ITS */
>
>   #endif /* __ASSEMBLY__ */
Shanker Donthineni Feb. 16, 2017, 7:03 p.m. UTC | #7
Hi Andre,


On 01/30/2017 12:31 PM, Andre Przywara wrote:
> Each ITS maps a pair of a DeviceID (usually the PCI b/d/f triplet) and
> an EventID (the MSI payload or interrupt ID) to a pair of LPI number
> and collection ID, which points to the target CPU.
> This mapping is stored in the device and collection tables, which software
> has to provide for the ITS to use.
> Allocate the required memory and hand it the ITS.
> The maximum number of devices is limited to a compile-time constant
> exposed in Kconfig.
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>   xen/arch/arm/Kconfig             |  14 +++++
>   xen/arch/arm/gic-v3-its.c        | 129
> +++++++++++++++++++++++++++++++++++++++
>   xen/arch/arm/gic-v3.c            |   5 ++
>   xen/include/asm-arm/gic_v3_its.h |  55 ++++++++++++++++-
>   4 files changed, 202 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 71734a1..81bc233 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -64,6 +64,20 @@ config MAX_PHYS_LPI_BITS
>             This can be overriden on the command line with the max_lpi_bits
>             parameter.
>
> +config MAX_PHYS_ITS_DEVICE_BITS
> +        depends on HAS_ITS
> +        int "Number of device bits the ITS supports"
> +        range 1 32
> +        default "10"
> +        help
> +          Specifies the maximum number of devices which want to use the
> ITS.
> +          Xen needs to allocates memory for the whole range very early.
> +          The allocation scheme may be sparse, so a much larger number must
> +          be supported to cover devices with a high bus number or those on
> +          separate bus segments.
> +          This can be overriden on the command line with the
> max_its_device_bits
> +          parameter.
> +
>   endmenu
>
>   menu "ARM errata workaround via the alternative framework"
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index ff0f571..c31fef6 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -20,9 +20,138 @@
>   #include <xen/lib.h>
>   #include <xen/device_tree.h>
>   #include <xen/libfdt/libfdt.h>
> +#include <xen/mm.h>
> +#include <xen/sizes.h>
>   #include <asm/gic.h>
>   #include <asm/gic_v3_defs.h>
>   #include <asm/gic_v3_its.h>
> +#include <asm/io.h>
> +
> +#define BASER_ATTR_MASK                                           \
> +        ((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)               | \
> +         (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)         | \
> +         (0x7UL << GITS_BASER_INNER_CACHEABILITY_SHIFT))
> +#define BASER_RO_MASK   (GENMASK(58, 56) | GENMASK(52, 48))
> +
> +static uint64_t encode_phys_addr(paddr_t addr, int page_bits)
> +{
> +    uint64_t ret;
> +
> +    if ( page_bits < 16 )
> +        return (uint64_t)addr & GENMASK(47, page_bits);
> +
> +    ret = addr & GENMASK(47, 16);
> +    return ret | (addr & GENMASK(51, 48)) >> (48 - 12);
> +}
> +
> +#define PAGE_BITS(sz) ((sz) * 2 + PAGE_SHIFT)
> +
> +static int its_map_baser(void __iomem *basereg, uint64_t regc, int
> nr_items)
> +{
> +    uint64_t attr, reg;
> +    int entry_size = ((regc >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1;
> +    int pagesz = 0, order, table_size;
> +    void *buffer = NULL;
> +
> +    attr  = GIC_BASER_InnerShareable << GITS_BASER_SHAREABILITY_SHIFT;
> +    attr |= GIC_BASER_CACHE_SameAsInner <<
> GITS_BASER_OUTER_CACHEABILITY_SHIFT;
> +    attr |= GIC_BASER_CACHE_RaWaWb << GITS_BASER_INNER_CACHEABILITY_SHIFT;
> +
> +    /*
> +     * Setup the BASE register with the attributes that we like. Then read
> +     * it back and see what sticks (page size, cacheability and
> shareability
> +     * attributes), retrying if necessary.
> +     */
> +    while ( 1 )
> +    {
> +        table_size = ROUNDUP(nr_items * entry_size,
> BIT(PAGE_BITS(pagesz)));
> +        order = get_order_from_bytes(table_size);
> +
> +        if ( !buffer )
> +            buffer = alloc_xenheap_pages(order, 0);
> +        if ( !buffer )
> +            return -ENOMEM;
> +
> +        reg  = attr;
> +        reg |= (pagesz << GITS_BASER_PAGE_SIZE_SHIFT);
> +        reg |= table_size >> PAGE_BITS(pagesz);
> +        reg |= regc & BASER_RO_MASK;
> +        reg |= GITS_VALID_BIT;
> +        reg |= encode_phys_addr(virt_to_maddr(buffer), PAGE_BITS(pagesz));
> +
> +        writeq_relaxed(reg, basereg);
> +        regc = readl_relaxed(basereg);
expecting regc = readq_relaxed(baserq)?

> +
> +        /* The host didn't like our attributes, just use what it returned.
> */
> +        if ( (regc & BASER_ATTR_MASK) != attr )
> +        {
> +            /* If we can't map it shareable, drop cacheability as well. */
> +            if ( (regc & GITS_BASER_SHAREABILITY_MASK) ==
> GIC_BASER_NonShareable )
> +            {
> +                regc &= ~GITS_BASER_INNER_CACHEABILITY_MASK;
> +                attr = regc & BASER_ATTR_MASK;
> +                continue;
> +            }
> +            attr = regc & BASER_ATTR_MASK;
> +        }
> +
> +        /* If the host accepted our page size, we are done. */
> +        if ( (regc & (3UL << GITS_BASER_PAGE_SIZE_SHIFT)) == pagesz )
Invalid check, should be 'if ( ((regc >> GITS_BASER_PAGE_SIZE_SHIFT) & 
0x3) == pagesz)'
> +            return 0;
> +
> +        /* None of the page sizes was accepted, give up */
> +        if ( pagesz >= 2 )
> +            break;
> +
> +        free_xenheap_pages(buffer, order);
> +        buffer = NULL;
> +
> +        pagesz++;
> +    }
> +
> +    if ( buffer )
> +        free_xenheap_pages(buffer, order);
> +
> +    return -EINVAL;
> +}
> +
> +static unsigned int max_its_device_bits = CONFIG_MAX_PHYS_ITS_DEVICE_BITS;
> +integer_param("max_its_device_bits", max_its_device_bits);
> +
> +int gicv3_its_init(struct host_its *hw_its)
> +{
> +    uint64_t reg;
> +    int i;
> +
> +    hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
> +    if ( !hw_its->its_base )
> +        return -ENOMEM;
> +
> +    for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
> +    {
> +        void __iomem *basereg = hw_its->its_base + GITS_BASER0 + i * 8;
> +        int type;
> +
> +        reg = readq_relaxed(basereg);
> +        type = (reg & GITS_BASER_TYPE_MASK) >> GITS_BASER_TYPE_SHIFT;
> +        switch ( type )
> +        {
> +        case GITS_BASER_TYPE_NONE:
> +            continue;
> +        case GITS_BASER_TYPE_DEVICE:
> +            /* TODO: find some better way of limiting the number of devices
> */
> +            its_map_baser(basereg, reg, BIT(max_its_device_bits));
> +            break;
> +        case GITS_BASER_TYPE_COLLECTION:
> +            its_map_baser(basereg, reg, NR_CPUS);
> +            break;
> +        default:
> +            continue;
> +        }
> +    }
> +
> +    return 0;
> +}
>
>   /* Scan the DT for any ITS nodes and create a list of host ITSes out of it.
> */
>   void gicv3_its_dt_init(const struct dt_device_node *node)
> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index fcb86c8..440c079 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -29,6 +29,7 @@
>   #include <xen/irq.h>
>   #include <xen/iocap.h>
>   #include <xen/sched.h>
> +#include <xen/err.h>
>   #include <xen/errno.h>
>   #include <xen/delay.h>
>   #include <xen/device_tree.h>
> @@ -1563,6 +1564,7 @@ static int __init gicv3_init(void)
>   {
>       int res, i;
>       uint32_t reg;
> +    struct host_its *hw_its;
>
>       if ( !cpu_has_gicv3 )
>       {
> @@ -1618,6 +1620,9 @@ static int __init gicv3_init(void)
>       res = gicv3_cpu_init();
>       gicv3_hyp_init();
>
> +    list_for_each_entry(hw_its, &host_its_list, entry)
> +        gicv3_its_init(hw_its);
> +
>       spin_unlock(&gicv3.lock);
>
>       return res;
> diff --git a/xen/include/asm-arm/gic_v3_its.h
> b/xen/include/asm-arm/gic_v3_its.h
> index a66b6be..ed44bdb 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -18,6 +18,53 @@
>   #ifndef __ASM_ARM_ITS_H__
>   #define __ASM_ARM_ITS_H__
>
> +#define GITS_CTLR                       0x000
> +#define GITS_IIDR                       0x004
> +#define GITS_TYPER                      0x008
> +#define GITS_CBASER                     0x080
> +#define GITS_CWRITER                    0x088
> +#define GITS_CREADR                     0x090
> +#define GITS_BASER_NR_REGS              8
> +#define GITS_BASER0                     0x100
> +#define GITS_BASER1                     0x108
> +#define GITS_BASER2                     0x110
> +#define GITS_BASER3                     0x118
> +#define GITS_BASER4                     0x120
> +#define GITS_BASER5                     0x128
> +#define GITS_BASER6                     0x130
> +#define GITS_BASER7                     0x138
> +
> +/* Register bits */
> +#define GITS_VALID_BIT                  BIT_ULL(63)
> +
> +#define GITS_CTLR_QUIESCENT             BIT(31)
> +#define GITS_CTLR_ENABLE                BIT(0)
> +
> +#define GITS_IIDR_VALUE                 0x34c
> +
> +#define GITS_BASER_INDIRECT             BIT_ULL(62)
> +#define GITS_BASER_INNER_CACHEABILITY_SHIFT        59
> +#define GITS_BASER_TYPE_SHIFT           56
> +#define GITS_BASER_TYPE_MASK            (7ULL << GITS_BASER_TYPE_SHIFT)
> +#define GITS_BASER_OUTER_CACHEABILITY_SHIFT        53
> +#define GITS_BASER_TYPE_NONE            0UL
> +#define GITS_BASER_TYPE_DEVICE          1UL
> +#define GITS_BASER_TYPE_VCPU            2UL
> +#define GITS_BASER_TYPE_CPU             3UL
> +#define GITS_BASER_TYPE_COLLECTION      4UL
> +#define GITS_BASER_TYPE_RESERVED5       5UL
> +#define GITS_BASER_TYPE_RESERVED6       6UL
> +#define GITS_BASER_TYPE_RESERVED7       7UL
> +#define GITS_BASER_ENTRY_SIZE_SHIFT     48
> +#define GITS_BASER_SHAREABILITY_SHIFT   10
> +#define GITS_BASER_PAGE_SIZE_SHIFT      8
> +#define GITS_BASER_RO_MASK              (GITS_BASER_TYPE_MASK | \
> +                                        (31UL <<
> GITS_BASER_ENTRY_SIZE_SHIFT) |\
> +                                        GITS_BASER_INDIRECT)
> +#define GITS_BASER_SHAREABILITY_MASK   (0x3ULL <<
> GITS_BASER_SHAREABILITY_SHIFT)
> +#define GITS_BASER_OUTER_CACHEABILITY_MASK   (0x7ULL <<
> GITS_BASER_OUTER_CACHEABILITY_SHIFT)
> +#define GITS_BASER_INNER_CACHEABILITY_MASK   (0x7ULL <<
> GITS_BASER_INNER_CACHEABILITY_SHIFT)
> +
>   #ifndef __ASSEMBLY__
>   #include <xen/device_tree.h>
>
> @@ -27,6 +74,7 @@ struct host_its {
>       const struct dt_device_node *dt_node;
>       paddr_t addr;
>       paddr_t size;
> +    void __iomem *its_base;
>   };
>
>   extern struct list_head host_its_list;
> @@ -42,8 +90,9 @@ void gicv3_its_dt_init(const struct dt_device_node *node);
>   uint64_t gicv3_lpi_get_proptable(void);
>   uint64_t gicv3_lpi_allocate_pendtable(void);
>
> -/* Initialize the host structures for LPIs. */
> +/* Initialize the host structures for LPIs and the host ITSes. */
>   int gicv3_lpi_init_host_lpis(unsigned int nr_lpis);
> +int gicv3_its_init(struct host_its *hw_its);
>
>   #else
>
> @@ -62,6 +111,10 @@ static inline int gicv3_lpi_init_host_lpis(unsigned int
> nr_lpis)
>   {
>       return 0;
>   }
> +static inline int gicv3_its_init(struct host_its *hw_its)
> +{
> +    return 0;
> +}
>   #endif /* CONFIG_HAS_ITS */
>
>   #endif /* __ASSEMBLY__ */
Shanker Donthineni Feb. 24, 2017, 7:29 p.m. UTC | #8
Hi Andre


On 02/16/2017 01:03 PM, Shanker Donthineni wrote:
> Hi Andre,
>
>
> On 01/30/2017 12:31 PM, Andre Przywara wrote:
>> Each ITS maps a pair of a DeviceID (usually the PCI b/d/f triplet) and
>> an EventID (the MSI payload or interrupt ID) to a pair of LPI number
>> and collection ID, which points to the target CPU.
>> This mapping is stored in the device and collection tables, which 
>> software
>> has to provide for the ITS to use.
>> Allocate the required memory and hand it the ITS.
>> The maximum number of devices is limited to a compile-time constant
>> exposed in Kconfig.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>   xen/arch/arm/Kconfig             |  14 +++++
>>   xen/arch/arm/gic-v3-its.c        | 129
>> +++++++++++++++++++++++++++++++++++++++
>>   xen/arch/arm/gic-v3.c            |   5 ++
>>   xen/include/asm-arm/gic_v3_its.h |  55 ++++++++++++++++-
>>   4 files changed, 202 insertions(+), 1 deletion(-)
>>
>> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
>> index 71734a1..81bc233 100644
>> --- a/xen/arch/arm/Kconfig
>> +++ b/xen/arch/arm/Kconfig
>> @@ -64,6 +64,20 @@ config MAX_PHYS_LPI_BITS
>>             This can be overriden on the command line with the 
>> max_lpi_bits
>>             parameter.
>>
>> +config MAX_PHYS_ITS_DEVICE_BITS
>> +        depends on HAS_ITS
>> +        int "Number of device bits the ITS supports"
>> +        range 1 32
>> +        default "10"
>> +        help
>> +          Specifies the maximum number of devices which want to use the
>> ITS.
>> +          Xen needs to allocates memory for the whole range very early.
>> +          The allocation scheme may be sparse, so a much larger 
>> number must
>> +          be supported to cover devices with a high bus number or 
>> those on
>> +          separate bus segments.
>> +          This can be overriden on the command line with the
>> max_its_device_bits
>> +          parameter.
>> +
>>   endmenu
>>
>>   menu "ARM errata workaround via the alternative framework"
>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>> index ff0f571..c31fef6 100644
>> --- a/xen/arch/arm/gic-v3-its.c
>> +++ b/xen/arch/arm/gic-v3-its.c
>> @@ -20,9 +20,138 @@
>>   #include <xen/lib.h>
>>   #include <xen/device_tree.h>
>>   #include <xen/libfdt/libfdt.h>
>> +#include <xen/mm.h>
>> +#include <xen/sizes.h>
>>   #include <asm/gic.h>
>>   #include <asm/gic_v3_defs.h>
>>   #include <asm/gic_v3_its.h>
>> +#include <asm/io.h>
>> +
>> +#define BASER_ATTR_MASK                                           \
>> +        ((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)               | \
>> +         (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)         | \
>> +         (0x7UL << GITS_BASER_INNER_CACHEABILITY_SHIFT))
>> +#define BASER_RO_MASK   (GENMASK(58, 56) | GENMASK(52, 48))
>> +
>> +static uint64_t encode_phys_addr(paddr_t addr, int page_bits)
>> +{
>> +    uint64_t ret;
>> +
>> +    if ( page_bits < 16 )
>> +        return (uint64_t)addr & GENMASK(47, page_bits);
>> +
>> +    ret = addr & GENMASK(47, 16);
>> +    return ret | (addr & GENMASK(51, 48)) >> (48 - 12);
>> +}
>> +
>> +#define PAGE_BITS(sz) ((sz) * 2 + PAGE_SHIFT)
>> +
>> +static int its_map_baser(void __iomem *basereg, uint64_t regc, int
>> nr_items)
>> +{
>> +    uint64_t attr, reg;
>> +    int entry_size = ((regc >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) 
>> + 1;
>> +    int pagesz = 0, order, table_size;

Please try ITS page sizes in the order 64K, 16K and 4K to cover more ITS 
devices using a flat table. Similar to Linux ITS driver.

>> +    void *buffer = NULL;
>> +
>> +    attr  = GIC_BASER_InnerShareable << GITS_BASER_SHAREABILITY_SHIFT;
>> +    attr |= GIC_BASER_CACHE_SameAsInner <<
>> GITS_BASER_OUTER_CACHEABILITY_SHIFT;
>> +    attr |= GIC_BASER_CACHE_RaWaWb << 
>> GITS_BASER_INNER_CACHEABILITY_SHIFT;
>> +
>> +    /*
>> +     * Setup the BASE register with the attributes that we like. 
>> Then read
>> +     * it back and see what sticks (page size, cacheability and
>> shareability
>> +     * attributes), retrying if necessary.
>> +     */
>> +    while ( 1 )
>> +    {
>> +        table_size = ROUNDUP(nr_items * entry_size,
>> BIT(PAGE_BITS(pagesz)));
>> +        order = get_order_from_bytes(table_size);
>> +

Limit to 256 ITS pages, ITS spec doesn't support more than 256 ITS pages.

        /* Maximum of 256 ITS pages are allowed */
        if ( (table_size >> PAGE_BITS(pagesz)) > GITS_BASER_PAGES_MAX )
                table_size = BIT(PAGE_BITS(pagesz)) * GITS_BASER_PAGES_MAX;

>> +        if ( !buffer )
>> +            buffer = alloc_xenheap_pages(order, 0);
>> +        if ( !buffer )
>> +            return -ENOMEM;
>> +

Please zero memory memset(buffer, 0x00, order << PAGE_SHIFT)
Andre Przywara Feb. 27, 2017, 10:23 a.m. UTC | #9
Hi Shanker,

thanks for having a look.

On 24/02/17 19:29, Shanker Donthineni wrote:
> Hi Andre
>
>
> On 02/16/2017 01:03 PM, Shanker Donthineni wrote:
>> Hi Andre,
>>
>>
>> On 01/30/2017 12:31 PM, Andre Przywara wrote:
>>> Each ITS maps a pair of a DeviceID (usually the PCI b/d/f triplet) and
>>> an EventID (the MSI payload or interrupt ID) to a pair of LPI number
>>> and collection ID, which points to the target CPU.
>>> This mapping is stored in the device and collection tables, which
>>> software
>>> has to provide for the ITS to use.
>>> Allocate the required memory and hand it the ITS.
>>> The maximum number of devices is limited to a compile-time constant
>>> exposed in Kconfig.
>>>
>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>> ---
>>>   xen/arch/arm/Kconfig             |  14 +++++
>>>   xen/arch/arm/gic-v3-its.c        | 129
>>> +++++++++++++++++++++++++++++++++++++++
>>>   xen/arch/arm/gic-v3.c            |   5 ++
>>>   xen/include/asm-arm/gic_v3_its.h |  55 ++++++++++++++++-
>>>   4 files changed, 202 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
>>> index 71734a1..81bc233 100644
>>> --- a/xen/arch/arm/Kconfig
>>> +++ b/xen/arch/arm/Kconfig
>>> @@ -64,6 +64,20 @@ config MAX_PHYS_LPI_BITS
>>>             This can be overriden on the command line with the
>>> max_lpi_bits
>>>             parameter.
>>>
>>> +config MAX_PHYS_ITS_DEVICE_BITS
>>> +        depends on HAS_ITS
>>> +        int "Number of device bits the ITS supports"
>>> +        range 1 32
>>> +        default "10"
>>> +        help
>>> +          Specifies the maximum number of devices which want to use the
>>> ITS.
>>> +          Xen needs to allocates memory for the whole range very early.
>>> +          The allocation scheme may be sparse, so a much larger
>>> number must
>>> +          be supported to cover devices with a high bus number or
>>> those on
>>> +          separate bus segments.
>>> +          This can be overriden on the command line with the
>>> max_its_device_bits
>>> +          parameter.
>>> +
>>>   endmenu
>>>
>>>   menu "ARM errata workaround via the alternative framework"
>>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>>> index ff0f571..c31fef6 100644
>>> --- a/xen/arch/arm/gic-v3-its.c
>>> +++ b/xen/arch/arm/gic-v3-its.c
>>> @@ -20,9 +20,138 @@
>>>   #include <xen/lib.h>
>>>   #include <xen/device_tree.h>
>>>   #include <xen/libfdt/libfdt.h>
>>> +#include <xen/mm.h>
>>> +#include <xen/sizes.h>
>>>   #include <asm/gic.h>
>>>   #include <asm/gic_v3_defs.h>
>>>   #include <asm/gic_v3_its.h>
>>> +#include <asm/io.h>
>>> +
>>> +#define BASER_ATTR_MASK                                           \
>>> +        ((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)               | \
>>> +         (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)         | \
>>> +         (0x7UL << GITS_BASER_INNER_CACHEABILITY_SHIFT))
>>> +#define BASER_RO_MASK   (GENMASK(58, 56) | GENMASK(52, 48))
>>> +
>>> +static uint64_t encode_phys_addr(paddr_t addr, int page_bits)
>>> +{
>>> +    uint64_t ret;
>>> +
>>> +    if ( page_bits < 16 )
>>> +        return (uint64_t)addr & GENMASK(47, page_bits);
>>> +
>>> +    ret = addr & GENMASK(47, 16);
>>> +    return ret | (addr & GENMASK(51, 48)) >> (48 - 12);
>>> +}
>>> +
>>> +#define PAGE_BITS(sz) ((sz) * 2 + PAGE_SHIFT)
>>> +
>>> +static int its_map_baser(void __iomem *basereg, uint64_t regc, int
>>> nr_items)
>>> +{
>>> +    uint64_t attr, reg;
>>> +    int entry_size = ((regc >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f)
>>> + 1;
>>> +    int pagesz = 0, order, table_size;
>
> Please try ITS page sizes in the order 64K, 16K and 4K to cover more ITS
> devices using a flat table. Similar to Linux ITS driver.
>
>>> +    void *buffer = NULL;
>>> +
>>> +    attr  = GIC_BASER_InnerShareable << GITS_BASER_SHAREABILITY_SHIFT;
>>> +    attr |= GIC_BASER_CACHE_SameAsInner <<
>>> GITS_BASER_OUTER_CACHEABILITY_SHIFT;
>>> +    attr |= GIC_BASER_CACHE_RaWaWb <<
>>> GITS_BASER_INNER_CACHEABILITY_SHIFT;
>>> +
>>> +    /*
>>> +     * Setup the BASE register with the attributes that we like.
>>> Then read
>>> +     * it back and see what sticks (page size, cacheability and
>>> shareability
>>> +     * attributes), retrying if necessary.
>>> +     */
>>> +    while ( 1 )
>>> +    {
>>> +        table_size = ROUNDUP(nr_items * entry_size,
>>> BIT(PAGE_BITS(pagesz)));
>>> +        order = get_order_from_bytes(table_size);
>>> +
>
> Limit to 256 ITS pages, ITS spec doesn't support more than 256 ITS pages.
>
>        /* Maximum of 256 ITS pages are allowed */
>        if ( (table_size >> PAGE_BITS(pagesz)) > GITS_BASER_PAGES_MAX )
>                table_size = BIT(PAGE_BITS(pagesz)) * GITS_BASER_PAGES_MAX;
>
>>> +        if ( !buffer )
>>> +            buffer = alloc_xenheap_pages(order, 0);
>>> +        if ( !buffer )
>>> +            return -ENOMEM;
>>> +
>
> Please zero memory memset(buffer, 0x00, order << PAGE_SHIFT)

All three comments make sense, thanks for pointing these out.
I will incorporate those changes into the next post.

Cheers,
Andre.
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Andre Przywara March 23, 2017, 6:06 p.m. UTC | #10
Hi,

On 06/02/17 17:43, Julien Grall wrote:
> Hi,
> 
> On 30/01/17 18:31, Andre Przywara wrote:
>> +int gicv3_its_init(struct host_its *hw_its)
>> +{
>> +    uint64_t reg;
>> +    int i;
>> +
>> +    hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
>> +    if ( !hw_its->its_base )
>> +        return -ENOMEM;
>> +
>> +    for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
>> +    {
>> +        void __iomem *basereg = hw_its->its_base + GITS_BASER0 + i * 8;
>> +        int type;
>> +
>> +        reg = readq_relaxed(basereg);
>> +        type = (reg & GITS_BASER_TYPE_MASK) >> GITS_BASER_TYPE_SHIFT;
>> +        switch ( type )
>> +        {
>> +        case GITS_BASER_TYPE_NONE:
>> +            continue;
>> +        case GITS_BASER_TYPE_DEVICE:
>> +            /* TODO: find some better way of limiting the number of
>> devices */
>> +            its_map_baser(basereg, reg, BIT(max_its_device_bits));
>> +            break;
>> +        case GITS_BASER_TYPE_COLLECTION:
>> +            its_map_baser(basereg, reg, NR_CPUS);
> 
> And I forgot to mention about the collection. Same remark as for the
> device collection, NR_CPUS is the maximum size.

NR_CPUS is 128, entry size for each collection is probably around 8 or
16 bytes, if at all. This gives me half a page, worst case.
The granularity of the table memory handed to the ITS is (64K|16K|4K),
so as we only hand over whole pages to the ITS, I don't see how we can
save memory here.
Beside, we have other memory issues to worry about than this single 64K
allocated at boot time.

So if you don't mind, I'd just keep it as it is. I am happy to revisit
this once NR_CPUS gets significantly increased.

Cheers,
Andre.
Julien Grall March 23, 2017, 6:08 p.m. UTC | #11
On 23/03/17 18:06, Andre Przywara wrote:
> Hi,

Hi Andre,

> On 06/02/17 17:43, Julien Grall wrote:
>> Hi,
>>
>> On 30/01/17 18:31, Andre Przywara wrote:
>>> +int gicv3_its_init(struct host_its *hw_its)
>>> +{
>>> +    uint64_t reg;
>>> +    int i;
>>> +
>>> +    hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
>>> +    if ( !hw_its->its_base )
>>> +        return -ENOMEM;
>>> +
>>> +    for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
>>> +    {
>>> +        void __iomem *basereg = hw_its->its_base + GITS_BASER0 + i * 8;
>>> +        int type;
>>> +
>>> +        reg = readq_relaxed(basereg);
>>> +        type = (reg & GITS_BASER_TYPE_MASK) >> GITS_BASER_TYPE_SHIFT;
>>> +        switch ( type )
>>> +        {
>>> +        case GITS_BASER_TYPE_NONE:
>>> +            continue;
>>> +        case GITS_BASER_TYPE_DEVICE:
>>> +            /* TODO: find some better way of limiting the number of
>>> devices */
>>> +            its_map_baser(basereg, reg, BIT(max_its_device_bits));
>>> +            break;
>>> +        case GITS_BASER_TYPE_COLLECTION:
>>> +            its_map_baser(basereg, reg, NR_CPUS);
>>
>> And I forgot to mention about the collection. Same remark as for the
>> device collection, NR_CPUS is the maximum size.
>
> NR_CPUS is 128, entry size for each collection is probably around 8 or
> 16 bytes, if at all. This gives me half a page, worst case.
> The granularity of the table memory handed to the ITS is (64K|16K|4K),
> so as we only hand over whole pages to the ITS, I don't see how we can
> save memory here.
> Beside, we have other memory issues to worry about than this single 64K
> allocated at boot time.
>
> So if you don't mind, I'd just keep it as it is. I am happy to revisit
> this once NR_CPUS gets significantly increased.

Replacing NR_CPUS by nr_cpu_ids would have addressed my comment and 
requiring less keystrokes than writing this e-mail.

Cheers,
diff mbox

Patch

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 71734a1..81bc233 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -64,6 +64,20 @@  config MAX_PHYS_LPI_BITS
           This can be overriden on the command line with the max_lpi_bits
           parameter.
 
+config MAX_PHYS_ITS_DEVICE_BITS
+        depends on HAS_ITS
+        int "Number of device bits the ITS supports"
+        range 1 32
+        default "10"
+        help
+          Specifies the maximum number of devices which want to use the ITS.
+          Xen needs to allocates memory for the whole range very early.
+          The allocation scheme may be sparse, so a much larger number must
+          be supported to cover devices with a high bus number or those on
+          separate bus segments.
+          This can be overriden on the command line with the max_its_device_bits
+          parameter.
+
 endmenu
 
 menu "ARM errata workaround via the alternative framework"
diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index ff0f571..c31fef6 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -20,9 +20,138 @@ 
 #include <xen/lib.h>
 #include <xen/device_tree.h>
 #include <xen/libfdt/libfdt.h>
+#include <xen/mm.h>
+#include <xen/sizes.h>
 #include <asm/gic.h>
 #include <asm/gic_v3_defs.h>
 #include <asm/gic_v3_its.h>
+#include <asm/io.h>
+
+#define BASER_ATTR_MASK                                           \
+        ((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)               | \
+         (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)         | \
+         (0x7UL << GITS_BASER_INNER_CACHEABILITY_SHIFT))
+#define BASER_RO_MASK   (GENMASK(58, 56) | GENMASK(52, 48))
+
+static uint64_t encode_phys_addr(paddr_t addr, int page_bits)
+{
+    uint64_t ret;
+
+    if ( page_bits < 16 )
+        return (uint64_t)addr & GENMASK(47, page_bits);
+
+    ret = addr & GENMASK(47, 16);
+    return ret | (addr & GENMASK(51, 48)) >> (48 - 12);
+}
+
+#define PAGE_BITS(sz) ((sz) * 2 + PAGE_SHIFT)
+
+static int its_map_baser(void __iomem *basereg, uint64_t regc, int nr_items)
+{
+    uint64_t attr, reg;
+    int entry_size = ((regc >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1;
+    int pagesz = 0, order, table_size;
+    void *buffer = NULL;
+
+    attr  = GIC_BASER_InnerShareable << GITS_BASER_SHAREABILITY_SHIFT;
+    attr |= GIC_BASER_CACHE_SameAsInner << GITS_BASER_OUTER_CACHEABILITY_SHIFT;
+    attr |= GIC_BASER_CACHE_RaWaWb << GITS_BASER_INNER_CACHEABILITY_SHIFT;
+
+    /*
+     * Setup the BASE register with the attributes that we like. Then read
+     * it back and see what sticks (page size, cacheability and shareability
+     * attributes), retrying if necessary.
+     */
+    while ( 1 )
+    {
+        table_size = ROUNDUP(nr_items * entry_size, BIT(PAGE_BITS(pagesz)));
+        order = get_order_from_bytes(table_size);
+
+        if ( !buffer )
+            buffer = alloc_xenheap_pages(order, 0);
+        if ( !buffer )
+            return -ENOMEM;
+
+        reg  = attr;
+        reg |= (pagesz << GITS_BASER_PAGE_SIZE_SHIFT);
+        reg |= table_size >> PAGE_BITS(pagesz);
+        reg |= regc & BASER_RO_MASK;
+        reg |= GITS_VALID_BIT;
+        reg |= encode_phys_addr(virt_to_maddr(buffer), PAGE_BITS(pagesz));
+
+        writeq_relaxed(reg, basereg);
+        regc = readl_relaxed(basereg);
+
+        /* The host didn't like our attributes, just use what it returned. */
+        if ( (regc & BASER_ATTR_MASK) != attr )
+        {
+            /* If we can't map it shareable, drop cacheability as well. */
+            if ( (regc & GITS_BASER_SHAREABILITY_MASK) == GIC_BASER_NonShareable )
+            {
+                regc &= ~GITS_BASER_INNER_CACHEABILITY_MASK;
+                attr = regc & BASER_ATTR_MASK;
+                continue;
+            }
+            attr = regc & BASER_ATTR_MASK;
+        }
+
+        /* If the host accepted our page size, we are done. */
+        if ( (regc & (3UL << GITS_BASER_PAGE_SIZE_SHIFT)) == pagesz )
+            return 0;
+
+        /* None of the page sizes was accepted, give up */
+        if ( pagesz >= 2 )
+            break;
+
+        free_xenheap_pages(buffer, order);
+        buffer = NULL;
+
+        pagesz++;
+    }
+
+    if ( buffer )
+        free_xenheap_pages(buffer, order);
+
+    return -EINVAL;
+}
+
+static unsigned int max_its_device_bits = CONFIG_MAX_PHYS_ITS_DEVICE_BITS;
+integer_param("max_its_device_bits", max_its_device_bits);
+
+int gicv3_its_init(struct host_its *hw_its)
+{
+    uint64_t reg;
+    int i;
+
+    hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
+    if ( !hw_its->its_base )
+        return -ENOMEM;
+
+    for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
+    {
+        void __iomem *basereg = hw_its->its_base + GITS_BASER0 + i * 8;
+        int type;
+
+        reg = readq_relaxed(basereg);
+        type = (reg & GITS_BASER_TYPE_MASK) >> GITS_BASER_TYPE_SHIFT;
+        switch ( type )
+        {
+        case GITS_BASER_TYPE_NONE:
+            continue;
+        case GITS_BASER_TYPE_DEVICE:
+            /* TODO: find some better way of limiting the number of devices */
+            its_map_baser(basereg, reg, BIT(max_its_device_bits));
+            break;
+        case GITS_BASER_TYPE_COLLECTION:
+            its_map_baser(basereg, reg, NR_CPUS);
+            break;
+        default:
+            continue;
+        }
+    }
+
+    return 0;
+}
 
 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index fcb86c8..440c079 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -29,6 +29,7 @@ 
 #include <xen/irq.h>
 #include <xen/iocap.h>
 #include <xen/sched.h>
+#include <xen/err.h>
 #include <xen/errno.h>
 #include <xen/delay.h>
 #include <xen/device_tree.h>
@@ -1563,6 +1564,7 @@  static int __init gicv3_init(void)
 {
     int res, i;
     uint32_t reg;
+    struct host_its *hw_its;
 
     if ( !cpu_has_gicv3 )
     {
@@ -1618,6 +1620,9 @@  static int __init gicv3_init(void)
     res = gicv3_cpu_init();
     gicv3_hyp_init();
 
+    list_for_each_entry(hw_its, &host_its_list, entry)
+        gicv3_its_init(hw_its);
+
     spin_unlock(&gicv3.lock);
 
     return res;
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index a66b6be..ed44bdb 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -18,6 +18,53 @@ 
 #ifndef __ASM_ARM_ITS_H__
 #define __ASM_ARM_ITS_H__
 
+#define GITS_CTLR                       0x000
+#define GITS_IIDR                       0x004
+#define GITS_TYPER                      0x008
+#define GITS_CBASER                     0x080
+#define GITS_CWRITER                    0x088
+#define GITS_CREADR                     0x090
+#define GITS_BASER_NR_REGS              8
+#define GITS_BASER0                     0x100
+#define GITS_BASER1                     0x108
+#define GITS_BASER2                     0x110
+#define GITS_BASER3                     0x118
+#define GITS_BASER4                     0x120
+#define GITS_BASER5                     0x128
+#define GITS_BASER6                     0x130
+#define GITS_BASER7                     0x138
+
+/* Register bits */
+#define GITS_VALID_BIT                  BIT_ULL(63)
+
+#define GITS_CTLR_QUIESCENT             BIT(31)
+#define GITS_CTLR_ENABLE                BIT(0)
+
+#define GITS_IIDR_VALUE                 0x34c
+
+#define GITS_BASER_INDIRECT             BIT_ULL(62)
+#define GITS_BASER_INNER_CACHEABILITY_SHIFT        59
+#define GITS_BASER_TYPE_SHIFT           56
+#define GITS_BASER_TYPE_MASK            (7ULL << GITS_BASER_TYPE_SHIFT)
+#define GITS_BASER_OUTER_CACHEABILITY_SHIFT        53
+#define GITS_BASER_TYPE_NONE            0UL
+#define GITS_BASER_TYPE_DEVICE          1UL
+#define GITS_BASER_TYPE_VCPU            2UL
+#define GITS_BASER_TYPE_CPU             3UL
+#define GITS_BASER_TYPE_COLLECTION      4UL
+#define GITS_BASER_TYPE_RESERVED5       5UL
+#define GITS_BASER_TYPE_RESERVED6       6UL
+#define GITS_BASER_TYPE_RESERVED7       7UL
+#define GITS_BASER_ENTRY_SIZE_SHIFT     48
+#define GITS_BASER_SHAREABILITY_SHIFT   10
+#define GITS_BASER_PAGE_SIZE_SHIFT      8
+#define GITS_BASER_RO_MASK              (GITS_BASER_TYPE_MASK | \
+                                        (31UL << GITS_BASER_ENTRY_SIZE_SHIFT) |\
+                                        GITS_BASER_INDIRECT)
+#define GITS_BASER_SHAREABILITY_MASK   (0x3ULL << GITS_BASER_SHAREABILITY_SHIFT)
+#define GITS_BASER_OUTER_CACHEABILITY_MASK   (0x7ULL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)
+#define GITS_BASER_INNER_CACHEABILITY_MASK   (0x7ULL << GITS_BASER_INNER_CACHEABILITY_SHIFT)
+
 #ifndef __ASSEMBLY__
 #include <xen/device_tree.h>
 
@@ -27,6 +74,7 @@  struct host_its {
     const struct dt_device_node *dt_node;
     paddr_t addr;
     paddr_t size;
+    void __iomem *its_base;
 };
 
 extern struct list_head host_its_list;
@@ -42,8 +90,9 @@  void gicv3_its_dt_init(const struct dt_device_node *node);
 uint64_t gicv3_lpi_get_proptable(void);
 uint64_t gicv3_lpi_allocate_pendtable(void);
 
-/* Initialize the host structures for LPIs. */
+/* Initialize the host structures for LPIs and the host ITSes. */
 int gicv3_lpi_init_host_lpis(unsigned int nr_lpis);
+int gicv3_its_init(struct host_its *hw_its);
 
 #else
 
@@ -62,6 +111,10 @@  static inline int gicv3_lpi_init_host_lpis(unsigned int nr_lpis)
 {
     return 0;
 }
+static inline int gicv3_its_init(struct host_its *hw_its)
+{
+    return 0;
+}
 #endif /* CONFIG_HAS_ITS */
 
 #endif /* __ASSEMBLY__ */