diff mbox

[v2,05/27] ARM: GICv3 ITS: introduce ITS command handling

Message ID 20170316112030.20419-6-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara March 16, 2017, 11:20 a.m. UTC
To be able to easily send commands to the ITS, create the respective
wrapper functions, which take care of the ring buffer.
The first two commands we implement provide methods to map a collection
to a redistributor (aka host core) and to flush the command queue (SYNC).
Start using these commands for mapping one collection to each host CPU.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 xen/arch/arm/gic-v3-its.c         | 181 +++++++++++++++++++++++++++++++++++++-
 xen/arch/arm/gic-v3-lpi.c         |  22 +++++
 xen/arch/arm/gic-v3.c             |  19 +++-
 xen/include/asm-arm/gic_v3_defs.h |   2 +
 xen/include/asm-arm/gic_v3_its.h  |  38 ++++++++
 5 files changed, 260 insertions(+), 2 deletions(-)

Comments

Shanker Donthineni March 16, 2017, 3:05 p.m. UTC | #1
Hi Andre,


On 03/16/2017 06:20 AM, Andre Przywara wrote:
> To be able to easily send commands to the ITS, create the respective
> wrapper functions, which take care of the ring buffer.
> The first two commands we implement provide methods to map a collection
> to a redistributor (aka host core) and to flush the command queue (SYNC).
> Start using these commands for mapping one collection to each host CPU.
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/gic-v3-its.c         | 181 +++++++++++++++++++++++++++++++++++++-
>  xen/arch/arm/gic-v3-lpi.c         |  22 +++++
>  xen/arch/arm/gic-v3.c             |  19 +++-
>  xen/include/asm-arm/gic_v3_defs.h |   2 +
>  xen/include/asm-arm/gic_v3_its.h  |  38 ++++++++
>  5 files changed, 260 insertions(+), 2 deletions(-)
>
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index e5601ed..5c11b0d 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -19,11 +19,14 @@
>   */
>  
>  #include <xen/lib.h>
> +#include <xen/delay.h>
>  #include <xen/mm.h>
>  #include <xen/sizes.h>
> +#include <asm/gic.h>
>  #include <asm/gic_v3_defs.h>
>  #include <asm/gic_v3_its.h>
>  #include <asm/io.h>
> +#include <asm/page.h>
>  
>  #define ITS_CMD_QUEUE_SZ                SZ_64K
>  
> @@ -34,6 +37,145 @@ bool gicv3_its_host_has_its(void)
>      return !list_empty(&host_its_list);
>  }
>  
> +#define BUFPTR_MASK                     GENMASK(19, 5)
> +static int its_send_command(struct host_its *hw_its, const void *its_cmd)
> +{
> +    s_time_t deadline = NOW() + MILLISECS(1);
> +    uint64_t readp, writep;
> +    int ret = -EBUSY;
> +
> +    /* No ITS commands from an interrupt handler (at the moment). */
> +    ASSERT(!in_irq());
> +
> +    spin_lock(&hw_its->cmd_lock);
> +
> +    do {
> +        readp = readq_relaxed(hw_its->its_base + GITS_CREADR) & BUFPTR_MASK;
> +        writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) & BUFPTR_MASK;
> +
> +        if ( ((writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ) != readp )
> +        {
> +            ret = 0;
> +            break;
> +        }
> +
> +        /*
> +         * If the command queue is full, wait for a bit in the hope it drains
> +         * before giving up.
> +         */
> +        spin_unlock(&hw_its->cmd_lock);
> +        cpu_relax();
> +        udelay(1);
> +        spin_lock(&hw_its->cmd_lock);
> +    } while ( NOW() <= deadline );
> +
> +    if ( ret )
> +    {
> +        spin_unlock(&hw_its->cmd_lock);
> +        printk(XENLOG_WARNING "ITS: command queue full.\n");
> +        return ret;
> +    }
> +
> +    memcpy(hw_its->cmd_buf + writep, its_cmd, ITS_CMD_SIZE);
> +    if ( hw_its->flags & HOST_ITS_FLUSH_CMD_QUEUE )
> +        clean_and_invalidate_dcache_va_range(hw_its->cmd_buf + writep,
> +                                             ITS_CMD_SIZE);
> +    else
> +        dsb(ishst);
> +
> +    writep = (writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ;
> +    writeq_relaxed(writep & BUFPTR_MASK, hw_its->its_base + GITS_CWRITER);
> +
> +    spin_unlock(&hw_its->cmd_lock);
> +
> +    return 0;
> +}
> +
> +/* Wait for an ITS to finish processing all commands. */
> +static int gicv3_its_wait_commands(struct host_its *hw_its)
> +{
> +    s_time_t deadline = NOW() + MILLISECS(100);
> +    uint64_t readp, writep;
> +
> +    do {
> +        spin_lock(&hw_its->cmd_lock);
> +        readp = readq_relaxed(hw_its->its_base + GITS_CREADR) & BUFPTR_MASK;
> +        writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) & BUFPTR_MASK;
> +        spin_unlock(&hw_its->cmd_lock);
> +
> +        if ( readp == writep )
> +            return 0;
> +
> +        cpu_relax();
> +        udelay(1);
> +    } while ( NOW() <= deadline );
> +
> +    return -ETIMEDOUT;
> +}
> +
> +static uint64_t encode_rdbase(struct host_its *hw_its, unsigned int cpu,
> +                              uint64_t reg)
> +{
> +    reg &= ~GENMASK(51, 16);
> +
> +    reg |= gicv3_get_redist_address(cpu, hw_its->flags & HOST_ITS_USES_PTA);
> +
> +    return reg;
> +}
> +
> +static int its_send_cmd_sync(struct host_its *its, unsigned int cpu)
> +{
> +    uint64_t cmd[4];
> +
> +    cmd[0] = GITS_CMD_SYNC;
> +    cmd[1] = 0x00;
> +    cmd[2] = encode_rdbase(its, cpu, 0x0);
> +    cmd[3] = 0x00;
> +
> +    return its_send_command(its, cmd);
> +}
> +
> +static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
> +                             unsigned int cpu)
> +{
> +    uint64_t cmd[4];
> +
> +    cmd[0] = GITS_CMD_MAPC;
> +    cmd[1] = 0x00;
> +    cmd[2] = encode_rdbase(its, cpu, (collection_id & GENMASK(15, 0)));
> +    cmd[2] |= GITS_VALID_BIT;
> +    cmd[3] = 0x00;
> +
> +    return its_send_command(its, cmd);
> +}
> +
> +/* Set up the (1:1) collection mapping for the given host CPU. */
> +int gicv3_its_setup_collection(unsigned int cpu)
> +{
> +    struct host_its *its;
> +    int ret;
> +
> +    list_for_each_entry(its, &host_its_list, entry)
> +    {
> +        if ( !its->cmd_buf )
> +            continue;
> +
> +        ret = its_send_cmd_mapc(its, cpu, cpu);
> +        if ( ret )
> +            return ret;
> +
> +        ret = its_send_cmd_sync(its, cpu);
> +        if ( ret )
> +            return ret;
> +
> +        ret = gicv3_its_wait_commands(its);
> +        if ( ret )
> +            return ret;
> +    }
> +
> +    return 0;
> +}
> +
>  #define BASER_ATTR_MASK                                           \
>          ((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)               | \
>           (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)         | \
> @@ -184,22 +326,59 @@ retry:
>      return -EINVAL;
>  }
>  
> +/*
> + * Before an ITS gets initialized, it should be in a quiescent state, where
> + * all outstanding commands and transactions have finished.
> + * So if the ITS is already enabled, turn it off and wait for all outstanding
> + * operations to get processed by polling the QUIESCENT bit.
> + */
> +static int gicv3_disable_its(struct host_its *hw_its)
> +{
> +    uint32_t reg;
> +    s_time_t deadline = NOW() + MILLISECS(100);
> +
> +    reg = readl_relaxed(hw_its->its_base + GITS_CTLR);
> +    if ( (reg & GITS_CTLR_QUIESCENT) && !(reg & GITS_CTLR_ENABLE) )

nit: I prefer changing to 'if ( !(reg & GITS_CTLR_ENABLE) && (reg & GITS_CTLR_QUIESCENT) ) ' because bit GITS_CTLR_QUIESCENT is not valid if ITS hardware is in enabled state.

>  #endif
Andre Przywara March 16, 2017, 3:18 p.m. UTC | #2
Hi Shanker,

On 16/03/17 15:05, Shanker Donthineni wrote:
> Hi Andre,
> 
> 
> On 03/16/2017 06:20 AM, Andre Przywara wrote:
>> To be able to easily send commands to the ITS, create the respective
>> wrapper functions, which take care of the ring buffer.
>> The first two commands we implement provide methods to map a collection
>> to a redistributor (aka host core) and to flush the command queue (SYNC).
>> Start using these commands for mapping one collection to each host CPU.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---

...

>> @@ -184,22 +326,59 @@ retry:
>>      return -EINVAL;
>>  }
>>  
>> +/*
>> + * Before an ITS gets initialized, it should be in a quiescent state, where
>> + * all outstanding commands and transactions have finished.
>> + * So if the ITS is already enabled, turn it off and wait for all outstanding
>> + * operations to get processed by polling the QUIESCENT bit.
>> + */
>> +static int gicv3_disable_its(struct host_its *hw_its)
>> +{
>> +    uint32_t reg;
>> +    s_time_t deadline = NOW() + MILLISECS(100);
>> +
>> +    reg = readl_relaxed(hw_its->its_base + GITS_CTLR);
>> +    if ( (reg & GITS_CTLR_QUIESCENT) && !(reg & GITS_CTLR_ENABLE) )
> 
> nit: I prefer changing to 'if ( !(reg & GITS_CTLR_ENABLE) && (reg & GITS_CTLR_QUIESCENT) ) ' because bit GITS_CTLR_QUIESCENT is not valid if ITS hardware is in enabled state.

Sure, makes sense. I will change this.

Thanks for having a look!

Cheers,
Andre.
Stefano Stabellini March 22, 2017, 12:02 a.m. UTC | #3
On Thu, 16 Mar 2017, Andre Przywara wrote:
> To be able to easily send commands to the ITS, create the respective
> wrapper functions, which take care of the ring buffer.
> The first two commands we implement provide methods to map a collection
> to a redistributor (aka host core) and to flush the command queue (SYNC).
> Start using these commands for mapping one collection to each host CPU.
> 
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/gic-v3-its.c         | 181 +++++++++++++++++++++++++++++++++++++-
>  xen/arch/arm/gic-v3-lpi.c         |  22 +++++
>  xen/arch/arm/gic-v3.c             |  19 +++-
>  xen/include/asm-arm/gic_v3_defs.h |   2 +
>  xen/include/asm-arm/gic_v3_its.h  |  38 ++++++++
>  5 files changed, 260 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index e5601ed..5c11b0d 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -19,11 +19,14 @@
>   */
>  
>  #include <xen/lib.h>
> +#include <xen/delay.h>
>  #include <xen/mm.h>
>  #include <xen/sizes.h>
> +#include <asm/gic.h>
>  #include <asm/gic_v3_defs.h>
>  #include <asm/gic_v3_its.h>
>  #include <asm/io.h>
> +#include <asm/page.h>
>  
>  #define ITS_CMD_QUEUE_SZ                SZ_64K
>  
> @@ -34,6 +37,145 @@ bool gicv3_its_host_has_its(void)
>      return !list_empty(&host_its_list);
>  }
>  
> +#define BUFPTR_MASK                     GENMASK(19, 5)
> +static int its_send_command(struct host_its *hw_its, const void *its_cmd)
> +{
> +    s_time_t deadline = NOW() + MILLISECS(1);
> +    uint64_t readp, writep;
> +    int ret = -EBUSY;
> +
> +    /* No ITS commands from an interrupt handler (at the moment). */
> +    ASSERT(!in_irq());
> +
> +    spin_lock(&hw_its->cmd_lock);
> +
> +    do {
> +        readp = readq_relaxed(hw_its->its_base + GITS_CREADR) & BUFPTR_MASK;
> +        writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) & BUFPTR_MASK;
> +
> +        if ( ((writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ) != readp )
> +        {
> +            ret = 0;
> +            break;
> +        }
> +
> +        /*
> +         * If the command queue is full, wait for a bit in the hope it drains
> +         * before giving up.
> +         */
> +        spin_unlock(&hw_its->cmd_lock);
> +        cpu_relax();
> +        udelay(1);
> +        spin_lock(&hw_its->cmd_lock);
> +    } while ( NOW() <= deadline );
> +
> +    if ( ret )
> +    {
> +        spin_unlock(&hw_its->cmd_lock);
> +        printk(XENLOG_WARNING "ITS: command queue full.\n");
> +        return ret;
> +    }
> +
> +    memcpy(hw_its->cmd_buf + writep, its_cmd, ITS_CMD_SIZE);
> +    if ( hw_its->flags & HOST_ITS_FLUSH_CMD_QUEUE )
> +        clean_and_invalidate_dcache_va_range(hw_its->cmd_buf + writep,
> +                                             ITS_CMD_SIZE);
> +    else
> +        dsb(ishst);
> +
> +    writep = (writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ;
> +    writeq_relaxed(writep & BUFPTR_MASK, hw_its->its_base + GITS_CWRITER);
> +
> +    spin_unlock(&hw_its->cmd_lock);
> +
> +    return 0;
> +}
> +
> +/* Wait for an ITS to finish processing all commands. */
> +static int gicv3_its_wait_commands(struct host_its *hw_its)
> +{
> +    s_time_t deadline = NOW() + MILLISECS(100);
> +    uint64_t readp, writep;
> +
> +    do {
> +        spin_lock(&hw_its->cmd_lock);
> +        readp = readq_relaxed(hw_its->its_base + GITS_CREADR) & BUFPTR_MASK;
> +        writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) & BUFPTR_MASK;
> +        spin_unlock(&hw_its->cmd_lock);
> +
> +        if ( readp == writep )
> +            return 0;
> +
> +        cpu_relax();
> +        udelay(1);
> +    } while ( NOW() <= deadline );
> +
> +    return -ETIMEDOUT;
> +}
> +
> +static uint64_t encode_rdbase(struct host_its *hw_its, unsigned int cpu,
> +                              uint64_t reg)
> +{
> +    reg &= ~GENMASK(51, 16);
> +
> +    reg |= gicv3_get_redist_address(cpu, hw_its->flags & HOST_ITS_USES_PTA);
> +
> +    return reg;
> +}
> +
> +static int its_send_cmd_sync(struct host_its *its, unsigned int cpu)
> +{
> +    uint64_t cmd[4];
> +
> +    cmd[0] = GITS_CMD_SYNC;
> +    cmd[1] = 0x00;
> +    cmd[2] = encode_rdbase(its, cpu, 0x0);
> +    cmd[3] = 0x00;
> +
> +    return its_send_command(its, cmd);
> +}
> +
> +static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
> +                             unsigned int cpu)
> +{
> +    uint64_t cmd[4];
> +
> +    cmd[0] = GITS_CMD_MAPC;
> +    cmd[1] = 0x00;
> +    cmd[2] = encode_rdbase(its, cpu, (collection_id & GENMASK(15, 0)));
> +    cmd[2] |= GITS_VALID_BIT;
> +    cmd[3] = 0x00;
> +
> +    return its_send_command(its, cmd);
> +}
> +
> +/* Set up the (1:1) collection mapping for the given host CPU. */
> +int gicv3_its_setup_collection(unsigned int cpu)
> +{
> +    struct host_its *its;
> +    int ret;
> +
> +    list_for_each_entry(its, &host_its_list, entry)
> +    {
> +        if ( !its->cmd_buf )
> +            continue;
> +
> +        ret = its_send_cmd_mapc(its, cpu, cpu);
> +        if ( ret )
> +            return ret;
> +
> +        ret = its_send_cmd_sync(its, cpu);
> +        if ( ret )
> +            return ret;
> +
> +        ret = gicv3_its_wait_commands(its);
> +        if ( ret )
> +            return ret;
> +    }
> +
> +    return 0;
> +}
> +
>  #define BASER_ATTR_MASK                                           \
>          ((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)               | \
>           (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)         | \
> @@ -184,22 +326,59 @@ retry:
>      return -EINVAL;
>  }
>  
> +/*
> + * Before an ITS gets initialized, it should be in a quiescent state, where
> + * all outstanding commands and transactions have finished.
> + * So if the ITS is already enabled, turn it off and wait for all outstanding
> + * operations to get processed by polling the QUIESCENT bit.
> + */
> +static int gicv3_disable_its(struct host_its *hw_its)
> +{
> +    uint32_t reg;
> +    s_time_t deadline = NOW() + MILLISECS(100);
> +
> +    reg = readl_relaxed(hw_its->its_base + GITS_CTLR);
> +    if ( (reg & GITS_CTLR_QUIESCENT) && !(reg & GITS_CTLR_ENABLE) )
> +        return 0;
> +
> +    writel_relaxed(reg & ~GITS_CTLR_ENABLE, hw_its->its_base + GITS_CTLR);
> +
> +    do {
> +        reg = readl_relaxed(hw_its->its_base + GITS_CTLR);
> +        if ( reg & GITS_CTLR_QUIESCENT )
> +            return 0;
> +
> +        cpu_relax();
> +        udelay(1);
> +    } while ( NOW() <= deadline );
> +
> +    dprintk(XENLOG_ERR, "ITS not quiescent.\n");
> +
> +    return -ETIMEDOUT;
> +}
> +
>  static unsigned int max_its_device_bits = CONFIG_MAX_PHYS_ITS_DEVICE_BITS;
>  integer_param("max_its_device_bits", max_its_device_bits);
>  
>  static int gicv3_its_init_single_its(struct host_its *hw_its)
>  {
>      uint64_t reg;
> -    int i;
> +    int i ,ret;
>      unsigned int devid_bits;
>  
>      hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
>      if ( !hw_its->its_base )
>          return -ENOMEM;
>  
> +    ret = gicv3_disable_its(hw_its);
> +    if ( ret )
> +        return ret;
> +
>      reg = readq_relaxed(hw_its->its_base + GITS_TYPER);
>      devid_bits = GITS_TYPER_DEVICE_ID_BITS(reg);
>      devid_bits = min(devid_bits, max_its_device_bits);
> +    if ( reg & GITS_TYPER_PTA )
> +        hw_its->flags |= HOST_ITS_USES_PTA;
>  
>      for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
>      {
> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
> index 4f8414b..51d7425 100644
> --- a/xen/arch/arm/gic-v3-lpi.c
> +++ b/xen/arch/arm/gic-v3-lpi.c
> @@ -42,6 +42,8 @@ static struct {
>  } lpi_data;
>  
>  struct lpi_redist_data {
> +    paddr_t             redist_addr;
> +    unsigned int        redist_id;
>      void                *pending_table;
>  };
>  
> @@ -49,6 +51,26 @@ static DEFINE_PER_CPU(struct lpi_redist_data, lpi_redist);
>  
>  #define MAX_PHYS_LPIS   (lpi_data.nr_host_lpis - LPI_OFFSET)
>  
> +/* Stores this redistributor's physical address and ID in a per-CPU variable */
> +void gicv3_set_redist_address(paddr_t address, unsigned int redist_id)
> +{
> +    this_cpu(lpi_redist).redist_addr = address;
> +    this_cpu(lpi_redist).redist_id = redist_id;
> +}
> +
> +/*
> + * Returns a redistributor's ID (either as an address or as an ID).
> + * This must be (and is) called only after it has been setup by the above
> + * function.
> + */
> +uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta)
> +{
> +    if ( use_pta )
> +        return per_cpu(lpi_redist, cpu).redist_addr & GENMASK(51, 16);
> +    else
> +        return per_cpu(lpi_redist, cpu).redist_id << 16;
> +}
> +
>  static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
>  {
>      uint64_t val;
> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index cc1e219..38dafe7 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -666,7 +666,21 @@ static int __init gicv3_populate_rdist(void)
>  
>                  if ( typer & GICR_TYPER_PLPIS )
>                  {
> -                    int ret;
> +                    paddr_t rdist_addr;
> +                    int procnum, ret;
> +
> +                    /*
> +                     * The ITS refers to redistributors either by their physical
> +                     * address or by their ID. Determine those two values and
> +                     * let the ITS code store them in per host CPU variables to
> +                     * later be able to address those redistributors.
> +                     */
> +                    rdist_addr = gicv3.rdist_regions[i].base;
> +                    rdist_addr += ptr - gicv3.rdist_regions[i].map_base;
> +                    procnum = (typer & GICR_TYPER_PROC_NUM_MASK);
> +                    procnum >>= GICR_TYPER_PROC_NUM_SHIFT;
> +
> +                    gicv3_set_redist_address(rdist_addr, procnum);
>  
>                      ret = gicv3_lpi_init_rdist(ptr);
>                      if ( ret && ret != -ENODEV )
> @@ -715,6 +729,9 @@ static int gicv3_cpu_init(void)
>      if ( gicv3_enable_redist() )
>          return -ENODEV;
>  
> +    if ( gicv3_its_host_has_its() )
> +        gicv3_its_setup_collection(smp_processor_id());

Check for return errors?


>      /* Set priority on PPI and SGI interrupts */
>      priority = (GIC_PRI_IPI << 24 | GIC_PRI_IPI << 16 | GIC_PRI_IPI << 8 |
>                  GIC_PRI_IPI);
> diff --git a/xen/include/asm-arm/gic_v3_defs.h b/xen/include/asm-arm/gic_v3_defs.h
> index b307322..878bae2 100644
> --- a/xen/include/asm-arm/gic_v3_defs.h
> +++ b/xen/include/asm-arm/gic_v3_defs.h
> @@ -101,6 +101,8 @@
>  #define GICR_TYPER_PLPIS             (1U << 0)
>  #define GICR_TYPER_VLPIS             (1U << 1)
>  #define GICR_TYPER_LAST              (1U << 4)
> +#define GICR_TYPER_PROC_NUM_SHIFT    8
> +#define GICR_TYPER_PROC_NUM_MASK     (0xffff << GICR_TYPER_PROC_NUM_SHIFT)
>  
>  /* For specifying the inner cacheability type only */
>  #define GIC_BASER_CACHE_nCnB         0ULL
> diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
> index d5facf0..8b493fb 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -42,10 +42,12 @@
>  #define GITS_CTLR_QUIESCENT             BIT(31)
>  #define GITS_CTLR_ENABLE                BIT(0)
>  
> +#define GITS_TYPER_PTA                  BIT_ULL(19)
>  #define GITS_TYPER_DEVIDS_SHIFT         13
>  #define GITS_TYPER_DEVIDS_MASK          (0x1fUL << GITS_TYPER_DEVIDS_SHIFT)
>  #define GITS_TYPER_DEVICE_ID_BITS(r)    (((r & GITS_TYPER_DEVIDS_MASK) >> \
>                                                 GITS_TYPER_DEVIDS_SHIFT) + 1)
> +#define GITS_TYPER_IDBITS_SHIFT         8
>  
>  #define GITS_IIDR_VALUE                 0x34c
>  
> @@ -76,9 +78,26 @@
>  
>  #define GITS_CBASER_SIZE_MASK           0xff
>  
> +/* ITS command definitions */
> +#define ITS_CMD_SIZE                    32
> +
> +#define GITS_CMD_MOVI                   0x01
> +#define GITS_CMD_INT                    0x03
> +#define GITS_CMD_CLEAR                  0x04
> +#define GITS_CMD_SYNC                   0x05
> +#define GITS_CMD_MAPD                   0x08
> +#define GITS_CMD_MAPC                   0x09
> +#define GITS_CMD_MAPTI                  0x0a
> +#define GITS_CMD_MAPI                   0x0b
> +#define GITS_CMD_INV                    0x0c
> +#define GITS_CMD_INVALL                 0x0d
> +#define GITS_CMD_MOVALL                 0x0e
> +#define GITS_CMD_DISCARD                0x0f
> +
>  #include <xen/device_tree.h>
>  
>  #define HOST_ITS_FLUSH_CMD_QUEUE        (1U << 0)
> +#define HOST_ITS_USES_PTA               (1U << 1)
>  
>  /* data structure for each hardware ITS */
>  struct host_its {
> @@ -87,6 +106,7 @@ struct host_its {
>      paddr_t addr;
>      paddr_t size;
>      void __iomem *its_base;
> +    spinlock_t cmd_lock;
>      void *cmd_buf;
>      unsigned int flags;
>  };
> @@ -107,6 +127,13 @@ int gicv3_lpi_init_rdist(void __iomem * rdist_base);
>  int gicv3_lpi_init_host_lpis(unsigned int nr_lpis);
>  int gicv3_its_init(void);
>  
> +/* Store the physical address and ID for each redistributor as read from DT. */
> +void gicv3_set_redist_address(paddr_t address, unsigned int redist_id);
> +uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta);
> +
> +/* Map a collection for this host CPU to each host ITS. */
> +int gicv3_its_setup_collection(unsigned int cpu);
> +
>  #else
>  
>  static LIST_HEAD(host_its_list);
> @@ -134,6 +161,17 @@ static inline int gicv3_its_init(void)
>  {
>      return 0;
>  }
> +
> +static inline void gicv3_set_redist_address(paddr_t address,
> +                                            unsigned int redist_id)
> +{
> +}
> +
> +static inline int gicv3_its_setup_collection(unsigned int cpu)
> +{
> +    return 0;
> +}
> +
>  #endif /* CONFIG_HAS_ITS */
>  
>  #endif
> -- 
> 2.9.0
>
Julien Grall March 22, 2017, 3:59 p.m. UTC | #4
Hi Andre,

On 16/03/17 11:20, Andre Przywara wrote:
> To be able to easily send commands to the ITS, create the respective
> wrapper functions, which take care of the ring buffer.
> The first two commands we implement provide methods to map a collection
> to a redistributor (aka host core) and to flush the command queue (SYNC).
> Start using these commands for mapping one collection to each host CPU.
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/gic-v3-its.c         | 181 +++++++++++++++++++++++++++++++++++++-
>  xen/arch/arm/gic-v3-lpi.c         |  22 +++++
>  xen/arch/arm/gic-v3.c             |  19 +++-
>  xen/include/asm-arm/gic_v3_defs.h |   2 +
>  xen/include/asm-arm/gic_v3_its.h  |  38 ++++++++
>  5 files changed, 260 insertions(+), 2 deletions(-)
>
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index e5601ed..5c11b0d 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -19,11 +19,14 @@
>   */
>
>  #include <xen/lib.h>
> +#include <xen/delay.h>
>  #include <xen/mm.h>
>  #include <xen/sizes.h>
> +#include <asm/gic.h>
>  #include <asm/gic_v3_defs.h>
>  #include <asm/gic_v3_its.h>
>  #include <asm/io.h>
> +#include <asm/page.h>
>
>  #define ITS_CMD_QUEUE_SZ                SZ_64K
>
> @@ -34,6 +37,145 @@ bool gicv3_its_host_has_its(void)
>      return !list_empty(&host_its_list);
>  }
>
> +#define BUFPTR_MASK                     GENMASK(19, 5)
> +static int its_send_command(struct host_its *hw_its, const void *its_cmd)
> +{
> +    s_time_t deadline = NOW() + MILLISECS(1);

It would be nice to explain where does this value comes from.

> +    uint64_t readp, writep;
> +    int ret = -EBUSY;
> +
> +    /* No ITS commands from an interrupt handler (at the moment). */
> +    ASSERT(!in_irq());
> +
> +    spin_lock(&hw_its->cmd_lock);
> +
> +    do {
> +        readp = readq_relaxed(hw_its->its_base + GITS_CREADR) & BUFPTR_MASK;
> +        writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) & BUFPTR_MASK;
> +
> +        if ( ((writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ) != readp )
> +        {
> +            ret = 0;
> +            break;
> +        }
> +
> +        /*
> +         * If the command queue is full, wait for a bit in the hope it drains
> +         * before giving up.
> +         */
> +        spin_unlock(&hw_its->cmd_lock);
> +        cpu_relax();
> +        udelay(1);
> +        spin_lock(&hw_its->cmd_lock);
> +    } while ( NOW() <= deadline );
> +
> +    if ( ret )
> +    {
> +        spin_unlock(&hw_its->cmd_lock);
> +        printk(XENLOG_WARNING "ITS: command queue full.\n");

This function could be called from a domain. So please ratelimit the 
message (see printk_ratelimit).

> +        return ret;
> +    }
> +
> +    memcpy(hw_its->cmd_buf + writep, its_cmd, ITS_CMD_SIZE);
> +    if ( hw_its->flags & HOST_ITS_FLUSH_CMD_QUEUE )
> +        clean_and_invalidate_dcache_va_range(hw_its->cmd_buf + writep,
> +                                             ITS_CMD_SIZE);
> +    else
> +        dsb(ishst);
> +
> +    writep = (writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ;
> +    writeq_relaxed(writep & BUFPTR_MASK, hw_its->its_base + GITS_CWRITER);
> +
> +    spin_unlock(&hw_its->cmd_lock);
> +
> +    return 0;
> +}
> +
> +/* Wait for an ITS to finish processing all commands. */
> +static int gicv3_its_wait_commands(struct host_its *hw_its)
> +{
> +    s_time_t deadline = NOW() + MILLISECS(100);

Same remark as above. Why 1ms and 100ms here?

> +    uint64_t readp, writep;
> +
> +    do {
> +        spin_lock(&hw_its->cmd_lock);
> +        readp = readq_relaxed(hw_its->its_base + GITS_CREADR) & BUFPTR_MASK;
> +        writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) & BUFPTR_MASK;
> +        spin_unlock(&hw_its->cmd_lock);
> +
> +        if ( readp == writep )
> +            return 0;
> +
> +        cpu_relax();
> +        udelay(1);
> +    } while ( NOW() <= deadline );
> +
> +    return -ETIMEDOUT;
> +}

[...]

> +static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
> +                             unsigned int cpu)
> +{
> +    uint64_t cmd[4];
> +
> +    cmd[0] = GITS_CMD_MAPC;
> +    cmd[1] = 0x00;
> +    cmd[2] = encode_rdbase(its, cpu, (collection_id & GENMASK(15, 0)));

I requested to drop the mask here and I didn't see any answer explaining 
why you would not do it. So this should be dropped unless you give me a 
reason to keep it.

> +    cmd[2] |= GITS_VALID_BIT;
> +    cmd[3] = 0x00;
> +
> +    return its_send_command(its, cmd);
> +}
> +
> +/* Set up the (1:1) collection mapping for the given host CPU. */
> +int gicv3_its_setup_collection(unsigned int cpu)
> +{
> +    struct host_its *its;
> +    int ret;
> +
> +    list_for_each_entry(its, &host_its_list, entry)
> +    {
> +        if ( !its->cmd_buf )
> +            continue;

Why this check?

[...]

> +/*
> + * Before an ITS gets initialized, it should be in a quiescent state, where
> + * all outstanding commands and transactions have finished.
> + * So if the ITS is already enabled, turn it off and wait for all outstanding
> + * operations to get processed by polling the QUIESCENT bit.
> + */
> +static int gicv3_disable_its(struct host_its *hw_its)
> +{
> +    uint32_t reg;
> +    s_time_t deadline = NOW() + MILLISECS(100);

Why 100ms?

[...]

>  static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
>  {
>      uint64_t val;
> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index cc1e219..38dafe7 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -666,7 +666,21 @@ static int __init gicv3_populate_rdist(void)
>
>                  if ( typer & GICR_TYPER_PLPIS )
>                  {
> -                    int ret;
> +                    paddr_t rdist_addr;
> +                    int procnum, ret;

As mentioned in v1, procnum should be unsigned. If you disagree, please 
explain.

> +
> +                    /*
> +                     * The ITS refers to redistributors either by their physical
> +                     * address or by their ID. Determine those two values and
> +                     * let the ITS code store them in per host CPU variables to
> +                     * later be able to address those redistributors.
> +                     */

Again, this comment does not look useful and is misleading as the code 
to get/set the redistributor information is living in gic-v3-lpi.c and 
not gic-v3-its.c.

[...]

> diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
> index d5facf0..8b493fb 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h

[...]

>  #include <xen/device_tree.h>
>
>  #define HOST_ITS_FLUSH_CMD_QUEUE        (1U << 0)
> +#define HOST_ITS_USES_PTA               (1U << 1)
>
>  /* data structure for each hardware ITS */
>  struct host_its {
> @@ -87,6 +106,7 @@ struct host_its {
>      paddr_t addr;
>      paddr_t size;
>      void __iomem *its_base;
> +    spinlock_t cmd_lock;

Again, initialization, clean-up of a field should be done in the same 
that added the field. Otherwise, this is a call to miss a bit of the 
code and makes more difficult for the reviewer.

So please initialize cmd_lock in this patch and patch #6.

>      void *cmd_buf;
>      unsigned int flags;
>  };
> @@ -107,6 +127,13 @@ int gicv3_lpi_init_rdist(void __iomem * rdist_base);
>  int gicv3_lpi_init_host_lpis(unsigned int nr_lpis);
>  int gicv3_its_init(void);
>
> +/* Store the physical address and ID for each redistributor as read from DT. */
> +void gicv3_set_redist_address(paddr_t address, unsigned int redist_id);
> +uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta);
> +
> +/* Map a collection for this host CPU to each host ITS. */
> +int gicv3_its_setup_collection(unsigned int cpu);
> +
>  #else
>
>  static LIST_HEAD(host_its_list);
> @@ -134,6 +161,17 @@ static inline int gicv3_its_init(void)
>  {
>      return 0;
>  }
> +
> +static inline void gicv3_set_redist_address(paddr_t address,
> +                                            unsigned int redist_id)
> +{
> +}
> +
> +static inline int gicv3_its_setup_collection(unsigned int cpu)
> +{

This function should never be called as it is gated by the presence of 
ITS. I would add a BUG() with a comment to ensure this is the case.

> +    return 0;
> +}
> +
>  #endif /* CONFIG_HAS_ITS */
>
>  #endif
>

Cheers
Andre Przywara April 3, 2017, 10:58 a.m. UTC | #5
Hi,

On 22/03/17 15:59, Julien Grall wrote:
> Hi Andre,
> 
> On 16/03/17 11:20, Andre Przywara wrote:
>> To be able to easily send commands to the ITS, create the respective
>> wrapper functions, which take care of the ring buffer.
>> The first two commands we implement provide methods to map a collection
>> to a redistributor (aka host core) and to flush the command queue (SYNC).
>> Start using these commands for mapping one collection to each host CPU.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  xen/arch/arm/gic-v3-its.c         | 181
>> +++++++++++++++++++++++++++++++++++++-
>>  xen/arch/arm/gic-v3-lpi.c         |  22 +++++
>>  xen/arch/arm/gic-v3.c             |  19 +++-
>>  xen/include/asm-arm/gic_v3_defs.h |   2 +
>>  xen/include/asm-arm/gic_v3_its.h  |  38 ++++++++
>>  5 files changed, 260 insertions(+), 2 deletions(-)
>>
>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>> index e5601ed..5c11b0d 100644
>> --- a/xen/arch/arm/gic-v3-its.c
>> +++ b/xen/arch/arm/gic-v3-its.c
>> @@ -19,11 +19,14 @@
>>   */
>>
>>  #include <xen/lib.h>
>> +#include <xen/delay.h>
>>  #include <xen/mm.h>
>>  #include <xen/sizes.h>
>> +#include <asm/gic.h>
>>  #include <asm/gic_v3_defs.h>
>>  #include <asm/gic_v3_its.h>
>>  #include <asm/io.h>
>> +#include <asm/page.h>
>>
>>  #define ITS_CMD_QUEUE_SZ                SZ_64K
>>
>> @@ -34,6 +37,145 @@ bool gicv3_its_host_has_its(void)
>>      return !list_empty(&host_its_list);
>>  }
>>
>> +#define BUFPTR_MASK                     GENMASK(19, 5)
>> +static int its_send_command(struct host_its *hw_its, const void
>> *its_cmd)
>> +{
>> +    s_time_t deadline = NOW() + MILLISECS(1);
> 
> It would be nice to explain where does this value comes from.

In lack of any real data on this, one millisecond is just a guess.
In v3 I added some comment stating that this is a "short" period to wait
for. We don't want to stall for a long time for a single command, but
also don't want to give up too easily. The command queue is handled by
the ITS, which is hardware, so the only thing it does is handling
commands. If we are unlucky enough to hit a full command queue due to a
sudden spike in host commands, chances are that we get a free slot very
soon afterwards. If not, we are screwed anyway.

>> +    uint64_t readp, writep;
>> +    int ret = -EBUSY;
>> +
>> +    /* No ITS commands from an interrupt handler (at the moment). */
>> +    ASSERT(!in_irq());
>> +
>> +    spin_lock(&hw_its->cmd_lock);
>> +
>> +    do {
>> +        readp = readq_relaxed(hw_its->its_base + GITS_CREADR) &
>> BUFPTR_MASK;
>> +        writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) &
>> BUFPTR_MASK;
>> +
>> +        if ( ((writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ) != readp )
>> +        {
>> +            ret = 0;
>> +            break;
>> +        }
>> +
>> +        /*
>> +         * If the command queue is full, wait for a bit in the hope
>> it drains
>> +         * before giving up.
>> +         */
>> +        spin_unlock(&hw_its->cmd_lock);
>> +        cpu_relax();
>> +        udelay(1);
>> +        spin_lock(&hw_its->cmd_lock);
>> +    } while ( NOW() <= deadline );
>> +
>> +    if ( ret )
>> +    {
>> +        spin_unlock(&hw_its->cmd_lock);
>> +        printk(XENLOG_WARNING "ITS: command queue full.\n");
> 
> This function could be called from a domain. So please ratelimit the
> message (see printk_ratelimit).

Fixed in v4.

> 
>> +        return ret;
>> +    }
>> +
>> +    memcpy(hw_its->cmd_buf + writep, its_cmd, ITS_CMD_SIZE);
>> +    if ( hw_its->flags & HOST_ITS_FLUSH_CMD_QUEUE )
>> +        clean_and_invalidate_dcache_va_range(hw_its->cmd_buf + writep,
>> +                                             ITS_CMD_SIZE);
>> +    else
>> +        dsb(ishst);
>> +
>> +    writep = (writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ;
>> +    writeq_relaxed(writep & BUFPTR_MASK, hw_its->its_base +
>> GITS_CWRITER);
>> +
>> +    spin_unlock(&hw_its->cmd_lock);
>> +
>> +    return 0;
>> +}
>> +
>> +/* Wait for an ITS to finish processing all commands. */
>> +static int gicv3_its_wait_commands(struct host_its *hw_its)
>> +{
>> +    s_time_t deadline = NOW() + MILLISECS(100);
> 
> Same remark as above. Why 1ms and 100ms here?

I made this period longer, because it covers multiple commands and we
are somewhat expected to wait here. Still it should be shorter than the
Linux timeout, which is one second.

>> +    uint64_t readp, writep;
>> +
>> +    do {
>> +        spin_lock(&hw_its->cmd_lock);
>> +        readp = readq_relaxed(hw_its->its_base + GITS_CREADR) &
>> BUFPTR_MASK;
>> +        writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) &
>> BUFPTR_MASK;
>> +        spin_unlock(&hw_its->cmd_lock);
>> +
>> +        if ( readp == writep )
>> +            return 0;
>> +
>> +        cpu_relax();
>> +        udelay(1);
>> +    } while ( NOW() <= deadline );
>> +
>> +    return -ETIMEDOUT;
>> +}
> 
> [...]
> 
>> +static int its_send_cmd_mapc(struct host_its *its, uint32_t
>> collection_id,
>> +                             unsigned int cpu)
>> +{
>> +    uint64_t cmd[4];
>> +
>> +    cmd[0] = GITS_CMD_MAPC;
>> +    cmd[1] = 0x00;
>> +    cmd[2] = encode_rdbase(its, cpu, (collection_id & GENMASK(15, 0)));
> 
> I requested to drop the mask here and I didn't see any answer explaining
> why you would not do it. So this should be dropped unless you give me a
> reason to keep it.

Fixed in v3.

> 
>> +    cmd[2] |= GITS_VALID_BIT;
>> +    cmd[3] = 0x00;
>> +
>> +    return its_send_command(its, cmd);
>> +}
>> +
>> +/* Set up the (1:1) collection mapping for the given host CPU. */
>> +int gicv3_its_setup_collection(unsigned int cpu)
>> +{
>> +    struct host_its *its;
>> +    int ret;
>> +
>> +    list_for_each_entry(its, &host_its_list, entry)
>> +    {
>> +        if ( !its->cmd_buf )
>> +            continue;
> 
> Why this check?

Indeed a leftover from the older code. Given the current code flow, we
shouldn't get here without the cmd_buf being initialized. Still I liked
the idea of having some check here in case the code flow changes.
Will drop this is v4.

> [...]
> 
>> +/*
>> + * Before an ITS gets initialized, it should be in a quiescent state,
>> where
>> + * all outstanding commands and transactions have finished.
>> + * So if the ITS is already enabled, turn it off and wait for all
>> outstanding
>> + * operations to get processed by polling the QUIESCENT bit.
>> + */
>> +static int gicv3_disable_its(struct host_its *hw_its)
>> +{
>> +    uint32_t reg;
>> +    s_time_t deadline = NOW() + MILLISECS(100);
> 
> Why 100ms?

Same rationale as above: We are dealing with multiple commands here,
also are expected to take a longer time.

>>  static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
>>  {
>>      uint64_t val;
>> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
>> index cc1e219..38dafe7 100644
>> --- a/xen/arch/arm/gic-v3.c
>> +++ b/xen/arch/arm/gic-v3.c
>> @@ -666,7 +666,21 @@ static int __init gicv3_populate_rdist(void)
>>
>>                  if ( typer & GICR_TYPER_PLPIS )
>>                  {
>> -                    int ret;
>> +                    paddr_t rdist_addr;
>> +                    int procnum, ret;
> 
> As mentioned in v1, procnum should be unsigned. If you disagree, please
> explain.

Fixed in v4.

>> +
>> +                    /*
>> +                     * The ITS refers to redistributors either by
>> their physical
>> +                     * address or by their ID. Determine those two
>> values and
>> +                     * let the ITS code store them in per host CPU
>> variables to
>> +                     * later be able to address those redistributors.
>> +                     */
> 
> Again, this comment does not look useful and is misleading as the code
> to get/set the redistributor information is living in gic-v3-lpi.c and
> not gic-v3-its.c.

But we need to prepare those values here (and only here) and it does
make a lot of sense to explain this, since the ability to deal with both
ways of representing a redistributor is not obvious.
And this is one of the connections between the ITS and the (GICv3)
redistributors, where the line between them is not easy to draw.
So in v4 I now reworded the comment to say that we hand this over to the
ITS (instead of spoiling ITS implementation details).
Hope you are OK with this.

> 
> [...]
> 
>> diff --git a/xen/include/asm-arm/gic_v3_its.h
>> b/xen/include/asm-arm/gic_v3_its.h
>> index d5facf0..8b493fb 100644
>> --- a/xen/include/asm-arm/gic_v3_its.h
>> +++ b/xen/include/asm-arm/gic_v3_its.h
> 
> [...]
> 
>>  #include <xen/device_tree.h>
>>
>>  #define HOST_ITS_FLUSH_CMD_QUEUE        (1U << 0)
>> +#define HOST_ITS_USES_PTA               (1U << 1)
>>
>>  /* data structure for each hardware ITS */
>>  struct host_its {
>> @@ -87,6 +106,7 @@ struct host_its {
>>      paddr_t addr;
>>      paddr_t size;
>>      void __iomem *its_base;
>> +    spinlock_t cmd_lock;
> 
> Again, initialization, clean-up of a field should be done in the same
> that added the field. Otherwise, this is a call to miss a bit of the
> code and makes more difficult for the reviewer.
> 
> So please initialize cmd_lock in this patch and patch #6.

Fixed in v4.

> 
>>      void *cmd_buf;
>>      unsigned int flags;
>>  };
>> @@ -107,6 +127,13 @@ int gicv3_lpi_init_rdist(void __iomem * rdist_base);
>>  int gicv3_lpi_init_host_lpis(unsigned int nr_lpis);
>>  int gicv3_its_init(void);
>>
>> +/* Store the physical address and ID for each redistributor as read
>> from DT. */
>> +void gicv3_set_redist_address(paddr_t address, unsigned int redist_id);
>> +uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta);
>> +
>> +/* Map a collection for this host CPU to each host ITS. */
>> +int gicv3_its_setup_collection(unsigned int cpu);
>> +
>>  #else
>>
>>  static LIST_HEAD(host_its_list);
>> @@ -134,6 +161,17 @@ static inline int gicv3_its_init(void)
>>  {
>>      return 0;
>>  }
>> +
>> +static inline void gicv3_set_redist_address(paddr_t address,
>> +                                            unsigned int redist_id)
>> +{
>> +}
>> +
>> +static inline int gicv3_its_setup_collection(unsigned int cpu)
>> +{
> 
> This function should never be called as it is gated by the presence of
> ITS. I would add a BUG() with a comment to ensure this is the case.

Right, fixed that in v4.

Cheers,
Andre.

> 
>> +    return 0;
>> +}
>> +
>>  #endif /* CONFIG_HAS_ITS */
>>
>>  #endif
>>
> 
> Cheers
>
Julien Grall April 3, 2017, 11:23 a.m. UTC | #6
Hi Andre,

On 03/04/17 11:58, Andre Przywara wrote:
>>> +#define BUFPTR_MASK                     GENMASK(19, 5)
>>> +static int its_send_command(struct host_its *hw_its, const void
>>> *its_cmd)
>>> +{
>>> +    s_time_t deadline = NOW() + MILLISECS(1);
>>
>> It would be nice to explain where does this value comes from.
>
> In lack of any real data on this, one millisecond is just a guess.
> In v3 I added some comment stating that this is a "short" period to wait
> for. We don't want to stall for a long time for a single command, but
> also don't want to give up too easily. The command queue is handled by
> the ITS, which is hardware, so the only thing it does is handling
> commands. If we are unlucky enough to hit a full command queue due to a
> sudden spike in host commands, chances are that we get a free slot very
> soon afterwards. If not, we are screwed anyway.

It is not the first time I am saying that. If it is a guess, then this 
should be spelled out. A reader cannot know and we would have a question 
asking why "1 ms in Xen and 1 sec in Linux"...

>>> +/* Wait for an ITS to finish processing all commands. */
>>> +static int gicv3_its_wait_commands(struct host_its *hw_its)
>>> +{
>>> +    s_time_t deadline = NOW() + MILLISECS(100);
>>
>> Same remark as above. Why 1ms and 100ms here?
>
> I made this period longer, because it covers multiple commands and we
> are somewhat expected to wait here. Still it should be shorter than the
> Linux timeout, which is one second.

See above.

[...]

>>
>>> +/*
>>> + * Before an ITS gets initialized, it should be in a quiescent state,
>>> where
>>> + * all outstanding commands and transactions have finished.
>>> + * So if the ITS is already enabled, turn it off and wait for all
>>> outstanding
>>> + * operations to get processed by polling the QUIESCENT bit.
>>> + */
>>> +static int gicv3_disable_its(struct host_its *hw_its)
>>> +{
>>> +    uint32_t reg;
>>> +    s_time_t deadline = NOW() + MILLISECS(100);
>>
>> Why 100ms?
>
> Same rationale as above: We are dealing with multiple commands here,
> also are expected to take a longer time.

See above.

[...]

>>> +
>>> +                    /*
>>> +                     * The ITS refers to redistributors either by
>>> their physical
>>> +                     * address or by their ID. Determine those two
>>> values and
>>> +                     * let the ITS code store them in per host CPU
>>> variables to
>>> +                     * later be able to address those redistributors.
>>> +                     */
>>
>> Again, this comment does not look useful and is misleading as the code
>> to get/set the redistributor information is living in gic-v3-lpi.c and
>> not gic-v3-its.c.
>
> But we need to prepare those values here (and only here) and it does
> make a lot of sense to explain this, since the ability to deal with both
> ways of representing a redistributor is not obvious.
> And this is one of the connections between the ITS and the (GICv3)
> redistributors, where the line between them is not easy to draw.
> So in v4 I now reworded the comment to say that we hand this over to the
> ITS (instead of spoiling ITS implementation details).
> Hope you are OK with this.

Well, it makes sense to initialize the redistributor address in here and 
not in another place.

This kind of comments is more useful on the place where you initialize 
the ITS to explain why it is done there and not before.

Cheers,
diff mbox

Patch

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index e5601ed..5c11b0d 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -19,11 +19,14 @@ 
  */
 
 #include <xen/lib.h>
+#include <xen/delay.h>
 #include <xen/mm.h>
 #include <xen/sizes.h>
+#include <asm/gic.h>
 #include <asm/gic_v3_defs.h>
 #include <asm/gic_v3_its.h>
 #include <asm/io.h>
+#include <asm/page.h>
 
 #define ITS_CMD_QUEUE_SZ                SZ_64K
 
@@ -34,6 +37,145 @@  bool gicv3_its_host_has_its(void)
     return !list_empty(&host_its_list);
 }
 
+#define BUFPTR_MASK                     GENMASK(19, 5)
+static int its_send_command(struct host_its *hw_its, const void *its_cmd)
+{
+    s_time_t deadline = NOW() + MILLISECS(1);
+    uint64_t readp, writep;
+    int ret = -EBUSY;
+
+    /* No ITS commands from an interrupt handler (at the moment). */
+    ASSERT(!in_irq());
+
+    spin_lock(&hw_its->cmd_lock);
+
+    do {
+        readp = readq_relaxed(hw_its->its_base + GITS_CREADR) & BUFPTR_MASK;
+        writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) & BUFPTR_MASK;
+
+        if ( ((writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ) != readp )
+        {
+            ret = 0;
+            break;
+        }
+
+        /*
+         * If the command queue is full, wait for a bit in the hope it drains
+         * before giving up.
+         */
+        spin_unlock(&hw_its->cmd_lock);
+        cpu_relax();
+        udelay(1);
+        spin_lock(&hw_its->cmd_lock);
+    } while ( NOW() <= deadline );
+
+    if ( ret )
+    {
+        spin_unlock(&hw_its->cmd_lock);
+        printk(XENLOG_WARNING "ITS: command queue full.\n");
+        return ret;
+    }
+
+    memcpy(hw_its->cmd_buf + writep, its_cmd, ITS_CMD_SIZE);
+    if ( hw_its->flags & HOST_ITS_FLUSH_CMD_QUEUE )
+        clean_and_invalidate_dcache_va_range(hw_its->cmd_buf + writep,
+                                             ITS_CMD_SIZE);
+    else
+        dsb(ishst);
+
+    writep = (writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ;
+    writeq_relaxed(writep & BUFPTR_MASK, hw_its->its_base + GITS_CWRITER);
+
+    spin_unlock(&hw_its->cmd_lock);
+
+    return 0;
+}
+
+/* Wait for an ITS to finish processing all commands. */
+static int gicv3_its_wait_commands(struct host_its *hw_its)
+{
+    s_time_t deadline = NOW() + MILLISECS(100);
+    uint64_t readp, writep;
+
+    do {
+        spin_lock(&hw_its->cmd_lock);
+        readp = readq_relaxed(hw_its->its_base + GITS_CREADR) & BUFPTR_MASK;
+        writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) & BUFPTR_MASK;
+        spin_unlock(&hw_its->cmd_lock);
+
+        if ( readp == writep )
+            return 0;
+
+        cpu_relax();
+        udelay(1);
+    } while ( NOW() <= deadline );
+
+    return -ETIMEDOUT;
+}
+
+static uint64_t encode_rdbase(struct host_its *hw_its, unsigned int cpu,
+                              uint64_t reg)
+{
+    reg &= ~GENMASK(51, 16);
+
+    reg |= gicv3_get_redist_address(cpu, hw_its->flags & HOST_ITS_USES_PTA);
+
+    return reg;
+}
+
+static int its_send_cmd_sync(struct host_its *its, unsigned int cpu)
+{
+    uint64_t cmd[4];
+
+    cmd[0] = GITS_CMD_SYNC;
+    cmd[1] = 0x00;
+    cmd[2] = encode_rdbase(its, cpu, 0x0);
+    cmd[3] = 0x00;
+
+    return its_send_command(its, cmd);
+}
+
+static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
+                             unsigned int cpu)
+{
+    uint64_t cmd[4];
+
+    cmd[0] = GITS_CMD_MAPC;
+    cmd[1] = 0x00;
+    cmd[2] = encode_rdbase(its, cpu, (collection_id & GENMASK(15, 0)));
+    cmd[2] |= GITS_VALID_BIT;
+    cmd[3] = 0x00;
+
+    return its_send_command(its, cmd);
+}
+
+/* Set up the (1:1) collection mapping for the given host CPU. */
+int gicv3_its_setup_collection(unsigned int cpu)
+{
+    struct host_its *its;
+    int ret;
+
+    list_for_each_entry(its, &host_its_list, entry)
+    {
+        if ( !its->cmd_buf )
+            continue;
+
+        ret = its_send_cmd_mapc(its, cpu, cpu);
+        if ( ret )
+            return ret;
+
+        ret = its_send_cmd_sync(its, cpu);
+        if ( ret )
+            return ret;
+
+        ret = gicv3_its_wait_commands(its);
+        if ( ret )
+            return ret;
+    }
+
+    return 0;
+}
+
 #define BASER_ATTR_MASK                                           \
         ((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)               | \
          (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT)         | \
@@ -184,22 +326,59 @@  retry:
     return -EINVAL;
 }
 
+/*
+ * Before an ITS gets initialized, it should be in a quiescent state, where
+ * all outstanding commands and transactions have finished.
+ * So if the ITS is already enabled, turn it off and wait for all outstanding
+ * operations to get processed by polling the QUIESCENT bit.
+ */
+static int gicv3_disable_its(struct host_its *hw_its)
+{
+    uint32_t reg;
+    s_time_t deadline = NOW() + MILLISECS(100);
+
+    reg = readl_relaxed(hw_its->its_base + GITS_CTLR);
+    if ( (reg & GITS_CTLR_QUIESCENT) && !(reg & GITS_CTLR_ENABLE) )
+        return 0;
+
+    writel_relaxed(reg & ~GITS_CTLR_ENABLE, hw_its->its_base + GITS_CTLR);
+
+    do {
+        reg = readl_relaxed(hw_its->its_base + GITS_CTLR);
+        if ( reg & GITS_CTLR_QUIESCENT )
+            return 0;
+
+        cpu_relax();
+        udelay(1);
+    } while ( NOW() <= deadline );
+
+    dprintk(XENLOG_ERR, "ITS not quiescent.\n");
+
+    return -ETIMEDOUT;
+}
+
 static unsigned int max_its_device_bits = CONFIG_MAX_PHYS_ITS_DEVICE_BITS;
 integer_param("max_its_device_bits", max_its_device_bits);
 
 static int gicv3_its_init_single_its(struct host_its *hw_its)
 {
     uint64_t reg;
-    int i;
+    int i ,ret;
     unsigned int devid_bits;
 
     hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
     if ( !hw_its->its_base )
         return -ENOMEM;
 
+    ret = gicv3_disable_its(hw_its);
+    if ( ret )
+        return ret;
+
     reg = readq_relaxed(hw_its->its_base + GITS_TYPER);
     devid_bits = GITS_TYPER_DEVICE_ID_BITS(reg);
     devid_bits = min(devid_bits, max_its_device_bits);
+    if ( reg & GITS_TYPER_PTA )
+        hw_its->flags |= HOST_ITS_USES_PTA;
 
     for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
     {
diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
index 4f8414b..51d7425 100644
--- a/xen/arch/arm/gic-v3-lpi.c
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -42,6 +42,8 @@  static struct {
 } lpi_data;
 
 struct lpi_redist_data {
+    paddr_t             redist_addr;
+    unsigned int        redist_id;
     void                *pending_table;
 };
 
@@ -49,6 +51,26 @@  static DEFINE_PER_CPU(struct lpi_redist_data, lpi_redist);
 
 #define MAX_PHYS_LPIS   (lpi_data.nr_host_lpis - LPI_OFFSET)
 
+/* Stores this redistributor's physical address and ID in a per-CPU variable */
+void gicv3_set_redist_address(paddr_t address, unsigned int redist_id)
+{
+    this_cpu(lpi_redist).redist_addr = address;
+    this_cpu(lpi_redist).redist_id = redist_id;
+}
+
+/*
+ * Returns a redistributor's ID (either as an address or as an ID).
+ * This must be (and is) called only after it has been setup by the above
+ * function.
+ */
+uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta)
+{
+    if ( use_pta )
+        return per_cpu(lpi_redist, cpu).redist_addr & GENMASK(51, 16);
+    else
+        return per_cpu(lpi_redist, cpu).redist_id << 16;
+}
+
 static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
 {
     uint64_t val;
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index cc1e219..38dafe7 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -666,7 +666,21 @@  static int __init gicv3_populate_rdist(void)
 
                 if ( typer & GICR_TYPER_PLPIS )
                 {
-                    int ret;
+                    paddr_t rdist_addr;
+                    int procnum, ret;
+
+                    /*
+                     * The ITS refers to redistributors either by their physical
+                     * address or by their ID. Determine those two values and
+                     * let the ITS code store them in per host CPU variables to
+                     * later be able to address those redistributors.
+                     */
+                    rdist_addr = gicv3.rdist_regions[i].base;
+                    rdist_addr += ptr - gicv3.rdist_regions[i].map_base;
+                    procnum = (typer & GICR_TYPER_PROC_NUM_MASK);
+                    procnum >>= GICR_TYPER_PROC_NUM_SHIFT;
+
+                    gicv3_set_redist_address(rdist_addr, procnum);
 
                     ret = gicv3_lpi_init_rdist(ptr);
                     if ( ret && ret != -ENODEV )
@@ -715,6 +729,9 @@  static int gicv3_cpu_init(void)
     if ( gicv3_enable_redist() )
         return -ENODEV;
 
+    if ( gicv3_its_host_has_its() )
+        gicv3_its_setup_collection(smp_processor_id());
+
     /* Set priority on PPI and SGI interrupts */
     priority = (GIC_PRI_IPI << 24 | GIC_PRI_IPI << 16 | GIC_PRI_IPI << 8 |
                 GIC_PRI_IPI);
diff --git a/xen/include/asm-arm/gic_v3_defs.h b/xen/include/asm-arm/gic_v3_defs.h
index b307322..878bae2 100644
--- a/xen/include/asm-arm/gic_v3_defs.h
+++ b/xen/include/asm-arm/gic_v3_defs.h
@@ -101,6 +101,8 @@ 
 #define GICR_TYPER_PLPIS             (1U << 0)
 #define GICR_TYPER_VLPIS             (1U << 1)
 #define GICR_TYPER_LAST              (1U << 4)
+#define GICR_TYPER_PROC_NUM_SHIFT    8
+#define GICR_TYPER_PROC_NUM_MASK     (0xffff << GICR_TYPER_PROC_NUM_SHIFT)
 
 /* For specifying the inner cacheability type only */
 #define GIC_BASER_CACHE_nCnB         0ULL
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index d5facf0..8b493fb 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -42,10 +42,12 @@ 
 #define GITS_CTLR_QUIESCENT             BIT(31)
 #define GITS_CTLR_ENABLE                BIT(0)
 
+#define GITS_TYPER_PTA                  BIT_ULL(19)
 #define GITS_TYPER_DEVIDS_SHIFT         13
 #define GITS_TYPER_DEVIDS_MASK          (0x1fUL << GITS_TYPER_DEVIDS_SHIFT)
 #define GITS_TYPER_DEVICE_ID_BITS(r)    (((r & GITS_TYPER_DEVIDS_MASK) >> \
                                                GITS_TYPER_DEVIDS_SHIFT) + 1)
+#define GITS_TYPER_IDBITS_SHIFT         8
 
 #define GITS_IIDR_VALUE                 0x34c
 
@@ -76,9 +78,26 @@ 
 
 #define GITS_CBASER_SIZE_MASK           0xff
 
+/* ITS command definitions */
+#define ITS_CMD_SIZE                    32
+
+#define GITS_CMD_MOVI                   0x01
+#define GITS_CMD_INT                    0x03
+#define GITS_CMD_CLEAR                  0x04
+#define GITS_CMD_SYNC                   0x05
+#define GITS_CMD_MAPD                   0x08
+#define GITS_CMD_MAPC                   0x09
+#define GITS_CMD_MAPTI                  0x0a
+#define GITS_CMD_MAPI                   0x0b
+#define GITS_CMD_INV                    0x0c
+#define GITS_CMD_INVALL                 0x0d
+#define GITS_CMD_MOVALL                 0x0e
+#define GITS_CMD_DISCARD                0x0f
+
 #include <xen/device_tree.h>
 
 #define HOST_ITS_FLUSH_CMD_QUEUE        (1U << 0)
+#define HOST_ITS_USES_PTA               (1U << 1)
 
 /* data structure for each hardware ITS */
 struct host_its {
@@ -87,6 +106,7 @@  struct host_its {
     paddr_t addr;
     paddr_t size;
     void __iomem *its_base;
+    spinlock_t cmd_lock;
     void *cmd_buf;
     unsigned int flags;
 };
@@ -107,6 +127,13 @@  int gicv3_lpi_init_rdist(void __iomem * rdist_base);
 int gicv3_lpi_init_host_lpis(unsigned int nr_lpis);
 int gicv3_its_init(void);
 
+/* Store the physical address and ID for each redistributor as read from DT. */
+void gicv3_set_redist_address(paddr_t address, unsigned int redist_id);
+uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta);
+
+/* Map a collection for this host CPU to each host ITS. */
+int gicv3_its_setup_collection(unsigned int cpu);
+
 #else
 
 static LIST_HEAD(host_its_list);
@@ -134,6 +161,17 @@  static inline int gicv3_its_init(void)
 {
     return 0;
 }
+
+static inline void gicv3_set_redist_address(paddr_t address,
+                                            unsigned int redist_id)
+{
+}
+
+static inline int gicv3_its_setup_collection(unsigned int cpu)
+{
+    return 0;
+}
+
 #endif /* CONFIG_HAS_ITS */
 
 #endif