diff mbox

[RFC,08/24] ARM: GICv3: introduce separate pending_irq structs for LPIs

Message ID 20160928182457.12433-9-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara Sept. 28, 2016, 6:24 p.m. UTC
For the same reason that allocating a struct irq_desc for each
possible LPI is not an option, having a struct pending_irq for each LPI
is also not feasible. However we actually only need those when an
interrupt is on a vCPU (or is about to be injected).
Maintain a list of those structs that we can use for the lifecycle of
a guest LPI. We allocate new entries if necessary, however reuse
pre-owned entries whenever possible.
Teach the existing VGIC functions to find the right pointer when being
given a virtual LPI number.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 xen/arch/arm/gic.c            |  3 +++
 xen/arch/arm/vgic-v3.c        |  2 ++
 xen/arch/arm/vgic.c           | 56 ++++++++++++++++++++++++++++++++++++++++---
 xen/include/asm-arm/domain.h  |  1 +
 xen/include/asm-arm/gic-its.h | 10 ++++++++
 xen/include/asm-arm/vgic.h    |  9 +++++++
 6 files changed, 78 insertions(+), 3 deletions(-)

Comments

Vijay Kilari Oct. 24, 2016, 3:31 p.m. UTC | #1
On Wed, Sep 28, 2016 at 11:54 PM, Andre Przywara <andre.przywara@arm.com> wrote:
> For the same reason that allocating a struct irq_desc for each
> possible LPI is not an option, having a struct pending_irq for each LPI
> is also not feasible. However we actually only need those when an
> interrupt is on a vCPU (or is about to be injected).
> Maintain a list of those structs that we can use for the lifecycle of
> a guest LPI. We allocate new entries if necessary, however reuse
> pre-owned entries whenever possible.
> Teach the existing VGIC functions to find the right pointer when being
> given a virtual LPI number.
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/gic.c            |  3 +++
>  xen/arch/arm/vgic-v3.c        |  2 ++
>  xen/arch/arm/vgic.c           | 56 ++++++++++++++++++++++++++++++++++++++++---
>  xen/include/asm-arm/domain.h  |  1 +
>  xen/include/asm-arm/gic-its.h | 10 ++++++++
>  xen/include/asm-arm/vgic.h    |  9 +++++++
>  6 files changed, 78 insertions(+), 3 deletions(-)
>
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 63c744a..ebe4035 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -506,6 +506,9 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
>                  irq_set_affinity(p->desc, cpumask_of(v_target->processor));
>              }
> +            /* If this was an LPI, mark this struct as available again. */
> +            if ( p->irq >= 8192 )
 Can define something line is_lpi(irq) and use it everywhere
> +                p->irq = 0;
>          }
>      }
>  }
> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> index ec038a3..e9b6490 100644
> --- a/xen/arch/arm/vgic-v3.c
> +++ b/xen/arch/arm/vgic-v3.c
> @@ -1388,6 +1388,8 @@ static int vgic_v3_vcpu_init(struct vcpu *v)
>      if ( v->vcpu_id == last_cpu || (v->vcpu_id == (d->max_vcpus - 1)) )
>          v->arch.vgic.flags |= VGIC_V3_RDIST_LAST;
>
> +    INIT_LIST_HEAD(&v->arch.vgic.pending_lpi_list);
> +
>      return 0;
>  }
>
> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> index 0965119..b961551 100644
> --- a/xen/arch/arm/vgic.c
> +++ b/xen/arch/arm/vgic.c
> @@ -31,6 +31,8 @@
>  #include <asm/mmio.h>
>  #include <asm/gic.h>
>  #include <asm/vgic.h>
> +#include <asm/gic_v3_defs.h>
> +#include <asm/gic-its.h>
>
>  static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v, int rank)
>  {
> @@ -61,7 +63,7 @@ struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq)
>      return vgic_get_rank(v, rank);
>  }
>
> -static void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
> +void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
>  {
>      INIT_LIST_HEAD(&p->inflight);
>      INIT_LIST_HEAD(&p->lr_queue);
> @@ -244,10 +246,14 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq)
>
>  static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
>  {
> -    struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
> +    struct vgic_irq_rank *rank;
>      unsigned long flags;
>      int priority;
>
> +    if ( virq >= 8192 )
> +        return gicv3_lpi_get_priority(v->domain, virq);
> +
> +    rank = vgic_rank_irq(v, virq);
>      vgic_lock_rank(v, rank, flags);
>      priority = rank->priority[virq & INTERRUPT_RANK_MASK];
>      vgic_unlock_rank(v, rank, flags);
> @@ -446,13 +452,55 @@ int vgic_to_sgi(struct vcpu *v, register_t sgir, enum gic_sgi_mode irqmode, int
>      return 1;
>  }
>
> +/*
> + * Holding struct pending_irq's for each possible virtual LPI in each domain
> + * requires too much Xen memory, also a malicious guest could potentially
> + * spam Xen with LPI map requests. We cannot cover those with (guest allocated)
> + * ITS memory, so we use a dynamic scheme of allocating struct pending_irq's
> + * on demand.
> + */
> +struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int lpi,
> +                                   bool allocate)
> +{
> +    struct lpi_pending_irq *lpi_irq, *empty = NULL;
> +
> +    /* TODO: locking! */
> +    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
> +    {
> +        if ( lpi_irq->pirq.irq == lpi )
> +            return &lpi_irq->pirq;
> +
> +        if ( lpi_irq->pirq.irq == 0 && !empty )
> +            empty = lpi_irq;
> +    }
   With this approach of allocating pending_irq on demand, if the
pending_lpi_list
is at n position then it iterates for long time to find pending_irq entry.
This will increase LPI injection time to domain.

Why can't we use btree?

> +
> +    if ( !allocate )
> +        return NULL;
> +
> +    if ( !empty )
> +    {
> +        empty = xzalloc(struct lpi_pending_irq);
> +        vgic_init_pending_irq(&empty->pirq, lpi);
> +        list_add_tail(&empty->entry, &v->arch.vgic.pending_lpi_list);
> +    } else
> +    {
> +        empty->pirq.status = 0;
> +        empty->pirq.irq = lpi;
> +    }
> +
> +    return &empty->pirq;
> +}
> +
>  struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
>  {
>      struct pending_irq *n;
> +
>      /* Pending irqs allocation strategy: the first vgic.nr_spis irqs
>       * are used for SPIs; the rests are used for per cpu irqs */
>      if ( irq < 32 )
>          n = &v->arch.vgic.pending_irqs[irq];
> +    else if ( irq >= 8192 )
> +        n = lpi_to_pending(v, irq, true);
>      else
>          n = &v->domain->arch.vgic.pending_irqs[irq - 32];
>      return n;
> @@ -480,7 +528,7 @@ void vgic_clear_pending_irqs(struct vcpu *v)
>  void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
>  {
>      uint8_t priority;
> -    struct pending_irq *iter, *n = irq_to_pending(v, virq);
> +    struct pending_irq *iter, *n;
>      unsigned long flags;
>      bool_t running;
>
> @@ -488,6 +536,8 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
>
>      spin_lock_irqsave(&v->arch.vgic.lock, flags);
>
> +    n = irq_to_pending(v, virq);
> +
>      /* vcpu offline */
>      if ( test_bit(_VPF_down, &v->pause_flags) )
>      {
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 9452fcd..ae8a9de 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -249,6 +249,7 @@ struct arch_vcpu
>          paddr_t rdist_base;
>  #define VGIC_V3_RDIST_LAST  (1 << 0)        /* last vCPU of the rdist */
>          uint8_t flags;
> +        struct list_head pending_lpi_list;
>      } vgic;
>
>      /* Timer registers  */
> diff --git a/xen/include/asm-arm/gic-its.h b/xen/include/asm-arm/gic-its.h
> index 4e9841a..1f881c0 100644
> --- a/xen/include/asm-arm/gic-its.h
> +++ b/xen/include/asm-arm/gic-its.h
> @@ -136,6 +136,12 @@ int gicv3_lpi_allocate_host_lpi(struct host_its *its,
>  int gicv3_lpi_drop_host_lpi(struct host_its *its,
>                              uint32_t devid, uint32_t eventid,
>                              uint32_t host_lpi);
> +
> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
> +{
> +    return GIC_PRI_IRQ;
   Why lpi priority is fixed?. can't we use domain set lpi priority?

> +}
> +
>  #else
>
>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
> @@ -175,6 +181,10 @@ static inline int gicv3_lpi_drop_host_lpi(struct host_its *its,
>  {
>      return 0;
>  }
> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
> +{
> +    return GIC_PRI_IRQ;
> +}
>
>  #endif /* CONFIG_HAS_ITS */
>
> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
> index 300f461..4e29ba6 100644
> --- a/xen/include/asm-arm/vgic.h
> +++ b/xen/include/asm-arm/vgic.h
> @@ -83,6 +83,12 @@ struct pending_irq
>      struct list_head lr_queue;
>  };
>
> +struct lpi_pending_irq
> +{
> +    struct list_head entry;
> +    struct pending_irq pirq;
> +};
> +
>  #define NR_INTERRUPT_PER_RANK   32
>  #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
>
> @@ -296,8 +302,11 @@ extern struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq);
>  extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq);
>  extern void vgic_vcpu_inject_spi(struct domain *d, unsigned int virq);
>  extern void vgic_clear_pending_irqs(struct vcpu *v);
> +extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq);
>  extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
>  extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int irq);
> +extern struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int irq,
> +                                          bool allocate);
>  extern struct vgic_irq_rank *vgic_rank_offset(struct vcpu *v, int b, int n, int s);
>  extern struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq);
>  extern int vgic_emulate(struct cpu_user_regs *regs, union hsr hsr);
> --
> 2.9.0
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel
Stefano Stabellini Oct. 28, 2016, 1:04 a.m. UTC | #2
On Wed, 28 Sep 2016, Andre Przywara wrote:
> For the same reason that allocating a struct irq_desc for each
> possible LPI is not an option, having a struct pending_irq for each LPI
> is also not feasible. However we actually only need those when an
> interrupt is on a vCPU (or is about to be injected).
> Maintain a list of those structs that we can use for the lifecycle of
> a guest LPI. We allocate new entries if necessary, however reuse
> pre-owned entries whenever possible.
> Teach the existing VGIC functions to find the right pointer when being
> given a virtual LPI number.
> 
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/gic.c            |  3 +++
>  xen/arch/arm/vgic-v3.c        |  2 ++
>  xen/arch/arm/vgic.c           | 56 ++++++++++++++++++++++++++++++++++++++++---
>  xen/include/asm-arm/domain.h  |  1 +
>  xen/include/asm-arm/gic-its.h | 10 ++++++++
>  xen/include/asm-arm/vgic.h    |  9 +++++++
>  6 files changed, 78 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 63c744a..ebe4035 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -506,6 +506,9 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
>                  irq_set_affinity(p->desc, cpumask_of(v_target->processor));
>              }
> +            /* If this was an LPI, mark this struct as available again. */
> +            if ( p->irq >= 8192 )
> +                p->irq = 0;

I believe that 0 is a valid irq number, we need to come up with a
different invalid_irq value, and we should #define it. We could also
consider checking if the irq is inflight (linked to the inflight list)
instead of using irq == 0 to understand if it is reusable.


>          }
>      }
>  }
> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> index ec038a3..e9b6490 100644
> --- a/xen/arch/arm/vgic-v3.c
> +++ b/xen/arch/arm/vgic-v3.c
> @@ -1388,6 +1388,8 @@ static int vgic_v3_vcpu_init(struct vcpu *v)
>      if ( v->vcpu_id == last_cpu || (v->vcpu_id == (d->max_vcpus - 1)) )
>          v->arch.vgic.flags |= VGIC_V3_RDIST_LAST;
>  
> +    INIT_LIST_HEAD(&v->arch.vgic.pending_lpi_list);
> +
>      return 0;
>  }
>  
> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> index 0965119..b961551 100644
> --- a/xen/arch/arm/vgic.c
> +++ b/xen/arch/arm/vgic.c
> @@ -31,6 +31,8 @@
>  #include <asm/mmio.h>
>  #include <asm/gic.h>
>  #include <asm/vgic.h>
> +#include <asm/gic_v3_defs.h>
> +#include <asm/gic-its.h>
>  
>  static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v, int rank)
>  {
> @@ -61,7 +63,7 @@ struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq)
>      return vgic_get_rank(v, rank);
>  }
>  
> -static void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
> +void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
>  {
>      INIT_LIST_HEAD(&p->inflight);
>      INIT_LIST_HEAD(&p->lr_queue);
> @@ -244,10 +246,14 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq)
>  
>  static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
>  {
> -    struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
> +    struct vgic_irq_rank *rank;
>      unsigned long flags;
>      int priority;
>  
> +    if ( virq >= 8192 )

Please introduce a convenience static inline function such as:

  bool is_lpi(unsigned int irq)


> +        return gicv3_lpi_get_priority(v->domain, virq);
> +
> +    rank = vgic_rank_irq(v, virq);
>      vgic_lock_rank(v, rank, flags);
>      priority = rank->priority[virq & INTERRUPT_RANK_MASK];
>      vgic_unlock_rank(v, rank, flags);
> @@ -446,13 +452,55 @@ int vgic_to_sgi(struct vcpu *v, register_t sgir, enum gic_sgi_mode irqmode, int
>      return 1;
>  }
>  
> +/*
> + * Holding struct pending_irq's for each possible virtual LPI in each domain
> + * requires too much Xen memory, also a malicious guest could potentially
> + * spam Xen with LPI map requests. We cannot cover those with (guest allocated)
> + * ITS memory, so we use a dynamic scheme of allocating struct pending_irq's
> + * on demand.
> + */
> +struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int lpi,
> +                                   bool allocate)
> +{
> +    struct lpi_pending_irq *lpi_irq, *empty = NULL;
> +
> +    /* TODO: locking! */

Yeah, this needs to be fixed in v1 :-)


> +    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
> +    {
> +        if ( lpi_irq->pirq.irq == lpi )
> +            return &lpi_irq->pirq;
> +
> +        if ( lpi_irq->pirq.irq == 0 && !empty )
> +            empty = lpi_irq;
> +    }

This is another one of those cases where a list is too slow for the hot
path. The idea of allocating pending_irq struct on demand is good, but
storing them in a linked list would kill performance. Probably the best
thing we could do is an hashtable and we should preallocate the initial
array of elements. I don't know what the size of the initial array
should be, but we can start around 50, and change it in the future once
we do tests with real workloads. Of course the other key parameter is
the hash function, not sure which one is the right one, but ideally we
would never have to allocate new pending_irq struct for LPIs because the
preallocated set would suffice.

I could be convinced that a list is sufficient if we do some real
benchmarking and it turns out that lpi_to_pending always resolve in less
than ~5 steps.


> +    if ( !allocate )
> +        return NULL;
> +
> +    if ( !empty )
> +    {
> +        empty = xzalloc(struct lpi_pending_irq);
> +        vgic_init_pending_irq(&empty->pirq, lpi);
> +        list_add_tail(&empty->entry, &v->arch.vgic.pending_lpi_list);
> +    } else
> +    {
> +        empty->pirq.status = 0;
> +        empty->pirq.irq = lpi;
> +    }
> +
> +    return &empty->pirq;
> +}
> +
>  struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
>  {
>      struct pending_irq *n;
> +

spurious change


>      /* Pending irqs allocation strategy: the first vgic.nr_spis irqs
>       * are used for SPIs; the rests are used for per cpu irqs */
>      if ( irq < 32 )
>          n = &v->arch.vgic.pending_irqs[irq];
> +    else if ( irq >= 8192 )

Use the new static inline


> +        n = lpi_to_pending(v, irq, true);
>      else
>          n = &v->domain->arch.vgic.pending_irqs[irq - 32];
>      return n;
> @@ -480,7 +528,7 @@ void vgic_clear_pending_irqs(struct vcpu *v)
>  void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
>  {
>      uint8_t priority;
> -    struct pending_irq *iter, *n = irq_to_pending(v, virq);
> +    struct pending_irq *iter, *n;
>      unsigned long flags;
>      bool_t running;
>  
> @@ -488,6 +536,8 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
>  
>      spin_lock_irqsave(&v->arch.vgic.lock, flags);
>  
> +    n = irq_to_pending(v, virq);

Why this change?


>      /* vcpu offline */
>      if ( test_bit(_VPF_down, &v->pause_flags) )
>      {
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 9452fcd..ae8a9de 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -249,6 +249,7 @@ struct arch_vcpu
>          paddr_t rdist_base;
>  #define VGIC_V3_RDIST_LAST  (1 << 0)        /* last vCPU of the rdist */
>          uint8_t flags;
> +        struct list_head pending_lpi_list;
>      } vgic;
>  
>      /* Timer registers  */
> diff --git a/xen/include/asm-arm/gic-its.h b/xen/include/asm-arm/gic-its.h
> index 4e9841a..1f881c0 100644
> --- a/xen/include/asm-arm/gic-its.h
> +++ b/xen/include/asm-arm/gic-its.h
> @@ -136,6 +136,12 @@ int gicv3_lpi_allocate_host_lpi(struct host_its *its,
>  int gicv3_lpi_drop_host_lpi(struct host_its *its,
>                              uint32_t devid, uint32_t eventid,
>                              uint32_t host_lpi);
> +
> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
> +{
> +    return GIC_PRI_IRQ;
> +}

Does it mean that we don't allow changes to LPI priorities?


>  #else
>  
>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
> @@ -175,6 +181,10 @@ static inline int gicv3_lpi_drop_host_lpi(struct host_its *its,
>  {
>      return 0;
>  }
> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
> +{
> +    return GIC_PRI_IRQ;
> +}
>  
>  #endif /* CONFIG_HAS_ITS */
>  
> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
> index 300f461..4e29ba6 100644
> --- a/xen/include/asm-arm/vgic.h
> +++ b/xen/include/asm-arm/vgic.h
> @@ -83,6 +83,12 @@ struct pending_irq
>      struct list_head lr_queue;
>  };
>  
> +struct lpi_pending_irq
> +{
> +    struct list_head entry;
> +    struct pending_irq pirq;
> +};
> +
>  #define NR_INTERRUPT_PER_RANK   32
>  #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
>  
> @@ -296,8 +302,11 @@ extern struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq);
>  extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq);
>  extern void vgic_vcpu_inject_spi(struct domain *d, unsigned int virq);
>  extern void vgic_clear_pending_irqs(struct vcpu *v);
> +extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq);
>  extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
>  extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int irq);
> +extern struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int irq,
> +                                          bool allocate);
>  extern struct vgic_irq_rank *vgic_rank_offset(struct vcpu *v, int b, int n, int s);
>  extern struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq);
>  extern int vgic_emulate(struct cpu_user_regs *regs, union hsr hsr);
> -- 
> 2.9.0
>
Andre Przywara Nov. 3, 2016, 7:47 p.m. UTC | #3
Hi,

On 24/10/16 16:31, Vijay Kilari wrote:
> On Wed, Sep 28, 2016 at 11:54 PM, Andre Przywara <andre.przywara@arm.com> wrote:
>> For the same reason that allocating a struct irq_desc for each
>> possible LPI is not an option, having a struct pending_irq for each LPI
>> is also not feasible. However we actually only need those when an
>> interrupt is on a vCPU (or is about to be injected).
>> Maintain a list of those structs that we can use for the lifecycle of
>> a guest LPI. We allocate new entries if necessary, however reuse
>> pre-owned entries whenever possible.
>> Teach the existing VGIC functions to find the right pointer when being
>> given a virtual LPI number.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  xen/arch/arm/gic.c            |  3 +++
>>  xen/arch/arm/vgic-v3.c        |  2 ++
>>  xen/arch/arm/vgic.c           | 56 ++++++++++++++++++++++++++++++++++++++++---
>>  xen/include/asm-arm/domain.h  |  1 +
>>  xen/include/asm-arm/gic-its.h | 10 ++++++++
>>  xen/include/asm-arm/vgic.h    |  9 +++++++
>>  6 files changed, 78 insertions(+), 3 deletions(-)
>>
>> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
>> index 63c744a..ebe4035 100644
>> --- a/xen/arch/arm/gic.c
>> +++ b/xen/arch/arm/gic.c
>> @@ -506,6 +506,9 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>>                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
>>                  irq_set_affinity(p->desc, cpumask_of(v_target->processor));
>>              }
>> +            /* If this was an LPI, mark this struct as available again. */
>> +            if ( p->irq >= 8192 )
>  Can define something line is_lpi(irq) and use it everywhere

Yes, that was on my list anyway.

>> +                p->irq = 0;
>>          }
>>      }
>>  }
>> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
>> index ec038a3..e9b6490 100644
>> --- a/xen/arch/arm/vgic-v3.c
>> +++ b/xen/arch/arm/vgic-v3.c
>> @@ -1388,6 +1388,8 @@ static int vgic_v3_vcpu_init(struct vcpu *v)
>>      if ( v->vcpu_id == last_cpu || (v->vcpu_id == (d->max_vcpus - 1)) )
>>          v->arch.vgic.flags |= VGIC_V3_RDIST_LAST;
>>
>> +    INIT_LIST_HEAD(&v->arch.vgic.pending_lpi_list);
>> +
>>      return 0;
>>  }
>>
>> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
>> index 0965119..b961551 100644
>> --- a/xen/arch/arm/vgic.c
>> +++ b/xen/arch/arm/vgic.c
>> @@ -31,6 +31,8 @@
>>  #include <asm/mmio.h>
>>  #include <asm/gic.h>
>>  #include <asm/vgic.h>
>> +#include <asm/gic_v3_defs.h>
>> +#include <asm/gic-its.h>
>>
>>  static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v, int rank)
>>  {
>> @@ -61,7 +63,7 @@ struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq)
>>      return vgic_get_rank(v, rank);
>>  }
>>
>> -static void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
>> +void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
>>  {
>>      INIT_LIST_HEAD(&p->inflight);
>>      INIT_LIST_HEAD(&p->lr_queue);
>> @@ -244,10 +246,14 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq)
>>
>>  static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
>>  {
>> -    struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
>> +    struct vgic_irq_rank *rank;
>>      unsigned long flags;
>>      int priority;
>>
>> +    if ( virq >= 8192 )
>> +        return gicv3_lpi_get_priority(v->domain, virq);
>> +
>> +    rank = vgic_rank_irq(v, virq);
>>      vgic_lock_rank(v, rank, flags);
>>      priority = rank->priority[virq & INTERRUPT_RANK_MASK];
>>      vgic_unlock_rank(v, rank, flags);
>> @@ -446,13 +452,55 @@ int vgic_to_sgi(struct vcpu *v, register_t sgir, enum gic_sgi_mode irqmode, int
>>      return 1;
>>  }
>>
>> +/*
>> + * Holding struct pending_irq's for each possible virtual LPI in each domain
>> + * requires too much Xen memory, also a malicious guest could potentially
>> + * spam Xen with LPI map requests. We cannot cover those with (guest allocated)
>> + * ITS memory, so we use a dynamic scheme of allocating struct pending_irq's
>> + * on demand.
>> + */
>> +struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int lpi,
>> +                                   bool allocate)
>> +{
>> +    struct lpi_pending_irq *lpi_irq, *empty = NULL;
>> +
>> +    /* TODO: locking! */
>> +    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
>> +    {
>> +        if ( lpi_irq->pirq.irq == lpi )
>> +            return &lpi_irq->pirq;
>> +
>> +        if ( lpi_irq->pirq.irq == 0 && !empty )
>> +            empty = lpi_irq;
>> +    }
>    With this approach of allocating pending_irq on demand, if the
> pending_lpi_list
> is at n position then it iterates for long time to find pending_irq entry.
> This will increase LPI injection time to domain.
> 
> Why can't we use btree?

That's an optimization. You will find that the actual number of
interrupts on a VCPU at any given time is very low, especially if we
look at LPIs only. So my hunch is that it's either 0, 1 or 2, not more.
So for simplicity I'd keep it as a list for now. If we see issues, we
can amend this at any time.

>> +
>> +    if ( !allocate )
>> +        return NULL;
>> +
>> +    if ( !empty )
>> +    {
>> +        empty = xzalloc(struct lpi_pending_irq);
>> +        vgic_init_pending_irq(&empty->pirq, lpi);
>> +        list_add_tail(&empty->entry, &v->arch.vgic.pending_lpi_list);
>> +    } else
>> +    {
>> +        empty->pirq.status = 0;
>> +        empty->pirq.irq = lpi;
>> +    }
>> +
>> +    return &empty->pirq;
>> +}
>> +
>>  struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
>>  {
>>      struct pending_irq *n;
>> +
>>      /* Pending irqs allocation strategy: the first vgic.nr_spis irqs
>>       * are used for SPIs; the rests are used for per cpu irqs */
>>      if ( irq < 32 )
>>          n = &v->arch.vgic.pending_irqs[irq];
>> +    else if ( irq >= 8192 )
>> +        n = lpi_to_pending(v, irq, true);
>>      else
>>          n = &v->domain->arch.vgic.pending_irqs[irq - 32];
>>      return n;
>> @@ -480,7 +528,7 @@ void vgic_clear_pending_irqs(struct vcpu *v)
>>  void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
>>  {
>>      uint8_t priority;
>> -    struct pending_irq *iter, *n = irq_to_pending(v, virq);
>> +    struct pending_irq *iter, *n;
>>      unsigned long flags;
>>      bool_t running;
>>
>> @@ -488,6 +536,8 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
>>
>>      spin_lock_irqsave(&v->arch.vgic.lock, flags);
>>
>> +    n = irq_to_pending(v, virq);
>> +
>>      /* vcpu offline */
>>      if ( test_bit(_VPF_down, &v->pause_flags) )
>>      {
>> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
>> index 9452fcd..ae8a9de 100644
>> --- a/xen/include/asm-arm/domain.h
>> +++ b/xen/include/asm-arm/domain.h
>> @@ -249,6 +249,7 @@ struct arch_vcpu
>>          paddr_t rdist_base;
>>  #define VGIC_V3_RDIST_LAST  (1 << 0)        /* last vCPU of the rdist */
>>          uint8_t flags;
>> +        struct list_head pending_lpi_list;
>>      } vgic;
>>
>>      /* Timer registers  */
>> diff --git a/xen/include/asm-arm/gic-its.h b/xen/include/asm-arm/gic-its.h
>> index 4e9841a..1f881c0 100644
>> --- a/xen/include/asm-arm/gic-its.h
>> +++ b/xen/include/asm-arm/gic-its.h
>> @@ -136,6 +136,12 @@ int gicv3_lpi_allocate_host_lpi(struct host_its *its,
>>  int gicv3_lpi_drop_host_lpi(struct host_its *its,
>>                              uint32_t devid, uint32_t eventid,
>>                              uint32_t host_lpi);
>> +
>> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
>> +{
>> +    return GIC_PRI_IRQ;
>    Why lpi priority is fixed?. can't we use domain set lpi priority?

Mmh, looks like a rebase artifact. It is fixed if the ITS isn't
configured, but should just be the prototype if not.
The final file has it right, so I guess this gets amended in some later
patch. Thanks for spotting this.

Cheers,
Andre.

>> +}
>> +
>>  #else
>>
>>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
>> @@ -175,6 +181,10 @@ static inline int gicv3_lpi_drop_host_lpi(struct host_its *its,
>>  {
>>      return 0;
>>  }
>> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
>> +{
>> +    return GIC_PRI_IRQ;
>> +}
>>
>>  #endif /* CONFIG_HAS_ITS */
>>
>> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
>> index 300f461..4e29ba6 100644
>> --- a/xen/include/asm-arm/vgic.h
>> +++ b/xen/include/asm-arm/vgic.h
>> @@ -83,6 +83,12 @@ struct pending_irq
>>      struct list_head lr_queue;
>>  };
>>
>> +struct lpi_pending_irq
>> +{
>> +    struct list_head entry;
>> +    struct pending_irq pirq;
>> +};
>> +
>>  #define NR_INTERRUPT_PER_RANK   32
>>  #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
>>
>> @@ -296,8 +302,11 @@ extern struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq);
>>  extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq);
>>  extern void vgic_vcpu_inject_spi(struct domain *d, unsigned int virq);
>>  extern void vgic_clear_pending_irqs(struct vcpu *v);
>> +extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq);
>>  extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
>>  extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int irq);
>> +extern struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int irq,
>> +                                          bool allocate);
>>  extern struct vgic_irq_rank *vgic_rank_offset(struct vcpu *v, int b, int n, int s);
>>  extern struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq);
>>  extern int vgic_emulate(struct cpu_user_regs *regs, union hsr hsr);
>> --
>> 2.9.0
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xen.org
>> https://lists.xen.org/xen-devel
>
Julien Grall Nov. 4, 2016, 3:46 p.m. UTC | #4
Hi Andre,

On 28/09/16 19:24, Andre Przywara wrote:
> +/*
> + * Holding struct pending_irq's for each possible virtual LPI in each domain
> + * requires too much Xen memory, also a malicious guest could potentially
> + * spam Xen with LPI map requests. We cannot cover those with (guest allocated)
> + * ITS memory, so we use a dynamic scheme of allocating struct pending_irq's
> + * on demand.
> + */
> +struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int lpi,
> +                                   bool allocate)
> +{
> +    struct lpi_pending_irq *lpi_irq, *empty = NULL;
> +
> +    /* TODO: locking! */
> +    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
> +    {
> +        if ( lpi_irq->pirq.irq == lpi )
> +            return &lpi_irq->pirq;
> +
> +        if ( lpi_irq->pirq.irq == 0 && !empty )
> +            empty = lpi_irq;
> +    }
> +
> +    if ( !allocate )
> +        return NULL;
> +
> +    if ( !empty )
> +    {
> +        empty = xzalloc(struct lpi_pending_irq);

xzalloc can return NULL if we fail to allocate memory.

> +        vgic_init_pending_irq(&empty->pirq, lpi);
> +        list_add_tail(&empty->entry, &v->arch.vgic.pending_lpi_list);
> +    } else
> +    {
> +        empty->pirq.status = 0;
> +        empty->pirq.irq = lpi;
> +    }
> +
> +    return &empty->pirq;
> +}
> +

Regards,
Andre Przywara Jan. 12, 2017, 7:14 p.m. UTC | #5
Hi Stefano,

as just mentioned in my last reply, I missed that email last time. Sorry
for that.

Replying to the comments that still apply to the new drop ...

On 28/10/16 02:04, Stefano Stabellini wrote:
> On Wed, 28 Sep 2016, Andre Przywara wrote:
>> For the same reason that allocating a struct irq_desc for each
>> possible LPI is not an option, having a struct pending_irq for each LPI
>> is also not feasible. However we actually only need those when an
>> interrupt is on a vCPU (or is about to be injected).
>> Maintain a list of those structs that we can use for the lifecycle of
>> a guest LPI. We allocate new entries if necessary, however reuse
>> pre-owned entries whenever possible.
>> Teach the existing VGIC functions to find the right pointer when being
>> given a virtual LPI number.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  xen/arch/arm/gic.c            |  3 +++
>>  xen/arch/arm/vgic-v3.c        |  2 ++
>>  xen/arch/arm/vgic.c           | 56 ++++++++++++++++++++++++++++++++++++++++---
>>  xen/include/asm-arm/domain.h  |  1 +
>>  xen/include/asm-arm/gic-its.h | 10 ++++++++
>>  xen/include/asm-arm/vgic.h    |  9 +++++++
>>  6 files changed, 78 insertions(+), 3 deletions(-)
>>
>> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
>> index 63c744a..ebe4035 100644
>> --- a/xen/arch/arm/gic.c
>> +++ b/xen/arch/arm/gic.c
>> @@ -506,6 +506,9 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>>                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
>>                  irq_set_affinity(p->desc, cpumask_of(v_target->processor));
>>              }
>> +            /* If this was an LPI, mark this struct as available again. */
>> +            if ( p->irq >= 8192 )
>> +                p->irq = 0;
> 
> I believe that 0 is a valid irq number, we need to come up with a
> different invalid_irq value, and we should #define it. We could also
> consider checking if the irq is inflight (linked to the inflight list)
> instead of using irq == 0 to understand if it is reusable.

But those pending_irqs here are used by LPIs only, where everything
below 8192 is invalid. So that seemed like an easy and straightforward
value to use. The other, statically allocated pending_irqs would never
read an IRQ number above 8192. When searching for an empty pending_irq
for a new LPI, we would never touch any of the statically allocated
structs, so this is safe, isn't it?

>>          }
>>      }
>>  }
>> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
>> index ec038a3..e9b6490 100644
>> --- a/xen/arch/arm/vgic-v3.c
>> +++ b/xen/arch/arm/vgic-v3.c
>> @@ -1388,6 +1388,8 @@ static int vgic_v3_vcpu_init(struct vcpu *v)
>>      if ( v->vcpu_id == last_cpu || (v->vcpu_id == (d->max_vcpus - 1)) )
>>          v->arch.vgic.flags |= VGIC_V3_RDIST_LAST;
>>  
>> +    INIT_LIST_HEAD(&v->arch.vgic.pending_lpi_list);
>> +
>>      return 0;
>>  }
>>  
>> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
>> index 0965119..b961551 100644
>> --- a/xen/arch/arm/vgic.c
>> +++ b/xen/arch/arm/vgic.c
>> @@ -31,6 +31,8 @@
>>  #include <asm/mmio.h>
>>  #include <asm/gic.h>
>>  #include <asm/vgic.h>
>> +#include <asm/gic_v3_defs.h>
>> +#include <asm/gic-its.h>
>>  
>>  static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v, int rank)
>>  {
>> @@ -61,7 +63,7 @@ struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq)
>>      return vgic_get_rank(v, rank);
>>  }
>>  
>> -static void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
>> +void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
>>  {
>>      INIT_LIST_HEAD(&p->inflight);
>>      INIT_LIST_HEAD(&p->lr_queue);
>> @@ -244,10 +246,14 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq)
>>  
>>  static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
>>  {
>> -    struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
>> +    struct vgic_irq_rank *rank;
>>      unsigned long flags;
>>      int priority;
>>  
>> +    if ( virq >= 8192 )
> 
> Please introduce a convenience static inline function such as:
> 
>   bool is_lpi(unsigned int irq)

Sure.

>> +        return gicv3_lpi_get_priority(v->domain, virq);
>> +
>> +    rank = vgic_rank_irq(v, virq);
>>      vgic_lock_rank(v, rank, flags);
>>      priority = rank->priority[virq & INTERRUPT_RANK_MASK];
>>      vgic_unlock_rank(v, rank, flags);
>> @@ -446,13 +452,55 @@ int vgic_to_sgi(struct vcpu *v, register_t sgir, enum gic_sgi_mode irqmode, int
>>      return 1;
>>  }
>>  
>> +/*
>> + * Holding struct pending_irq's for each possible virtual LPI in each domain
>> + * requires too much Xen memory, also a malicious guest could potentially
>> + * spam Xen with LPI map requests. We cannot cover those with (guest allocated)
>> + * ITS memory, so we use a dynamic scheme of allocating struct pending_irq's
>> + * on demand.
>> + */
>> +struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int lpi,
>> +                                   bool allocate)
>> +{
>> +    struct lpi_pending_irq *lpi_irq, *empty = NULL;
>> +
>> +    /* TODO: locking! */
> 
> Yeah, this needs to be fixed in v1 :-)

I fixed that in the RFC v2 post.

> 
>> +    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
>> +    {
>> +        if ( lpi_irq->pirq.irq == lpi )
>> +            return &lpi_irq->pirq;
>> +
>> +        if ( lpi_irq->pirq.irq == 0 && !empty )
>> +            empty = lpi_irq;
>> +    }
> 
> This is another one of those cases where a list is too slow for the hot
> path. The idea of allocating pending_irq struct on demand is good, but
> storing them in a linked list would kill performance. Probably the best
> thing we could do is an hashtable and we should preallocate the initial
> array of elements. I don't know what the size of the initial array
> should be, but we can start around 50, and change it in the future once
> we do tests with real workloads. Of course the other key parameter is
> the hash function, not sure which one is the right one, but ideally we
> would never have to allocate new pending_irq struct for LPIs because the
> preallocated set would suffice.

As I mentioned in the last post, I expect this number to be really low
(less than 5). Let's face it: If you have multiple interrupts pending
for a significant amount of time you won't make any actual progress in
the guest, because it's busy with handling interrupts.
So my picture of LPI handling is:
1) A device triggers an MSI, so the host receives the LPI. Ideally this
will be handled by the pCPU where the right VCPU is running atm, so it
will exit to EL2. Xen will handle the LPI by assigning one struct
pending_irq to it and will inject it into the guest.
2) The VCPU gets to run again and calls the interrupt handler, because
the (virtual) LPI is pending.
3) The (Linux) IRQ handler reads the ICC_IAR register to learn the IRQ
number, and will get the virtual LPI number.
=> At this point the LPI is done when it comes to the VGIC. The LR state
will be set to 0 (neither pending or active). This is independent of the
EOI the handler will execute soon (or later).
4) On the next exit the VGIC code will discover that the IRQ is done
(LR.state == 0) and will discard the struct pending_irq (set the LPI
number to 0 to make it available to the next LPI).

Even if there would be multiple LPIs pending at the same time (because
the guest had interrupts disabled, for instance), I believe they can be
all handled without exiting. Upon EOIing (priority-dropping, really) the
first LPI, the next virtual LPI would fire, calling the interrupt
handler again, and so no. Unless the kernel decides to do something that
exits (even accessing the hardware normally wouldn't, I believe), we can
clear all pending LPIs in one go.

So I have a hard time to imagine how we can really have many LPIs
pending and thus struct pending_irqs allocated.
Note that this may differ from SPIs, for instance, because the IRQ life
cycle is more complex there (extending till the EOI).

Does that make some sense? Or am I missing something here?

> I could be convinced that a list is sufficient if we do some real
> benchmarking and it turns out that lpi_to_pending always resolve in less
> than ~5 steps.

I can try to do this once I get it running on some silicon ...

>> +    if ( !allocate )
>> +        return NULL;
>> +
>> +    if ( !empty )
>> +    {
>> +        empty = xzalloc(struct lpi_pending_irq);
>> +        vgic_init_pending_irq(&empty->pirq, lpi);
>> +        list_add_tail(&empty->entry, &v->arch.vgic.pending_lpi_list);
>> +    } else
>> +    {
>> +        empty->pirq.status = 0;
>> +        empty->pirq.irq = lpi;
>> +    }
>> +
>> +    return &empty->pirq;
>> +}
>> +
>>  struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
>>  {
>>      struct pending_irq *n;
>> +
> 
> spurious change
> 
> 
>>      /* Pending irqs allocation strategy: the first vgic.nr_spis irqs
>>       * are used for SPIs; the rests are used for per cpu irqs */
>>      if ( irq < 32 )
>>          n = &v->arch.vgic.pending_irqs[irq];
>> +    else if ( irq >= 8192 )
> 
> Use the new static inline
> 
> 
>> +        n = lpi_to_pending(v, irq, true);
>>      else
>>          n = &v->domain->arch.vgic.pending_irqs[irq - 32];
>>      return n;
>> @@ -480,7 +528,7 @@ void vgic_clear_pending_irqs(struct vcpu *v)
>>  void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
>>  {
>>      uint8_t priority;
>> -    struct pending_irq *iter, *n = irq_to_pending(v, virq);
>> +    struct pending_irq *iter, *n;
>>      unsigned long flags;
>>      bool_t running;
>>  
>> @@ -488,6 +536,8 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
>>  
>>      spin_lock_irqsave(&v->arch.vgic.lock, flags);
>>  
>> +    n = irq_to_pending(v, virq);
> 
> Why this change?

Because we now need to hold the lock before calling irq_to_pending(),
which now may call lpi_to_pending().

>>      /* vcpu offline */
>>      if ( test_bit(_VPF_down, &v->pause_flags) )
>>      {
>> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
>> index 9452fcd..ae8a9de 100644
>> --- a/xen/include/asm-arm/domain.h
>> +++ b/xen/include/asm-arm/domain.h
>> @@ -249,6 +249,7 @@ struct arch_vcpu
>>          paddr_t rdist_base;
>>  #define VGIC_V3_RDIST_LAST  (1 << 0)        /* last vCPU of the rdist */
>>          uint8_t flags;
>> +        struct list_head pending_lpi_list;
>>      } vgic;
>>  
>>      /* Timer registers  */
>> diff --git a/xen/include/asm-arm/gic-its.h b/xen/include/asm-arm/gic-its.h
>> index 4e9841a..1f881c0 100644
>> --- a/xen/include/asm-arm/gic-its.h
>> +++ b/xen/include/asm-arm/gic-its.h
>> @@ -136,6 +136,12 @@ int gicv3_lpi_allocate_host_lpi(struct host_its *its,
>>  int gicv3_lpi_drop_host_lpi(struct host_its *its,
>>                              uint32_t devid, uint32_t eventid,
>>                              uint32_t host_lpi);
>> +
>> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
>> +{
>> +    return GIC_PRI_IRQ;
>> +}
> 
> Does it mean that we don't allow changes to LPI priorities?

This is placeholder code for now, until we learn about the virtual
property table in patch 11/24 (where this function gets amended).
The new code drop gets away without this function here entirely.

Cheers,
Andre.

>>  #else
>>  
>>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
>> @@ -175,6 +181,10 @@ static inline int gicv3_lpi_drop_host_lpi(struct host_its *its,
>>  {
>>      return 0;
>>  }
>> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
>> +{
>> +    return GIC_PRI_IRQ;
>> +}
>>  
>>  #endif /* CONFIG_HAS_ITS */
>>  
>> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
>> index 300f461..4e29ba6 100644
>> --- a/xen/include/asm-arm/vgic.h
>> +++ b/xen/include/asm-arm/vgic.h
>> @@ -83,6 +83,12 @@ struct pending_irq
>>      struct list_head lr_queue;
>>  };
>>  
>> +struct lpi_pending_irq
>> +{
>> +    struct list_head entry;
>> +    struct pending_irq pirq;
>> +};
>> +
>>  #define NR_INTERRUPT_PER_RANK   32
>>  #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
>>  
>> @@ -296,8 +302,11 @@ extern struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq);
>>  extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq);
>>  extern void vgic_vcpu_inject_spi(struct domain *d, unsigned int virq);
>>  extern void vgic_clear_pending_irqs(struct vcpu *v);
>> +extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq);
>>  extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
>>  extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int irq);
>> +extern struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int irq,
>> +                                          bool allocate);
>>  extern struct vgic_irq_rank *vgic_rank_offset(struct vcpu *v, int b, int n, int s);
>>  extern struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq);
>>  extern int vgic_emulate(struct cpu_user_regs *regs, union hsr hsr);
>> -- 
>> 2.9.0
>>
>
Stefano Stabellini Jan. 13, 2017, 7:37 p.m. UTC | #6
On Thu, 12 Jan 2017, Andre Przywara wrote:
> Hi Stefano,
> 
> as just mentioned in my last reply, I missed that email last time. Sorry
> for that.
> 
> Replying to the comments that still apply to the new drop ...
> 
> On 28/10/16 02:04, Stefano Stabellini wrote:
> > On Wed, 28 Sep 2016, Andre Przywara wrote:
> >> For the same reason that allocating a struct irq_desc for each
> >> possible LPI is not an option, having a struct pending_irq for each LPI
> >> is also not feasible. However we actually only need those when an
> >> interrupt is on a vCPU (or is about to be injected).
> >> Maintain a list of those structs that we can use for the lifecycle of
> >> a guest LPI. We allocate new entries if necessary, however reuse
> >> pre-owned entries whenever possible.
> >> Teach the existing VGIC functions to find the right pointer when being
> >> given a virtual LPI number.
> >>
> >> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> >> ---
> >>  xen/arch/arm/gic.c            |  3 +++
> >>  xen/arch/arm/vgic-v3.c        |  2 ++
> >>  xen/arch/arm/vgic.c           | 56 ++++++++++++++++++++++++++++++++++++++++---
> >>  xen/include/asm-arm/domain.h  |  1 +
> >>  xen/include/asm-arm/gic-its.h | 10 ++++++++
> >>  xen/include/asm-arm/vgic.h    |  9 +++++++
> >>  6 files changed, 78 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> >> index 63c744a..ebe4035 100644
> >> --- a/xen/arch/arm/gic.c
> >> +++ b/xen/arch/arm/gic.c
> >> @@ -506,6 +506,9 @@ static void gic_update_one_lr(struct vcpu *v, int i)
> >>                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
> >>                  irq_set_affinity(p->desc, cpumask_of(v_target->processor));
> >>              }
> >> +            /* If this was an LPI, mark this struct as available again. */
> >> +            if ( p->irq >= 8192 )
> >> +                p->irq = 0;
> > 
> > I believe that 0 is a valid irq number, we need to come up with a
> > different invalid_irq value, and we should #define it. We could also
> > consider checking if the irq is inflight (linked to the inflight list)
> > instead of using irq == 0 to understand if it is reusable.
> 
> But those pending_irqs here are used by LPIs only, where everything
> below 8192 is invalid. So that seemed like an easy and straightforward
> value to use. The other, statically allocated pending_irqs would never
> read an IRQ number above 8192. When searching for an empty pending_irq
> for a new LPI, we would never touch any of the statically allocated
> structs, so this is safe, isn't it?

I think you are right. Still, please #define it.


> >>          }
> >>      }
> >>  }
> >> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> >> index ec038a3..e9b6490 100644
> >> --- a/xen/arch/arm/vgic-v3.c
> >> +++ b/xen/arch/arm/vgic-v3.c
> >> @@ -1388,6 +1388,8 @@ static int vgic_v3_vcpu_init(struct vcpu *v)
> >>      if ( v->vcpu_id == last_cpu || (v->vcpu_id == (d->max_vcpus - 1)) )
> >>          v->arch.vgic.flags |= VGIC_V3_RDIST_LAST;
> >>  
> >> +    INIT_LIST_HEAD(&v->arch.vgic.pending_lpi_list);
> >> +
> >>      return 0;
> >>  }
> >>  
> >> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> >> index 0965119..b961551 100644
> >> --- a/xen/arch/arm/vgic.c
> >> +++ b/xen/arch/arm/vgic.c
> >> @@ -31,6 +31,8 @@
> >>  #include <asm/mmio.h>
> >>  #include <asm/gic.h>
> >>  #include <asm/vgic.h>
> >> +#include <asm/gic_v3_defs.h>
> >> +#include <asm/gic-its.h>
> >>  
> >>  static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v, int rank)
> >>  {
> >> @@ -61,7 +63,7 @@ struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq)
> >>      return vgic_get_rank(v, rank);
> >>  }
> >>  
> >> -static void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
> >> +void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
> >>  {
> >>      INIT_LIST_HEAD(&p->inflight);
> >>      INIT_LIST_HEAD(&p->lr_queue);
> >> @@ -244,10 +246,14 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq)
> >>  
> >>  static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
> >>  {
> >> -    struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
> >> +    struct vgic_irq_rank *rank;
> >>      unsigned long flags;
> >>      int priority;
> >>  
> >> +    if ( virq >= 8192 )
> > 
> > Please introduce a convenience static inline function such as:
> > 
> >   bool is_lpi(unsigned int irq)
> 
> Sure.
> 
> >> +        return gicv3_lpi_get_priority(v->domain, virq);
> >> +
> >> +    rank = vgic_rank_irq(v, virq);
> >>      vgic_lock_rank(v, rank, flags);
> >>      priority = rank->priority[virq & INTERRUPT_RANK_MASK];
> >>      vgic_unlock_rank(v, rank, flags);
> >> @@ -446,13 +452,55 @@ int vgic_to_sgi(struct vcpu *v, register_t sgir, enum gic_sgi_mode irqmode, int
> >>      return 1;
> >>  }
> >>  
> >> +/*
> >> + * Holding struct pending_irq's for each possible virtual LPI in each domain
> >> + * requires too much Xen memory, also a malicious guest could potentially
> >> + * spam Xen with LPI map requests. We cannot cover those with (guest allocated)
> >> + * ITS memory, so we use a dynamic scheme of allocating struct pending_irq's
> >> + * on demand.
> >> + */
> >> +struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int lpi,
> >> +                                   bool allocate)
> >> +{
> >> +    struct lpi_pending_irq *lpi_irq, *empty = NULL;
> >> +
> >> +    /* TODO: locking! */
> > 
> > Yeah, this needs to be fixed in v1 :-)
> 
> I fixed that in the RFC v2 post.
> 
> > 
> >> +    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
> >> +    {
> >> +        if ( lpi_irq->pirq.irq == lpi )
> >> +            return &lpi_irq->pirq;
> >> +
> >> +        if ( lpi_irq->pirq.irq == 0 && !empty )
> >> +            empty = lpi_irq;
> >> +    }
> > 
> > This is another one of those cases where a list is too slow for the hot
> > path. The idea of allocating pending_irq struct on demand is good, but
> > storing them in a linked list would kill performance. Probably the best
> > thing we could do is an hashtable and we should preallocate the initial
> > array of elements. I don't know what the size of the initial array
> > should be, but we can start around 50, and change it in the future once
> > we do tests with real workloads. Of course the other key parameter is
> > the hash function, not sure which one is the right one, but ideally we
> > would never have to allocate new pending_irq struct for LPIs because the
> > preallocated set would suffice.
> 
> As I mentioned in the last post, I expect this number to be really low
> (less than 5). 

Are you able to check this assumption in a real scenario? If not you,
somebody else?


> Let's face it: If you have multiple interrupts pending
> for a significant amount of time you won't make any actual progress in
> the guest, because it's busy with handling interrupts.
> So my picture of LPI handling is:
> 1) A device triggers an MSI, so the host receives the LPI. Ideally this
> will be handled by the pCPU where the right VCPU is running atm, so it
> will exit to EL2. Xen will handle the LPI by assigning one struct
> pending_irq to it and will inject it into the guest.
> 2) The VCPU gets to run again and calls the interrupt handler, because
> the (virtual) LPI is pending.
> 3) The (Linux) IRQ handler reads the ICC_IAR register to learn the IRQ
> number, and will get the virtual LPI number.
> => At this point the LPI is done when it comes to the VGIC. The LR state
> will be set to 0 (neither pending or active). This is independent of the
> EOI the handler will execute soon (or later).
> 4) On the next exit the VGIC code will discover that the IRQ is done
> (LR.state == 0) and will discard the struct pending_irq (set the LPI
> number to 0 to make it available to the next LPI).

I am following


> Even if there would be multiple LPIs pending at the same time (because
> the guest had interrupts disabled, for instance), I believe they can be
> all handled without exiting. Upon EOIing (priority-dropping, really) the
> first LPI, the next virtual LPI would fire, calling the interrupt
> handler again, and so no. Unless the kernel decides to do something that
> exits (even accessing the hardware normally wouldn't, I believe), we can
> clear all pending LPIs in one go.
> 
> So I have a hard time to imagine how we can really have many LPIs
> pending and thus struct pending_irqs allocated.
> Note that this may differ from SPIs, for instance, because the IRQ life
> cycle is more complex there (extending till the EOI).
> 
> Does that make some sense? Or am I missing something here?

In my tests with much smaller platforms than the ones existing today, I
could easily have 2-3 interrupts pending at the same time without much
load and without any SR-IOV NICs or any other fancy PCIE hardware. It
would be nice to test on Cavium ThunderX for example. It's also easy to
switch to rbtrees.


> > I could be convinced that a list is sufficient if we do some real
> > benchmarking and it turns out that lpi_to_pending always resolve in less
> > than ~5 steps.
> 
> I can try to do this once I get it running on some silicon ...
> 
> >> +    if ( !allocate )
> >> +        return NULL;
> >> +
> >> +    if ( !empty )
> >> +    {
> >> +        empty = xzalloc(struct lpi_pending_irq);
> >> +        vgic_init_pending_irq(&empty->pirq, lpi);
> >> +        list_add_tail(&empty->entry, &v->arch.vgic.pending_lpi_list);
> >> +    } else
> >> +    {
> >> +        empty->pirq.status = 0;
> >> +        empty->pirq.irq = lpi;
> >> +    }
> >> +
> >> +    return &empty->pirq;
> >> +}
> >> +
> >>  struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
> >>  {
> >>      struct pending_irq *n;
> >> +
> > 
> > spurious change
> > 
> > 
> >>      /* Pending irqs allocation strategy: the first vgic.nr_spis irqs
> >>       * are used for SPIs; the rests are used for per cpu irqs */
> >>      if ( irq < 32 )
> >>          n = &v->arch.vgic.pending_irqs[irq];
> >> +    else if ( irq >= 8192 )
> > 
> > Use the new static inline
> > 
> > 
> >> +        n = lpi_to_pending(v, irq, true);
> >>      else
> >>          n = &v->domain->arch.vgic.pending_irqs[irq - 32];
> >>      return n;
> >> @@ -480,7 +528,7 @@ void vgic_clear_pending_irqs(struct vcpu *v)
> >>  void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
> >>  {
> >>      uint8_t priority;
> >> -    struct pending_irq *iter, *n = irq_to_pending(v, virq);
> >> +    struct pending_irq *iter, *n;
> >>      unsigned long flags;
> >>      bool_t running;
> >>  
> >> @@ -488,6 +536,8 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
> >>  
> >>      spin_lock_irqsave(&v->arch.vgic.lock, flags);
> >>  
> >> +    n = irq_to_pending(v, virq);
> > 
> > Why this change?
> 
> Because we now need to hold the lock before calling irq_to_pending(),
> which now may call lpi_to_pending().
> 
> >>      /* vcpu offline */
> >>      if ( test_bit(_VPF_down, &v->pause_flags) )
> >>      {
> >> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> >> index 9452fcd..ae8a9de 100644
> >> --- a/xen/include/asm-arm/domain.h
> >> +++ b/xen/include/asm-arm/domain.h
> >> @@ -249,6 +249,7 @@ struct arch_vcpu
> >>          paddr_t rdist_base;
> >>  #define VGIC_V3_RDIST_LAST  (1 << 0)        /* last vCPU of the rdist */
> >>          uint8_t flags;
> >> +        struct list_head pending_lpi_list;
> >>      } vgic;
> >>  
> >>      /* Timer registers  */
> >> diff --git a/xen/include/asm-arm/gic-its.h b/xen/include/asm-arm/gic-its.h
> >> index 4e9841a..1f881c0 100644
> >> --- a/xen/include/asm-arm/gic-its.h
> >> +++ b/xen/include/asm-arm/gic-its.h
> >> @@ -136,6 +136,12 @@ int gicv3_lpi_allocate_host_lpi(struct host_its *its,
> >>  int gicv3_lpi_drop_host_lpi(struct host_its *its,
> >>                              uint32_t devid, uint32_t eventid,
> >>                              uint32_t host_lpi);
> >> +
> >> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
> >> +{
> >> +    return GIC_PRI_IRQ;
> >> +}
> > 
> > Does it mean that we don't allow changes to LPI priorities?
> 
> This is placeholder code for now, until we learn about the virtual
> property table in patch 11/24 (where this function gets amended).
> The new code drop gets away without this function here entirely.
> 
> Cheers,
> Andre.
> 
> >>  #else
> >>  
> >>  static inline void gicv3_its_dt_init(const struct dt_device_node *node)
> >> @@ -175,6 +181,10 @@ static inline int gicv3_lpi_drop_host_lpi(struct host_its *its,
> >>  {
> >>      return 0;
> >>  }
> >> +static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
> >> +{
> >> +    return GIC_PRI_IRQ;
> >> +}
> >>  
> >>  #endif /* CONFIG_HAS_ITS */
> >>  
> >> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
> >> index 300f461..4e29ba6 100644
> >> --- a/xen/include/asm-arm/vgic.h
> >> +++ b/xen/include/asm-arm/vgic.h
> >> @@ -83,6 +83,12 @@ struct pending_irq
> >>      struct list_head lr_queue;
> >>  };
> >>  
> >> +struct lpi_pending_irq
> >> +{
> >> +    struct list_head entry;
> >> +    struct pending_irq pirq;
> >> +};
> >> +
> >>  #define NR_INTERRUPT_PER_RANK   32
> >>  #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
> >>  
> >> @@ -296,8 +302,11 @@ extern struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq);
> >>  extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq);
> >>  extern void vgic_vcpu_inject_spi(struct domain *d, unsigned int virq);
> >>  extern void vgic_clear_pending_irqs(struct vcpu *v);
> >> +extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq);
> >>  extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
> >>  extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int irq);
> >> +extern struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int irq,
> >> +                                          bool allocate);
> >>  extern struct vgic_irq_rank *vgic_rank_offset(struct vcpu *v, int b, int n, int s);
> >>  extern struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq);
> >>  extern int vgic_emulate(struct cpu_user_regs *regs, union hsr hsr);
> >> -- 
> >> 2.9.0
> >>
> > 
>
Andre Przywara Jan. 16, 2017, 9:44 a.m. UTC | #7
On 13/01/17 19:37, Stefano Stabellini wrote:
> On Thu, 12 Jan 2017, Andre Przywara wrote:

Hi Stefano,

...

>>>> +    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
>>>> +    {
>>>> +        if ( lpi_irq->pirq.irq == lpi )
>>>> +            return &lpi_irq->pirq;
>>>> +
>>>> +        if ( lpi_irq->pirq.irq == 0 && !empty )
>>>> +            empty = lpi_irq;
>>>> +    }
>>>
>>> This is another one of those cases where a list is too slow for the hot
>>> path. The idea of allocating pending_irq struct on demand is good, but
>>> storing them in a linked list would kill performance. Probably the best
>>> thing we could do is an hashtable and we should preallocate the initial
>>> array of elements. I don't know what the size of the initial array
>>> should be, but we can start around 50, and change it in the future once
>>> we do tests with real workloads. Of course the other key parameter is
>>> the hash function, not sure which one is the right one, but ideally we
>>> would never have to allocate new pending_irq struct for LPIs because the
>>> preallocated set would suffice.
>>
>> As I mentioned in the last post, I expect this number to be really low
>> (less than 5). 
> 
> Are you able to check this assumption in a real scenario? If not you,
> somebody else?
> 
> 
>> Let's face it: If you have multiple interrupts pending
>> for a significant amount of time you won't make any actual progress in
>> the guest, because it's busy with handling interrupts.
>> So my picture of LPI handling is:
>> 1) A device triggers an MSI, so the host receives the LPI. Ideally this
>> will be handled by the pCPU where the right VCPU is running atm, so it
>> will exit to EL2. Xen will handle the LPI by assigning one struct
>> pending_irq to it and will inject it into the guest.
>> 2) The VCPU gets to run again and calls the interrupt handler, because
>> the (virtual) LPI is pending.
>> 3) The (Linux) IRQ handler reads the ICC_IAR register to learn the IRQ
>> number, and will get the virtual LPI number.
>> => At this point the LPI is done when it comes to the VGIC. The LR state
>> will be set to 0 (neither pending or active). This is independent of the
>> EOI the handler will execute soon (or later).
>> 4) On the next exit the VGIC code will discover that the IRQ is done
>> (LR.state == 0) and will discard the struct pending_irq (set the LPI
>> number to 0 to make it available to the next LPI).
> 
> I am following
> 
> 
>> Even if there would be multiple LPIs pending at the same time (because
>> the guest had interrupts disabled, for instance), I believe they can be
>> all handled without exiting. Upon EOIing (priority-dropping, really) the
>> first LPI, the next virtual LPI would fire, calling the interrupt
>> handler again, and so no. Unless the kernel decides to do something that
>> exits (even accessing the hardware normally wouldn't, I believe), we can
>> clear all pending LPIs in one go.
>>
>> So I have a hard time to imagine how we can really have many LPIs
>> pending and thus struct pending_irqs allocated.
>> Note that this may differ from SPIs, for instance, because the IRQ life
>> cycle is more complex there (extending till the EOI).
>>
>> Does that make some sense? Or am I missing something here?
> 
> In my tests with much smaller platforms than the ones existing today, I
> could easily have 2-3 interrupts pending at the same time without much
> load and without any SR-IOV NICs or any other fancy PCIE hardware.

The difference to LPIs is that SPIs can be level triggered (eventually
requiring a driver to delete the interrupt condition in the device),
also require an explicit deactivation to finish off the IRQ state machine.
Both these things will lead to an IRQ to stay much longer in the LRs
than one would expect for an always edge triggered LPI lacking an active
state.
Also the timer IRQ is a PPI and thus a frequent visitor in the LRs.

> It would be nice to test on Cavium ThunderX for example.

Yes, I agree that there is quite some guessing involved, so proving this
sounds like a worthwhile task.

> It's also easy to switch to rbtrees.

On Friday I looked at rbtrees in Xen, which thankfully seem to be the
same as in Linux. So I converted the its_devices list over.

But in this case here I don't believe that rbtrees are the best data
structure, since we frequently need to look up entries, but also need to
find new, empty ones (when an LPI has fired).
And for allocating new LPIs we don't need a certain slot, just any free
would do. We probably want to avoid actually malloc-ing pending_irq
structures for that.

So a hash table with open addressing sounds like a better fit here:
- With a clever hash function (taken for instance Linux' LPI allocation
scheme into account) we get very quick lookup times for already assigned
LPIs.
- Assigning an LPI would use the same hash function, probably finding an
unused pending_irq, which we then could easily allocate.

I will try to write something along those lines.

Cheers,
Andre.
Stefano Stabellini Jan. 16, 2017, 7:16 p.m. UTC | #8
On Mon, 16 Jan 2017, André Przywara wrote:
> On 13/01/17 19:37, Stefano Stabellini wrote:
> > On Thu, 12 Jan 2017, Andre Przywara wrote:
> 
> Hi Stefano,
> 
> ...
> 
> >>>> +    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
> >>>> +    {
> >>>> +        if ( lpi_irq->pirq.irq == lpi )
> >>>> +            return &lpi_irq->pirq;
> >>>> +
> >>>> +        if ( lpi_irq->pirq.irq == 0 && !empty )
> >>>> +            empty = lpi_irq;
> >>>> +    }
> >>>
> >>> This is another one of those cases where a list is too slow for the hot
> >>> path. The idea of allocating pending_irq struct on demand is good, but
> >>> storing them in a linked list would kill performance. Probably the best
> >>> thing we could do is an hashtable and we should preallocate the initial
> >>> array of elements. I don't know what the size of the initial array
> >>> should be, but we can start around 50, and change it in the future once
> >>> we do tests with real workloads. Of course the other key parameter is
> >>> the hash function, not sure which one is the right one, but ideally we
> >>> would never have to allocate new pending_irq struct for LPIs because the
> >>> preallocated set would suffice.
> >>
> >> As I mentioned in the last post, I expect this number to be really low
> >> (less than 5). 
> > 
> > Are you able to check this assumption in a real scenario? If not you,
> > somebody else?
> > 
> > 
> >> Let's face it: If you have multiple interrupts pending
> >> for a significant amount of time you won't make any actual progress in
> >> the guest, because it's busy with handling interrupts.
> >> So my picture of LPI handling is:
> >> 1) A device triggers an MSI, so the host receives the LPI. Ideally this
> >> will be handled by the pCPU where the right VCPU is running atm, so it
> >> will exit to EL2. Xen will handle the LPI by assigning one struct
> >> pending_irq to it and will inject it into the guest.
> >> 2) The VCPU gets to run again and calls the interrupt handler, because
> >> the (virtual) LPI is pending.
> >> 3) The (Linux) IRQ handler reads the ICC_IAR register to learn the IRQ
> >> number, and will get the virtual LPI number.
> >> => At this point the LPI is done when it comes to the VGIC. The LR state
> >> will be set to 0 (neither pending or active). This is independent of the
> >> EOI the handler will execute soon (or later).
> >> 4) On the next exit the VGIC code will discover that the IRQ is done
> >> (LR.state == 0) and will discard the struct pending_irq (set the LPI
> >> number to 0 to make it available to the next LPI).
> > 
> > I am following
> > 
> > 
> >> Even if there would be multiple LPIs pending at the same time (because
> >> the guest had interrupts disabled, for instance), I believe they can be
> >> all handled without exiting. Upon EOIing (priority-dropping, really) the
> >> first LPI, the next virtual LPI would fire, calling the interrupt
> >> handler again, and so no. Unless the kernel decides to do something that
> >> exits (even accessing the hardware normally wouldn't, I believe), we can
> >> clear all pending LPIs in one go.
> >>
> >> So I have a hard time to imagine how we can really have many LPIs
> >> pending and thus struct pending_irqs allocated.
> >> Note that this may differ from SPIs, for instance, because the IRQ life
> >> cycle is more complex there (extending till the EOI).
> >>
> >> Does that make some sense? Or am I missing something here?
> > 
> > In my tests with much smaller platforms than the ones existing today, I
> > could easily have 2-3 interrupts pending at the same time without much
> > load and without any SR-IOV NICs or any other fancy PCIE hardware.
> 
> The difference to LPIs is that SPIs can be level triggered (eventually
> requiring a driver to delete the interrupt condition in the device),
> also require an explicit deactivation to finish off the IRQ state machine.
> Both these things will lead to an IRQ to stay much longer in the LRs
> than one would expect for an always edge triggered LPI lacking an active
> state.
> Also the timer IRQ is a PPI and thus a frequent visitor in the LRs.
> 
> > It would be nice to test on Cavium ThunderX for example.
> 
> Yes, I agree that there is quite some guessing involved, so proving this
> sounds like a worthwhile task.
> 
> > It's also easy to switch to rbtrees.
> 
> On Friday I looked at rbtrees in Xen, which thankfully seem to be the
> same as in Linux. So I converted the its_devices list over.
> 
> But in this case here I don't believe that rbtrees are the best data
> structure, since we frequently need to look up entries, but also need to
> find new, empty ones (when an LPI has fired).
> And for allocating new LPIs we don't need a certain slot, just any free
> would do. We probably want to avoid actually malloc-ing pending_irq
> structures for that.
> 
> So a hash table with open addressing sounds like a better fit here:
> - With a clever hash function (taken for instance Linux' LPI allocation
> scheme into account) we get very quick lookup times for already assigned
> LPIs.
> - Assigning an LPI would use the same hash function, probably finding an
> unused pending_irq, which we then could easily allocate.
> 
> I will try to write something along those lines.

Thanks Andre, indeed it sounds like a better option.
diff mbox

Patch

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 63c744a..ebe4035 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -506,6 +506,9 @@  static void gic_update_one_lr(struct vcpu *v, int i)
                 struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
                 irq_set_affinity(p->desc, cpumask_of(v_target->processor));
             }
+            /* If this was an LPI, mark this struct as available again. */
+            if ( p->irq >= 8192 )
+                p->irq = 0;
         }
     }
 }
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index ec038a3..e9b6490 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -1388,6 +1388,8 @@  static int vgic_v3_vcpu_init(struct vcpu *v)
     if ( v->vcpu_id == last_cpu || (v->vcpu_id == (d->max_vcpus - 1)) )
         v->arch.vgic.flags |= VGIC_V3_RDIST_LAST;
 
+    INIT_LIST_HEAD(&v->arch.vgic.pending_lpi_list);
+
     return 0;
 }
 
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 0965119..b961551 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -31,6 +31,8 @@ 
 #include <asm/mmio.h>
 #include <asm/gic.h>
 #include <asm/vgic.h>
+#include <asm/gic_v3_defs.h>
+#include <asm/gic-its.h>
 
 static inline struct vgic_irq_rank *vgic_get_rank(struct vcpu *v, int rank)
 {
@@ -61,7 +63,7 @@  struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq)
     return vgic_get_rank(v, rank);
 }
 
-static void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
+void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq)
 {
     INIT_LIST_HEAD(&p->inflight);
     INIT_LIST_HEAD(&p->lr_queue);
@@ -244,10 +246,14 @@  struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq)
 
 static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
 {
-    struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
+    struct vgic_irq_rank *rank;
     unsigned long flags;
     int priority;
 
+    if ( virq >= 8192 )
+        return gicv3_lpi_get_priority(v->domain, virq);
+
+    rank = vgic_rank_irq(v, virq);
     vgic_lock_rank(v, rank, flags);
     priority = rank->priority[virq & INTERRUPT_RANK_MASK];
     vgic_unlock_rank(v, rank, flags);
@@ -446,13 +452,55 @@  int vgic_to_sgi(struct vcpu *v, register_t sgir, enum gic_sgi_mode irqmode, int
     return 1;
 }
 
+/*
+ * Holding struct pending_irq's for each possible virtual LPI in each domain
+ * requires too much Xen memory, also a malicious guest could potentially
+ * spam Xen with LPI map requests. We cannot cover those with (guest allocated)
+ * ITS memory, so we use a dynamic scheme of allocating struct pending_irq's
+ * on demand.
+ */
+struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int lpi,
+                                   bool allocate)
+{
+    struct lpi_pending_irq *lpi_irq, *empty = NULL;
+
+    /* TODO: locking! */
+    list_for_each_entry(lpi_irq, &v->arch.vgic.pending_lpi_list, entry)
+    {
+        if ( lpi_irq->pirq.irq == lpi )
+            return &lpi_irq->pirq;
+
+        if ( lpi_irq->pirq.irq == 0 && !empty )
+            empty = lpi_irq;
+    }
+
+    if ( !allocate )
+        return NULL;
+
+    if ( !empty )
+    {
+        empty = xzalloc(struct lpi_pending_irq);
+        vgic_init_pending_irq(&empty->pirq, lpi);
+        list_add_tail(&empty->entry, &v->arch.vgic.pending_lpi_list);
+    } else
+    {
+        empty->pirq.status = 0;
+        empty->pirq.irq = lpi;
+    }
+
+    return &empty->pirq;
+}
+
 struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
 {
     struct pending_irq *n;
+
     /* Pending irqs allocation strategy: the first vgic.nr_spis irqs
      * are used for SPIs; the rests are used for per cpu irqs */
     if ( irq < 32 )
         n = &v->arch.vgic.pending_irqs[irq];
+    else if ( irq >= 8192 )
+        n = lpi_to_pending(v, irq, true);
     else
         n = &v->domain->arch.vgic.pending_irqs[irq - 32];
     return n;
@@ -480,7 +528,7 @@  void vgic_clear_pending_irqs(struct vcpu *v)
 void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
 {
     uint8_t priority;
-    struct pending_irq *iter, *n = irq_to_pending(v, virq);
+    struct pending_irq *iter, *n;
     unsigned long flags;
     bool_t running;
 
@@ -488,6 +536,8 @@  void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq)
 
     spin_lock_irqsave(&v->arch.vgic.lock, flags);
 
+    n = irq_to_pending(v, virq);
+
     /* vcpu offline */
     if ( test_bit(_VPF_down, &v->pause_flags) )
     {
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 9452fcd..ae8a9de 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -249,6 +249,7 @@  struct arch_vcpu
         paddr_t rdist_base;
 #define VGIC_V3_RDIST_LAST  (1 << 0)        /* last vCPU of the rdist */
         uint8_t flags;
+        struct list_head pending_lpi_list;
     } vgic;
 
     /* Timer registers  */
diff --git a/xen/include/asm-arm/gic-its.h b/xen/include/asm-arm/gic-its.h
index 4e9841a..1f881c0 100644
--- a/xen/include/asm-arm/gic-its.h
+++ b/xen/include/asm-arm/gic-its.h
@@ -136,6 +136,12 @@  int gicv3_lpi_allocate_host_lpi(struct host_its *its,
 int gicv3_lpi_drop_host_lpi(struct host_its *its,
                             uint32_t devid, uint32_t eventid,
                             uint32_t host_lpi);
+
+static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
+{
+    return GIC_PRI_IRQ;
+}
+
 #else
 
 static inline void gicv3_its_dt_init(const struct dt_device_node *node)
@@ -175,6 +181,10 @@  static inline int gicv3_lpi_drop_host_lpi(struct host_its *its,
 {
     return 0;
 }
+static inline int gicv3_lpi_get_priority(struct domain *d, uint32_t lpi)
+{
+    return GIC_PRI_IRQ;
+}
 
 #endif /* CONFIG_HAS_ITS */
 
diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
index 300f461..4e29ba6 100644
--- a/xen/include/asm-arm/vgic.h
+++ b/xen/include/asm-arm/vgic.h
@@ -83,6 +83,12 @@  struct pending_irq
     struct list_head lr_queue;
 };
 
+struct lpi_pending_irq
+{
+    struct list_head entry;
+    struct pending_irq pirq;
+};
+
 #define NR_INTERRUPT_PER_RANK   32
 #define INTERRUPT_RANK_MASK (NR_INTERRUPT_PER_RANK - 1)
 
@@ -296,8 +302,11 @@  extern struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq);
 extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int virq);
 extern void vgic_vcpu_inject_spi(struct domain *d, unsigned int virq);
 extern void vgic_clear_pending_irqs(struct vcpu *v);
+extern void vgic_init_pending_irq(struct pending_irq *p, unsigned int virq);
 extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
 extern struct pending_irq *spi_to_pending(struct domain *d, unsigned int irq);
+extern struct pending_irq *lpi_to_pending(struct vcpu *v, unsigned int irq,
+                                          bool allocate);
 extern struct vgic_irq_rank *vgic_rank_offset(struct vcpu *v, int b, int n, int s);
 extern struct vgic_irq_rank *vgic_rank_irq(struct vcpu *v, unsigned int irq);
 extern int vgic_emulate(struct cpu_user_regs *regs, union hsr hsr);