diff mbox

[v11,01/34] ARM: vGIC: avoid rank lock when reading priority

Message ID 20170609174141.5068-2-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara June 9, 2017, 5:41 p.m. UTC
When reading the priority value of a virtual interrupt, we were taking
the respective rank lock so far.
However for forwarded interrupts (Dom0 only so far) this may lead to a
deadlock with the following call chain:
- MMIO access to change the IRQ affinity, calling the ITARGETSR handler
- this handler takes the appropriate rank lock and calls vgic_store_itargetsr()
- vgic_store_itargetsr() will eventually call vgic_migrate_irq()
- if this IRQ is already in-flight, it will remove it from the old
  VCPU and inject it into the new one, by calling vgic_vcpu_inject_irq()
- vgic_vcpu_inject_irq will call vgic_get_virq_priority()
- vgic_get_virq_priority() tries to take the rank lock - again!
It seems like this code path has never been exercised before.

Fix this by avoiding taking the lock in vgic_get_virq_priority() (like we
do in vgic_get_target_vcpu()).
Actually we are just reading one byte, and priority changes while
interrupts are handled are a benign race that can happen on real hardware
too. So it is safe to just prevent the compiler from reading from the
struct more than once.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 xen/arch/arm/vgic-v2.c | 13 ++++++++-----
 xen/arch/arm/vgic-v3.c | 11 +++++++----
 xen/arch/arm/vgic.c    |  8 +-------
 3 files changed, 16 insertions(+), 16 deletions(-)

Comments

Julien Grall June 12, 2017, 2:49 p.m. UTC | #1
Hi Andre,

On 09/06/17 18:41, Andre Przywara wrote:
> When reading the priority value of a virtual interrupt, we were taking
> the respective rank lock so far.
> However for forwarded interrupts (Dom0 only so far) this may lead to a
> deadlock with the following call chain:
> - MMIO access to change the IRQ affinity, calling the ITARGETSR handler
> - this handler takes the appropriate rank lock and calls vgic_store_itargetsr()
> - vgic_store_itargetsr() will eventually call vgic_migrate_irq()
> - if this IRQ is already in-flight, it will remove it from the old
>   VCPU and inject it into the new one, by calling vgic_vcpu_inject_irq()
> - vgic_vcpu_inject_irq will call vgic_get_virq_priority()
> - vgic_get_virq_priority() tries to take the rank lock - again!
> It seems like this code path has never been exercised before.
>
> Fix this by avoiding taking the lock in vgic_get_virq_priority() (like we
> do in vgic_get_target_vcpu()).
> Actually we are just reading one byte, and priority changes while
> interrupts are handled are a benign race that can happen on real hardware
> too. So it is safe to just prevent the compiler from reading from the
> struct more than once.
>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  xen/arch/arm/vgic-v2.c | 13 ++++++++-----
>  xen/arch/arm/vgic-v3.c | 11 +++++++----
>  xen/arch/arm/vgic.c    |  8 +-------
>  3 files changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
> index dc9f95b..5370020 100644
> --- a/xen/arch/arm/vgic-v2.c
> +++ b/xen/arch/arm/vgic-v2.c
> @@ -258,9 +258,9 @@ static int vgic_v2_distr_mmio_read(struct vcpu *v, mmio_info_t *info,
>          if ( rank == NULL ) goto read_as_zero;
>
>          vgic_lock_rank(v, rank, flags);
> -        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8,
> -                                                     gicd_reg - GICD_IPRIORITYR,
> -                                                     DABT_WORD)];
> +        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
> +                                     gicd_reg - GICD_IPRIORITYR,
> +                                     DABT_WORD)]);

The indentation is a bit odd. Can you introduce a temporary variable here?

>          vgic_unlock_rank(v, rank, flags);
>          *r = vgic_reg32_extract(ipriorityr, info);
>
> @@ -499,7 +499,7 @@ static int vgic_v2_distr_mmio_write(struct vcpu *v, mmio_info_t *info,
>
>      case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
>      {
> -        uint32_t *ipriorityr;
> +        uint32_t *ipriorityr, priority;
>
>          if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD ) goto bad_width;
>          rank = vgic_rank_offset(v, 8, gicd_reg - GICD_IPRIORITYR, DABT_WORD);
> @@ -508,7 +508,10 @@ static int vgic_v2_distr_mmio_write(struct vcpu *v, mmio_info_t *info,
>          ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8,
>                                                        gicd_reg - GICD_IPRIORITYR,
>                                                        DABT_WORD)];
> -        vgic_reg32_update(ipriorityr, r, info);
> +        priority = ACCESS_ONCE(*ipriorityr);
> +        vgic_reg32_update(&priority, r, info);
> +        ACCESS_ONCE(*ipriorityr) = priority;

This is a bit odd to read because of the dereferencing. I admit that I 
would prefer if you use read_atomic/write_atomic which are easier to 
understand (though the naming is confusing).

Let see what Stefano thinks here.

> +
>          vgic_unlock_rank(v, rank, flags);
>          return 1;
>      }
> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> index d10757a..8abc069 100644
> --- a/xen/arch/arm/vgic-v3.c
> +++ b/xen/arch/arm/vgic-v3.c
> @@ -521,8 +521,9 @@ static int __vgic_v3_distr_common_mmio_read(const char *name, struct vcpu *v,
>          if ( rank == NULL ) goto read_as_zero;
>
>          vgic_lock_rank(v, rank, flags);
> -        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8, reg - GICD_IPRIORITYR,
> -                                                     DABT_WORD)];
> +        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
> +                                     reg - GICD_IPRIORITYR,
> +                                     DABT_WORD)]);

Ditto.


>          vgic_unlock_rank(v, rank, flags);
>
>          *r = vgic_reg32_extract(ipriorityr, info);
> @@ -630,7 +631,7 @@ static int __vgic_v3_distr_common_mmio_write(const char *name, struct vcpu *v,
>
>      case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
>      {
> -        uint32_t *ipriorityr;
> +        uint32_t *ipriorityr, priority;
>
>          if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD ) goto bad_width;
>          rank = vgic_rank_offset(v, 8, reg - GICD_IPRIORITYR, DABT_WORD);
> @@ -638,7 +639,9 @@ static int __vgic_v3_distr_common_mmio_write(const char *name, struct vcpu *v,
>          vgic_lock_rank(v, rank, flags);
>          ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8, reg - GICD_IPRIORITYR,
>                                                        DABT_WORD)];
> -        vgic_reg32_update(ipriorityr, r, info);
> +        priority = ACCESS_ONCE(*ipriorityr);
> +        vgic_reg32_update(&priority, r, info);
> +        ACCESS_ONCE(*ipriorityr) = priority;

Ditto.

>          vgic_unlock_rank(v, rank, flags);
>          return 1;
>      }
> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> index 83569b0..18fe420 100644
> --- a/xen/arch/arm/vgic.c
> +++ b/xen/arch/arm/vgic.c
> @@ -227,14 +227,8 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq)
>  static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
>  {
>      struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
> -    unsigned long flags;
> -    int priority;
> -
> -    vgic_lock_rank(v, rank, flags);
> -    priority = rank->priority[virq & INTERRUPT_RANK_MASK];
> -    vgic_unlock_rank(v, rank, flags);
>
> -    return priority;
> +    return ACCESS_ONCE(rank->priority[virq & INTERRUPT_RANK_MASK]);
>  }
>
>  bool vgic_migrate_irq(struct vcpu *old, struct vcpu *new, unsigned int irq)
>

Cheers,
Stefano Stabellini June 12, 2017, 10:34 p.m. UTC | #2
On Mon, 12 Jun 2017, Julien Grall wrote:
> Hi Andre,
> 
> On 09/06/17 18:41, Andre Przywara wrote:
> > When reading the priority value of a virtual interrupt, we were taking
> > the respective rank lock so far.
> > However for forwarded interrupts (Dom0 only so far) this may lead to a
> > deadlock with the following call chain:
> > - MMIO access to change the IRQ affinity, calling the ITARGETSR handler
> > - this handler takes the appropriate rank lock and calls
> > vgic_store_itargetsr()
> > - vgic_store_itargetsr() will eventually call vgic_migrate_irq()
> > - if this IRQ is already in-flight, it will remove it from the old
> >   VCPU and inject it into the new one, by calling vgic_vcpu_inject_irq()
> > - vgic_vcpu_inject_irq will call vgic_get_virq_priority()
> > - vgic_get_virq_priority() tries to take the rank lock - again!
> > It seems like this code path has never been exercised before.
> > 
> > Fix this by avoiding taking the lock in vgic_get_virq_priority() (like we
> > do in vgic_get_target_vcpu()).
> > Actually we are just reading one byte, and priority changes while
> > interrupts are handled are a benign race that can happen on real hardware
> > too. So it is safe to just prevent the compiler from reading from the
> > struct more than once.
> > 
> > Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> > ---
> >  xen/arch/arm/vgic-v2.c | 13 ++++++++-----
> >  xen/arch/arm/vgic-v3.c | 11 +++++++----
> >  xen/arch/arm/vgic.c    |  8 +-------
> >  3 files changed, 16 insertions(+), 16 deletions(-)
> > 
> > diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
> > index dc9f95b..5370020 100644
> > --- a/xen/arch/arm/vgic-v2.c
> > +++ b/xen/arch/arm/vgic-v2.c
> > @@ -258,9 +258,9 @@ static int vgic_v2_distr_mmio_read(struct vcpu *v,
> > mmio_info_t *info,
> >          if ( rank == NULL ) goto read_as_zero;
> > 
> >          vgic_lock_rank(v, rank, flags);
> > -        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8,
> > -                                                     gicd_reg -
> > GICD_IPRIORITYR,
> > -                                                     DABT_WORD)];
> > +        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
> > +                                     gicd_reg - GICD_IPRIORITYR,
> > +                                     DABT_WORD)]);
> 
> The indentation is a bit odd. Can you introduce a temporary variable here?
> 
> >          vgic_unlock_rank(v, rank, flags);
> >          *r = vgic_reg32_extract(ipriorityr, info);
> > 
> > @@ -499,7 +499,7 @@ static int vgic_v2_distr_mmio_write(struct vcpu *v,
> > mmio_info_t *info,
> > 
> >      case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
> >      {
> > -        uint32_t *ipriorityr;
> > +        uint32_t *ipriorityr, priority;
> > 
> >          if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD ) goto
> > bad_width;
> >          rank = vgic_rank_offset(v, 8, gicd_reg - GICD_IPRIORITYR,
> > DABT_WORD);
> > @@ -508,7 +508,10 @@ static int vgic_v2_distr_mmio_write(struct vcpu *v,
> > mmio_info_t *info,
> >          ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8,
> >                                                        gicd_reg -
> > GICD_IPRIORITYR,
> >                                                        DABT_WORD)];
> > -        vgic_reg32_update(ipriorityr, r, info);
> > +        priority = ACCESS_ONCE(*ipriorityr);
> > +        vgic_reg32_update(&priority, r, info);
> > +        ACCESS_ONCE(*ipriorityr) = priority;
> 
> This is a bit odd to read because of the dereferencing. I admit that I would
> prefer if you use read_atomic/write_atomic which are easier to understand
> (though the naming is confusing).
> 
> Let see what Stefano thinks here.

I also prefer *_atomic, especially given what Jan wrote about
ACCESS_ONCE:

  Plus ACCESS_ONCE() doesn't enforce a single instruction to be used in
  the resulting assembly).



> > +
> >          vgic_unlock_rank(v, rank, flags);
> >          return 1;
> >      }
> > diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> > index d10757a..8abc069 100644
> > --- a/xen/arch/arm/vgic-v3.c
> > +++ b/xen/arch/arm/vgic-v3.c
> > @@ -521,8 +521,9 @@ static int __vgic_v3_distr_common_mmio_read(const char
> > *name, struct vcpu *v,
> >          if ( rank == NULL ) goto read_as_zero;
> > 
> >          vgic_lock_rank(v, rank, flags);
> > -        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8, reg -
> > GICD_IPRIORITYR,
> > -                                                     DABT_WORD)];
> > +        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
> > +                                     reg - GICD_IPRIORITYR,
> > +                                     DABT_WORD)]);
> 
> Ditto.
> 
> 
> >          vgic_unlock_rank(v, rank, flags);
> > 
> >          *r = vgic_reg32_extract(ipriorityr, info);
> > @@ -630,7 +631,7 @@ static int __vgic_v3_distr_common_mmio_write(const char
> > *name, struct vcpu *v,
> > 
> >      case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
> >      {
> > -        uint32_t *ipriorityr;
> > +        uint32_t *ipriorityr, priority;
> > 
> >          if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD ) goto
> > bad_width;
> >          rank = vgic_rank_offset(v, 8, reg - GICD_IPRIORITYR, DABT_WORD);
> > @@ -638,7 +639,9 @@ static int __vgic_v3_distr_common_mmio_write(const char
> > *name, struct vcpu *v,
> >          vgic_lock_rank(v, rank, flags);
> >          ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8, reg -
> > GICD_IPRIORITYR,
> >                                                        DABT_WORD)];
> > -        vgic_reg32_update(ipriorityr, r, info);
> > +        priority = ACCESS_ONCE(*ipriorityr);
> > +        vgic_reg32_update(&priority, r, info);
> > +        ACCESS_ONCE(*ipriorityr) = priority;
> 
> Ditto.
> 
> >          vgic_unlock_rank(v, rank, flags);
> >          return 1;
> >      }
> > diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> > index 83569b0..18fe420 100644
> > --- a/xen/arch/arm/vgic.c
> > +++ b/xen/arch/arm/vgic.c
> > @@ -227,14 +227,8 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v,
> > unsigned int virq)
> >  static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
> >  {
> >      struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
> > -    unsigned long flags;
> > -    int priority;
> > -
> > -    vgic_lock_rank(v, rank, flags);
> > -    priority = rank->priority[virq & INTERRUPT_RANK_MASK];
> > -    vgic_unlock_rank(v, rank, flags);
> > 
> > -    return priority;
> > +    return ACCESS_ONCE(rank->priority[virq & INTERRUPT_RANK_MASK]);
> >  }
> > 
> >  bool vgic_migrate_irq(struct vcpu *old, struct vcpu *new, unsigned int irq)
> > 
> 
> Cheers,
> 
> -- 
> Julien Grall
>
Julien Grall June 13, 2017, 9:01 a.m. UTC | #3
On 12/06/2017 23:34, Stefano Stabellini wrote:
> On Mon, 12 Jun 2017, Julien Grall wrote:
>> Hi Andre,
>>
>> On 09/06/17 18:41, Andre Przywara wrote:
>>> When reading the priority value of a virtual interrupt, we were taking
>>> the respective rank lock so far.
>>> However for forwarded interrupts (Dom0 only so far) this may lead to a
>>> deadlock with the following call chain:
>>> - MMIO access to change the IRQ affinity, calling the ITARGETSR handler
>>> - this handler takes the appropriate rank lock and calls
>>> vgic_store_itargetsr()
>>> - vgic_store_itargetsr() will eventually call vgic_migrate_irq()
>>> - if this IRQ is already in-flight, it will remove it from the old
>>>   VCPU and inject it into the new one, by calling vgic_vcpu_inject_irq()
>>> - vgic_vcpu_inject_irq will call vgic_get_virq_priority()
>>> - vgic_get_virq_priority() tries to take the rank lock - again!
>>> It seems like this code path has never been exercised before.
>>>
>>> Fix this by avoiding taking the lock in vgic_get_virq_priority() (like we
>>> do in vgic_get_target_vcpu()).
>>> Actually we are just reading one byte, and priority changes while
>>> interrupts are handled are a benign race that can happen on real hardware
>>> too. So it is safe to just prevent the compiler from reading from the
>>> struct more than once.
>>>
>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>> ---
>>>  xen/arch/arm/vgic-v2.c | 13 ++++++++-----
>>>  xen/arch/arm/vgic-v3.c | 11 +++++++----
>>>  xen/arch/arm/vgic.c    |  8 +-------
>>>  3 files changed, 16 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
>>> index dc9f95b..5370020 100644
>>> --- a/xen/arch/arm/vgic-v2.c
>>> +++ b/xen/arch/arm/vgic-v2.c
>>> @@ -258,9 +258,9 @@ static int vgic_v2_distr_mmio_read(struct vcpu *v,
>>> mmio_info_t *info,
>>>          if ( rank == NULL ) goto read_as_zero;
>>>
>>>          vgic_lock_rank(v, rank, flags);
>>> -        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8,
>>> -                                                     gicd_reg -
>>> GICD_IPRIORITYR,
>>> -                                                     DABT_WORD)];
>>> +        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
>>> +                                     gicd_reg - GICD_IPRIORITYR,
>>> +                                     DABT_WORD)]);
>>
>> The indentation is a bit odd. Can you introduce a temporary variable here?
>>
>>>          vgic_unlock_rank(v, rank, flags);
>>>          *r = vgic_reg32_extract(ipriorityr, info);
>>>
>>> @@ -499,7 +499,7 @@ static int vgic_v2_distr_mmio_write(struct vcpu *v,
>>> mmio_info_t *info,
>>>
>>>      case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
>>>      {
>>> -        uint32_t *ipriorityr;
>>> +        uint32_t *ipriorityr, priority;
>>>
>>>          if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD ) goto
>>> bad_width;
>>>          rank = vgic_rank_offset(v, 8, gicd_reg - GICD_IPRIORITYR,
>>> DABT_WORD);
>>> @@ -508,7 +508,10 @@ static int vgic_v2_distr_mmio_write(struct vcpu *v,
>>> mmio_info_t *info,
>>>          ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8,
>>>                                                        gicd_reg -
>>> GICD_IPRIORITYR,
>>>                                                        DABT_WORD)];
>>> -        vgic_reg32_update(ipriorityr, r, info);
>>> +        priority = ACCESS_ONCE(*ipriorityr);
>>> +        vgic_reg32_update(&priority, r, info);
>>> +        ACCESS_ONCE(*ipriorityr) = priority;
>>
>> This is a bit odd to read because of the dereferencing. I admit that I would
>> prefer if you use read_atomic/write_atomic which are easier to understand
>> (though the naming is confusing).
>>
>> Let see what Stefano thinks here.
>
> I also prefer *_atomic, especially given what Jan wrote about
> ACCESS_ONCE:
>
>   Plus ACCESS_ONCE() doesn't enforce a single instruction to be used in
>   the resulting assembly).

I don't buy this argument. There are quite a few places we rely on 
assignment to be atomic (see PV protocols for instance). Furthermore 
implementation of atomic_read/atomic_write in Linux (both ARM and x86) 
is based on WRITE_ONCE/READ_ONCE, on Xen it is a simple assignment.

In any case, all those macros does not prevent re-ordering at the 
processor level nor read/write atomicity if the variable is misaligned.

Cheers,
Stefano Stabellini June 13, 2017, 10:19 p.m. UTC | #4
On Tue, 13 Jun 2017, Julien Grall wrote:
> On 12/06/2017 23:34, Stefano Stabellini wrote:
> > On Mon, 12 Jun 2017, Julien Grall wrote:
> > > Hi Andre,
> > > 
> > > On 09/06/17 18:41, Andre Przywara wrote:
> > > > When reading the priority value of a virtual interrupt, we were taking
> > > > the respective rank lock so far.
> > > > However for forwarded interrupts (Dom0 only so far) this may lead to a
> > > > deadlock with the following call chain:
> > > > - MMIO access to change the IRQ affinity, calling the ITARGETSR handler
> > > > - this handler takes the appropriate rank lock and calls
> > > > vgic_store_itargetsr()
> > > > - vgic_store_itargetsr() will eventually call vgic_migrate_irq()
> > > > - if this IRQ is already in-flight, it will remove it from the old
> > > >   VCPU and inject it into the new one, by calling vgic_vcpu_inject_irq()
> > > > - vgic_vcpu_inject_irq will call vgic_get_virq_priority()
> > > > - vgic_get_virq_priority() tries to take the rank lock - again!
> > > > It seems like this code path has never been exercised before.
> > > > 
> > > > Fix this by avoiding taking the lock in vgic_get_virq_priority() (like
> > > > we
> > > > do in vgic_get_target_vcpu()).
> > > > Actually we are just reading one byte, and priority changes while
> > > > interrupts are handled are a benign race that can happen on real
> > > > hardware
> > > > too. So it is safe to just prevent the compiler from reading from the
> > > > struct more than once.
> > > > 
> > > > Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> > > > ---
> > > >  xen/arch/arm/vgic-v2.c | 13 ++++++++-----
> > > >  xen/arch/arm/vgic-v3.c | 11 +++++++----
> > > >  xen/arch/arm/vgic.c    |  8 +-------
> > > >  3 files changed, 16 insertions(+), 16 deletions(-)
> > > > 
> > > > diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
> > > > index dc9f95b..5370020 100644
> > > > --- a/xen/arch/arm/vgic-v2.c
> > > > +++ b/xen/arch/arm/vgic-v2.c
> > > > @@ -258,9 +258,9 @@ static int vgic_v2_distr_mmio_read(struct vcpu *v,
> > > > mmio_info_t *info,
> > > >          if ( rank == NULL ) goto read_as_zero;
> > > > 
> > > >          vgic_lock_rank(v, rank, flags);
> > > > -        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8,
> > > > -                                                     gicd_reg -
> > > > GICD_IPRIORITYR,
> > > > -                                                     DABT_WORD)];
> > > > +        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
> > > > +                                     gicd_reg - GICD_IPRIORITYR,
> > > > +                                     DABT_WORD)]);
> > > 
> > > The indentation is a bit odd. Can you introduce a temporary variable here?
> > > 
> > > >          vgic_unlock_rank(v, rank, flags);
> > > >          *r = vgic_reg32_extract(ipriorityr, info);
> > > > 
> > > > @@ -499,7 +499,7 @@ static int vgic_v2_distr_mmio_write(struct vcpu *v,
> > > > mmio_info_t *info,
> > > > 
> > > >      case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
> > > >      {
> > > > -        uint32_t *ipriorityr;
> > > > +        uint32_t *ipriorityr, priority;
> > > > 
> > > >          if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD ) goto
> > > > bad_width;
> > > >          rank = vgic_rank_offset(v, 8, gicd_reg - GICD_IPRIORITYR,
> > > > DABT_WORD);
> > > > @@ -508,7 +508,10 @@ static int vgic_v2_distr_mmio_write(struct vcpu *v,
> > > > mmio_info_t *info,
> > > >          ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8,
> > > >                                                        gicd_reg -
> > > > GICD_IPRIORITYR,
> > > >                                                        DABT_WORD)];
> > > > -        vgic_reg32_update(ipriorityr, r, info);
> > > > +        priority = ACCESS_ONCE(*ipriorityr);
> > > > +        vgic_reg32_update(&priority, r, info);
> > > > +        ACCESS_ONCE(*ipriorityr) = priority;
> > > 
> > > This is a bit odd to read because of the dereferencing. I admit that I
> > > would
> > > prefer if you use read_atomic/write_atomic which are easier to understand
> > > (though the naming is confusing).
> > > 
> > > Let see what Stefano thinks here.
> > 
> > I also prefer *_atomic, especially given what Jan wrote about
> > ACCESS_ONCE:
> > 
> >   Plus ACCESS_ONCE() doesn't enforce a single instruction to be used in
> >   the resulting assembly).
> 
> I don't buy this argument. There are quite a few places we rely on assignment
> to be atomic (see PV protocols for instance).

I don't understand your explanation. There are no PV protocols under
xen/, they are implemented in other repositories. I grepped for ACCESS
under xen/include/public, in case you referred to the PV protocol
headers, but couldn't find anything interesting.


> Furthermore implementation of
> atomic_read/atomic_write in Linux (both ARM and x86) is based on
> WRITE_ONCE/READ_ONCE, on Xen it is a simple assignment.

I don't follow why you are referring to Linux constructs in this
discussion about Xen atomic functions.


> In any case, all those macros does not prevent re-ordering at the processor
> level nor read/write atomicity if the variable is misaligned.

My understanding is that the unwritten assumption in Xen is that
variables are always aligned. You are right about processor level
reordering, in fact when needed we have to have barriers.

I have read Andre's well written README.atomic, and he ends the
document stating the following:


> This makes read and write accesses to ints and longs (and their respective
> unsigned counter parts) naturally atomic.
> However it would be beneficial to use atomic primitives anyway to annotate
> the code as being concurrent and to prevent silent breakage when changing
> the code

with which I completely agree
Julien Grall June 14, 2017, 8:55 a.m. UTC | #5
On 06/13/2017 11:19 PM, Stefano Stabellini wrote:
> On Tue, 13 Jun 2017, Julien Grall wrote:
>> On 12/06/2017 23:34, Stefano Stabellini wrote:
>>> On Mon, 12 Jun 2017, Julien Grall wrote:
>>>> Hi Andre,
>>>>
>>>> On 09/06/17 18:41, Andre Przywara wrote:
>>>>> When reading the priority value of a virtual interrupt, we were taking
>>>>> the respective rank lock so far.
>>>>> However for forwarded interrupts (Dom0 only so far) this may lead to a
>>>>> deadlock with the following call chain:
>>>>> - MMIO access to change the IRQ affinity, calling the ITARGETSR handler
>>>>> - this handler takes the appropriate rank lock and calls
>>>>> vgic_store_itargetsr()
>>>>> - vgic_store_itargetsr() will eventually call vgic_migrate_irq()
>>>>> - if this IRQ is already in-flight, it will remove it from the old
>>>>>    VCPU and inject it into the new one, by calling vgic_vcpu_inject_irq()
>>>>> - vgic_vcpu_inject_irq will call vgic_get_virq_priority()
>>>>> - vgic_get_virq_priority() tries to take the rank lock - again!
>>>>> It seems like this code path has never been exercised before.
>>>>>
>>>>> Fix this by avoiding taking the lock in vgic_get_virq_priority() (like
>>>>> we
>>>>> do in vgic_get_target_vcpu()).
>>>>> Actually we are just reading one byte, and priority changes while
>>>>> interrupts are handled are a benign race that can happen on real
>>>>> hardware
>>>>> too. So it is safe to just prevent the compiler from reading from the
>>>>> struct more than once.
>>>>>
>>>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>>>> ---
>>>>>   xen/arch/arm/vgic-v2.c | 13 ++++++++-----
>>>>>   xen/arch/arm/vgic-v3.c | 11 +++++++----
>>>>>   xen/arch/arm/vgic.c    |  8 +-------
>>>>>   3 files changed, 16 insertions(+), 16 deletions(-)
>>>>>
>>>>> diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
>>>>> index dc9f95b..5370020 100644
>>>>> --- a/xen/arch/arm/vgic-v2.c
>>>>> +++ b/xen/arch/arm/vgic-v2.c
>>>>> @@ -258,9 +258,9 @@ static int vgic_v2_distr_mmio_read(struct vcpu *v,
>>>>> mmio_info_t *info,
>>>>>           if ( rank == NULL ) goto read_as_zero;
>>>>>
>>>>>           vgic_lock_rank(v, rank, flags);
>>>>> -        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8,
>>>>> -                                                     gicd_reg -
>>>>> GICD_IPRIORITYR,
>>>>> -                                                     DABT_WORD)];
>>>>> +        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
>>>>> +                                     gicd_reg - GICD_IPRIORITYR,
>>>>> +                                     DABT_WORD)]);
>>>>
>>>> The indentation is a bit odd. Can you introduce a temporary variable here?
>>>>
>>>>>           vgic_unlock_rank(v, rank, flags);
>>>>>           *r = vgic_reg32_extract(ipriorityr, info);
>>>>>
>>>>> @@ -499,7 +499,7 @@ static int vgic_v2_distr_mmio_write(struct vcpu *v,
>>>>> mmio_info_t *info,
>>>>>
>>>>>       case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
>>>>>       {
>>>>> -        uint32_t *ipriorityr;
>>>>> +        uint32_t *ipriorityr, priority;
>>>>>
>>>>>           if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD ) goto
>>>>> bad_width;
>>>>>           rank = vgic_rank_offset(v, 8, gicd_reg - GICD_IPRIORITYR,
>>>>> DABT_WORD);
>>>>> @@ -508,7 +508,10 @@ static int vgic_v2_distr_mmio_write(struct vcpu *v,
>>>>> mmio_info_t *info,
>>>>>           ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8,
>>>>>                                                         gicd_reg -
>>>>> GICD_IPRIORITYR,
>>>>>                                                         DABT_WORD)];
>>>>> -        vgic_reg32_update(ipriorityr, r, info);
>>>>> +        priority = ACCESS_ONCE(*ipriorityr);
>>>>> +        vgic_reg32_update(&priority, r, info);
>>>>> +        ACCESS_ONCE(*ipriorityr) = priority;
>>>>
>>>> This is a bit odd to read because of the dereferencing. I admit that I
>>>> would
>>>> prefer if you use read_atomic/write_atomic which are easier to understand
>>>> (though the naming is confusing).
>>>>
>>>> Let see what Stefano thinks here.
>>>
>>> I also prefer *_atomic, especially given what Jan wrote about
>>> ACCESS_ONCE:
>>>
>>>    Plus ACCESS_ONCE() doesn't enforce a single instruction to be used in
>>>    the resulting assembly).
>>
>> I don't buy this argument. There are quite a few places we rely on assignment
>> to be atomic (see PV protocols for instance).
> 
> I don't understand your explanation. There are no PV protocols under
> xen/, they are implemented in other repositories. I grepped for ACCESS
> under xen/include/public, in case you referred to the PV protocol
> headers, but couldn't find anything interesting.

Have a look at the pl011 emulation from Bhupinder. It will use plain '=' 
for updating the PV drivers. So can you explain why it is fine there and 
not here?

> 
> 
>> Furthermore implementation of
>> atomic_read/atomic_write in Linux (both ARM and x86) is based on
>> WRITE_ONCE/READ_ONCE, on Xen it is a simple assignment.
> 
> I don't follow why you are referring to Linux constructs in this
> discussion about Xen atomic functions.

My point here is Xen and Linux are very similar. Actually a lot of the 
atomic code is been taken from Linux (have a look to our 
atomic_read/atomic_write).

As the atomic code was added the code by you (likely from Linux), I 
don't understand why you don't complain about the atomic implementation 
but ACCESS_ONCE.

> 
> 
>> In any case, all those macros does not prevent re-ordering at the processor
>> level nor read/write atomicity if the variable is misaligned.
> 
> My understanding is that the unwritten assumption in Xen is that
> variables are always aligned. You are right about processor level
> reordering, in fact when needed we have to have barriers
> 
> I have read Andre's well written README.atomic, and he ends the
> document stating the following:
> 
> 
>> This makes read and write accesses to ints and longs (and their respective
>> unsigned counter parts) naturally atomic.
>> However it would be beneficial to use atomic primitives anyway to annotate
>> the code as being concurrent and to prevent silent breakage when changing
>> the code
> 
> with which I completely agree

Which means you are happy to use either ACCESS_ONCE or 
read_atomic/write_atomic as they in-fine exactly the same on the 
compiler we support.

Cheers,
Andre Przywara June 14, 2017, 11:22 a.m. UTC | #6
Hi,

On 14/06/17 09:55, Julien Grall wrote:
> 
> 
> On 06/13/2017 11:19 PM, Stefano Stabellini wrote:
>> On Tue, 13 Jun 2017, Julien Grall wrote:
>>> On 12/06/2017 23:34, Stefano Stabellini wrote:
>>>> On Mon, 12 Jun 2017, Julien Grall wrote:
>>>>> Hi Andre,
>>>>>
>>>>> On 09/06/17 18:41, Andre Przywara wrote:
>>>>>> When reading the priority value of a virtual interrupt, we were
>>>>>> taking
>>>>>> the respective rank lock so far.
>>>>>> However for forwarded interrupts (Dom0 only so far) this may lead
>>>>>> to a
>>>>>> deadlock with the following call chain:
>>>>>> - MMIO access to change the IRQ affinity, calling the ITARGETSR
>>>>>> handler
>>>>>> - this handler takes the appropriate rank lock and calls
>>>>>> vgic_store_itargetsr()
>>>>>> - vgic_store_itargetsr() will eventually call vgic_migrate_irq()
>>>>>> - if this IRQ is already in-flight, it will remove it from the old
>>>>>>    VCPU and inject it into the new one, by calling
>>>>>> vgic_vcpu_inject_irq()
>>>>>> - vgic_vcpu_inject_irq will call vgic_get_virq_priority()
>>>>>> - vgic_get_virq_priority() tries to take the rank lock - again!
>>>>>> It seems like this code path has never been exercised before.
>>>>>>
>>>>>> Fix this by avoiding taking the lock in vgic_get_virq_priority()
>>>>>> (like
>>>>>> we
>>>>>> do in vgic_get_target_vcpu()).
>>>>>> Actually we are just reading one byte, and priority changes while
>>>>>> interrupts are handled are a benign race that can happen on real
>>>>>> hardware
>>>>>> too. So it is safe to just prevent the compiler from reading from the
>>>>>> struct more than once.
>>>>>>
>>>>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>>>>> ---
>>>>>>   xen/arch/arm/vgic-v2.c | 13 ++++++++-----
>>>>>>   xen/arch/arm/vgic-v3.c | 11 +++++++----
>>>>>>   xen/arch/arm/vgic.c    |  8 +-------
>>>>>>   3 files changed, 16 insertions(+), 16 deletions(-)
>>>>>>
>>>>>> diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
>>>>>> index dc9f95b..5370020 100644
>>>>>> --- a/xen/arch/arm/vgic-v2.c
>>>>>> +++ b/xen/arch/arm/vgic-v2.c
>>>>>> @@ -258,9 +258,9 @@ static int vgic_v2_distr_mmio_read(struct vcpu
>>>>>> *v,
>>>>>> mmio_info_t *info,
>>>>>>           if ( rank == NULL ) goto read_as_zero;
>>>>>>
>>>>>>           vgic_lock_rank(v, rank, flags);
>>>>>> -        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8,
>>>>>> -                                                     gicd_reg -
>>>>>> GICD_IPRIORITYR,
>>>>>> -                                                     DABT_WORD)];
>>>>>> +        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
>>>>>> +                                     gicd_reg - GICD_IPRIORITYR,
>>>>>> +                                     DABT_WORD)]);
>>>>>
>>>>> The indentation is a bit odd. Can you introduce a temporary
>>>>> variable here?
>>>>>
>>>>>>           vgic_unlock_rank(v, rank, flags);
>>>>>>           *r = vgic_reg32_extract(ipriorityr, info);
>>>>>>
>>>>>> @@ -499,7 +499,7 @@ static int vgic_v2_distr_mmio_write(struct
>>>>>> vcpu *v,
>>>>>> mmio_info_t *info,
>>>>>>
>>>>>>       case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
>>>>>>       {
>>>>>> -        uint32_t *ipriorityr;
>>>>>> +        uint32_t *ipriorityr, priority;
>>>>>>
>>>>>>           if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD )
>>>>>> goto
>>>>>> bad_width;
>>>>>>           rank = vgic_rank_offset(v, 8, gicd_reg - GICD_IPRIORITYR,
>>>>>> DABT_WORD);
>>>>>> @@ -508,7 +508,10 @@ static int vgic_v2_distr_mmio_write(struct
>>>>>> vcpu *v,
>>>>>> mmio_info_t *info,
>>>>>>           ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8,
>>>>>>                                                         gicd_reg -
>>>>>> GICD_IPRIORITYR,
>>>>>>                                                         DABT_WORD)];
>>>>>> -        vgic_reg32_update(ipriorityr, r, info);
>>>>>> +        priority = ACCESS_ONCE(*ipriorityr);
>>>>>> +        vgic_reg32_update(&priority, r, info);
>>>>>> +        ACCESS_ONCE(*ipriorityr) = priority;
>>>>>
>>>>> This is a bit odd to read because of the dereferencing. I admit that I
>>>>> would
>>>>> prefer if you use read_atomic/write_atomic which are easier to
>>>>> understand
>>>>> (though the naming is confusing).
>>>>>
>>>>> Let see what Stefano thinks here.
>>>>
>>>> I also prefer *_atomic, especially given what Jan wrote about
>>>> ACCESS_ONCE:
>>>>
>>>>    Plus ACCESS_ONCE() doesn't enforce a single instruction to be
>>>> used in
>>>>    the resulting assembly).
>>>
>>> I don't buy this argument. There are quite a few places we rely on
>>> assignment
>>> to be atomic (see PV protocols for instance).
>>
>> I don't understand your explanation. There are no PV protocols under
>> xen/, they are implemented in other repositories. I grepped for ACCESS
>> under xen/include/public, in case you referred to the PV protocol
>> headers, but couldn't find anything interesting.
> 
> Have a look at the pl011 emulation from Bhupinder. It will use plain '='
> for updating the PV drivers. So can you explain why it is fine there and
> not here?
> 
>>
>>
>>> Furthermore implementation of
>>> atomic_read/atomic_write in Linux (both ARM and x86) is based on
>>> WRITE_ONCE/READ_ONCE, on Xen it is a simple assignment.
>>
>> I don't follow why you are referring to Linux constructs in this
>> discussion about Xen atomic functions.
> 
> My point here is Xen and Linux are very similar. Actually a lot of the
> atomic code is been taken from Linux (have a look to our
> atomic_read/atomic_write).
> 
> As the atomic code was added the code by you (likely from Linux), I
> don't understand why you don't complain about the atomic implementation
> but ACCESS_ONCE.
> 
>>
>>
>>> In any case, all those macros does not prevent re-ordering at the
>>> processor
>>> level nor read/write atomicity if the variable is misaligned.
>>
>> My understanding is that the unwritten assumption in Xen is that
>> variables are always aligned. You are right about processor level
>> reordering, in fact when needed we have to have barriers
>>
>> I have read Andre's well written README.atomic, and he ends the
>> document stating the following:
>>
>>
>>> This makes read and write accesses to ints and longs (and their
>>> respective
>>> unsigned counter parts) naturally atomic.
>>> However it would be beneficial to use atomic primitives anyway to
>>> annotate
>>> the code as being concurrent and to prevent silent breakage when
>>> changing
>>> the code
>>
>> with which I completely agree
> 
> Which means you are happy to use either ACCESS_ONCE or
> read_atomic/write_atomic as they in-fine exactly the same on the
> compiler we support.

So I think in this case it's not the same.
read_atomic() would prevent the compiler from breaking up the assignment
into multiple reads, which indeed does not seem necessary here (as it is
aligned, especially due to being a member of an array).
But we have to prevent the compiler from reading the variable *multiple
times* (with the value possibly changing in-between), so ACCESS_ONCE()
is needed, IMHO. It is beyond me to see through how the
vgic_reg32_extract/vgic_reg32_update macro expansion actually ends up,
but I reckon the compiler can break this up and inline it and would be
*allowed* to dereference the pointer several times.
Having said this, in practise it may not be relevant though, as the
compiler probably will only read it once anyway, because a read is
deemed comparably expensive and there is hardly any register pressure on
arm64. But for documenting that this is a lockless read outside of the
lock I'd rather have some annotation in the code.

As for the readability argument of "ACCESS_ONCE(*ptr) = foo;":
I agree, but was wondering why we don't have Linux' READ_ONCE/WRITE_ONCE
pairs in the first place. I see that the implementation of them is
nicely merged into one macro, but maybe we should have wrappers to
improve readability? Not sure that's something for this patch, though.
So can we postpone this for the rework? This will be touched anyway
because it's dealing with a rank.

Cheers,
Andre.
Julien Grall June 14, 2017, 11:27 a.m. UTC | #7
On 06/14/2017 12:22 PM, Andre Przywara wrote:
> As for the readability argument of "ACCESS_ONCE(*ptr) = foo;":
> I agree, but was wondering why we don't have Linux' READ_ONCE/WRITE_ONCE
> pairs in the first place. I see that the implementation of them is
> nicely merged into one macro, but maybe we should have wrappers to
> improve readability? Not sure that's something for this patch, though.
> So can we postpone this for the rework? This will be touched anyway
> because it's dealing with a rank.

I think nobody looked at importing WRITE_ONCE/READ_ONCE on Xen. Feel 
free to send a patch :).

Anyway, I am happy to postpone for the rework.

Cheers,
Stefano Stabellini June 14, 2017, 5:32 p.m. UTC | #8
On Wed, 14 Jun 2017, Julien Grall wrote:
> On 06/13/2017 11:19 PM, Stefano Stabellini wrote:
> > On Tue, 13 Jun 2017, Julien Grall wrote:
> > > On 12/06/2017 23:34, Stefano Stabellini wrote:
> > > > On Mon, 12 Jun 2017, Julien Grall wrote:
> > > > > Hi Andre,
> > > > > 
> > > > > On 09/06/17 18:41, Andre Przywara wrote:
> > > > > > When reading the priority value of a virtual interrupt, we were
> > > > > > taking
> > > > > > the respective rank lock so far.
> > > > > > However for forwarded interrupts (Dom0 only so far) this may lead to
> > > > > > a
> > > > > > deadlock with the following call chain:
> > > > > > - MMIO access to change the IRQ affinity, calling the ITARGETSR
> > > > > > handler
> > > > > > - this handler takes the appropriate rank lock and calls
> > > > > > vgic_store_itargetsr()
> > > > > > - vgic_store_itargetsr() will eventually call vgic_migrate_irq()
> > > > > > - if this IRQ is already in-flight, it will remove it from the old
> > > > > >    VCPU and inject it into the new one, by calling
> > > > > > vgic_vcpu_inject_irq()
> > > > > > - vgic_vcpu_inject_irq will call vgic_get_virq_priority()
> > > > > > - vgic_get_virq_priority() tries to take the rank lock - again!
> > > > > > It seems like this code path has never been exercised before.
> > > > > > 
> > > > > > Fix this by avoiding taking the lock in vgic_get_virq_priority()
> > > > > > (like
> > > > > > we
> > > > > > do in vgic_get_target_vcpu()).
> > > > > > Actually we are just reading one byte, and priority changes while
> > > > > > interrupts are handled are a benign race that can happen on real
> > > > > > hardware
> > > > > > too. So it is safe to just prevent the compiler from reading from
> > > > > > the
> > > > > > struct more than once.
> > > > > > 
> > > > > > Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> > > > > > ---
> > > > > >   xen/arch/arm/vgic-v2.c | 13 ++++++++-----
> > > > > >   xen/arch/arm/vgic-v3.c | 11 +++++++----
> > > > > >   xen/arch/arm/vgic.c    |  8 +-------
> > > > > >   3 files changed, 16 insertions(+), 16 deletions(-)
> > > > > > 
> > > > > > diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
> > > > > > index dc9f95b..5370020 100644
> > > > > > --- a/xen/arch/arm/vgic-v2.c
> > > > > > +++ b/xen/arch/arm/vgic-v2.c
> > > > > > @@ -258,9 +258,9 @@ static int vgic_v2_distr_mmio_read(struct vcpu
> > > > > > *v,
> > > > > > mmio_info_t *info,
> > > > > >           if ( rank == NULL ) goto read_as_zero;
> > > > > > 
> > > > > >           vgic_lock_rank(v, rank, flags);
> > > > > > -        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8,
> > > > > > -                                                     gicd_reg -
> > > > > > GICD_IPRIORITYR,
> > > > > > -                                                     DABT_WORD)];
> > > > > > +        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
> > > > > > +                                     gicd_reg - GICD_IPRIORITYR,
> > > > > > +                                     DABT_WORD)]);
> > > > > 
> > > > > The indentation is a bit odd. Can you introduce a temporary variable
> > > > > here?
> > > > > 
> > > > > >           vgic_unlock_rank(v, rank, flags);
> > > > > >           *r = vgic_reg32_extract(ipriorityr, info);
> > > > > > 
> > > > > > @@ -499,7 +499,7 @@ static int vgic_v2_distr_mmio_write(struct vcpu
> > > > > > *v,
> > > > > > mmio_info_t *info,
> > > > > > 
> > > > > >       case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
> > > > > >       {
> > > > > > -        uint32_t *ipriorityr;
> > > > > > +        uint32_t *ipriorityr, priority;
> > > > > > 
> > > > > >           if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD )
> > > > > > goto
> > > > > > bad_width;
> > > > > >           rank = vgic_rank_offset(v, 8, gicd_reg - GICD_IPRIORITYR,
> > > > > > DABT_WORD);
> > > > > > @@ -508,7 +508,10 @@ static int vgic_v2_distr_mmio_write(struct vcpu
> > > > > > *v,
> > > > > > mmio_info_t *info,
> > > > > >           ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8,
> > > > > >                                                         gicd_reg -
> > > > > > GICD_IPRIORITYR,
> > > > > >                                                         DABT_WORD)];
> > > > > > -        vgic_reg32_update(ipriorityr, r, info);
> > > > > > +        priority = ACCESS_ONCE(*ipriorityr);
> > > > > > +        vgic_reg32_update(&priority, r, info);
> > > > > > +        ACCESS_ONCE(*ipriorityr) = priority;
> > > > > 
> > > > > This is a bit odd to read because of the dereferencing. I admit that I
> > > > > would
> > > > > prefer if you use read_atomic/write_atomic which are easier to
> > > > > understand
> > > > > (though the naming is confusing).
> > > > > 
> > > > > Let see what Stefano thinks here.
> > > > 
> > > > I also prefer *_atomic, especially given what Jan wrote about
> > > > ACCESS_ONCE:
> > > > 
> > > >    Plus ACCESS_ONCE() doesn't enforce a single instruction to be used in
> > > >    the resulting assembly).
> > > 
> > > I don't buy this argument. There are quite a few places we rely on
> > > assignment
> > > to be atomic (see PV protocols for instance).
> > 
> > I don't understand your explanation. There are no PV protocols under
> > xen/, they are implemented in other repositories. I grepped for ACCESS
> > under xen/include/public, in case you referred to the PV protocol
> > headers, but couldn't find anything interesting.
> 
> Have a look at the pl011 emulation from Bhupinder. It will use plain '=' for
> updating the PV drivers. So can you explain why it is fine there and not here?

Bhupinder's series is the first PV driver in the Xen codebase. It is
easy to forget that coding should/might have to be different compared to
Linux, which is the codebase I usually work with for PV drivers.

It is true that updating the indexes should be done atomically,
otherwise the other end might end up reading a wrong index value.


> > > Furthermore implementation of
> > > atomic_read/atomic_write in Linux (both ARM and x86) is based on
> > > WRITE_ONCE/READ_ONCE, on Xen it is a simple assignment.
> > 
> > I don't follow why you are referring to Linux constructs in this
> > discussion about Xen atomic functions.
> 
> My point here is Xen and Linux are very similar. Actually a lot of the atomic
> code is been taken from Linux (have a look to our atomic_read/atomic_write).
> 
> As the atomic code was added the code by you (likely from Linux), I don't
> understand why you don't complain about the atomic implementation but
> ACCESS_ONCE.

Linux and Xen are free to make different assumptions regarding
compilers.

FWIW I am not really complaining about either atomics or ACCESS_ONCE, I
just don't see the advantage of using ACCESS_ONCE over read/write_atomic
(see below).


> > > In any case, all those macros does not prevent re-ordering at the
> > > processor
> > > level nor read/write atomicity if the variable is misaligned.
> > 
> > My understanding is that the unwritten assumption in Xen is that
> > variables are always aligned. You are right about processor level
> > reordering, in fact when needed we have to have barriers
> > 
> > I have read Andre's well written README.atomic, and he ends the
> > document stating the following:
> > 
> > 
> > > This makes read and write accesses to ints and longs (and their respective
> > > unsigned counter parts) naturally atomic.
> > > However it would be beneficial to use atomic primitives anyway to annotate
> > > the code as being concurrent and to prevent silent breakage when changing
> > > the code
> > 
> > with which I completely agree
> 
> Which means you are happy to use either ACCESS_ONCE or
> read_atomic/write_atomic as they in-fine exactly the same on the compiler we
> support.

I do understand that both of them will produce the same output,
therefore, both work for this use-case.

I don't understand why anybody would prefer ACCESS_ONCE over
read/write_atomic, given that with ACCESS_ONCE as a contributor/reviewer
you additionally need to remember to check whether the argument is a
native data type. Basically, I see ACCESS_ONCE as "more work" for me.
Why do you think that ACCESS_ONCE is "better"?

Regarding the "compiler with support": do we state clearly in any docs
or website what are the compilers we support? I think this would be the
right opportunity to do it.
Julien Grall June 14, 2017, 5:44 p.m. UTC | #9
Hi Stefano,

On 06/14/2017 06:32 PM, Stefano Stabellini wrote:
> On Wed, 14 Jun 2017, Julien Grall wrote:
>>> I don't understand your explanation. There are no PV protocols under
>>> xen/, they are implemented in other repositories. I grepped for ACCESS
>>> under xen/include/public, in case you referred to the PV protocol
>>> headers, but couldn't find anything interesting.
>>
>> Have a look at the pl011 emulation from Bhupinder. It will use plain '=' for
>> updating the PV drivers. So can you explain why it is fine there and not here?
> 
> Bhupinder's series is the first PV driver in the Xen codebase. It is
> easy to forget that coding should/might have to be different compared to
> Linux, which is the codebase I usually work with for PV drivers.
> 
> It is true that updating the indexes should be done atomically,
> otherwise the other end might end up reading a wrong index value.
> 
> 
>>>> Furthermore implementation of
>>>> atomic_read/atomic_write in Linux (both ARM and x86) is based on
>>>> WRITE_ONCE/READ_ONCE, on Xen it is a simple assignment.
>>>
>>> I don't follow why you are referring to Linux constructs in this
>>> discussion about Xen atomic functions.
>>
>> My point here is Xen and Linux are very similar. Actually a lot of the atomic
>> code is been taken from Linux (have a look to our atomic_read/atomic_write).
>>
>> As the atomic code was added the code by you (likely from Linux), I don't
>> understand why you don't complain about the atomic implementation but
>> ACCESS_ONCE.
> 
> Linux and Xen are free to make different assumptions regarding
> compilers.

It is not possible to say that Xen may have different assumptions when 
you import code from Linux without deep review (yes most of the code 
taken from Linux is as it is).

> 
> FWIW I am not really complaining about either atomics or ACCESS_ONCE, I
> just don't see the advantage of using ACCESS_ONCE over read/write_atomic
> (see below).
> 
> 
>>>> In any case, all those macros does not prevent re-ordering at the
>>>> processor
>>>> level nor read/write atomicity if the variable is misaligned.
>>>
>>> My understanding is that the unwritten assumption in Xen is that
>>> variables are always aligned. You are right about processor level
>>> reordering, in fact when needed we have to have barriers
>>>
>>> I have read Andre's well written README.atomic, and he ends the
>>> document stating the following:
>>>
>>>
>>>> This makes read and write accesses to ints and longs (and their respective
>>>> unsigned counter parts) naturally atomic.
>>>> However it would be beneficial to use atomic primitives anyway to annotate
>>>> the code as being concurrent and to prevent silent breakage when changing
>>>> the code
>>>
>>> with which I completely agree
>>
>> Which means you are happy to use either ACCESS_ONCE or
>> read_atomic/write_atomic as they in-fine exactly the same on the compiler we
>> support.
> 
> I do understand that both of them will produce the same output,
> therefore, both work for this use-case.
> 
> I don't understand why anybody would prefer ACCESS_ONCE over
> read/write_atomic, given that with ACCESS_ONCE as a contributor/reviewer
> you additionally need to remember to check whether the argument is a
> native data type. Basically, I see ACCESS_ONCE as "more work" for me.
> Why do you think that ACCESS_ONCE is "better"?

Have you looked at the implementation of ACCESS_ONCE? You don't have to 
check the data type when using ACCESS_ONCE. There are safety check to 
avoid misusing it.

What I want to avoid is this split mind we currently have about atomic.
They are either all safe or not.

As Andre suggested, we should probably import a lighter version of 
WRITE_ONCE/READ_ONCE. They are first easier to understand than 
read_atomic/write_atomic that could be confused with 
atomic_read/atomic_write (IIRC Jan agreed here).

The main goal is to avoid assembly code when it is deem not necessary.

> 
> Regarding the "compiler with support": do we state clearly in any docs
> or website what are the compilers we support? I think this would be the
> right opportunity to do it.

That's a discussion to have on the README.atomics patch.

Cheers,
Stefano Stabellini June 14, 2017, 6:15 p.m. UTC | #10
On Wed, 14 Jun 2017, Julien Grall wrote:
> > > > > In any case, all those macros does not prevent re-ordering at the
> > > > > processor
> > > > > level nor read/write atomicity if the variable is misaligned.
> > > > 
> > > > My understanding is that the unwritten assumption in Xen is that
> > > > variables are always aligned. You are right about processor level
> > > > reordering, in fact when needed we have to have barriers
> > > > 
> > > > I have read Andre's well written README.atomic, and he ends the
> > > > document stating the following:
> > > > 
> > > > 
> > > > > This makes read and write accesses to ints and longs (and their
> > > > > respective
> > > > > unsigned counter parts) naturally atomic.
> > > > > However it would be beneficial to use atomic primitives anyway to
> > > > > annotate
> > > > > the code as being concurrent and to prevent silent breakage when
> > > > > changing
> > > > > the code
> > > > 
> > > > with which I completely agree
> > > 
> > > Which means you are happy to use either ACCESS_ONCE or
> > > read_atomic/write_atomic as they in-fine exactly the same on the compiler
> > > we
> > > support.
> > 
> > I do understand that both of them will produce the same output,
> > therefore, both work for this use-case.
> > 
> > I don't understand why anybody would prefer ACCESS_ONCE over
> > read/write_atomic, given that with ACCESS_ONCE as a contributor/reviewer
> > you additionally need to remember to check whether the argument is a
> > native data type. Basically, I see ACCESS_ONCE as "more work" for me.
> > Why do you think that ACCESS_ONCE is "better"?
> 
> Have you looked at the implementation of ACCESS_ONCE? You don't have to check
> the data type when using ACCESS_ONCE. There are safety check to avoid misusing
> it.

It checks for a scalar type, not for native data type. They are not
always the same thing but I think they are on arm.


> What I want to avoid is this split mind we currently have about atomic.
> They are either all safe or not.

What split mind? Do you mean ACCESS_ONCE vs. read/write_atomic? So far,
there are no instances of ACCESS_ONCE in xen/arch/arm.


> As Andre suggested, we should probably import a lighter version of
> WRITE_ONCE/READ_ONCE. They are first easier to understand than
> read_atomic/write_atomic that could be confused with atomic_read/atomic_write
> (IIRC Jan agreed here).
> 
> The main goal is to avoid assembly code when it is deem not necessary.

All right, this is one reason. Sorry if I seem unnecessarily contrarian,
but this is the first time I read a reason for this recent push for using
ACCESS_ONCE. You wrote that you preferred the read/write_atomic
functions yourself on Monday.
Julien Grall June 14, 2017, 6:32 p.m. UTC | #11
Hi Stefano,

On 06/14/2017 07:15 PM, Stefano Stabellini wrote:
> On Wed, 14 Jun 2017, Julien Grall wrote:
>>>>>> In any case, all those macros does not prevent re-ordering at the
>>>>>> processor
>>>>>> level nor read/write atomicity if the variable is misaligned.
>>>>>
>>>>> My understanding is that the unwritten assumption in Xen is that
>>>>> variables are always aligned. You are right about processor level
>>>>> reordering, in fact when needed we have to have barriers
>>>>>
>>>>> I have read Andre's well written README.atomic, and he ends the
>>>>> document stating the following:
>>>>>
>>>>>
>>>>>> This makes read and write accesses to ints and longs (and their
>>>>>> respective
>>>>>> unsigned counter parts) naturally atomic.
>>>>>> However it would be beneficial to use atomic primitives anyway to
>>>>>> annotate
>>>>>> the code as being concurrent and to prevent silent breakage when
>>>>>> changing
>>>>>> the code
>>>>>
>>>>> with which I completely agree
>>>>
>>>> Which means you are happy to use either ACCESS_ONCE or
>>>> read_atomic/write_atomic as they in-fine exactly the same on the compiler
>>>> we
>>>> support.
>>>
>>> I do understand that both of them will produce the same output,
>>> therefore, both work for this use-case.
>>>
>>> I don't understand why anybody would prefer ACCESS_ONCE over
>>> read/write_atomic, given that with ACCESS_ONCE as a contributor/reviewer
>>> you additionally need to remember to check whether the argument is a
>>> native data type. Basically, I see ACCESS_ONCE as "more work" for me.
>>> Why do you think that ACCESS_ONCE is "better"?
>>
>> Have you looked at the implementation of ACCESS_ONCE? You don't have to check
>> the data type when using ACCESS_ONCE. There are safety check to avoid misusing
>> it.
> 
> It checks for a scalar type, not for native data type. They are not
> always the same thing but I think they are on arm.
That's the goal of specification (such as AAPCS).

>> What I want to avoid is this split mind we currently have about atomic.
>> They are either all safe or not.
> 
> What split mind? Do you mean ACCESS_ONCE vs. read/write_atomic? So far,
> there are no instances of ACCESS_ONCE in xen/arch/arm.

No. I mean there are places with exact same construct but sometimes 
consider we consider safe, sometimes not. We should have a clear common 
answer rather than arguing differently every time.

>> As Andre suggested, we should probably import a lighter version of
>> WRITE_ONCE/READ_ONCE. They are first easier to understand than
>> read_atomic/write_atomic that could be confused with atomic_read/atomic_write
>> (IIRC Jan agreed here).
>>
>> The main goal is to avoid assembly code when it is deem not necessary.
> 
> All right, this is one reason. Sorry if I seem unnecessarily contrarian,
> but this is the first time I read a reason for this recent push for using
> ACCESS_ONCE. You wrote that you preferred the read/write_atomic
> functions yourself on Monday.

I preferred {read,write}_atomic because the prototype is nicer to use 
than ACCESS_ONCE. In the ideal we should introduce WRITE_ONCE/READ_ONCE 
improving the naming and also having a nice prototype.
diff mbox

Patch

diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
index dc9f95b..5370020 100644
--- a/xen/arch/arm/vgic-v2.c
+++ b/xen/arch/arm/vgic-v2.c
@@ -258,9 +258,9 @@  static int vgic_v2_distr_mmio_read(struct vcpu *v, mmio_info_t *info,
         if ( rank == NULL ) goto read_as_zero;
 
         vgic_lock_rank(v, rank, flags);
-        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8,
-                                                     gicd_reg - GICD_IPRIORITYR,
-                                                     DABT_WORD)];
+        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
+                                     gicd_reg - GICD_IPRIORITYR,
+                                     DABT_WORD)]);
         vgic_unlock_rank(v, rank, flags);
         *r = vgic_reg32_extract(ipriorityr, info);
 
@@ -499,7 +499,7 @@  static int vgic_v2_distr_mmio_write(struct vcpu *v, mmio_info_t *info,
 
     case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
     {
-        uint32_t *ipriorityr;
+        uint32_t *ipriorityr, priority;
 
         if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD ) goto bad_width;
         rank = vgic_rank_offset(v, 8, gicd_reg - GICD_IPRIORITYR, DABT_WORD);
@@ -508,7 +508,10 @@  static int vgic_v2_distr_mmio_write(struct vcpu *v, mmio_info_t *info,
         ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8,
                                                       gicd_reg - GICD_IPRIORITYR,
                                                       DABT_WORD)];
-        vgic_reg32_update(ipriorityr, r, info);
+        priority = ACCESS_ONCE(*ipriorityr);
+        vgic_reg32_update(&priority, r, info);
+        ACCESS_ONCE(*ipriorityr) = priority;
+
         vgic_unlock_rank(v, rank, flags);
         return 1;
     }
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index d10757a..8abc069 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -521,8 +521,9 @@  static int __vgic_v3_distr_common_mmio_read(const char *name, struct vcpu *v,
         if ( rank == NULL ) goto read_as_zero;
 
         vgic_lock_rank(v, rank, flags);
-        ipriorityr = rank->ipriorityr[REG_RANK_INDEX(8, reg - GICD_IPRIORITYR,
-                                                     DABT_WORD)];
+        ipriorityr = ACCESS_ONCE(rank->ipriorityr[REG_RANK_INDEX(8,
+                                     reg - GICD_IPRIORITYR,
+                                     DABT_WORD)]);
         vgic_unlock_rank(v, rank, flags);
 
         *r = vgic_reg32_extract(ipriorityr, info);
@@ -630,7 +631,7 @@  static int __vgic_v3_distr_common_mmio_write(const char *name, struct vcpu *v,
 
     case VRANGE32(GICD_IPRIORITYR, GICD_IPRIORITYRN):
     {
-        uint32_t *ipriorityr;
+        uint32_t *ipriorityr, priority;
 
         if ( dabt.size != DABT_BYTE && dabt.size != DABT_WORD ) goto bad_width;
         rank = vgic_rank_offset(v, 8, reg - GICD_IPRIORITYR, DABT_WORD);
@@ -638,7 +639,9 @@  static int __vgic_v3_distr_common_mmio_write(const char *name, struct vcpu *v,
         vgic_lock_rank(v, rank, flags);
         ipriorityr = &rank->ipriorityr[REG_RANK_INDEX(8, reg - GICD_IPRIORITYR,
                                                       DABT_WORD)];
-        vgic_reg32_update(ipriorityr, r, info);
+        priority = ACCESS_ONCE(*ipriorityr);
+        vgic_reg32_update(&priority, r, info);
+        ACCESS_ONCE(*ipriorityr) = priority;
         vgic_unlock_rank(v, rank, flags);
         return 1;
     }
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 83569b0..18fe420 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -227,14 +227,8 @@  struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq)
 static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
 {
     struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
-    unsigned long flags;
-    int priority;
-
-    vgic_lock_rank(v, rank, flags);
-    priority = rank->priority[virq & INTERRUPT_RANK_MASK];
-    vgic_unlock_rank(v, rank, flags);
 
-    return priority;
+    return ACCESS_ONCE(rank->priority[virq & INTERRUPT_RANK_MASK]);
 }
 
 bool vgic_migrate_irq(struct vcpu *old, struct vcpu *new, unsigned int irq)