diff mbox series

[11/12] evtchn: convert vIRQ lock to an r/w one

Message ID 6e529147-2a76-bc28-ac16-21fc9a2c8f03@suse.com (mailing list archive)
State Superseded
Headers show
Series evtchn: recent XSAs follow-on | expand

Commit Message

Jan Beulich Sept. 28, 2020, 11:02 a.m. UTC
There's no need to serialize all sending of vIRQ-s; all that's needed
is serialization against the closing of the respective event channels
(by means of a barrier). To facilitate the conversion, introduce a new
rw_barrier().

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Comments

Julien Grall Sept. 29, 2020, 1:03 p.m. UTC | #1
Hi Jan,

On 28/09/2020 12:02, Jan Beulich wrote:
> There's no need to serialize all sending of vIRQ-s; all that's needed
> is serialization against the closing of the respective event channels
> (by means of a barrier). To facilitate the conversion, introduce a new
> rw_barrier().

Looking at the code below, all the spin_lock() have been replaced by a 
read_lock_*(). This is a bit surprising,

> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -160,7 +160,7 @@ struct vcpu *vcpu_create(struct domain *
>       v->vcpu_id = vcpu_id;
>       v->dirty_cpu = VCPU_CPU_CLEAN;
>   
> -    spin_lock_init(&v->virq_lock);
> +    rwlock_init(&v->virq_lock);
>   
>       tasklet_init(&v->continue_hypercall_tasklet, NULL, NULL);
>   
> --- a/xen/common/event_channel.c
> +++ b/xen/common/event_channel.c
> @@ -640,7 +640,7 @@ int evtchn_close(struct domain *d1, int
>               if ( v->virq_to_evtchn[chn1->u.virq] != port1 )
>                   continue;
>               v->virq_to_evtchn[chn1->u.virq] = 0;
> -            spin_barrier(&v->virq_lock);
> +            rw_barrier(&v->virq_lock);
>           }
>           break;
>   
> @@ -794,7 +794,7 @@ void send_guest_vcpu_virq(struct vcpu *v
>   
>       ASSERT(!virq_is_global(virq));
>   
> -    spin_lock_irqsave(&v->virq_lock, flags);
> +    read_lock_irqsave(&v->virq_lock, flags);
>   
>       port = v->virq_to_evtchn[virq];
>       if ( unlikely(port == 0) )
> @@ -807,7 +807,7 @@ void send_guest_vcpu_virq(struct vcpu *v
>       spin_unlock(&chn->lock);
>   
>    out:
> -    spin_unlock_irqrestore(&v->virq_lock, flags);
> +    read_unlock_irqrestore(&v->virq_lock, flags);
>   }
>   
>   void send_guest_global_virq(struct domain *d, uint32_t virq)
> @@ -826,7 +826,7 @@ void send_guest_global_virq(struct domai
>       if ( unlikely(v == NULL) )
>           return;
>   
> -    spin_lock_irqsave(&v->virq_lock, flags);
> +    read_lock_irqsave(&v->virq_lock, flags);
>   
>       port = v->virq_to_evtchn[virq];
>       if ( unlikely(port == 0) )
> @@ -838,7 +838,7 @@ void send_guest_global_virq(struct domai
>       spin_unlock(&chn->lock);
>   
>    out:
> -    spin_unlock_irqrestore(&v->virq_lock, flags);
> +    read_unlock_irqrestore(&v->virq_lock, flags);
>   }
>   
>   void send_guest_pirq(struct domain *d, const struct pirq *pirq)
> --- a/xen/common/spinlock.c
> +++ b/xen/common/spinlock.c
> @@ -2,7 +2,7 @@
>   #include <xen/irq.h>
>   #include <xen/smp.h>
>   #include <xen/time.h>
> -#include <xen/spinlock.h>
> +#include <xen/rwlock.h>

I would prefer if keep including <xen/spinlock.h> as the fact 
<xen/rwlock.h> include it is merely an implementation details.

>   #include <xen/guest_access.h>
>   #include <xen/preempt.h>
>   #include <public/sysctl.h>
> @@ -334,6 +334,12 @@ void _spin_unlock_recursive(spinlock_t *
>       }
>   }
>   
> +void _rw_barrier(rwlock_t *lock)
> +{
> +    check_barrier(&lock->lock.debug);
> +    do { smp_mb(); } while ( _rw_is_locked(lock) );
> +}

Why do you need to call smp_mb() at each loop? Would not it be 
sufficient to write something similar to spin_barrier(). I.e:

smp_mb();
while ( _rw_is_locked(lock) )
   cpu_relax();
smp_mb();

But I wonder if there is a risk with either implementation for 
_rw_is_locked() to always return true and therefore never end.

Let say we receive an interrupt, by the time it is handled, the 
read/lock may have been taken again.

spin_barrier() seems to handle this situation fine because it just wait 
for the head to change. I don't think we can do the same here...

I am thinking that it may be easier to hold the write lock when doing 
the update.

Cheers,
Jan Beulich Sept. 29, 2020, 1:37 p.m. UTC | #2
On 29.09.2020 15:03, Julien Grall wrote:
> On 28/09/2020 12:02, Jan Beulich wrote:
>> There's no need to serialize all sending of vIRQ-s; all that's needed
>> is serialization against the closing of the respective event channels
>> (by means of a barrier). To facilitate the conversion, introduce a new
>> rw_barrier().
> 
> Looking at the code below, all the spin_lock() have been replaced by a 
> read_lock_*(). This is a bit surprising,

It is, I agree, but that's what it ends up being. It is my understanding
that the lock really only exists for the purpose of the barrier in
evtchn_close().

>> --- a/xen/common/spinlock.c
>> +++ b/xen/common/spinlock.c
>> @@ -2,7 +2,7 @@
>>   #include <xen/irq.h>
>>   #include <xen/smp.h>
>>   #include <xen/time.h>
>> -#include <xen/spinlock.h>
>> +#include <xen/rwlock.h>
> 
> I would prefer if keep including <xen/spinlock.h> as the fact 
> <xen/rwlock.h> include it is merely an implementation details.

Can do, sure.

>> @@ -334,6 +334,12 @@ void _spin_unlock_recursive(spinlock_t *
>>       }
>>   }
>>   
>> +void _rw_barrier(rwlock_t *lock)
>> +{
>> +    check_barrier(&lock->lock.debug);
>> +    do { smp_mb(); } while ( _rw_is_locked(lock) );
>> +}
> 
> Why do you need to call smp_mb() at each loop? Would not it be 
> sufficient to write something similar to spin_barrier(). I.e:
> 
> smp_mb();
> while ( _rw_is_locked(lock) )
>    cpu_relax();
> smp_mb();

Yes, this looks to be possible. Both for this and the question
below it may be relevant to know that this patch pre-dates the
conversion to ticket locks by quite a bit. Hence the construct
above resembles the _spin_barrier() back at the time.

> But I wonder if there is a risk with either implementation for 
> _rw_is_locked() to always return true and therefore never end.
> 
> Let say we receive an interrupt, by the time it is handled, the 
> read/lock may have been taken again.

With non-ticket locks I would say there was the same issue. But
yes, ...

> spin_barrier() seems to handle this situation fine because it just wait 
> for the head to change. I don't think we can do the same here...
> 
> I am thinking that it may be easier to hold the write lock when doing 
> the update.

... perhaps this is indeed better. I have to admit that I never
fully understood the benefit of using spin_barrier() in this code
(as opposed to the use in evtchn_destroy()).

Jan
Julien Grall Sept. 29, 2020, 5:18 p.m. UTC | #3
Hi Jan,

On 29/09/2020 14:37, Jan Beulich wrote:
> On 29.09.2020 15:03, Julien Grall wrote:
>> On 28/09/2020 12:02, Jan Beulich wrote:
>>> There's no need to serialize all sending of vIRQ-s; all that's needed
>>> is serialization against the closing of the respective event channels
>>> (by means of a barrier). To facilitate the conversion, introduce a new
>>> rw_barrier().
>>
>> Looking at the code below, all the spin_lock() have been replaced by a
>> read_lock_*(). This is a bit surprising,
> 
> It is, I agree, but that's what it ends up being. It is my understanding
> that the lock really only exists for the purpose of the barrier in
> evtchn_close().
> 
>>> --- a/xen/common/spinlock.c
>>> +++ b/xen/common/spinlock.c
>>> @@ -2,7 +2,7 @@
>>>    #include <xen/irq.h>
>>>    #include <xen/smp.h>
>>>    #include <xen/time.h>
>>> -#include <xen/spinlock.h>
>>> +#include <xen/rwlock.h>
>>
>> I would prefer if keep including <xen/spinlock.h> as the fact
>> <xen/rwlock.h> include it is merely an implementation details.
> 
> Can do, sure.
> 
>>> @@ -334,6 +334,12 @@ void _spin_unlock_recursive(spinlock_t *
>>>        }
>>>    }
>>>    
>>> +void _rw_barrier(rwlock_t *lock)
>>> +{
>>> +    check_barrier(&lock->lock.debug);
>>> +    do { smp_mb(); } while ( _rw_is_locked(lock) );
>>> +}
>>
>> Why do you need to call smp_mb() at each loop? Would not it be
>> sufficient to write something similar to spin_barrier(). I.e:
>>
>> smp_mb();
>> while ( _rw_is_locked(lock) )
>>     cpu_relax();
>> smp_mb();
> 
> Yes, this looks to be possible. Both for this and the question
> below it may be relevant to know that this patch pre-dates the
> conversion to ticket locks by quite a bit. Hence the construct
> above resembles the _spin_barrier() back at the time.
> 
>> But I wonder if there is a risk with either implementation for
>> _rw_is_locked() to always return true and therefore never end.
>>
>> Let say we receive an interrupt, by the time it is handled, the
>> read/lock may have been taken again.
> 
> With non-ticket locks I would say there was the same issue. But
> yes, ...

Most likely yes.

> 
>> spin_barrier() seems to handle this situation fine because it just wait
>> for the head to change. I don't think we can do the same here...
>>
>> I am thinking that it may be easier to hold the write lock when doing
>> the update.
> 
> ... perhaps this is indeed better. I have to admit that I never
> fully understood the benefit of using spin_barrier() in this code
> (as opposed to the use in evtchn_destroy()).

I am not entirely sure either. It looks like it is an attempt to make 
v->virq_to_evtchn[X] visible without holding a lock.

Any holder of the lock after spin_barrier() has completed will read 0 
and not try to use the lock.

But the update of v->virq_to_evtchn[X] should have used either 
ACCESS_ONCE() or write_atomic().

Cheers,
Jan Beulich Sept. 30, 2020, 6:26 a.m. UTC | #4
On 29.09.2020 19:18, Julien Grall wrote:
> On 29/09/2020 14:37, Jan Beulich wrote:
>> On 29.09.2020 15:03, Julien Grall wrote:
>>> I am thinking that it may be easier to hold the write lock when doing
>>> the update.
>>
>> ... perhaps this is indeed better. I have to admit that I never
>> fully understood the benefit of using spin_barrier() in this code
>> (as opposed to the use in evtchn_destroy()).
> 
> I am not entirely sure either. It looks like it is an attempt to make 
> v->virq_to_evtchn[X] visible without holding a lock.
> 
> Any holder of the lock after spin_barrier() has completed will read 0 
> and not try to use the lock.

I'm not sure I follow: A holder of the lock is obviously already
making use of the lock. Or are you talking of two different locks
here (recall that before XSA-343 there was just one lock involved
in sending)?

> But the update of v->virq_to_evtchn[X] should have used either 
> ACCESS_ONCE() or write_atomic().

Of course, like in so many other places in the code base.

Jan
Paul Durrant Sept. 30, 2020, 7:58 a.m. UTC | #5
> -----Original Message-----
> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Jan Beulich
> Sent: 28 September 2020 12:02
> To: xen-devel@lists.xenproject.org
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>; George Dunlap <George.Dunlap@eu.citrix.com>; Ian
> Jackson <iwj@xenproject.org>; Julien Grall <julien@xen.org>; Wei Liu <wl@xen.org>; Stefano Stabellini
> <sstabellini@kernel.org>
> Subject: [PATCH 11/12] evtchn: convert vIRQ lock to an r/w one
> 
> There's no need to serialize all sending of vIRQ-s; all that's needed
> is serialization against the closing of the respective event channels
> (by means of a barrier). To facilitate the conversion, introduce a new
> rw_barrier().
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -160,7 +160,7 @@ struct vcpu *vcpu_create(struct domain *
>      v->vcpu_id = vcpu_id;
>      v->dirty_cpu = VCPU_CPU_CLEAN;
> 
> -    spin_lock_init(&v->virq_lock);
> +    rwlock_init(&v->virq_lock);
> 
>      tasklet_init(&v->continue_hypercall_tasklet, NULL, NULL);
> 
> --- a/xen/common/event_channel.c
> +++ b/xen/common/event_channel.c
> @@ -640,7 +640,7 @@ int evtchn_close(struct domain *d1, int
>              if ( v->virq_to_evtchn[chn1->u.virq] != port1 )
>                  continue;
>              v->virq_to_evtchn[chn1->u.virq] = 0;
> -            spin_barrier(&v->virq_lock);
> +            rw_barrier(&v->virq_lock);
>          }
>          break;
> 
> @@ -794,7 +794,7 @@ void send_guest_vcpu_virq(struct vcpu *v
> 
>      ASSERT(!virq_is_global(virq));
> 
> -    spin_lock_irqsave(&v->virq_lock, flags);
> +    read_lock_irqsave(&v->virq_lock, flags);
> 
>      port = v->virq_to_evtchn[virq];
>      if ( unlikely(port == 0) )
> @@ -807,7 +807,7 @@ void send_guest_vcpu_virq(struct vcpu *v
>      spin_unlock(&chn->lock);
> 
>   out:
> -    spin_unlock_irqrestore(&v->virq_lock, flags);
> +    read_unlock_irqrestore(&v->virq_lock, flags);
>  }
> 
>  void send_guest_global_virq(struct domain *d, uint32_t virq)
> @@ -826,7 +826,7 @@ void send_guest_global_virq(struct domai
>      if ( unlikely(v == NULL) )
>          return;
> 
> -    spin_lock_irqsave(&v->virq_lock, flags);
> +    read_lock_irqsave(&v->virq_lock, flags);
> 
>      port = v->virq_to_evtchn[virq];
>      if ( unlikely(port == 0) )
> @@ -838,7 +838,7 @@ void send_guest_global_virq(struct domai
>      spin_unlock(&chn->lock);
> 
>   out:
> -    spin_unlock_irqrestore(&v->virq_lock, flags);
> +    read_unlock_irqrestore(&v->virq_lock, flags);
>  }
> 
>  void send_guest_pirq(struct domain *d, const struct pirq *pirq)
> --- a/xen/common/spinlock.c
> +++ b/xen/common/spinlock.c
> @@ -2,7 +2,7 @@
>  #include <xen/irq.h>
>  #include <xen/smp.h>
>  #include <xen/time.h>
> -#include <xen/spinlock.h>
> +#include <xen/rwlock.h>
>  #include <xen/guest_access.h>
>  #include <xen/preempt.h>
>  #include <public/sysctl.h>
> @@ -334,6 +334,12 @@ void _spin_unlock_recursive(spinlock_t *
>      }
>  }
> 
> +void _rw_barrier(rwlock_t *lock)
> +{
> +    check_barrier(&lock->lock.debug);
> +    do { smp_mb(); } while ( _rw_is_locked(lock) );
> +}

Should you not have a cpu_relax() somewhere in here?

TBH though, the fact this lock is never taken as a writer makes me wonder whether there needs to be a lock at all.

  Paul

> +
>  #ifdef CONFIG_DEBUG_LOCK_PROFILE
> 
>  struct lock_profile_anc {
> --- a/xen/include/xen/rwlock.h
> +++ b/xen/include/xen/rwlock.h
> @@ -237,6 +237,8 @@ static inline int _rw_is_write_locked(rw
>      return (atomic_read(&lock->cnts) & _QW_WMASK) == _QW_LOCKED;
>  }
> 
> +void _rw_barrier(rwlock_t *lock);
> +
>  #define read_lock(l)                  _read_lock(l)
>  #define read_lock_irq(l)              _read_lock_irq(l)
>  #define read_lock_irqsave(l, f)                                 \
> @@ -266,6 +268,7 @@ static inline int _rw_is_write_locked(rw
>  #define rw_is_locked(l)               _rw_is_locked(l)
>  #define rw_is_write_locked(l)         _rw_is_write_locked(l)
> 
> +#define rw_barrier(l)                 _rw_barrier(l)
> 
>  typedef struct percpu_rwlock percpu_rwlock_t;
> 
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -235,7 +235,7 @@ struct vcpu
> 
>      /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
>      evtchn_port_t    virq_to_evtchn[NR_VIRQS];
> -    spinlock_t       virq_lock;
> +    rwlock_t         virq_lock;
> 
>      /* Tasklet for continue_hypercall_on_cpu(). */
>      struct tasklet   continue_hypercall_tasklet;
>
Jan Beulich Sept. 30, 2020, 8:37 a.m. UTC | #6
On 30.09.2020 09:58, Paul Durrant wrote:
>> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Jan Beulich
>> Sent: 28 September 2020 12:02
>>
>> @@ -334,6 +334,12 @@ void _spin_unlock_recursive(spinlock_t *
>>      }
>>  }
>>
>> +void _rw_barrier(rwlock_t *lock)
>> +{
>> +    check_barrier(&lock->lock.debug);
>> +    do { smp_mb(); } while ( _rw_is_locked(lock) );
>> +}
> 
> Should you not have a cpu_relax() somewhere in here?
> 
> TBH though, the fact this lock is never taken as a writer makes me
> wonder whether there needs to be a lock at all.

For both of these - see the discussion Julien and I also had. The
construct will now move to the subsequent patch anyway, and change
as per Julien's request.

Jan
Paul Durrant Sept. 30, 2020, 8:52 a.m. UTC | #7
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: 30 September 2020 09:37
> To: paul@xen.org
> Cc: xen-devel@lists.xenproject.org; 'Andrew Cooper' <andrew.cooper3@citrix.com>; 'George Dunlap'
> <George.Dunlap@eu.citrix.com>; 'Ian Jackson' <iwj@xenproject.org>; 'Julien Grall' <julien@xen.org>;
> 'Wei Liu' <wl@xen.org>; 'Stefano Stabellini' <sstabellini@kernel.org>
> Subject: Re: [PATCH 11/12] evtchn: convert vIRQ lock to an r/w one
> 
> On 30.09.2020 09:58, Paul Durrant wrote:
> >> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Jan Beulich
> >> Sent: 28 September 2020 12:02
> >>
> >> @@ -334,6 +334,12 @@ void _spin_unlock_recursive(spinlock_t *
> >>      }
> >>  }
> >>
> >> +void _rw_barrier(rwlock_t *lock)
> >> +{
> >> +    check_barrier(&lock->lock.debug);
> >> +    do { smp_mb(); } while ( _rw_is_locked(lock) );
> >> +}
> >
> > Should you not have a cpu_relax() somewhere in here?
> >
> > TBH though, the fact this lock is never taken as a writer makes me
> > wonder whether there needs to be a lock at all.
> 
> For both of these - see the discussion Julien and I also had. The
> construct will now move to the subsequent patch anyway, and change
> as per Julien's request.
> 

OK.

Looking again, given that both send_guest_vcpu_virq() and send_guest_global_virq() (rightly) hold the evtchn lock before calling evtchn_port_set_pending() I think you could do away with the virq lock by adding checks in those functions to verify evtchn->state == ECS_VIRQ and u.virq == virq after having acquired the channel lock but before calling evtchn_port_set_pending().

  Paul

> Jan
Julien Grall Sept. 30, 2020, 9:09 a.m. UTC | #8
Hi Jan,

On 30/09/2020 07:26, Jan Beulich wrote:
> On 29.09.2020 19:18, Julien Grall wrote:
>> On 29/09/2020 14:37, Jan Beulich wrote:
>>> On 29.09.2020 15:03, Julien Grall wrote:
>>>> I am thinking that it may be easier to hold the write lock when doing
>>>> the update.
>>>
>>> ... perhaps this is indeed better. I have to admit that I never
>>> fully understood the benefit of using spin_barrier() in this code
>>> (as opposed to the use in evtchn_destroy()).
>>
>> I am not entirely sure either. It looks like it is an attempt to make
>> v->virq_to_evtchn[X] visible without holding a lock.
>>
>> Any holder of the lock after spin_barrier() has completed will read 0
>> and not try to use the lock.
> 
> I'm not sure I follow: A holder of the lock is obviously already
> making use of the lock.

My point is the barrier is meant to split the holders of the lock in two 
category:
    - Anyone acquiring the lock before the spin_barrier() completed may 
see either the port open or close.
    - Anyone acquiring the lock after the spin_barrier() completed will 
see a close port.

> Or are you talking of two different locks
> here (recall that before XSA-343 there was just one lock involved
> in sending)?
> 
>> But the update of v->virq_to_evtchn[X] should have used either
>> ACCESS_ONCE() or write_atomic().
> 
> Of course, like in so many other places in the code base.

This is known. What I meant is if we are going to continue to use a 
spin_barrier() (or rw_barrier()), then we should also switch to use 
ACCESS_ONCE()/write_atomic().

Anyway, I think we discussed to acquire the write lock instead. So it 
should not be a concern.

Cheers,
Jan Beulich Sept. 30, 2020, 10:16 a.m. UTC | #9
On 30.09.2020 10:52, Paul Durrant wrote:
> Looking again, given that both send_guest_vcpu_virq() and
> send_guest_global_virq() (rightly) hold the evtchn lock before
> calling evtchn_port_set_pending() I think you could do away with
> the virq lock by adding checks in those functions to verify
> evtchn->state == ECS_VIRQ and u.virq == virq after having
> acquired the channel lock but before calling
> evtchn_port_set_pending().

I don't think so: The adjustment of v->virq_to_evtchn[] in
evtchn_close() would then happen with just the domain's event
lock held, which the sending paths don't use at all. The per-
channel lock gets acquired in evtchn_close() a bit later only
(and this lock can't possibly protect per-vCPU state).

In fact I'm now getting puzzled by evtchn_bind_virq() updating
this array with (just) the per-domain lock held. Since it's
the last thing in the function, there's technically no strict
need for acquiring the vIRQ lock, but holding the event lock
definitely doesn't help. All that looks to be needed is the
barrier implied from write_unlock().

Jan
Julien Grall Oct. 1, 2020, 4:21 p.m. UTC | #10
Hi Jan,

On 30/09/2020 11:16, Jan Beulich wrote:
> On 30.09.2020 10:52, Paul Durrant wrote:
>> Looking again, given that both send_guest_vcpu_virq() and
>> send_guest_global_virq() (rightly) hold the evtchn lock before
>> calling evtchn_port_set_pending() I think you could do away with
>> the virq lock by adding checks in those functions to verify
>> evtchn->state == ECS_VIRQ and u.virq == virq after having
>> acquired the channel lock but before calling
>> evtchn_port_set_pending().
> 
> I don't think so: The adjustment of v->virq_to_evtchn[] in
> evtchn_close() would then happen with just the domain's event
> lock held, which the sending paths don't use at all. The per-
> channel lock gets acquired in evtchn_close() a bit later only
> (and this lock can't possibly protect per-vCPU state).
> 
> In fact I'm now getting puzzled by evtchn_bind_virq() updating
> this array with (just) the per-domain lock held. Since it's
> the last thing in the function, there's technically no strict
> need for acquiring the vIRQ lock,

Well, we at least need to prevent the compiler to tear the store/load. 
If we don't use a lock, then we would should use ACCESS_ONCE() or 
{read,write}_atomic() for all the usage.

> but holding the event lock
> definitely doesn't help.

It helps because spin_unlock() and write_unlock() use the same barrier 
(arch_lock_release_barrier()). So ...

> All that looks to be needed is the
> barrier implied from write_unlock().

No barrier should be necessary. Although, I would suggest to add a 
comment explaining it.

Cheers,
Jan Beulich Oct. 2, 2020, 6:12 a.m. UTC | #11
On 01.10.2020 18:21, Julien Grall wrote:
> On 30/09/2020 11:16, Jan Beulich wrote:
>> On 30.09.2020 10:52, Paul Durrant wrote:
>>> Looking again, given that both send_guest_vcpu_virq() and
>>> send_guest_global_virq() (rightly) hold the evtchn lock before
>>> calling evtchn_port_set_pending() I think you could do away with
>>> the virq lock by adding checks in those functions to verify
>>> evtchn->state == ECS_VIRQ and u.virq == virq after having
>>> acquired the channel lock but before calling
>>> evtchn_port_set_pending().
>>
>> I don't think so: The adjustment of v->virq_to_evtchn[] in
>> evtchn_close() would then happen with just the domain's event
>> lock held, which the sending paths don't use at all. The per-
>> channel lock gets acquired in evtchn_close() a bit later only
>> (and this lock can't possibly protect per-vCPU state).
>>
>> In fact I'm now getting puzzled by evtchn_bind_virq() updating
>> this array with (just) the per-domain lock held. Since it's
>> the last thing in the function, there's technically no strict
>> need for acquiring the vIRQ lock,
> 
> Well, we at least need to prevent the compiler to tear the store/load. 
> If we don't use a lock, then we would should use ACCESS_ONCE() or 
> {read,write}_atomic() for all the usage.
> 
>> but holding the event lock
>> definitely doesn't help.
> 
> It helps because spin_unlock() and write_unlock() use the same barrier 
> (arch_lock_release_barrier()). So ...

I'm having trouble making this part of your reply fit ...

>> All that looks to be needed is the
>> barrier implied from write_unlock().
> 
> No barrier should be necessary. Although, I would suggest to add a 
> comment explaining it.

... this. If we moved the update of v->virq_to_evtchn[] out of the
locked region (as the lock doesn't protect anything anymore at that
point), I think a barrier would need adding, such that the sending
paths will observe the update by the time evtchn_bind_virq()
returns (and hence sending of a respective vIRQ event can
legitimately be expected to actually work). Or did you possibly
just misunderstand what I wrote before? By putting in question the
utility of holding the event lock, I implied the write could be
moved out of the locked region ...

Jan
Julien Grall Oct. 2, 2020, 8:43 a.m. UTC | #12
Hi Jan,

On 02/10/2020 07:12, Jan Beulich wrote:
> On 01.10.2020 18:21, Julien Grall wrote:
>> On 30/09/2020 11:16, Jan Beulich wrote:
>>> On 30.09.2020 10:52, Paul Durrant wrote:
>>>> Looking again, given that both send_guest_vcpu_virq() and
>>>> send_guest_global_virq() (rightly) hold the evtchn lock before
>>>> calling evtchn_port_set_pending() I think you could do away with
>>>> the virq lock by adding checks in those functions to verify
>>>> evtchn->state == ECS_VIRQ and u.virq == virq after having
>>>> acquired the channel lock but before calling
>>>> evtchn_port_set_pending().
>>>
>>> I don't think so: The adjustment of v->virq_to_evtchn[] in
>>> evtchn_close() would then happen with just the domain's event
>>> lock held, which the sending paths don't use at all. The per-
>>> channel lock gets acquired in evtchn_close() a bit later only
>>> (and this lock can't possibly protect per-vCPU state).
>>>
>>> In fact I'm now getting puzzled by evtchn_bind_virq() updating
>>> this array with (just) the per-domain lock held. Since it's
>>> the last thing in the function, there's technically no strict
>>> need for acquiring the vIRQ lock,
>>
>> Well, we at least need to prevent the compiler to tear the store/load.
>> If we don't use a lock, then we would should use ACCESS_ONCE() or
>> {read,write}_atomic() for all the usage.
>>
>>> but holding the event lock
>>> definitely doesn't help.
>>
>> It helps because spin_unlock() and write_unlock() use the same barrier
>> (arch_lock_release_barrier()). So ...
> 
> I'm having trouble making this part of your reply fit ...
> 
>>> All that looks to be needed is the
>>> barrier implied from write_unlock().
>>
>> No barrier should be necessary. Although, I would suggest to add a
>> comment explaining it.
> 
> ... this. If we moved the update of v->virq_to_evtchn[] out of the
> locked region (as the lock doesn't protect anything anymore at that
> point), I think a barrier would need adding, such that the sending
> paths will observe the update by the time evtchn_bind_virq()
> returns (and hence sending of a respective vIRQ event can
> legitimately be expected to actually work). Or did you possibly
> just misunderstand what I wrote before? By putting in question the
> utility of holding the event lock, I implied the write could be
> moved out of the locked region ...

We are probably talking past each other... My point was that if we leave 
the write where it currently is, then we don't need an extra barrier 
because the spin_unlock() already contains the barrier we want.

Hence the suggestion to add a comment so a reader doesn't spend time 
wondering how this is safe...

Cheers,
diff mbox series

Patch

--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -160,7 +160,7 @@  struct vcpu *vcpu_create(struct domain *
     v->vcpu_id = vcpu_id;
     v->dirty_cpu = VCPU_CPU_CLEAN;
 
-    spin_lock_init(&v->virq_lock);
+    rwlock_init(&v->virq_lock);
 
     tasklet_init(&v->continue_hypercall_tasklet, NULL, NULL);
 
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -640,7 +640,7 @@  int evtchn_close(struct domain *d1, int
             if ( v->virq_to_evtchn[chn1->u.virq] != port1 )
                 continue;
             v->virq_to_evtchn[chn1->u.virq] = 0;
-            spin_barrier(&v->virq_lock);
+            rw_barrier(&v->virq_lock);
         }
         break;
 
@@ -794,7 +794,7 @@  void send_guest_vcpu_virq(struct vcpu *v
 
     ASSERT(!virq_is_global(virq));
 
-    spin_lock_irqsave(&v->virq_lock, flags);
+    read_lock_irqsave(&v->virq_lock, flags);
 
     port = v->virq_to_evtchn[virq];
     if ( unlikely(port == 0) )
@@ -807,7 +807,7 @@  void send_guest_vcpu_virq(struct vcpu *v
     spin_unlock(&chn->lock);
 
  out:
-    spin_unlock_irqrestore(&v->virq_lock, flags);
+    read_unlock_irqrestore(&v->virq_lock, flags);
 }
 
 void send_guest_global_virq(struct domain *d, uint32_t virq)
@@ -826,7 +826,7 @@  void send_guest_global_virq(struct domai
     if ( unlikely(v == NULL) )
         return;
 
-    spin_lock_irqsave(&v->virq_lock, flags);
+    read_lock_irqsave(&v->virq_lock, flags);
 
     port = v->virq_to_evtchn[virq];
     if ( unlikely(port == 0) )
@@ -838,7 +838,7 @@  void send_guest_global_virq(struct domai
     spin_unlock(&chn->lock);
 
  out:
-    spin_unlock_irqrestore(&v->virq_lock, flags);
+    read_unlock_irqrestore(&v->virq_lock, flags);
 }
 
 void send_guest_pirq(struct domain *d, const struct pirq *pirq)
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -2,7 +2,7 @@ 
 #include <xen/irq.h>
 #include <xen/smp.h>
 #include <xen/time.h>
-#include <xen/spinlock.h>
+#include <xen/rwlock.h>
 #include <xen/guest_access.h>
 #include <xen/preempt.h>
 #include <public/sysctl.h>
@@ -334,6 +334,12 @@  void _spin_unlock_recursive(spinlock_t *
     }
 }
 
+void _rw_barrier(rwlock_t *lock)
+{
+    check_barrier(&lock->lock.debug);
+    do { smp_mb(); } while ( _rw_is_locked(lock) );
+}
+
 #ifdef CONFIG_DEBUG_LOCK_PROFILE
 
 struct lock_profile_anc {
--- a/xen/include/xen/rwlock.h
+++ b/xen/include/xen/rwlock.h
@@ -237,6 +237,8 @@  static inline int _rw_is_write_locked(rw
     return (atomic_read(&lock->cnts) & _QW_WMASK) == _QW_LOCKED;
 }
 
+void _rw_barrier(rwlock_t *lock);
+
 #define read_lock(l)                  _read_lock(l)
 #define read_lock_irq(l)              _read_lock_irq(l)
 #define read_lock_irqsave(l, f)                                 \
@@ -266,6 +268,7 @@  static inline int _rw_is_write_locked(rw
 #define rw_is_locked(l)               _rw_is_locked(l)
 #define rw_is_write_locked(l)         _rw_is_write_locked(l)
 
+#define rw_barrier(l)                 _rw_barrier(l)
 
 typedef struct percpu_rwlock percpu_rwlock_t;
 
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -235,7 +235,7 @@  struct vcpu
 
     /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
     evtchn_port_t    virq_to_evtchn[NR_VIRQS];
-    spinlock_t       virq_lock;
+    rwlock_t         virq_lock;
 
     /* Tasklet for continue_hypercall_on_cpu(). */
     struct tasklet   continue_hypercall_tasklet;