diff mbox series

[v2,2/2] xen/rwlock: add check_lock() handling to rwlocks

Message ID 20201102131239.14134-3-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen/locking: fix and enhance lock debugging | expand

Commit Message

Jürgen Groß Nov. 2, 2020, 1:12 p.m. UTC
Checking whether a lock is consistently used regarding interrupts on
or off is beneficial for rwlocks, too.

So add check_lock() calls to rwlock functions. For this purpose make
check_lock() globally accessible.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- call check_lock() unconditionally in try_lock variants (Jan Beulich)
---
 xen/common/spinlock.c      |  3 +--
 xen/include/xen/rwlock.h   | 11 +++++++++++
 xen/include/xen/spinlock.h |  2 ++
 3 files changed, 14 insertions(+), 2 deletions(-)

Comments

Jan Beulich Nov. 3, 2020, 9:02 a.m. UTC | #1
On 02.11.2020 14:12, Juergen Gross wrote:
> --- a/xen/include/xen/rwlock.h
> +++ b/xen/include/xen/rwlock.h
> @@ -56,6 +56,7 @@ static inline int _read_trylock(rwlock_t *lock)
>      u32 cnts;
>  
>      preempt_disable();
> +    check_lock(&lock->lock.debug, true);
>      cnts = atomic_read(&lock->cnts);
>      if ( likely(_can_read_lock(cnts)) )
>      {

I'm sorry for being picky, but this still isn't matching
_spin_trylock(). Perhaps the difference is really benign, but
there the check sits ahead of preempt_disable(). (It has a
clear reason to be this way there, but without a clear reason
for things to be differently here, I think matching ordering
may help, at least to avoid questions.)

> @@ -66,6 +67,7 @@ static inline int _read_trylock(rwlock_t *lock)
>           */
>          if ( likely(_can_read_lock(cnts)) )
>              return 1;
> +
>          atomic_sub(_QR_BIAS, &lock->cnts);
>      }
>      preempt_enable();

Stray change?

> @@ -87,7 +89,10 @@ static inline void _read_lock(rwlock_t *lock)
>       * arch_lock_acquire_barrier().
>       */
>      if ( likely(_can_read_lock(cnts)) )
> +    {
> +        check_lock(&lock->lock.debug, false);
>          return;
> +    }
>  
>      /* The slowpath will decrement the reader count, if necessary. */
>      queue_read_lock_slowpath(lock);
> @@ -162,7 +167,10 @@ static inline void _write_lock(rwlock_t *lock)
>       * arch_lock_acquire_barrier().
>       */
>      if ( atomic_cmpxchg(&lock->cnts, 0, _write_lock_val()) == 0 )
> +    {
> +        check_lock(&lock->lock.debug, false);
>          return;
> +    }
>  
>      queue_write_lock_slowpath(lock);
>      /*

Maybe also for these two, but likely more importantly for ...

> @@ -328,6 +337,8 @@ static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
>          /* Drop the read lock because we don't need it anymore. */
>          read_unlock(&percpu_rwlock->rwlock);
>      }
> +    else
> +        check_lock(&percpu_rwlock->rwlock.lock.debug, false);
>  }

... this one a brief comment may be warranted to clarify why
the call sits here rather than at the top?

Jan
Jürgen Groß Nov. 3, 2020, 9:22 a.m. UTC | #2
On 03.11.20 10:02, Jan Beulich wrote:
> On 02.11.2020 14:12, Juergen Gross wrote:
>> --- a/xen/include/xen/rwlock.h
>> +++ b/xen/include/xen/rwlock.h
>> @@ -56,6 +56,7 @@ static inline int _read_trylock(rwlock_t *lock)
>>       u32 cnts;
>>   
>>       preempt_disable();
>> +    check_lock(&lock->lock.debug, true);
>>       cnts = atomic_read(&lock->cnts);
>>       if ( likely(_can_read_lock(cnts)) )
>>       {
> 
> I'm sorry for being picky, but this still isn't matching
> _spin_trylock(). Perhaps the difference is really benign, but
> there the check sits ahead of preempt_disable(). (It has a
> clear reason to be this way there, but without a clear reason
> for things to be differently here, I think matching ordering
> may help, at least to avoid questions.)

I think this is more an optimization opportunity: I'd rather move the
preempt_disable() into the first if clause, as there is no need to
disable preemption in case the first read of the lock already leads
to failure acquiring the lock.

If you want I can prepend a patch doing that optimization.

> 
>> @@ -66,6 +67,7 @@ static inline int _read_trylock(rwlock_t *lock)
>>            */
>>           if ( likely(_can_read_lock(cnts)) )
>>               return 1;
>> +
>>           atomic_sub(_QR_BIAS, &lock->cnts);
>>       }
>>       preempt_enable();
> 
> Stray change?

Oh yes, a leftover from the old positioning of check_lock().

> 
>> @@ -87,7 +89,10 @@ static inline void _read_lock(rwlock_t *lock)
>>        * arch_lock_acquire_barrier().
>>        */
>>       if ( likely(_can_read_lock(cnts)) )
>> +    {
>> +        check_lock(&lock->lock.debug, false);
>>           return;
>> +    }
>>   
>>       /* The slowpath will decrement the reader count, if necessary. */
>>       queue_read_lock_slowpath(lock);
>> @@ -162,7 +167,10 @@ static inline void _write_lock(rwlock_t *lock)
>>        * arch_lock_acquire_barrier().
>>        */
>>       if ( atomic_cmpxchg(&lock->cnts, 0, _write_lock_val()) == 0 )
>> +    {
>> +        check_lock(&lock->lock.debug, false);
>>           return;
>> +    }
>>   
>>       queue_write_lock_slowpath(lock);
>>       /*
> 
> Maybe also for these two, but likely more importantly for ...
> 
>> @@ -328,6 +337,8 @@ static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
>>           /* Drop the read lock because we don't need it anymore. */
>>           read_unlock(&percpu_rwlock->rwlock);
>>       }
>> +    else
>> +        check_lock(&percpu_rwlock->rwlock.lock.debug, false);
>>   }
> 
> ... this one a brief comment may be warranted to clarify why
> the call sits here rather than at the top?

Okay.


Juergen
Jan Beulich Nov. 3, 2020, 10:04 a.m. UTC | #3
On 03.11.2020 10:22, Jürgen Groß wrote:
> On 03.11.20 10:02, Jan Beulich wrote:
>> On 02.11.2020 14:12, Juergen Gross wrote:
>>> --- a/xen/include/xen/rwlock.h
>>> +++ b/xen/include/xen/rwlock.h
>>> @@ -56,6 +56,7 @@ static inline int _read_trylock(rwlock_t *lock)
>>>       u32 cnts;
>>>   
>>>       preempt_disable();
>>> +    check_lock(&lock->lock.debug, true);
>>>       cnts = atomic_read(&lock->cnts);
>>>       if ( likely(_can_read_lock(cnts)) )
>>>       {
>>
>> I'm sorry for being picky, but this still isn't matching
>> _spin_trylock(). Perhaps the difference is really benign, but
>> there the check sits ahead of preempt_disable(). (It has a
>> clear reason to be this way there, but without a clear reason
>> for things to be differently here, I think matching ordering
>> may help, at least to avoid questions.)
> 
> I think this is more an optimization opportunity: I'd rather move the
> preempt_disable() into the first if clause, as there is no need to
> disable preemption in case the first read of the lock already leads
> to failure acquiring the lock.
> 
> If you want I can prepend a patch doing that optimization.

I'd appreciate you doing so, yet I'd like to point out that
then the same question remains for _write_trylock(). Perhaps
a similar transformation is possible there, but it'll at
least be more code churn. Which of course isn't a reason not
to go this route there too.

This said - wouldn't what you suggest be wrong if we had
actual preemption in the hypervisor? Preemption hitting
between e.g. these two lines

    cnts = atomic_read(&lock->cnts);
    if ( likely(_can_read_lock(cnts)) )

would not yield the intended result, would it? (It wouldn't
affect correctness afaics, because the caller has to be
prepared anyway that the attempt fails, but the amount of
effectively false negatives would grow, as would the number
of cases where the slower path is taken for no reason.)

Question therefore is how much we care about keeping code
ready for "real" preemption, when we have ample other places
that would need changing first, before such could be enabled.

Jan
Jürgen Groß Nov. 3, 2020, 10:17 a.m. UTC | #4
On 03.11.20 11:04, Jan Beulich wrote:
> On 03.11.2020 10:22, Jürgen Groß wrote:
>> On 03.11.20 10:02, Jan Beulich wrote:
>>> On 02.11.2020 14:12, Juergen Gross wrote:
>>>> --- a/xen/include/xen/rwlock.h
>>>> +++ b/xen/include/xen/rwlock.h
>>>> @@ -56,6 +56,7 @@ static inline int _read_trylock(rwlock_t *lock)
>>>>        u32 cnts;
>>>>    
>>>>        preempt_disable();
>>>> +    check_lock(&lock->lock.debug, true);
>>>>        cnts = atomic_read(&lock->cnts);
>>>>        if ( likely(_can_read_lock(cnts)) )
>>>>        {
>>>
>>> I'm sorry for being picky, but this still isn't matching
>>> _spin_trylock(). Perhaps the difference is really benign, but
>>> there the check sits ahead of preempt_disable(). (It has a
>>> clear reason to be this way there, but without a clear reason
>>> for things to be differently here, I think matching ordering
>>> may help, at least to avoid questions.)
>>
>> I think this is more an optimization opportunity: I'd rather move the
>> preempt_disable() into the first if clause, as there is no need to
>> disable preemption in case the first read of the lock already leads
>> to failure acquiring the lock.
>>
>> If you want I can prepend a patch doing that optimization.
> 
> I'd appreciate you doing so, yet I'd like to point out that
> then the same question remains for _write_trylock(). Perhaps
> a similar transformation is possible there, but it'll at
> least be more code churn. Which of course isn't a reason not
> to go this route there too.

Shouldn't be very hard. It would just need to split the if clause
into two clauses.

> This said - wouldn't what you suggest be wrong if we had
> actual preemption in the hypervisor? Preemption hitting
> between e.g. these two lines
> 
>      cnts = atomic_read(&lock->cnts);
>      if ( likely(_can_read_lock(cnts)) )
> 
> would not yield the intended result, would it? (It wouldn't
> affect correctness afaics, because the caller has to be
> prepared anyway that the attempt fails, but the amount of
> effectively false negatives would grow, as would the number
> of cases where the slower path is taken for no reason.)

And this in turn would hit _spin_trylock() the same way.

IMO we should harmonize all the trylock variants in this regard:
either they have an as small as possible preemption disabled
section or they have the initial test for success being possible
at all in this section.

> Question therefore is how much we care about keeping code
> ready for "real" preemption, when we have ample other places
> that would need changing first, before such could be enabled.

Yes. And depending on the answer the route to go (wide or narrow
no-preemption section) wither the rwlock or the spinlock trylock
variants should be adapted.


Juergen
Julien Grall Nov. 3, 2020, 10:25 a.m. UTC | #5
Hi Jan,

On 03/11/2020 10:04, Jan Beulich wrote:
> On 03.11.2020 10:22, Jürgen Groß wrote:
>> On 03.11.20 10:02, Jan Beulich wrote:
>>> On 02.11.2020 14:12, Juergen Gross wrote:
> Question therefore is how much we care about keeping code
> ready for "real" preemption, when we have ample other places
> that would need changing first, before such could be enabled

The question we should ask ourself is whether we think one would want to 
use preemption in Xen.

Some of the emulation in Xen on Arm (e.g. ITS, SMMUv3, set/way) would 
have been easier to implement if the code were preemptible.

I also hear time to time stakeholders asking for preemptible spin lock 
(this is useful for RT).

Therefore, I think there are values to keep the code as much as possible 
preempt-ready.

Cheers,
Jan Beulich Nov. 3, 2020, 10:30 a.m. UTC | #6
On 03.11.2020 11:17, Jürgen Groß wrote:
> On 03.11.20 11:04, Jan Beulich wrote:
>> This said - wouldn't what you suggest be wrong if we had
>> actual preemption in the hypervisor? Preemption hitting
>> between e.g. these two lines
>>
>>      cnts = atomic_read(&lock->cnts);
>>      if ( likely(_can_read_lock(cnts)) )
>>
>> would not yield the intended result, would it? (It wouldn't
>> affect correctness afaics, because the caller has to be
>> prepared anyway that the attempt fails, but the amount of
>> effectively false negatives would grow, as would the number
>> of cases where the slower path is taken for no reason.)
> 
> And this in turn would hit _spin_trylock() the same way.

True.

> IMO we should harmonize all the trylock variants in this regard:
> either they have an as small as possible preemption disabled
> section or they have the initial test for success being possible
> at all in this section.
> 
>> Question therefore is how much we care about keeping code
>> ready for "real" preemption, when we have ample other places
>> that would need changing first, before such could be enabled.
> 
> Yes. And depending on the answer the route to go (wide or narrow
> no-preemption section) wither the rwlock or the spinlock trylock
> variants should be adapted.

Well, personally I'd slightly prefer the adjustment as you did
suggest, but Julien's subsequent reply points towards the other
direction.

Jan
diff mbox series

Patch

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index b4aaf6bce6..405322c6b8 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -13,7 +13,7 @@ 
 
 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
 
-static void check_lock(union lock_debug *debug, bool try)
+void check_lock(union lock_debug *debug, bool try)
 {
     bool irq_safe = !local_irq_is_enabled();
 
@@ -108,7 +108,6 @@  void spin_debug_disable(void)
 
 #else /* CONFIG_DEBUG_LOCKS */
 
-#define check_lock(l, t) ((void)0)
 #define check_barrier(l) ((void)0)
 #define got_lock(l) ((void)0)
 #define rel_lock(l) ((void)0)
diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
index 427664037a..94496a0f53 100644
--- a/xen/include/xen/rwlock.h
+++ b/xen/include/xen/rwlock.h
@@ -56,6 +56,7 @@  static inline int _read_trylock(rwlock_t *lock)
     u32 cnts;
 
     preempt_disable();
+    check_lock(&lock->lock.debug, true);
     cnts = atomic_read(&lock->cnts);
     if ( likely(_can_read_lock(cnts)) )
     {
@@ -66,6 +67,7 @@  static inline int _read_trylock(rwlock_t *lock)
          */
         if ( likely(_can_read_lock(cnts)) )
             return 1;
+
         atomic_sub(_QR_BIAS, &lock->cnts);
     }
     preempt_enable();
@@ -87,7 +89,10 @@  static inline void _read_lock(rwlock_t *lock)
      * arch_lock_acquire_barrier().
      */
     if ( likely(_can_read_lock(cnts)) )
+    {
+        check_lock(&lock->lock.debug, false);
         return;
+    }
 
     /* The slowpath will decrement the reader count, if necessary. */
     queue_read_lock_slowpath(lock);
@@ -162,7 +167,10 @@  static inline void _write_lock(rwlock_t *lock)
      * arch_lock_acquire_barrier().
      */
     if ( atomic_cmpxchg(&lock->cnts, 0, _write_lock_val()) == 0 )
+    {
+        check_lock(&lock->lock.debug, false);
         return;
+    }
 
     queue_write_lock_slowpath(lock);
     /*
@@ -197,6 +205,7 @@  static inline int _write_trylock(rwlock_t *lock)
     u32 cnts;
 
     preempt_disable();
+    check_lock(&lock->lock.debug, true);
     cnts = atomic_read(&lock->cnts);
     if ( unlikely(cnts) ||
          unlikely(atomic_cmpxchg(&lock->cnts, 0, _write_lock_val()) != 0) )
@@ -328,6 +337,8 @@  static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
         /* Drop the read lock because we don't need it anymore. */
         read_unlock(&percpu_rwlock->rwlock);
     }
+    else
+        check_lock(&percpu_rwlock->rwlock.lock.debug, false);
 }
 
 static inline void _percpu_read_unlock(percpu_rwlock_t **per_cpudata,
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index ca13b600a0..9fa4e600c1 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -21,11 +21,13 @@  union lock_debug {
     };
 };
 #define _LOCK_DEBUG { LOCK_DEBUG_INITVAL }
+void check_lock(union lock_debug *debug, bool try);
 void spin_debug_enable(void);
 void spin_debug_disable(void);
 #else
 union lock_debug { };
 #define _LOCK_DEBUG { }
+#define check_lock(l, t) ((void)0)
 #define spin_debug_enable() ((void)0)
 #define spin_debug_disable() ((void)0)
 #endif