diff mbox series

[v5,1/5] xen/spinlocks: in debug builds store cpu holding the lock

Message ID 20190912132813.6509-2-jgross@suse.com (mailing list archive)
State New, archived
Headers show
Series enhance lock debugging | expand

Commit Message

Jürgen Groß Sept. 12, 2019, 1:28 p.m. UTC
Add the cpu currently holding the lock to struct lock_debug. This makes
analysis of locking errors easier and it can be tested whether the
correct cpu is releasing a lock again.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- adjust types (Jan Beulich)
V4:
- add define for bitfield size to store cpu number (Jan Beulich)
- make padding field unnamed (Jan Beulich)
V5:
- add BUILD_BUG_ON()s to verify bitfields don't have size 0 (Jan Beulich)
---
 xen/common/spinlock.c      | 36 +++++++++++++++++++++++++++++-------
 xen/include/xen/spinlock.h | 29 ++++++++++++++++++++---------
 2 files changed, 49 insertions(+), 16 deletions(-)

Comments

Jan Beulich Sept. 12, 2019, 1:36 p.m. UTC | #1
On 12.09.2019 15:28, Juergen Gross wrote:
> @@ -267,6 +288,7 @@ int _spin_trylock_recursive(spinlock_t *lock)
>  
>      /* Don't allow overflow of recurse_cpu field. */
>      BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU);
> +    BUILD_BUG_ON(SPINLOCK_RECURSE_BITS <= 0);

This is too weak: While I don't think we strictly need 15 levels of
recursion, I also don't think we'll get away with just 1. I think
this minimally needs to be "<= 1", perhaps better "<= 2". Other
thoughts (also by others) on the precise value to use here
appreciated. With this suitably taken care of (which can happen
while committing, but must not be forgotten)
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan
Jürgen Groß Sept. 12, 2019, 1:42 p.m. UTC | #2
On 12.09.19 15:36, Jan Beulich wrote:
> On 12.09.2019 15:28, Juergen Gross wrote:
>> @@ -267,6 +288,7 @@ int _spin_trylock_recursive(spinlock_t *lock)
>>   
>>       /* Don't allow overflow of recurse_cpu field. */
>>       BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU);
>> +    BUILD_BUG_ON(SPINLOCK_RECURSE_BITS <= 0);
> 
> This is too weak: While I don't think we strictly need 15 levels of
> recursion, I also don't think we'll get away with just 1. I think
> this minimally needs to be "<= 1", perhaps better "<= 2". Other
> thoughts (also by others) on the precise value to use here
> appreciated. With this suitably taken care of (which can happen
> while committing, but must not be forgotten)
> Reviewed-by: Jan Beulich <jbeulich@suse.com>

"2" should be no problem, as the other added

BUILD_BUG_ON(LOCK_DEBUG_PAD_BITS·<=·0);

is implying that already.


Juergen
Jan Beulich Sept. 12, 2019, 2:30 p.m. UTC | #3
On 12.09.2019 15:42, Juergen Gross wrote:
> On 12.09.19 15:36, Jan Beulich wrote:
>> On 12.09.2019 15:28, Juergen Gross wrote:
>>> @@ -267,6 +288,7 @@ int _spin_trylock_recursive(spinlock_t *lock)
>>>   
>>>       /* Don't allow overflow of recurse_cpu field. */
>>>       BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU);
>>> +    BUILD_BUG_ON(SPINLOCK_RECURSE_BITS <= 0);
>>
>> This is too weak: While I don't think we strictly need 15 levels of
>> recursion, I also don't think we'll get away with just 1. I think
>> this minimally needs to be "<= 1", perhaps better "<= 2". Other
>> thoughts (also by others) on the precise value to use here
>> appreciated. With this suitably taken care of (which can happen
>> while committing, but must not be forgotten)
>> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> 
> "2" should be no problem, as the other added
> 
> BUILD_BUG_ON(LOCK_DEBUG_PAD_BITS·<=·0);
> 
> is implying that already.

That's not the point though - after your change has gone in,
the two bitfields may change independently. The question is what
recursion depth we think we minimally need to run the code as it
is right now. For example, I'm not sure how much nesting we need
for the PCI devices lock right now. (For others I think there's
not going to be deeper than two or three levels of nesting.)

Jan
diff mbox series

Patch

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 6bc52d70c0..6bb1709881 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -13,9 +13,11 @@ 
 
 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
 
-static void check_lock(struct lock_debug *debug)
+static void check_lock(union lock_debug *debug)
 {
-    int irq_safe = !local_irq_is_enabled();
+    bool irq_safe = !local_irq_is_enabled();
+
+    BUILD_BUG_ON(LOCK_DEBUG_PAD_BITS <= 0);
 
     if ( unlikely(atomic_read(&spin_debug) <= 0) )
         return;
@@ -43,18 +45,21 @@  static void check_lock(struct lock_debug *debug)
      */
     if ( unlikely(debug->irq_safe != irq_safe) )
     {
-        int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
+        union lock_debug seen, new = { 0 };
+
+        new.irq_safe = irq_safe;
+        seen.val = cmpxchg(&debug->val, LOCK_DEBUG_INITVAL, new.val);
 
-        if ( seen == !irq_safe )
+        if ( !seen.unseen && seen.irq_safe == !irq_safe )
         {
             printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n",
-                   seen, irq_safe);
+                   seen.irq_safe, irq_safe);
             BUG();
         }
     }
 }
 
-static void check_barrier(struct lock_debug *debug)
+static void check_barrier(union lock_debug *debug)
 {
     if ( unlikely(atomic_read(&spin_debug) <= 0) )
         return;
@@ -70,7 +75,18 @@  static void check_barrier(struct lock_debug *debug)
      * However, if we spin on an IRQ-unsafe lock with IRQs disabled then that
      * is clearly wrong, for the same reason outlined in check_lock() above.
      */
-    BUG_ON(!local_irq_is_enabled() && (debug->irq_safe == 0));
+    BUG_ON(!local_irq_is_enabled() && !debug->irq_safe);
+}
+
+static void got_lock(union lock_debug *debug)
+{
+    debug->cpu = smp_processor_id();
+}
+
+static void rel_lock(union lock_debug *debug)
+{
+    ASSERT(debug->cpu == smp_processor_id());
+    debug->cpu = SPINLOCK_NO_CPU;
 }
 
 void spin_debug_enable(void)
@@ -87,6 +103,8 @@  void spin_debug_disable(void)
 
 #define check_lock(l) ((void)0)
 #define check_barrier(l) ((void)0)
+#define got_lock(l) ((void)0)
+#define rel_lock(l) ((void)0)
 
 #endif
 
@@ -150,6 +168,7 @@  void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data)
             cb(data);
         arch_lock_relax();
     }
+    got_lock(&lock->debug);
     LOCK_PROFILE_GOT;
     preempt_disable();
     arch_lock_acquire_barrier();
@@ -181,6 +200,7 @@  void _spin_unlock(spinlock_t *lock)
     arch_lock_release_barrier();
     preempt_enable();
     LOCK_PROFILE_REL;
+    rel_lock(&lock->debug);
     add_sized(&lock->tickets.head, 1);
     arch_lock_signal();
 }
@@ -224,6 +244,7 @@  int _spin_trylock(spinlock_t *lock)
     if ( cmpxchg(&lock->tickets.head_tail,
                  old.head_tail, new.head_tail) != old.head_tail )
         return 0;
+    got_lock(&lock->debug);
 #ifdef CONFIG_LOCK_PROFILE
     if (lock->profile)
         lock->profile->time_locked = NOW();
@@ -267,6 +288,7 @@  int _spin_trylock_recursive(spinlock_t *lock)
 
     /* Don't allow overflow of recurse_cpu field. */
     BUILD_BUG_ON(NR_CPUS > SPINLOCK_NO_CPU);
+    BUILD_BUG_ON(SPINLOCK_RECURSE_BITS <= 0);
 
     check_lock(&lock->debug);
 
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 2c7415e23a..df37550f02 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -5,15 +5,25 @@ 
 #include <asm/spinlock.h>
 #include <asm/types.h>
 
+#define SPINLOCK_CPU_BITS  12
+
 #ifndef NDEBUG
-struct lock_debug {
-    s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
+union lock_debug {
+    uint16_t val;
+#define LOCK_DEBUG_INITVAL 0xffff
+    struct {
+        uint16_t cpu:SPINLOCK_CPU_BITS;
+#define LOCK_DEBUG_PAD_BITS (14 - SPINLOCK_CPU_BITS)
+        uint16_t :LOCK_DEBUG_PAD_BITS;
+        bool irq_safe:1;
+        bool unseen:1;
+    };
 };
-#define _LOCK_DEBUG { -1 }
+#define _LOCK_DEBUG { LOCK_DEBUG_INITVAL }
 void spin_debug_enable(void);
 void spin_debug_disable(void);
 #else
-struct lock_debug { };
+union lock_debug { };
 #define _LOCK_DEBUG { }
 #define spin_debug_enable() ((void)0)
 #define spin_debug_disable() ((void)0)
@@ -138,11 +148,12 @@  typedef union {
 
 typedef struct spinlock {
     spinlock_tickets_t tickets;
-    u16 recurse_cpu:12;
-#define SPINLOCK_NO_CPU 0xfffu
-    u16 recurse_cnt:4;
-#define SPINLOCK_MAX_RECURSE 0xfu
-    struct lock_debug debug;
+    u16 recurse_cpu:SPINLOCK_CPU_BITS;
+#define SPINLOCK_NO_CPU        ((1u << SPINLOCK_CPU_BITS) - 1)
+#define SPINLOCK_RECURSE_BITS  (16 - SPINLOCK_CPU_BITS)
+    u16 recurse_cnt:SPINLOCK_RECURSE_BITS;
+#define SPINLOCK_MAX_RECURSE   ((1u << SPINLOCK_RECURSE_BITS) - 1)
+    union lock_debug debug;
 #ifdef CONFIG_LOCK_PROFILE
     struct lock_profile *profile;
 #endif