diff mbox series

[1/4] xen/spinlocks: in debug builds store cpu holding the lock

Message ID 20190807143119.8360-2-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series enhance lock debugging | expand

Commit Message

Jürgen Groß Aug. 7, 2019, 2:31 p.m. UTC
Add the cpu currently holding the lock to struct lock_debug. This makes
analysis of locking errors easier and it can be tested whether the
correct cpu is releasing a lock again.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/common/spinlock.c      | 31 +++++++++++++++++++++++++------
 xen/include/xen/spinlock.h | 16 +++++++++++-----
 2 files changed, 36 insertions(+), 11 deletions(-)

Comments

Andrew Cooper Aug. 7, 2019, 5:05 p.m. UTC | #1
On 07/08/2019 15:31, Juergen Gross wrote:
> Add the cpu currently holding the lock to struct lock_debug. This makes
> analysis of locking errors easier and it can be tested whether the
> correct cpu is releasing a lock again.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Looking at the structure:

xen.git/xen$ pahole xen-syms -E -C spinlock
struct spinlock {
	/* typedef spinlock_tickets_t */ union {
		/* typedef u32 */ unsigned int       head_tail;                          /*           4 */
		struct {
			/* typedef u16 */ short unsigned int head;                       /*     0     2 */
			/* typedef u16 */ short unsigned int tail;                       /*     2     2 */
		};                                                                       /*           4 */
	} tickets; /*     0     4 */
	/* typedef u16 */ short unsigned int         recurse_cpu:12;                     /*     4: 4  2 */
	/* typedef u16 */ short unsigned int         recurse_cnt:4;                      /*     4: 0  2 */
	union lock_debug {
		short unsigned int val;                                                  /*           2 */
		struct {
			short unsigned int unused:1;                                     /*     6:15  2 */
			short unsigned int irq_safe:1;                                   /*     6:14  2 */
			short unsigned int pad:2;                                        /*     6:12  2 */
			short unsigned int cpu:12;                                       /*     6: 0  2 */
		};                                                                       /*           2 */
	} debug; /*     6     2 */

	/* size: 8, cachelines: 1, members: 4 */
	/* last cacheline: 8 bytes */
};


we now get two 12-bit CPU fields trying to fit into 4 bytes.  Is it
possible to reuse the recurse_cpu field for debugging as well?

~Andrew
Jürgen Groß Aug. 8, 2019, 4:34 a.m. UTC | #2
On 07.08.19 19:05, Andrew Cooper wrote:
> 
> On 07/08/2019 15:31, Juergen Gross wrote:
>> Add the cpu currently holding the lock to struct lock_debug. This makes
>> analysis of locking errors easier and it can be tested whether the
>> correct cpu is releasing a lock again.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
> 
> Looking at the structure:
> 
> xen.git/xen$ pahole xen-syms -E -C spinlock
> struct spinlock {
> 	/* typedef spinlock_tickets_t */ union {
> 		/* typedef u32 */ unsigned int       head_tail;                          /*           4 */
> 		struct {
> 			/* typedef u16 */ short unsigned int head;                       /*     0     2 */
> 			/* typedef u16 */ short unsigned int tail;                       /*     2     2 */
> 		};                                                                       /*           4 */
> 	} tickets; /*     0     4 */
> 	/* typedef u16 */ short unsigned int         recurse_cpu:12;                     /*     4: 4  2 */
> 	/* typedef u16 */ short unsigned int         recurse_cnt:4;                      /*     4: 0  2 */
> 	union lock_debug {
> 		short unsigned int val;                                                  /*           2 */
> 		struct {
> 			short unsigned int unused:1;                                     /*     6:15  2 */
> 			short unsigned int irq_safe:1;                                   /*     6:14  2 */
> 			short unsigned int pad:2;                                        /*     6:12  2 */
> 			short unsigned int cpu:12;                                       /*     6: 0  2 */
> 		};                                                                       /*           2 */
> 	} debug; /*     6     2 */
> 
> 	/* size: 8, cachelines: 1, members: 4 */
> 	/* last cacheline: 8 bytes */
> };
> 
> 
> we now get two 12-bit CPU fields trying to fit into 4 bytes.  Is it
> possible to reuse the recurse_cpu field for debugging as well?

I don't think this would be a good idea for two reasons:

- Changing the way recurse_cpu is being used in debug builds might
   result in weird behavior in corer cases. That's not what debug
   additions are meant for.

- We might be able to save 2 bytes, which will then just be unused.
   So no memory saved, but complexity gained.


Juergen
Jan Beulich Aug. 8, 2019, 6:58 a.m. UTC | #3
On 07.08.2019 16:31, Juergen Gross wrote:
> --- a/xen/common/spinlock.c
> +++ b/xen/common/spinlock.c
> @@ -13,9 +13,9 @@
>   
>   static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
>   
> -static void check_lock(struct lock_debug *debug)
> +static void check_lock(union lock_debug *debug)
>   {
> -    int irq_safe = !local_irq_is_enabled();
> +    unsigned short irq_safe = !local_irq_is_enabled();

bool? Seeing your heavy use of "unsigned short" I realize that
my CodingStyle change committed yesterday still wasn't precise
enough.

> @@ -43,18 +43,21 @@ static void check_lock(struct lock_debug *debug)
>      */
>      if ( unlikely(debug->irq_safe != irq_safe) )
>      {
> -        int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
> +        union lock_debug seen, new = { 0 };
>   
> -        if ( seen == !irq_safe )
> +        new.irq_safe = irq_safe;
> +        seen.val = cmpxchg(&debug->val, 0xffff, new.val);

This 0xffff should be connected (by way of at least a #define) to
the one used in _LOCK_DEBUG.

> +        if ( !seen.unused && seen.irq_safe == !irq_safe )

While "unused" makes sufficient sense here, ...

> --- a/xen/include/xen/spinlock.h
> +++ b/xen/include/xen/spinlock.h
> @@ -7,14 +7,20 @@
>   #include <xen/percpu.h>
>   
>   #ifndef NDEBUG
> -struct lock_debug {
> -    s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
> +union lock_debug {
> +    unsigned short val;
> +    struct {
> +        unsigned short unused:1;

... it gives a rather misleading impression of this being an unused
field here. How about e.g. "unseen" instead?

> +        unsigned short irq_safe:1;
> +        unsigned short pad:2;
> +        unsigned short cpu:12;
> +    };
>   };

Do we have an implied assumption somewhere that unsigned short is
exactly 16 bits wide? I think "val" wants to be uint16_t here (as
you really mean "exactly 16 bits"), the two boolean fields want
to be bool, and the remaining two ones unsigned int.

Jan
Jürgen Groß Aug. 8, 2019, 7:51 a.m. UTC | #4
On 08.08.19 08:58, Jan Beulich wrote:
> On 07.08.2019 16:31, Juergen Gross wrote:
>> --- a/xen/common/spinlock.c
>> +++ b/xen/common/spinlock.c
>> @@ -13,9 +13,9 @@
>>    
>>    static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
>>    
>> -static void check_lock(struct lock_debug *debug)
>> +static void check_lock(union lock_debug *debug)
>>    {
>> -    int irq_safe = !local_irq_is_enabled();
>> +    unsigned short irq_safe = !local_irq_is_enabled();
> 
> bool? Seeing your heavy use of "unsigned short" I realize that
> my CodingStyle change committed yesterday still wasn't precise
> enough.

I wanted to be consistent with the type used in the structure.
I can switch to bool if you like that better.

> 
>> @@ -43,18 +43,21 @@ static void check_lock(struct lock_debug *debug)
>>       */
>>       if ( unlikely(debug->irq_safe != irq_safe) )
>>       {
>> -        int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
>> +        union lock_debug seen, new = { 0 };
>>    
>> -        if ( seen == !irq_safe )
>> +        new.irq_safe = irq_safe;
>> +        seen.val = cmpxchg(&debug->val, 0xffff, new.val);
> 
> This 0xffff should be connected (by way of at least a #define) to
> the one used in _LOCK_DEBUG.

Okay.

> 
>> +        if ( !seen.unused && seen.irq_safe == !irq_safe )
> 
> While "unused" makes sufficient sense here, ...
> 
>> --- a/xen/include/xen/spinlock.h
>> +++ b/xen/include/xen/spinlock.h
>> @@ -7,14 +7,20 @@
>>    #include <xen/percpu.h>
>>    
>>    #ifndef NDEBUG
>> -struct lock_debug {
>> -    s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
>> +union lock_debug {
>> +    unsigned short val;
>> +    struct {
>> +        unsigned short unused:1;
> 
> ... it gives a rather misleading impression of this being an unused
> field here. How about e.g. "unseen" instead?

Yes, that seems to be the better choice.

> 
>> +        unsigned short irq_safe:1;
>> +        unsigned short pad:2;
>> +        unsigned short cpu:12;
>> +    };
>>    };
> 
> Do we have an implied assumption somewhere that unsigned short is
> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
> you really mean "exactly 16 bits"), the two boolean fields want
> to be bool, and the remaining two ones unsigned int.

But that would increase the size of the union to 4 bytes instead of 2.
So at least pad and cpu must be unsigned short or (better) uint16_t.


Juergen
Jan Beulich Aug. 8, 2019, 7:59 a.m. UTC | #5
On 08.08.2019 09:51, Juergen Gross wrote:
> On 08.08.19 08:58, Jan Beulich wrote:
>> On 07.08.2019 16:31, Juergen Gross wrote:
>>> +        unsigned short irq_safe:1;
>>> +        unsigned short pad:2;
>>> +        unsigned short cpu:12;
>>> +    };
>>>    };
>>
>> Do we have an implied assumption somewhere that unsigned short is
>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>> you really mean "exactly 16 bits"), the two boolean fields want
>> to be bool, and the remaining two ones unsigned int.
> 
> But that would increase the size of the union to 4 bytes instead of 2.
> So at least pad and cpu must be unsigned short or (better) uint16_t.

Oh, right.

Jan
Julien Grall Aug. 8, 2019, 10:28 a.m. UTC | #6
On 08/08/2019 08:51, Juergen Gross wrote:
> On 08.08.19 08:58, Jan Beulich wrote:
>> On 07.08.2019 16:31, Juergen Gross wrote:
>> Do we have an implied assumption somewhere that unsigned short is
>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>> you really mean "exactly 16 bits"), the two boolean fields want
>> to be bool, and the remaining two ones unsigned int.
> 
> But that would increase the size of the union to 4 bytes instead of 2.
> So at least pad and cpu must be unsigned short or (better) uint16_t.

How about bool irq_safe:1?

Cheers,
Jan Beulich Aug. 8, 2019, 11:52 a.m. UTC | #7
On 08.08.2019 12:28, Julien Grall wrote:
> On 08/08/2019 08:51, Juergen Gross wrote:
>> On 08.08.19 08:58, Jan Beulich wrote:
>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>> Do we have an implied assumption somewhere that unsigned short is
>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>>> you really mean "exactly 16 bits"), the two boolean fields want
>>> to be bool, and the remaining two ones unsigned int.
>>
>> But that would increase the size of the union to 4 bytes instead of 2.
>> So at least pad and cpu must be unsigned short or (better) uint16_t.
> 
> How about bool irq_safe:1?

That's what I had suggested, indeed. Jürgen's response was for
my "unsigned int" suggestion towards the two non-boolean fields.

Jan
Jürgen Groß Aug. 8, 2019, 11:53 a.m. UTC | #8
On 08.08.19 12:28, Julien Grall wrote:
> 
> 
> On 08/08/2019 08:51, Juergen Gross wrote:
>> On 08.08.19 08:58, Jan Beulich wrote:
>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>> Do we have an implied assumption somewhere that unsigned short is
>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>>> you really mean "exactly 16 bits"), the two boolean fields want
>>> to be bool, and the remaining two ones unsigned int.
>>
>> But that would increase the size of the union to 4 bytes instead of 2.
>> So at least pad and cpu must be unsigned short or (better) uint16_t.
> 
> How about bool irq_safe:1?

I didn't question that, but OTOH I'm not sure doing something like:

struct {
   bool     unseen:1;
   bool     irq_safe:1;
   uint16_t pad:2;
   uint16_t cpu:12;
}

is guaranteed to be 2 bytes long. I think pad will be still put into the
first byte, but the compiler might decide that the 4 bits left won't be
able to hold the next 12 bits so it could start a new uint16_t at offset
2.

Moving the bool types to the end of the struct would avoid that IMHO.


Juergen
Jan Beulich Aug. 8, 2019, 12:18 p.m. UTC | #9
On 08.08.2019 13:53, Juergen Gross wrote:
> On 08.08.19 12:28, Julien Grall wrote:
>> On 08/08/2019 08:51, Juergen Gross wrote:
>>> On 08.08.19 08:58, Jan Beulich wrote:
>>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>>> Do we have an implied assumption somewhere that unsigned short is
>>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>>>> you really mean "exactly 16 bits"), the two boolean fields want
>>>> to be bool, and the remaining two ones unsigned int.
>>>
>>> But that would increase the size of the union to 4 bytes instead of 2.
>>> So at least pad and cpu must be unsigned short or (better) uint16_t.
>>
>> How about bool irq_safe:1?
> 
> I didn't question that, but OTOH I'm not sure doing something like:
> 
> struct {
>    bool     unseen:1;
>    bool     irq_safe:1;
>    uint16_t pad:2;
>    uint16_t cpu:12;
> }
> 
> is guaranteed to be 2 bytes long. I think pad will be still put into the
> first byte, but the compiler might decide that the 4 bits left won't be
> able to hold the next 12 bits so it could start a new uint16_t at offset
> 2.
> 
> Moving the bool types to the end of the struct would avoid that IMHO.

Moving the two bool-s further down will also simplify extraction and
insertion of the "cpu" field.

Jan
Jürgen Groß Aug. 8, 2019, 12:49 p.m. UTC | #10
On 08.08.19 14:18, Jan Beulich wrote:
> On 08.08.2019 13:53, Juergen Gross wrote:
>> On 08.08.19 12:28, Julien Grall wrote:
>>> On 08/08/2019 08:51, Juergen Gross wrote:
>>>> On 08.08.19 08:58, Jan Beulich wrote:
>>>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>>>> Do we have an implied assumption somewhere that unsigned short is
>>>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>>>>> you really mean "exactly 16 bits"), the two boolean fields want
>>>>> to be bool, and the remaining two ones unsigned int.
>>>>
>>>> But that would increase the size of the union to 4 bytes instead of 2.
>>>> So at least pad and cpu must be unsigned short or (better) uint16_t.
>>>
>>> How about bool irq_safe:1?
>>
>> I didn't question that, but OTOH I'm not sure doing something like:
>>
>> struct {
>>     bool     unseen:1;
>>     bool     irq_safe:1;
>>     uint16_t pad:2;
>>     uint16_t cpu:12;
>> }
>>
>> is guaranteed to be 2 bytes long. I think pad will be still put into the
>> first byte, but the compiler might decide that the 4 bits left won't be
>> able to hold the next 12 bits so it could start a new uint16_t at offset
>> 2.
>>
>> Moving the bool types to the end of the struct would avoid that IMHO.
> 
> Moving the two bool-s further down will also simplify extraction and
> insertion of the "cpu" field.

Okay, lets reverse above struct.


Juergen
Andrew Cooper Aug. 8, 2019, 3:32 p.m. UTC | #11
On 08/08/2019 12:53, Juergen Gross wrote:
> On 08.08.19 12:28, Julien Grall wrote:
>>
>>
>> On 08/08/2019 08:51, Juergen Gross wrote:
>>> On 08.08.19 08:58, Jan Beulich wrote:
>>>> On 07.08.2019 16:31, Juergen Gross wrote:
>>>> Do we have an implied assumption somewhere that unsigned short is
>>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as
>>>> you really mean "exactly 16 bits"), the two boolean fields want
>>>> to be bool, and the remaining two ones unsigned int.
>>>
>>> But that would increase the size of the union to 4 bytes instead of 2.
>>> So at least pad and cpu must be unsigned short or (better) uint16_t.
>>
>> How about bool irq_safe:1?
>
> I didn't question that, but OTOH I'm not sure doing something like:
>
> struct {
>   bool     unseen:1;
>   bool     irq_safe:1;
>   uint16_t pad:2;
>   uint16_t cpu:12;
> }
>
> is guaranteed to be 2 bytes long.

It will be on anything which is GCC-compatible.  All scalar bitfields
gets tightly packed even when they differ in base type.

~Andrew
diff mbox series

Patch

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 6bc52d70c0..4e681cc651 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -13,9 +13,9 @@ 
 
 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
 
-static void check_lock(struct lock_debug *debug)
+static void check_lock(union lock_debug *debug)
 {
-    int irq_safe = !local_irq_is_enabled();
+    unsigned short irq_safe = !local_irq_is_enabled();
 
     if ( unlikely(atomic_read(&spin_debug) <= 0) )
         return;
@@ -43,18 +43,21 @@  static void check_lock(struct lock_debug *debug)
      */
     if ( unlikely(debug->irq_safe != irq_safe) )
     {
-        int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
+        union lock_debug seen, new = { 0 };
 
-        if ( seen == !irq_safe )
+        new.irq_safe = irq_safe;
+        seen.val = cmpxchg(&debug->val, 0xffff, new.val);
+
+        if ( !seen.unused && seen.irq_safe == !irq_safe )
         {
             printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n",
-                   seen, irq_safe);
+                   seen.irq_safe, irq_safe);
             BUG();
         }
     }
 }
 
-static void check_barrier(struct lock_debug *debug)
+static void check_barrier(union lock_debug *debug)
 {
     if ( unlikely(atomic_read(&spin_debug) <= 0) )
         return;
@@ -73,6 +76,17 @@  static void check_barrier(struct lock_debug *debug)
     BUG_ON(!local_irq_is_enabled() && (debug->irq_safe == 0));
 }
 
+static void got_lock(union lock_debug *debug)
+{
+    debug->cpu = smp_processor_id();
+}
+
+static void rel_lock(union lock_debug *debug)
+{
+    ASSERT(debug->cpu == smp_processor_id());
+    debug->cpu = SPINLOCK_NO_CPU;
+}
+
 void spin_debug_enable(void)
 {
     atomic_inc(&spin_debug);
@@ -87,6 +101,8 @@  void spin_debug_disable(void)
 
 #define check_lock(l) ((void)0)
 #define check_barrier(l) ((void)0)
+#define got_lock(l) ((void)0)
+#define rel_lock(l) ((void)0)
 
 #endif
 
@@ -150,6 +166,7 @@  void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data)
             cb(data);
         arch_lock_relax();
     }
+    got_lock(&lock->debug);
     LOCK_PROFILE_GOT;
     preempt_disable();
     arch_lock_acquire_barrier();
@@ -181,6 +198,7 @@  void _spin_unlock(spinlock_t *lock)
     arch_lock_release_barrier();
     preempt_enable();
     LOCK_PROFILE_REL;
+    rel_lock(&lock->debug);
     add_sized(&lock->tickets.head, 1);
     arch_lock_signal();
 }
@@ -224,6 +242,7 @@  int _spin_trylock(spinlock_t *lock)
     if ( cmpxchg(&lock->tickets.head_tail,
                  old.head_tail, new.head_tail) != old.head_tail )
         return 0;
+    got_lock(&lock->debug);
 #ifdef CONFIG_LOCK_PROFILE
     if (lock->profile)
         lock->profile->time_locked = NOW();
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index a811b73bf3..b4881ad433 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -7,14 +7,20 @@ 
 #include <xen/percpu.h>
 
 #ifndef NDEBUG
-struct lock_debug {
-    s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
+union lock_debug {
+    unsigned short val;
+    struct {
+        unsigned short unused:1;
+        unsigned short irq_safe:1;
+        unsigned short pad:2;
+        unsigned short cpu:12;
+    };
 };
-#define _LOCK_DEBUG { -1 }
+#define _LOCK_DEBUG { 0xffff }
 void spin_debug_enable(void);
 void spin_debug_disable(void);
 #else
-struct lock_debug { };
+union lock_debug { };
 #define _LOCK_DEBUG { }
 #define spin_debug_enable() ((void)0)
 #define spin_debug_disable() ((void)0)
@@ -143,7 +149,7 @@  typedef struct spinlock {
 #define SPINLOCK_NO_CPU 0xfffu
     u16 recurse_cnt:4;
 #define SPINLOCK_MAX_RECURSE 0xfu
-    struct lock_debug debug;
+    union lock_debug debug;
 #ifdef CONFIG_LOCK_PROFILE
     struct lock_profile *profile;
 #endif