Message ID | 20190807143119.8360-2-jgross@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | enhance lock debugging | expand |
On 07/08/2019 15:31, Juergen Gross wrote: > Add the cpu currently holding the lock to struct lock_debug. This makes > analysis of locking errors easier and it can be tested whether the > correct cpu is releasing a lock again. > > Signed-off-by: Juergen Gross <jgross@suse.com> Looking at the structure: xen.git/xen$ pahole xen-syms -E -C spinlock struct spinlock { /* typedef spinlock_tickets_t */ union { /* typedef u32 */ unsigned int head_tail; /* 4 */ struct { /* typedef u16 */ short unsigned int head; /* 0 2 */ /* typedef u16 */ short unsigned int tail; /* 2 2 */ }; /* 4 */ } tickets; /* 0 4 */ /* typedef u16 */ short unsigned int recurse_cpu:12; /* 4: 4 2 */ /* typedef u16 */ short unsigned int recurse_cnt:4; /* 4: 0 2 */ union lock_debug { short unsigned int val; /* 2 */ struct { short unsigned int unused:1; /* 6:15 2 */ short unsigned int irq_safe:1; /* 6:14 2 */ short unsigned int pad:2; /* 6:12 2 */ short unsigned int cpu:12; /* 6: 0 2 */ }; /* 2 */ } debug; /* 6 2 */ /* size: 8, cachelines: 1, members: 4 */ /* last cacheline: 8 bytes */ }; we now get two 12-bit CPU fields trying to fit into 4 bytes. Is it possible to reuse the recurse_cpu field for debugging as well? ~Andrew
On 07.08.19 19:05, Andrew Cooper wrote: > > On 07/08/2019 15:31, Juergen Gross wrote: >> Add the cpu currently holding the lock to struct lock_debug. This makes >> analysis of locking errors easier and it can be tested whether the >> correct cpu is releasing a lock again. >> >> Signed-off-by: Juergen Gross <jgross@suse.com> > > Looking at the structure: > > xen.git/xen$ pahole xen-syms -E -C spinlock > struct spinlock { > /* typedef spinlock_tickets_t */ union { > /* typedef u32 */ unsigned int head_tail; /* 4 */ > struct { > /* typedef u16 */ short unsigned int head; /* 0 2 */ > /* typedef u16 */ short unsigned int tail; /* 2 2 */ > }; /* 4 */ > } tickets; /* 0 4 */ > /* typedef u16 */ short unsigned int recurse_cpu:12; /* 4: 4 2 */ > /* typedef u16 */ short unsigned int recurse_cnt:4; /* 4: 0 2 */ > union lock_debug { > short unsigned int val; /* 2 */ > struct { > short unsigned int unused:1; /* 6:15 2 */ > short unsigned int irq_safe:1; /* 6:14 2 */ > short unsigned int pad:2; /* 6:12 2 */ > short unsigned int cpu:12; /* 6: 0 2 */ > }; /* 2 */ > } debug; /* 6 2 */ > > /* size: 8, cachelines: 1, members: 4 */ > /* last cacheline: 8 bytes */ > }; > > > we now get two 12-bit CPU fields trying to fit into 4 bytes. Is it > possible to reuse the recurse_cpu field for debugging as well? I don't think this would be a good idea for two reasons: - Changing the way recurse_cpu is being used in debug builds might result in weird behavior in corer cases. That's not what debug additions are meant for. - We might be able to save 2 bytes, which will then just be unused. So no memory saved, but complexity gained. Juergen
On 07.08.2019 16:31, Juergen Gross wrote: > --- a/xen/common/spinlock.c > +++ b/xen/common/spinlock.c > @@ -13,9 +13,9 @@ > > static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0); > > -static void check_lock(struct lock_debug *debug) > +static void check_lock(union lock_debug *debug) > { > - int irq_safe = !local_irq_is_enabled(); > + unsigned short irq_safe = !local_irq_is_enabled(); bool? Seeing your heavy use of "unsigned short" I realize that my CodingStyle change committed yesterday still wasn't precise enough. > @@ -43,18 +43,21 @@ static void check_lock(struct lock_debug *debug) > */ > if ( unlikely(debug->irq_safe != irq_safe) ) > { > - int seen = cmpxchg(&debug->irq_safe, -1, irq_safe); > + union lock_debug seen, new = { 0 }; > > - if ( seen == !irq_safe ) > + new.irq_safe = irq_safe; > + seen.val = cmpxchg(&debug->val, 0xffff, new.val); This 0xffff should be connected (by way of at least a #define) to the one used in _LOCK_DEBUG. > + if ( !seen.unused && seen.irq_safe == !irq_safe ) While "unused" makes sufficient sense here, ... > --- a/xen/include/xen/spinlock.h > +++ b/xen/include/xen/spinlock.h > @@ -7,14 +7,20 @@ > #include <xen/percpu.h> > > #ifndef NDEBUG > -struct lock_debug { > - s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */ > +union lock_debug { > + unsigned short val; > + struct { > + unsigned short unused:1; ... it gives a rather misleading impression of this being an unused field here. How about e.g. "unseen" instead? > + unsigned short irq_safe:1; > + unsigned short pad:2; > + unsigned short cpu:12; > + }; > }; Do we have an implied assumption somewhere that unsigned short is exactly 16 bits wide? I think "val" wants to be uint16_t here (as you really mean "exactly 16 bits"), the two boolean fields want to be bool, and the remaining two ones unsigned int. Jan
On 08.08.19 08:58, Jan Beulich wrote: > On 07.08.2019 16:31, Juergen Gross wrote: >> --- a/xen/common/spinlock.c >> +++ b/xen/common/spinlock.c >> @@ -13,9 +13,9 @@ >> >> static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0); >> >> -static void check_lock(struct lock_debug *debug) >> +static void check_lock(union lock_debug *debug) >> { >> - int irq_safe = !local_irq_is_enabled(); >> + unsigned short irq_safe = !local_irq_is_enabled(); > > bool? Seeing your heavy use of "unsigned short" I realize that > my CodingStyle change committed yesterday still wasn't precise > enough. I wanted to be consistent with the type used in the structure. I can switch to bool if you like that better. > >> @@ -43,18 +43,21 @@ static void check_lock(struct lock_debug *debug) >> */ >> if ( unlikely(debug->irq_safe != irq_safe) ) >> { >> - int seen = cmpxchg(&debug->irq_safe, -1, irq_safe); >> + union lock_debug seen, new = { 0 }; >> >> - if ( seen == !irq_safe ) >> + new.irq_safe = irq_safe; >> + seen.val = cmpxchg(&debug->val, 0xffff, new.val); > > This 0xffff should be connected (by way of at least a #define) to > the one used in _LOCK_DEBUG. Okay. > >> + if ( !seen.unused && seen.irq_safe == !irq_safe ) > > While "unused" makes sufficient sense here, ... > >> --- a/xen/include/xen/spinlock.h >> +++ b/xen/include/xen/spinlock.h >> @@ -7,14 +7,20 @@ >> #include <xen/percpu.h> >> >> #ifndef NDEBUG >> -struct lock_debug { >> - s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */ >> +union lock_debug { >> + unsigned short val; >> + struct { >> + unsigned short unused:1; > > ... it gives a rather misleading impression of this being an unused > field here. How about e.g. "unseen" instead? Yes, that seems to be the better choice. > >> + unsigned short irq_safe:1; >> + unsigned short pad:2; >> + unsigned short cpu:12; >> + }; >> }; > > Do we have an implied assumption somewhere that unsigned short is > exactly 16 bits wide? I think "val" wants to be uint16_t here (as > you really mean "exactly 16 bits"), the two boolean fields want > to be bool, and the remaining two ones unsigned int. But that would increase the size of the union to 4 bytes instead of 2. So at least pad and cpu must be unsigned short or (better) uint16_t. Juergen
On 08.08.2019 09:51, Juergen Gross wrote: > On 08.08.19 08:58, Jan Beulich wrote: >> On 07.08.2019 16:31, Juergen Gross wrote: >>> + unsigned short irq_safe:1; >>> + unsigned short pad:2; >>> + unsigned short cpu:12; >>> + }; >>> }; >> >> Do we have an implied assumption somewhere that unsigned short is >> exactly 16 bits wide? I think "val" wants to be uint16_t here (as >> you really mean "exactly 16 bits"), the two boolean fields want >> to be bool, and the remaining two ones unsigned int. > > But that would increase the size of the union to 4 bytes instead of 2. > So at least pad and cpu must be unsigned short or (better) uint16_t. Oh, right. Jan
On 08/08/2019 08:51, Juergen Gross wrote: > On 08.08.19 08:58, Jan Beulich wrote: >> On 07.08.2019 16:31, Juergen Gross wrote: >> Do we have an implied assumption somewhere that unsigned short is >> exactly 16 bits wide? I think "val" wants to be uint16_t here (as >> you really mean "exactly 16 bits"), the two boolean fields want >> to be bool, and the remaining two ones unsigned int. > > But that would increase the size of the union to 4 bytes instead of 2. > So at least pad and cpu must be unsigned short or (better) uint16_t. How about bool irq_safe:1? Cheers,
On 08.08.2019 12:28, Julien Grall wrote: > On 08/08/2019 08:51, Juergen Gross wrote: >> On 08.08.19 08:58, Jan Beulich wrote: >>> On 07.08.2019 16:31, Juergen Gross wrote: >>> Do we have an implied assumption somewhere that unsigned short is >>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as >>> you really mean "exactly 16 bits"), the two boolean fields want >>> to be bool, and the remaining two ones unsigned int. >> >> But that would increase the size of the union to 4 bytes instead of 2. >> So at least pad and cpu must be unsigned short or (better) uint16_t. > > How about bool irq_safe:1? That's what I had suggested, indeed. Jürgen's response was for my "unsigned int" suggestion towards the two non-boolean fields. Jan
On 08.08.19 12:28, Julien Grall wrote: > > > On 08/08/2019 08:51, Juergen Gross wrote: >> On 08.08.19 08:58, Jan Beulich wrote: >>> On 07.08.2019 16:31, Juergen Gross wrote: >>> Do we have an implied assumption somewhere that unsigned short is >>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as >>> you really mean "exactly 16 bits"), the two boolean fields want >>> to be bool, and the remaining two ones unsigned int. >> >> But that would increase the size of the union to 4 bytes instead of 2. >> So at least pad and cpu must be unsigned short or (better) uint16_t. > > How about bool irq_safe:1? I didn't question that, but OTOH I'm not sure doing something like: struct { bool unseen:1; bool irq_safe:1; uint16_t pad:2; uint16_t cpu:12; } is guaranteed to be 2 bytes long. I think pad will be still put into the first byte, but the compiler might decide that the 4 bits left won't be able to hold the next 12 bits so it could start a new uint16_t at offset 2. Moving the bool types to the end of the struct would avoid that IMHO. Juergen
On 08.08.2019 13:53, Juergen Gross wrote: > On 08.08.19 12:28, Julien Grall wrote: >> On 08/08/2019 08:51, Juergen Gross wrote: >>> On 08.08.19 08:58, Jan Beulich wrote: >>>> On 07.08.2019 16:31, Juergen Gross wrote: >>>> Do we have an implied assumption somewhere that unsigned short is >>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as >>>> you really mean "exactly 16 bits"), the two boolean fields want >>>> to be bool, and the remaining two ones unsigned int. >>> >>> But that would increase the size of the union to 4 bytes instead of 2. >>> So at least pad and cpu must be unsigned short or (better) uint16_t. >> >> How about bool irq_safe:1? > > I didn't question that, but OTOH I'm not sure doing something like: > > struct { > bool unseen:1; > bool irq_safe:1; > uint16_t pad:2; > uint16_t cpu:12; > } > > is guaranteed to be 2 bytes long. I think pad will be still put into the > first byte, but the compiler might decide that the 4 bits left won't be > able to hold the next 12 bits so it could start a new uint16_t at offset > 2. > > Moving the bool types to the end of the struct would avoid that IMHO. Moving the two bool-s further down will also simplify extraction and insertion of the "cpu" field. Jan
On 08.08.19 14:18, Jan Beulich wrote: > On 08.08.2019 13:53, Juergen Gross wrote: >> On 08.08.19 12:28, Julien Grall wrote: >>> On 08/08/2019 08:51, Juergen Gross wrote: >>>> On 08.08.19 08:58, Jan Beulich wrote: >>>>> On 07.08.2019 16:31, Juergen Gross wrote: >>>>> Do we have an implied assumption somewhere that unsigned short is >>>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as >>>>> you really mean "exactly 16 bits"), the two boolean fields want >>>>> to be bool, and the remaining two ones unsigned int. >>>> >>>> But that would increase the size of the union to 4 bytes instead of 2. >>>> So at least pad and cpu must be unsigned short or (better) uint16_t. >>> >>> How about bool irq_safe:1? >> >> I didn't question that, but OTOH I'm not sure doing something like: >> >> struct { >> bool unseen:1; >> bool irq_safe:1; >> uint16_t pad:2; >> uint16_t cpu:12; >> } >> >> is guaranteed to be 2 bytes long. I think pad will be still put into the >> first byte, but the compiler might decide that the 4 bits left won't be >> able to hold the next 12 bits so it could start a new uint16_t at offset >> 2. >> >> Moving the bool types to the end of the struct would avoid that IMHO. > > Moving the two bool-s further down will also simplify extraction and > insertion of the "cpu" field. Okay, lets reverse above struct. Juergen
On 08/08/2019 12:53, Juergen Gross wrote: > On 08.08.19 12:28, Julien Grall wrote: >> >> >> On 08/08/2019 08:51, Juergen Gross wrote: >>> On 08.08.19 08:58, Jan Beulich wrote: >>>> On 07.08.2019 16:31, Juergen Gross wrote: >>>> Do we have an implied assumption somewhere that unsigned short is >>>> exactly 16 bits wide? I think "val" wants to be uint16_t here (as >>>> you really mean "exactly 16 bits"), the two boolean fields want >>>> to be bool, and the remaining two ones unsigned int. >>> >>> But that would increase the size of the union to 4 bytes instead of 2. >>> So at least pad and cpu must be unsigned short or (better) uint16_t. >> >> How about bool irq_safe:1? > > I didn't question that, but OTOH I'm not sure doing something like: > > struct { > bool unseen:1; > bool irq_safe:1; > uint16_t pad:2; > uint16_t cpu:12; > } > > is guaranteed to be 2 bytes long. It will be on anything which is GCC-compatible. All scalar bitfields gets tightly packed even when they differ in base type. ~Andrew
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c index 6bc52d70c0..4e681cc651 100644 --- a/xen/common/spinlock.c +++ b/xen/common/spinlock.c @@ -13,9 +13,9 @@ static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0); -static void check_lock(struct lock_debug *debug) +static void check_lock(union lock_debug *debug) { - int irq_safe = !local_irq_is_enabled(); + unsigned short irq_safe = !local_irq_is_enabled(); if ( unlikely(atomic_read(&spin_debug) <= 0) ) return; @@ -43,18 +43,21 @@ static void check_lock(struct lock_debug *debug) */ if ( unlikely(debug->irq_safe != irq_safe) ) { - int seen = cmpxchg(&debug->irq_safe, -1, irq_safe); + union lock_debug seen, new = { 0 }; - if ( seen == !irq_safe ) + new.irq_safe = irq_safe; + seen.val = cmpxchg(&debug->val, 0xffff, new.val); + + if ( !seen.unused && seen.irq_safe == !irq_safe ) { printk("CHECKLOCK FAILURE: prev irqsafe: %d, curr irqsafe %d\n", - seen, irq_safe); + seen.irq_safe, irq_safe); BUG(); } } } -static void check_barrier(struct lock_debug *debug) +static void check_barrier(union lock_debug *debug) { if ( unlikely(atomic_read(&spin_debug) <= 0) ) return; @@ -73,6 +76,17 @@ static void check_barrier(struct lock_debug *debug) BUG_ON(!local_irq_is_enabled() && (debug->irq_safe == 0)); } +static void got_lock(union lock_debug *debug) +{ + debug->cpu = smp_processor_id(); +} + +static void rel_lock(union lock_debug *debug) +{ + ASSERT(debug->cpu == smp_processor_id()); + debug->cpu = SPINLOCK_NO_CPU; +} + void spin_debug_enable(void) { atomic_inc(&spin_debug); @@ -87,6 +101,8 @@ void spin_debug_disable(void) #define check_lock(l) ((void)0) #define check_barrier(l) ((void)0) +#define got_lock(l) ((void)0) +#define rel_lock(l) ((void)0) #endif @@ -150,6 +166,7 @@ void inline _spin_lock_cb(spinlock_t *lock, void (*cb)(void *), void *data) cb(data); arch_lock_relax(); } + got_lock(&lock->debug); LOCK_PROFILE_GOT; preempt_disable(); arch_lock_acquire_barrier(); @@ -181,6 +198,7 @@ void _spin_unlock(spinlock_t *lock) arch_lock_release_barrier(); preempt_enable(); LOCK_PROFILE_REL; + rel_lock(&lock->debug); add_sized(&lock->tickets.head, 1); arch_lock_signal(); } @@ -224,6 +242,7 @@ int _spin_trylock(spinlock_t *lock) if ( cmpxchg(&lock->tickets.head_tail, old.head_tail, new.head_tail) != old.head_tail ) return 0; + got_lock(&lock->debug); #ifdef CONFIG_LOCK_PROFILE if (lock->profile) lock->profile->time_locked = NOW(); diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h index a811b73bf3..b4881ad433 100644 --- a/xen/include/xen/spinlock.h +++ b/xen/include/xen/spinlock.h @@ -7,14 +7,20 @@ #include <xen/percpu.h> #ifndef NDEBUG -struct lock_debug { - s16 irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */ +union lock_debug { + unsigned short val; + struct { + unsigned short unused:1; + unsigned short irq_safe:1; + unsigned short pad:2; + unsigned short cpu:12; + }; }; -#define _LOCK_DEBUG { -1 } +#define _LOCK_DEBUG { 0xffff } void spin_debug_enable(void); void spin_debug_disable(void); #else -struct lock_debug { }; +union lock_debug { }; #define _LOCK_DEBUG { } #define spin_debug_enable() ((void)0) #define spin_debug_disable() ((void)0) @@ -143,7 +149,7 @@ typedef struct spinlock { #define SPINLOCK_NO_CPU 0xfffu u16 recurse_cnt:4; #define SPINLOCK_MAX_RECURSE 0xfu - struct lock_debug debug; + union lock_debug debug; #ifdef CONFIG_LOCK_PROFILE struct lock_profile *profile; #endif
Add the cpu currently holding the lock to struct lock_debug. This makes analysis of locking errors easier and it can be tested whether the correct cpu is releasing a lock again. Signed-off-by: Juergen Gross <jgross@suse.com> --- xen/common/spinlock.c | 31 +++++++++++++++++++++++++------ xen/include/xen/spinlock.h | 16 +++++++++++----- 2 files changed, 36 insertions(+), 11 deletions(-)