diff mbox series

xen/locks: add dynamic lock recursion checks

Message ID 20220908074619.18221-1-jgross@suse.com (mailing list archive)
State New, archived
Headers show
Series xen/locks: add dynamic lock recursion checks | expand

Commit Message

Juergen Gross Sept. 8, 2022, 7:46 a.m. UTC
Add checking of lock recursion to the hypervisor. This is done by using
a percpu data array for storing the address of each taken lock. Any
attempt to take a lock twice (with the exception of recursive
spinlocks) will result in a crash. This is especially meant for
detecting attempts to take a rwlock multiple times as a reader, which
will only result in a deadlock in case of another cpu trying to get the
lock as a writer in between.

The additional checks are not performance neutral, so they are enabled
only in debug builds per default, as the checks are active only with
CONFIG_DEBUG_LOCKS enabled. The size of the percpu data array can be
selected via a boot parameter.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
Further possible enhancements:
- record address or file/line of locker, too
- add sanity check when exiting hypervisor or going to idle that all locks
  have been released

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 docs/misc/xen-command-line.pandoc |  12 +++
 xen/common/rwlock.c               |   6 ++
 xen/common/spinlock.c             | 145 ++++++++++++++++++++++++++++++
 xen/include/xen/rwlock.h          |  20 +++++
 xen/include/xen/spinlock.h        |   4 +
 5 files changed, 187 insertions(+)

Comments

Jan Beulich Sept. 8, 2022, 1:08 p.m. UTC | #1
On 08.09.2022 09:46, Juergen Gross wrote:
> Add checking of lock recursion to the hypervisor. This is done by using
> a percpu data array for storing the address of each taken lock. Any
> attempt to take a lock twice (with the exception of recursive
> spinlocks) will result in a crash. This is especially meant for
> detecting attempts to take a rwlock multiple times as a reader, which
> will only result in a deadlock in case of another cpu trying to get the
> lock as a writer in between.
> 
> The additional checks are not performance neutral, so they are enabled
> only in debug builds per default, as the checks are active only with
> CONFIG_DEBUG_LOCKS enabled. The size of the percpu data array can be
> selected via a boot parameter.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> Further possible enhancements:
> - record address or file/line of locker, too
> - add sanity check when exiting hypervisor or going to idle that all locks
>   have been released
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>

But left in the queue for 4.18, ftaod.

Jan
diff mbox series

Patch

diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index 9a79385a37..7d228ad5a1 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1631,6 +1631,18 @@  This option is intended for debugging purposes only.  Enable MSR_DEBUGCTL.LBR
 in hypervisor context to be able to dump the Last Interrupt/Exception To/From
 record with other registers.
 
+### lock-depth-size
+> `= <integer>`
+
+> Default: `lock-depth-size=64`
+
+Specifies the maximum number of nested locks tested for illegal recursions.
+Higher nesting levels still work, but recursion testing is omitted for those
+levels. In case an illegal recursion is detected the system will crash
+immediately. Specifying `0` will disable all testing of illegal lock nesting.
+
+This option is available for hypervisors built with CONFIG_DEBUG_LOCKS only.
+
 ### loglvl
 > `= <level>[/<rate-limited level>]` where level is `none | error | warning | info | debug | all`
 
diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c
index aa15529bbe..18224a4bb5 100644
--- a/xen/common/rwlock.c
+++ b/xen/common/rwlock.c
@@ -54,6 +54,8 @@  void queue_read_lock_slowpath(rwlock_t *lock)
      * Signal the next one in queue to become queue head.
      */
     spin_unlock(&lock->lock);
+
+    lock_enter(&lock->lock.debug);
 }
 
 /*
@@ -100,6 +102,8 @@  void queue_write_lock_slowpath(rwlock_t *lock)
     }
  unlock:
     spin_unlock(&lock->lock);
+
+    lock_enter(&lock->lock.debug);
 }
 
 
@@ -146,4 +150,6 @@  void _percpu_write_lock(percpu_rwlock_t **per_cpudata,
         /* Give the coherency fabric a break. */
         cpu_relax();
     };
+
+    lock_enter(&percpu_rwlock->rwlock.lock.debug);
 }
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 62c83aaa6a..84996c3fbc 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -1,5 +1,8 @@ 
+#include <xen/cpu.h>
 #include <xen/lib.h>
 #include <xen/irq.h>
+#include <xen/notifier.h>
+#include <xen/param.h>
 #include <xen/smp.h>
 #include <xen/time.h>
 #include <xen/spinlock.h>
@@ -11,11 +14,77 @@ 
 
 #ifdef CONFIG_DEBUG_LOCKS
 
+/* Max. number of entries in locks_taken array. */
+static unsigned int __ro_after_init lock_depth_size = 64;
+integer_param("lock-depth-size", lock_depth_size);
+
+/*
+ * Array of addresses of taken locks.
+ * nr_locks_taken is the index after the last entry. As locks tend to be
+ * nested cleanly, when freeing a lock it will probably be the one before
+ * nr_locks_taken, and new entries can be entered at that index. It is fine
+ * for a lock to be released out of order, though.
+ */
+static DEFINE_PER_CPU(const union lock_debug **, locks_taken);
+static DEFINE_PER_CPU(unsigned int, nr_locks_taken);
+static bool __read_mostly max_depth_reached;
+
 static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
 
+static int cf_check cpu_lockdebug_callback(struct notifier_block *nfb,
+                                           unsigned long action,
+                                           void *hcpu)
+{
+    unsigned int cpu = (unsigned long)hcpu;
+
+    switch ( action )
+    {
+    case CPU_UP_PREPARE:
+        if ( !per_cpu(locks_taken, cpu) )
+            per_cpu(locks_taken, cpu) = xzalloc_array(const union lock_debug *,
+                                                      lock_depth_size);
+        if ( !per_cpu(locks_taken, cpu) )
+            printk(XENLOG_WARNING
+                   "cpu %u: failed to allocate lock recursion check area\n",
+                   cpu);
+        break;
+
+    case CPU_UP_CANCELED:
+    case CPU_DEAD:
+        XFREE(per_cpu(locks_taken, cpu));
+        break;
+
+    default:
+        break;
+    }
+
+    return 0;
+}
+
+static struct notifier_block cpu_lockdebug_nfb = {
+    .notifier_call = cpu_lockdebug_callback,
+};
+
+static int __init cf_check lockdebug_init(void)
+{
+    if ( lock_depth_size )
+    {
+        register_cpu_notifier(&cpu_lockdebug_nfb);
+        cpu_lockdebug_callback(&cpu_lockdebug_nfb, CPU_UP_PREPARE,
+                               (void *)(unsigned long)smp_processor_id());
+    }
+
+    return 0;
+}
+presmp_initcall(lockdebug_init);
+
 void check_lock(union lock_debug *debug, bool try)
 {
     bool irq_safe = !local_irq_is_enabled();
+    unsigned int cpu = smp_processor_id();
+    const union lock_debug *const *taken = per_cpu(locks_taken, cpu);
+    unsigned int nr_taken = per_cpu(nr_locks_taken, cpu);
+    unsigned int i;
 
     BUILD_BUG_ON(LOCK_DEBUG_PAD_BITS <= 0);
 
@@ -63,6 +132,16 @@  void check_lock(union lock_debug *debug, bool try)
             BUG();
         }
     }
+
+    if ( try )
+        return;
+
+    for ( i = 0; i < nr_taken; i++ )
+        if ( taken[i] == debug )
+        {
+            printk("CHECKLOCK FAILURE: lock at %p taken recursively\n", debug);
+            BUG();
+        }
 }
 
 static void check_barrier(union lock_debug *debug)
@@ -84,15 +163,81 @@  static void check_barrier(union lock_debug *debug)
     BUG_ON(!local_irq_is_enabled() && !debug->irq_safe);
 }
 
+void lock_enter(const union lock_debug *debug)
+{
+    unsigned int cpu = smp_processor_id();
+    const union lock_debug **taken = per_cpu(locks_taken, cpu);
+    unsigned int *nr_taken = &per_cpu(nr_locks_taken, cpu);
+    unsigned long flags;
+
+    if ( !taken )
+        return;
+
+    local_irq_save(flags);
+
+    if ( *nr_taken < lock_depth_size )
+        taken[(*nr_taken)++] = debug;
+    else if ( !max_depth_reached )
+    {
+        max_depth_reached = true;
+        printk("CHECKLOCK max lock depth %u reached!\n", lock_depth_size);
+        WARN();
+    }
+
+    local_irq_restore(flags);
+}
+
+void lock_exit(const union lock_debug *debug)
+{
+    unsigned int cpu = smp_processor_id();
+    const union lock_debug **taken = per_cpu(locks_taken, cpu);
+    unsigned int *nr_taken = &per_cpu(nr_locks_taken, cpu);
+    unsigned int i;
+    unsigned long flags;
+
+    if ( !taken )
+        return;
+
+    local_irq_save(flags);
+
+    for ( i = *nr_taken; i > 0; i-- )
+    {
+        if ( taken[i - 1] == debug )
+        {
+            memmove(taken + i - 1, taken + i,
+                    (*nr_taken - i) * sizeof(*taken));
+            (*nr_taken)--;
+            taken[*nr_taken] = NULL;
+
+            local_irq_restore(flags);
+
+            return;
+        }
+    }
+
+    if ( !max_depth_reached )
+    {
+        printk("CHECKLOCK released lock at %p not recorded!\n", debug);
+        WARN();
+    }
+
+    local_irq_restore(flags);
+}
+
 static void got_lock(union lock_debug *debug)
 {
     debug->cpu = smp_processor_id();
+
+    lock_enter(debug);
 }
 
 static void rel_lock(union lock_debug *debug)
 {
     if ( atomic_read(&spin_debug) > 0 )
         BUG_ON(debug->cpu != smp_processor_id());
+
+    lock_exit(debug);
+
     debug->cpu = SPINLOCK_NO_CPU;
 }
 
diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
index 0cc9167715..b8d52a5aa9 100644
--- a/xen/include/xen/rwlock.h
+++ b/xen/include/xen/rwlock.h
@@ -66,7 +66,10 @@  static inline int _read_trylock(rwlock_t *lock)
          * arch_lock_acquire_barrier().
          */
         if ( likely(_can_read_lock(cnts)) )
+        {
+            lock_enter(&lock->lock.debug);
             return 1;
+        }
         atomic_sub(_QR_BIAS, &lock->cnts);
     }
     preempt_enable();
@@ -91,6 +94,7 @@  static inline void _read_lock(rwlock_t *lock)
     {
         /* The slow path calls check_lock() via spin_lock(). */
         check_lock(&lock->lock.debug, false);
+        lock_enter(&lock->lock.debug);
         return;
     }
 
@@ -123,6 +127,8 @@  static inline unsigned long _read_lock_irqsave(rwlock_t *lock)
  */
 static inline void _read_unlock(rwlock_t *lock)
 {
+    lock_exit(&lock->lock.debug);
+
     arch_lock_release_barrier();
     /*
      * Atomically decrement the reader count
@@ -170,6 +176,7 @@  static inline void _write_lock(rwlock_t *lock)
     {
         /* The slow path calls check_lock() via spin_lock(). */
         check_lock(&lock->lock.debug, false);
+        lock_enter(&lock->lock.debug);
         return;
     }
 
@@ -215,6 +222,8 @@  static inline int _write_trylock(rwlock_t *lock)
         return 0;
     }
 
+    lock_enter(&lock->lock.debug);
+
     /*
      * atomic_cmpxchg() is a full barrier so no need for an
      * arch_lock_acquire_barrier().
@@ -225,6 +234,9 @@  static inline int _write_trylock(rwlock_t *lock)
 static inline void _write_unlock(rwlock_t *lock)
 {
     ASSERT(_is_write_locked_by_me(atomic_read(&lock->cnts)));
+
+    lock_exit(&lock->lock.debug);
+
     arch_lock_release_barrier();
     atomic_and(~(_QW_CPUMASK | _QW_WMASK), &lock->cnts);
     preempt_enable();
@@ -343,6 +355,8 @@  static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
         /* All other paths have implicit check_lock() calls via read_lock(). */
         check_lock(&percpu_rwlock->rwlock.lock.debug, false);
     }
+
+    lock_enter(&percpu_rwlock->rwlock.lock.debug);
 }
 
 static inline void _percpu_read_unlock(percpu_rwlock_t **per_cpudata,
@@ -353,6 +367,9 @@  static inline void _percpu_read_unlock(percpu_rwlock_t **per_cpudata,
 
     /* Verify the read lock was taken for this lock */
     ASSERT(this_cpu_ptr(per_cpudata) != NULL);
+
+    lock_exit(&percpu_rwlock->rwlock.lock.debug);
+
     /*
      * Detect using a second percpu_rwlock_t simulatenously and fallback
      * to standard read_unlock.
@@ -379,6 +396,9 @@  static inline void _percpu_write_unlock(percpu_rwlock_t **per_cpudata,
 
     ASSERT(percpu_rwlock->writer_activating);
     percpu_rwlock->writer_activating = 0;
+
+    lock_exit(&percpu_rwlock->rwlock.lock.debug);
+
     write_unlock(&percpu_rwlock->rwlock);
 }
 
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 961891bea4..2fa6ba3654 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -22,12 +22,16 @@  union lock_debug {
 };
 #define _LOCK_DEBUG { LOCK_DEBUG_INITVAL }
 void check_lock(union lock_debug *debug, bool try);
+void lock_enter(const union lock_debug *debug);
+void lock_exit(const union lock_debug *debug);
 void spin_debug_enable(void);
 void spin_debug_disable(void);
 #else
 union lock_debug { };
 #define _LOCK_DEBUG { }
 #define check_lock(l, t) ((void)0)
+#define lock_enter(l) ((void)0)
+#define lock_exit(l) ((void)0)
 #define spin_debug_enable() ((void)0)
 #define spin_debug_disable() ((void)0)
 #endif