diff mbox series

[v2,4/5] util/qht: use striped locks under TSAN

Message ID 20230109224954.161672-5-cota@braap.org (mailing list archive)
State New, archived
Headers show
Series tsan fixes | expand

Commit Message

Emilio Cota Jan. 9, 2023, 10:49 p.m. UTC
Fixes this tsan crash, easy to reproduce with any large enough program:

$ tests/unit/test-qht
1..2
ThreadSanitizer: CHECK failed: sanitizer_deadlock_detector.h:67 "((n_all_locks_)) < (((sizeof(all_locks_with_contexts_)/sizeof((all_locks_with_contexts_)[0]))))" (0x40, 0x40) (tid=1821568)
    #0 __tsan::CheckUnwind() ../../../../src/libsanitizer/tsan/tsan_rtl.cpp:353 (libtsan.so.2+0x90034)
    #1 __sanitizer::CheckFailed(char const*, int, char const*, unsigned long long, unsigned long long) ../../../../src/libsanitizer/sanitizer_common/sanitizer_termination.cpp:86 (libtsan.so.2+0xca555)
    #2 __sanitizer::DeadlockDetectorTLS<__sanitizer::TwoLevelBitVector<1ul, __sanitizer::BasicBitVector<unsigned long> > >::addLock(unsigned long, unsigned long, unsigned int) ../../../../src/libsanitizer/sanitizer_common/sanitizer_deadlock_detector.h:67 (libtsan.so.2+0xb3616)
    #3 __sanitizer::DeadlockDetectorTLS<__sanitizer::TwoLevelBitVector<1ul, __sanitizer::BasicBitVector<unsigned long> > >::addLock(unsigned long, unsigned long, unsigned int) ../../../../src/libsanitizer/sanitizer_common/sanitizer_deadlock_detector.h:59 (libtsan.so.2+0xb3616)
    #4 __sanitizer::DeadlockDetector<__sanitizer::TwoLevelBitVector<1ul, __sanitizer::BasicBitVector<unsigned long> > >::onLockAfter(__sanitizer::DeadlockDetectorTLS<__sanitizer::TwoLevelBitVector<1ul, __sanitizer::BasicBitVector<unsigned long> > >*, unsigned long, unsigned int) ../../../../src/libsanitizer/sanitizer_common/sanitizer_deadlock_detector.h:216 (libtsan.so.2+0xb3616)
    #5 __sanitizer::DD::MutexAfterLock(__sanitizer::DDCallback*, __sanitizer::DDMutex*, bool, bool) ../../../../src/libsanitizer/sanitizer_common/sanitizer_deadlock_detector1.cpp:169 (libtsan.so.2+0xb3616)
    #6 __tsan::MutexPostLock(__tsan::ThreadState*, unsigned long, unsigned long, unsigned int, int) ../../../../src/libsanitizer/tsan/tsan_rtl_mutex.cpp:200 (libtsan.so.2+0xa3382)
    #7 __tsan_mutex_post_lock ../../../../src/libsanitizer/tsan/tsan_interface_ann.cpp:384 (libtsan.so.2+0x76bc3)
    #8 qemu_spin_lock /home/cota/src/qemu/include/qemu/thread.h:259 (test-qht+0x44a97)
    #9 qht_map_lock_buckets ../util/qht.c:253 (test-qht+0x44a97)
    #10 do_qht_iter ../util/qht.c:809 (test-qht+0x45f33)
    #11 qht_iter ../util/qht.c:821 (test-qht+0x45f33)
    #12 iter_check ../tests/unit/test-qht.c:121 (test-qht+0xe473)
    #13 qht_do_test ../tests/unit/test-qht.c:202 (test-qht+0xe473)
    #14 qht_test ../tests/unit/test-qht.c:240 (test-qht+0xe7c1)
    #15 test_default ../tests/unit/test-qht.c:246 (test-qht+0xe828)
    #16 <null> <null> (libglib-2.0.so.0+0x7daed)
    #17 <null> <null> (libglib-2.0.so.0+0x7d80a)
    #18 <null> <null> (libglib-2.0.so.0+0x7d80a)
    #19 g_test_run_suite <null> (libglib-2.0.so.0+0x7dfe9)
    #20 g_test_run <null> (libglib-2.0.so.0+0x7e055)
    #21 main ../tests/unit/test-qht.c:259 (test-qht+0xd2c6)
    #22 __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 (libc.so.6+0x29d8f)
    #23 __libc_start_main_impl ../csu/libc-start.c:392 (libc.so.6+0x29e3f)
    #24 _start <null> (test-qht+0xdb44)

Signed-off-by: Emilio Cota <cota@braap.org>
---
 util/qht.c | 101 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 87 insertions(+), 14 deletions(-)

Comments

Alex Bennée Jan. 10, 2023, 8:58 p.m. UTC | #1
Emilio Cota <cota@braap.org> writes:

> Fixes this tsan crash, easy to reproduce with any large enough program:
>
> $ tests/unit/test-qht
> 1..2
> ThreadSanitizer: CHECK failed: sanitizer_deadlock_detector.h:67 "((n_all_locks_)) < (((sizeof(all_locks_with_contexts_)/sizeof((all_locks_with_contexts_)[0]))))" (0x40, 0x40) (tid=1821568)
>     #0 __tsan::CheckUnwind() ../../../../src/libsanitizer/tsan/tsan_rtl.cpp:353 (libtsan.so.2+0x90034)
>     #1 __sanitizer::CheckFailed(char const*, int, char const*, unsigned long long, unsigned long long) ../../../../src/libsanitizer/sanitizer_common/sanitizer_termination.cpp:86 (libtsan.so.2+0xca555)
>     #2 __sanitizer::DeadlockDetectorTLS<__sanitizer::TwoLevelBitVector<1ul, __sanitizer::BasicBitVector<unsigned long> > >::addLock(unsigned long, unsigned long, unsigned int) ../../../../src/libsanitizer/sanitizer_common/sanitizer_deadlock_detector.h:67 (libtsan.so.2+0xb3616)
>     #3 __sanitizer::DeadlockDetectorTLS<__sanitizer::TwoLevelBitVector<1ul, __sanitizer::BasicBitVector<unsigned long> > >::addLock(unsigned long, unsigned long, unsigned int) ../../../../src/libsanitizer/sanitizer_common/sanitizer_deadlock_detector.h:59 (libtsan.so.2+0xb3616)
>     #4 __sanitizer::DeadlockDetector<__sanitizer::TwoLevelBitVector<1ul, __sanitizer::BasicBitVector<unsigned long> > >::onLockAfter(__sanitizer::DeadlockDetectorTLS<__sanitizer::TwoLevelBitVector<1ul, __sanitizer::BasicBitVector<unsigned long> > >*, unsigned long, unsigned int) ../../../../src/libsanitizer/sanitizer_common/sanitizer_deadlock_detector.h:216 (libtsan.so.2+0xb3616)
>     #5 __sanitizer::DD::MutexAfterLock(__sanitizer::DDCallback*, __sanitizer::DDMutex*, bool, bool) ../../../../src/libsanitizer/sanitizer_common/sanitizer_deadlock_detector1.cpp:169 (libtsan.so.2+0xb3616)
>     #6 __tsan::MutexPostLock(__tsan::ThreadState*, unsigned long, unsigned long, unsigned int, int) ../../../../src/libsanitizer/tsan/tsan_rtl_mutex.cpp:200 (libtsan.so.2+0xa3382)
>     #7 __tsan_mutex_post_lock ../../../../src/libsanitizer/tsan/tsan_interface_ann.cpp:384 (libtsan.so.2+0x76bc3)
>     #8 qemu_spin_lock /home/cota/src/qemu/include/qemu/thread.h:259 (test-qht+0x44a97)
>     #9 qht_map_lock_buckets ../util/qht.c:253 (test-qht+0x44a97)
>     #10 do_qht_iter ../util/qht.c:809 (test-qht+0x45f33)
>     #11 qht_iter ../util/qht.c:821 (test-qht+0x45f33)
>     #12 iter_check ../tests/unit/test-qht.c:121 (test-qht+0xe473)
>     #13 qht_do_test ../tests/unit/test-qht.c:202 (test-qht+0xe473)
>     #14 qht_test ../tests/unit/test-qht.c:240 (test-qht+0xe7c1)
>     #15 test_default ../tests/unit/test-qht.c:246 (test-qht+0xe828)
>     #16 <null> <null> (libglib-2.0.so.0+0x7daed)
>     #17 <null> <null> (libglib-2.0.so.0+0x7d80a)
>     #18 <null> <null> (libglib-2.0.so.0+0x7d80a)
>     #19 g_test_run_suite <null> (libglib-2.0.so.0+0x7dfe9)
>     #20 g_test_run <null> (libglib-2.0.so.0+0x7e055)
>     #21 main ../tests/unit/test-qht.c:259 (test-qht+0xd2c6)
>     #22 __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 (libc.so.6+0x29d8f)
>     #23 __libc_start_main_impl ../csu/libc-start.c:392 (libc.so.6+0x29e3f)
>     #24 _start <null> (test-qht+0xdb44)
>
> Signed-off-by: Emilio Cota <cota@braap.org>
> ---
>  util/qht.c | 101 +++++++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 87 insertions(+), 14 deletions(-)
>
> diff --git a/util/qht.c b/util/qht.c
> index 15866299e6..70cc733d5d 100644
> --- a/util/qht.c
> +++ b/util/qht.c
> @@ -151,6 +151,22 @@ struct qht_bucket {
>  
>  QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
>  
> +/*
> + * Under TSAN, we use striped locks instead of one lock per bucket chain.
> + * This avoids crashing under TSAN, since TSAN aborts the program if more than
> + * 64 locks are held (this is a hardcoded limit in TSAN).
> + * When resizing a QHT we grab all the buckets' locks, which can easily
> + * go over TSAN's limit. By using striped locks, we avoid this problem.
> + *
> + * Note: this number must be a power of two for easy index computation.
> + */
> +#define QHT_TSAN_BUCKET_LOCKS_BITS 4
> +#define QHT_TSAN_BUCKET_LOCKS (1 << QHT_TSAN_BUCKET_LOCKS_BITS)
> +
> +struct qht_tsan_lock {
> +    QemuSpin lock;
> +} QEMU_ALIGNED(QHT_BUCKET_ALIGN);
> +
>  /**
>   * struct qht_map - structure to track an array of buckets
>   * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind
> @@ -160,6 +176,7 @@ QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
>   * @n_added_buckets: number of added (i.e. "non-head") buckets
>   * @n_added_buckets_threshold: threshold to trigger an upward resize once the
>   *                             number of added buckets surpasses it.
> + * @tsan_bucket_locks: Array of striped locks to be used only under TSAN.
>   *
>   * Buckets are tracked in what we call a "map", i.e. this structure.
>   */
> @@ -169,6 +186,9 @@ struct qht_map {
>      size_t n_buckets;
>      size_t n_added_buckets;
>      size_t n_added_buckets_threshold;
> +#ifdef CONFIG_TSAN
> +    struct qht_tsan_lock tsan_bucket_locks[QHT_TSAN_BUCKET_LOCKS];
> +#endif
>  };
>  
>  /* trigger a resize when n_added_buckets > n_buckets / div */
> @@ -229,10 +249,62 @@ static inline size_t qht_elems_to_buckets(size_t n_elems)
>      return pow2ceil(n_elems / QHT_BUCKET_ENTRIES);
>  }
>  
> -static inline void qht_head_init(struct qht_bucket *b)
> +/*
> + * When using striped locks (i.e. under TSAN), we have to be careful not
> + * to operate on the same lock twice (e.g. when iterating through all buckets).
> + * We achieve this by operating only on each stripe's first matching lock.
> + */
> +static inline void qht_do_if_first_in_stripe(const struct qht_map *map,
> +                                             struct qht_bucket *b,
> +                                             void (*func)(QemuSpin *spin))
> +{
> +#ifdef CONFIG_TSAN
> +    unsigned long bucket_idx = b - map->buckets;
> +    bool is_first_in_stripe = (bucket_idx >> QHT_TSAN_BUCKET_LOCKS_BITS) == 0;
> +    if (is_first_in_stripe) {
> +        unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1);
> +        func(&map->tsan_bucket_locks[lock_idx]);

Hmm I ran into an issue with:

     ../util/qht.c:286:10: error: incompatible pointer types passing 'const struct qht_tsan_lock *' to parameter of type 'QemuSpin *' (aka 'struct QemuSpin *') [-Werror,-Wincompatible-pointer-types]
    func(&map->tsan_bucket_locks[lock_idx]);
         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

But changing the line to:

    func(&map->tsan_bucket_locks[lock_idx].lock);

still fails because:

../../util/qht.c:266:14: error: passing 'const QemuSpin *' (aka 'const struct QemuSpin *') to parameter of type 'QemuSpin *' (aka 'struct QemuSpin *') discards qualifiers [\
-Werror,-Wincompatible-pointer-types-discards-qualifiers]                                                                                                                    
        func(&map->tsan_bucket_locks[lock_idx].lock);                                                                                                                        
             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~            

> +    }
> +#else
> +    func(&b->lock);
> +#endif
> +}
> +
> +static inline void qht_bucket_lock_destroy(const struct qht_map *map,
> +                                           struct qht_bucket *b)
> +{
> +    qht_do_if_first_in_stripe(map, b, qemu_spin_destroy);
> +}

Who is meant to be calling this?

> +
> +static inline void qht_bucket_lock_do(const struct qht_map *map,
> +                                      struct qht_bucket *b,
> +                                      void (*func)(QemuSpin *lock))
> +{
> +#ifdef CONFIG_TSAN
> +    unsigned long bucket_idx = b - map->buckets;
> +    unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1);
> +    func(&map->tsan_bucket_locks[lock_idx]);
> +#else
> +    func(&b->lock);
> +#endif
> +}
> +
> +static inline void qht_bucket_lock(const struct qht_map *map,
> +                                   struct qht_bucket *b)
> +{
> +    qht_bucket_lock_do(map, b, qemu_spin_lock);
> +}
> +
> +static inline void qht_bucket_unlock(const struct qht_map *map,
> +                                     struct qht_bucket *b)
> +{
> +    qht_bucket_lock_do(map, b, qemu_spin_unlock);
> +}
> +
> +static inline void qht_head_init(struct qht_map *map, struct qht_bucket *b)
>  {
>      memset(b, 0, sizeof(*b));
> -    qemu_spin_init(&b->lock);
> +    qht_do_if_first_in_stripe(map, b, qemu_spin_init);
>      seqlock_init(&b->sequence);
>  }
>  
> @@ -250,7 +322,7 @@ static void qht_map_lock_buckets(struct qht_map *map)
>      for (i = 0; i < map->n_buckets; i++) {
>          struct qht_bucket *b = &map->buckets[i];
>  
> -        qemu_spin_lock(&b->lock);
> +        qht_do_if_first_in_stripe(map, b, qemu_spin_lock);
>      }
>  }
>  
> @@ -261,7 +333,7 @@ static void qht_map_unlock_buckets(struct qht_map *map)
>      for (i = 0; i < map->n_buckets; i++) {
>          struct qht_bucket *b = &map->buckets[i];
>  
> -        qemu_spin_unlock(&b->lock);
> +        qht_do_if_first_in_stripe(map, b, qemu_spin_unlock);
>      }
>  }
>  
> @@ -308,7 +380,7 @@ void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
>   * Get a head bucket and lock it, making sure its parent map is not stale.
>   * @pmap is filled with a pointer to the bucket's parent map.
>   *
> - * Unlock with qemu_spin_unlock(&b->lock).
> + * Unlock with qht_bucket_unlock.
>   *
>   * Note: callers cannot have ht->lock held.
>   */
> @@ -322,18 +394,18 @@ struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash,
>      map = qatomic_rcu_read(&ht->map);
>      b = qht_map_to_bucket(map, hash);
>  
> -    qemu_spin_lock(&b->lock);
> +    qht_bucket_lock(map, b);
>      if (likely(!qht_map_is_stale__locked(ht, map))) {
>          *pmap = map;
>          return b;
>      }
> -    qemu_spin_unlock(&b->lock);
> +    qht_bucket_unlock(map, b);
>  
>      /* we raced with a resize; acquire ht->lock to see the updated ht->map */
>      qht_lock(ht);
>      map = ht->map;
>      b = qht_map_to_bucket(map, hash);
> -    qemu_spin_lock(&b->lock);
> +    qht_bucket_lock(map, b);
>      qht_unlock(ht);
>      *pmap = map;
>      return b;
> @@ -345,12 +417,13 @@ static inline bool qht_map_needs_resize(const struct qht_map *map)
>             map->n_added_buckets_threshold;
>  }
>  
> -static inline void qht_chain_destroy(const struct qht_bucket *head)
> +static inline void qht_chain_destroy(const struct qht_map *map,
> +                                     struct qht_bucket *head)
>  {
>      struct qht_bucket *curr = head->next;
>      struct qht_bucket *prev;
>  
> -    qemu_spin_destroy(&head->lock);
> +    qht_do_if_first_in_stripe(map, head, qemu_spin_destroy);
>      while (curr) {
>          prev = curr;
>          curr = curr->next;
> @@ -364,7 +437,7 @@ static void qht_map_destroy(struct qht_map *map)
>      size_t i;
>  
>      for (i = 0; i < map->n_buckets; i++) {
> -        qht_chain_destroy(&map->buckets[i]);
> +        qht_chain_destroy(map, &map->buckets[i]);
>      }
>      qemu_vfree(map->buckets);
>      g_free(map);
> @@ -390,7 +463,7 @@ static struct qht_map *qht_map_create(size_t n_buckets)
>      map->buckets = qemu_memalign(QHT_BUCKET_ALIGN,
>                                   sizeof(*map->buckets) * n_buckets);
>      for (i = 0; i < n_buckets; i++) {
> -        qht_head_init(&map->buckets[i]);
> +        qht_head_init(map, &map->buckets[i]);
>      }
>      return map;
>  }
> @@ -638,7 +711,7 @@ bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing)
>      b = qht_bucket_lock__no_stale(ht, hash, &map);
>      prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
>      qht_bucket_debug__locked(b);
> -    qemu_spin_unlock(&b->lock);
> +    qht_bucket_unlock(map, b);
>  
>      if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) {
>          qht_grow_maybe(ht);
> @@ -749,7 +822,7 @@ bool qht_remove(struct qht *ht, const void *p, uint32_t hash)
>      b = qht_bucket_lock__no_stale(ht, hash, &map);
>      ret = qht_remove__locked(b, p, hash);
>      qht_bucket_debug__locked(b);
> -    qemu_spin_unlock(&b->lock);
> +    qht_bucket_unlock(map, b);
>      return ret;
>  }
Emilio Cota Jan. 11, 2023, 2:41 a.m. UTC | #2
On Tue, Jan 10, 2023 at 20:58:01 +0000, Alex Bennée wrote:
> Emilio Cota <cota@braap.org> writes:
(snip)
> > +static inline void qht_do_if_first_in_stripe(const struct qht_map *map,
> > +                                             struct qht_bucket *b,
> > +                                             void (*func)(QemuSpin *spin))
> > +{
> > +#ifdef CONFIG_TSAN
> > +    unsigned long bucket_idx = b - map->buckets;
> > +    bool is_first_in_stripe = (bucket_idx >> QHT_TSAN_BUCKET_LOCKS_BITS) == 0;
> > +    if (is_first_in_stripe) {
> > +        unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1);
> > +        func(&map->tsan_bucket_locks[lock_idx]);
> 
> Hmm I ran into an issue with:
> 
>      ../util/qht.c:286:10: error: incompatible pointer types passing 'const struct qht_tsan_lock *' to parameter of type 'QemuSpin *' (aka 'struct QemuSpin *') [-Werror,-Wincompatible-pointer-types]

Gaah, sorry. I didn't notice this because of unrelated noise due
to having to configure with --disable-werror. Fixed now by removing
a bunch of const's and also using .lock.

> > +static inline void qht_bucket_lock_destroy(const struct qht_map *map,
> > +                                           struct qht_bucket *b)
> > +{
> > +    qht_do_if_first_in_stripe(map, b, qemu_spin_destroy);
> > +}
> 
> Who is meant to be calling this?

Should have been removed in v2; fixed now.

I've uploaded the v3 series to https://github.com/cota/qemu/tree/tsan-v3

Please let me know if you want me to also mail it to the list.
Thanks,

		Emilio
Alex Bennée Jan. 11, 2023, 1:03 p.m. UTC | #3
Emilio Cota <cota@braap.org> writes:

> On Tue, Jan 10, 2023 at 20:58:01 +0000, Alex Bennée wrote:
>> Emilio Cota <cota@braap.org> writes:
> (snip)
>> > +static inline void qht_do_if_first_in_stripe(const struct qht_map *map,
>> > +                                             struct qht_bucket *b,
>> > +                                             void (*func)(QemuSpin *spin))
>> > +{
>> > +#ifdef CONFIG_TSAN
>> > +    unsigned long bucket_idx = b - map->buckets;
>> > +    bool is_first_in_stripe = (bucket_idx >> QHT_TSAN_BUCKET_LOCKS_BITS) == 0;
>> > +    if (is_first_in_stripe) {
>> > +        unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1);
>> > +        func(&map->tsan_bucket_locks[lock_idx]);
>> 
>> Hmm I ran into an issue with:
>> 
>>      ../util/qht.c:286:10: error: incompatible pointer types passing
>> 'const struct qht_tsan_lock *' to parameter of type 'QemuSpin *'
>> (aka 'struct QemuSpin *') [-Werror,-Wincompatible-pointer-types]
>
> Gaah, sorry. I didn't notice this because of unrelated noise due
> to having to configure with --disable-werror. Fixed now by removing
> a bunch of const's and also using .lock.
>
>> > +static inline void qht_bucket_lock_destroy(const struct qht_map *map,
>> > +                                           struct qht_bucket *b)
>> > +{
>> > +    qht_do_if_first_in_stripe(map, b, qemu_spin_destroy);
>> > +}
>> 
>> Who is meant to be calling this?
>
> Should have been removed in v2; fixed now.
>
> I've uploaded the v3 series to https://github.com/cota/qemu/tree/tsan-v3
>
> Please let me know if you want me to also mail it to the list.

Please do and then I can be sure I'm upto date when I rebuild the PR.

> Thanks,
>
> 		Emilio
diff mbox series

Patch

diff --git a/util/qht.c b/util/qht.c
index 15866299e6..70cc733d5d 100644
--- a/util/qht.c
+++ b/util/qht.c
@@ -151,6 +151,22 @@  struct qht_bucket {
 
 QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
 
+/*
+ * Under TSAN, we use striped locks instead of one lock per bucket chain.
+ * This avoids crashing under TSAN, since TSAN aborts the program if more than
+ * 64 locks are held (this is a hardcoded limit in TSAN).
+ * When resizing a QHT we grab all the buckets' locks, which can easily
+ * go over TSAN's limit. By using striped locks, we avoid this problem.
+ *
+ * Note: this number must be a power of two for easy index computation.
+ */
+#define QHT_TSAN_BUCKET_LOCKS_BITS 4
+#define QHT_TSAN_BUCKET_LOCKS (1 << QHT_TSAN_BUCKET_LOCKS_BITS)
+
+struct qht_tsan_lock {
+    QemuSpin lock;
+} QEMU_ALIGNED(QHT_BUCKET_ALIGN);
+
 /**
  * struct qht_map - structure to track an array of buckets
  * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind
@@ -160,6 +176,7 @@  QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN);
  * @n_added_buckets: number of added (i.e. "non-head") buckets
  * @n_added_buckets_threshold: threshold to trigger an upward resize once the
  *                             number of added buckets surpasses it.
+ * @tsan_bucket_locks: Array of striped locks to be used only under TSAN.
  *
  * Buckets are tracked in what we call a "map", i.e. this structure.
  */
@@ -169,6 +186,9 @@  struct qht_map {
     size_t n_buckets;
     size_t n_added_buckets;
     size_t n_added_buckets_threshold;
+#ifdef CONFIG_TSAN
+    struct qht_tsan_lock tsan_bucket_locks[QHT_TSAN_BUCKET_LOCKS];
+#endif
 };
 
 /* trigger a resize when n_added_buckets > n_buckets / div */
@@ -229,10 +249,62 @@  static inline size_t qht_elems_to_buckets(size_t n_elems)
     return pow2ceil(n_elems / QHT_BUCKET_ENTRIES);
 }
 
-static inline void qht_head_init(struct qht_bucket *b)
+/*
+ * When using striped locks (i.e. under TSAN), we have to be careful not
+ * to operate on the same lock twice (e.g. when iterating through all buckets).
+ * We achieve this by operating only on each stripe's first matching lock.
+ */
+static inline void qht_do_if_first_in_stripe(const struct qht_map *map,
+                                             struct qht_bucket *b,
+                                             void (*func)(QemuSpin *spin))
+{
+#ifdef CONFIG_TSAN
+    unsigned long bucket_idx = b - map->buckets;
+    bool is_first_in_stripe = (bucket_idx >> QHT_TSAN_BUCKET_LOCKS_BITS) == 0;
+    if (is_first_in_stripe) {
+        unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1);
+        func(&map->tsan_bucket_locks[lock_idx]);
+    }
+#else
+    func(&b->lock);
+#endif
+}
+
+static inline void qht_bucket_lock_destroy(const struct qht_map *map,
+                                           struct qht_bucket *b)
+{
+    qht_do_if_first_in_stripe(map, b, qemu_spin_destroy);
+}
+
+static inline void qht_bucket_lock_do(const struct qht_map *map,
+                                      struct qht_bucket *b,
+                                      void (*func)(QemuSpin *lock))
+{
+#ifdef CONFIG_TSAN
+    unsigned long bucket_idx = b - map->buckets;
+    unsigned long lock_idx = bucket_idx & (QHT_TSAN_BUCKET_LOCKS - 1);
+    func(&map->tsan_bucket_locks[lock_idx]);
+#else
+    func(&b->lock);
+#endif
+}
+
+static inline void qht_bucket_lock(const struct qht_map *map,
+                                   struct qht_bucket *b)
+{
+    qht_bucket_lock_do(map, b, qemu_spin_lock);
+}
+
+static inline void qht_bucket_unlock(const struct qht_map *map,
+                                     struct qht_bucket *b)
+{
+    qht_bucket_lock_do(map, b, qemu_spin_unlock);
+}
+
+static inline void qht_head_init(struct qht_map *map, struct qht_bucket *b)
 {
     memset(b, 0, sizeof(*b));
-    qemu_spin_init(&b->lock);
+    qht_do_if_first_in_stripe(map, b, qemu_spin_init);
     seqlock_init(&b->sequence);
 }
 
@@ -250,7 +322,7 @@  static void qht_map_lock_buckets(struct qht_map *map)
     for (i = 0; i < map->n_buckets; i++) {
         struct qht_bucket *b = &map->buckets[i];
 
-        qemu_spin_lock(&b->lock);
+        qht_do_if_first_in_stripe(map, b, qemu_spin_lock);
     }
 }
 
@@ -261,7 +333,7 @@  static void qht_map_unlock_buckets(struct qht_map *map)
     for (i = 0; i < map->n_buckets; i++) {
         struct qht_bucket *b = &map->buckets[i];
 
-        qemu_spin_unlock(&b->lock);
+        qht_do_if_first_in_stripe(map, b, qemu_spin_unlock);
     }
 }
 
@@ -308,7 +380,7 @@  void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
  * Get a head bucket and lock it, making sure its parent map is not stale.
  * @pmap is filled with a pointer to the bucket's parent map.
  *
- * Unlock with qemu_spin_unlock(&b->lock).
+ * Unlock with qht_bucket_unlock.
  *
  * Note: callers cannot have ht->lock held.
  */
@@ -322,18 +394,18 @@  struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash,
     map = qatomic_rcu_read(&ht->map);
     b = qht_map_to_bucket(map, hash);
 
-    qemu_spin_lock(&b->lock);
+    qht_bucket_lock(map, b);
     if (likely(!qht_map_is_stale__locked(ht, map))) {
         *pmap = map;
         return b;
     }
-    qemu_spin_unlock(&b->lock);
+    qht_bucket_unlock(map, b);
 
     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
     qht_lock(ht);
     map = ht->map;
     b = qht_map_to_bucket(map, hash);
-    qemu_spin_lock(&b->lock);
+    qht_bucket_lock(map, b);
     qht_unlock(ht);
     *pmap = map;
     return b;
@@ -345,12 +417,13 @@  static inline bool qht_map_needs_resize(const struct qht_map *map)
            map->n_added_buckets_threshold;
 }
 
-static inline void qht_chain_destroy(const struct qht_bucket *head)
+static inline void qht_chain_destroy(const struct qht_map *map,
+                                     struct qht_bucket *head)
 {
     struct qht_bucket *curr = head->next;
     struct qht_bucket *prev;
 
-    qemu_spin_destroy(&head->lock);
+    qht_do_if_first_in_stripe(map, head, qemu_spin_destroy);
     while (curr) {
         prev = curr;
         curr = curr->next;
@@ -364,7 +437,7 @@  static void qht_map_destroy(struct qht_map *map)
     size_t i;
 
     for (i = 0; i < map->n_buckets; i++) {
-        qht_chain_destroy(&map->buckets[i]);
+        qht_chain_destroy(map, &map->buckets[i]);
     }
     qemu_vfree(map->buckets);
     g_free(map);
@@ -390,7 +463,7 @@  static struct qht_map *qht_map_create(size_t n_buckets)
     map->buckets = qemu_memalign(QHT_BUCKET_ALIGN,
                                  sizeof(*map->buckets) * n_buckets);
     for (i = 0; i < n_buckets; i++) {
-        qht_head_init(&map->buckets[i]);
+        qht_head_init(map, &map->buckets[i]);
     }
     return map;
 }
@@ -638,7 +711,7 @@  bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing)
     b = qht_bucket_lock__no_stale(ht, hash, &map);
     prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
     qht_bucket_debug__locked(b);
-    qemu_spin_unlock(&b->lock);
+    qht_bucket_unlock(map, b);
 
     if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) {
         qht_grow_maybe(ht);
@@ -749,7 +822,7 @@  bool qht_remove(struct qht *ht, const void *p, uint32_t hash)
     b = qht_bucket_lock__no_stale(ht, hash, &map);
     ret = qht_remove__locked(b, p, hash);
     qht_bucket_debug__locked(b);
-    qemu_spin_unlock(&b->lock);
+    qht_bucket_unlock(map, b);
     return ret;
 }