diff mbox series

[v2,22/35] percpu-rwsem: enable percpu_sem destruction in atomic context

Message ID 20220128131006.67712-23-michel@lespinasse.org (mailing list archive)
State New
Headers show
Series Speculative page faults | expand

Commit Message

Michel Lespinasse Jan. 28, 2022, 1:09 p.m. UTC
From: Suren Baghdasaryan <surenb@google.com>

Calling percpu_free_rwsem in atomic context results in "scheduling while
atomic" bug being triggered:

BUG: scheduling while atomic: klogd/158/0x00000002
...
  __schedule_bug+0x191/0x290
  schedule_debug+0x97/0x180
  __schedule+0xdc/0xba0
  schedule+0xda/0x250
  schedule_timeout+0x92/0x2d0
  __wait_for_common+0x25b/0x430
  wait_for_completion+0x1f/0x30
  rcu_barrier+0x440/0x4f0
  rcu_sync_dtor+0xaa/0x190
  percpu_free_rwsem+0x41/0x80

Introduce percpu_rwsem_destroy function to perform semaphore destruction
in a worker thread.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Michel Lespinasse <michel@lespinasse.org>
---
 include/linux/percpu-rwsem.h  | 13 ++++++++++++-
 kernel/locking/percpu-rwsem.c | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

Comments

Hillf Danton Jan. 29, 2022, 12:13 p.m. UTC | #1
On Fri, 28 Jan 2022 05:09:53 -0800 Michel Lespinasse wrote:
> +
> +static LIST_HEAD(destroy_list);
> +static DEFINE_SPINLOCK(destroy_list_lock);

   static bool destroyer_running;

> +
> +static void destroy_list_workfn(struct work_struct *work)
> +{
> +	struct percpu_rw_semaphore *sem, *sem2;
> +	LIST_HEAD(to_destroy);
> +

again:

> +	spin_lock(&destroy_list_lock);

	if (list_empty(&destroy_list)) {
		destroyer_running = false;
		spin_unlock(&destroy_list_lock);
		return;
	}
	destroyer_running = true;

> +	list_splice_init(&destroy_list, &to_destroy);
> +	spin_unlock(&destroy_list_lock);
> +
> +	if (list_empty(&to_destroy))
> +		return;
> +
> +	list_for_each_entry_safe(sem, sem2, &to_destroy, destroy_list_entry) {

		list_del(&sem->destroy_list_entry);

> +		percpu_free_rwsem(sem);
> +		kfree(sem);
> +	}

	goto again;
> +}
> +
> +static DECLARE_WORK(destroy_list_work, destroy_list_workfn);
> +
> +void percpu_rwsem_async_destroy(struct percpu_rw_semaphore *sem)
> +{
> +	spin_lock(&destroy_list_lock);
> +	list_add_tail(&sem->destroy_list_entry, &destroy_list);
> +	spin_unlock(&destroy_list_lock);
> +	schedule_work(&destroy_list_work);

Nits
	spin_lock(&destroy_list_lock);
1/	/* LIFO */
	list_add(&sem->destroy_list_entry, &destroy_list);
2/	/* spawn worker if it is idle */
	if (!destroyer_running)
3/		/* this is not critical work */
		queue_work(system_unbound_wq, &destroy_list_work);
	spin_unlock(&destroy_list_lock);
> +}
> -- 
> 2.20.1
Suren Baghdasaryan Jan. 31, 2022, 6:04 p.m. UTC | #2
On Sat, Jan 29, 2022 at 4:13 AM Hillf Danton <hdanton@sina.com> wrote:
>
> On Fri, 28 Jan 2022 05:09:53 -0800 Michel Lespinasse wrote:
> > +
> > +static LIST_HEAD(destroy_list);
> > +static DEFINE_SPINLOCK(destroy_list_lock);
>
>    static bool destroyer_running;
>
> > +
> > +static void destroy_list_workfn(struct work_struct *work)
> > +{
> > +     struct percpu_rw_semaphore *sem, *sem2;
> > +     LIST_HEAD(to_destroy);
> > +
>
> again:
>
> > +     spin_lock(&destroy_list_lock);
>
>         if (list_empty(&destroy_list)) {
>                 destroyer_running = false;
>                 spin_unlock(&destroy_list_lock);
>                 return;
>         }
>         destroyer_running = true;
>
> > +     list_splice_init(&destroy_list, &to_destroy);
> > +     spin_unlock(&destroy_list_lock);
> > +
> > +     if (list_empty(&to_destroy))
> > +             return;
> > +
> > +     list_for_each_entry_safe(sem, sem2, &to_destroy, destroy_list_entry) {
>
>                 list_del(&sem->destroy_list_entry);
>
> > +             percpu_free_rwsem(sem);
> > +             kfree(sem);
> > +     }
>
>         goto again;
> > +}
> > +
> > +static DECLARE_WORK(destroy_list_work, destroy_list_workfn);
> > +
> > +void percpu_rwsem_async_destroy(struct percpu_rw_semaphore *sem)
> > +{
> > +     spin_lock(&destroy_list_lock);
> > +     list_add_tail(&sem->destroy_list_entry, &destroy_list);
> > +     spin_unlock(&destroy_list_lock);
> > +     schedule_work(&destroy_list_work);
>
> Nits
>         spin_lock(&destroy_list_lock);
> 1/      /* LIFO */
>         list_add(&sem->destroy_list_entry, &destroy_list);
> 2/      /* spawn worker if it is idle */
>         if (!destroyer_running)
> 3/              /* this is not critical work */
>                 queue_work(system_unbound_wq, &destroy_list_work);
>         spin_unlock(&destroy_list_lock);

Thanks for the review! Just to clarify, are you suggesting
simplifications to the current patch or do you see a function issue?

> > +}
> > --
> > 2.20.1
>
Hillf Danton Feb. 1, 2022, 2:09 a.m. UTC | #3
On Mon, 31 Jan 2022 10:04:16 -0800 Suren Baghdasaryan wrote:
> On Sat, Jan 29, 2022 at 4:13 AM Hillf Danton wrote:
> >
> > On Fri, 28 Jan 2022 05:09:53 -0800 Michel Lespinasse wrote:
> > > +
> > > +static LIST_HEAD(destroy_list);
> > > +static DEFINE_SPINLOCK(destroy_list_lock);
> >
> >    static bool destroyer_running;
> >
> > > +
> > > +static void destroy_list_workfn(struct work_struct *work)
> > > +{
> > > +     struct percpu_rw_semaphore *sem, *sem2;
> > > +     LIST_HEAD(to_destroy);
> > > +
> >
> > again:
> >
> > > +     spin_lock(&destroy_list_lock);
> >
> >         if (list_empty(&destroy_list)) {
> >                 destroyer_running = false;
> >                 spin_unlock(&destroy_list_lock);
> >                 return;
> >         }
> >         destroyer_running = true;
> >
> > > +     list_splice_init(&destroy_list, &to_destroy);
> > > +     spin_unlock(&destroy_list_lock);
> > > +
> > > +     if (list_empty(&to_destroy))
> > > +             return;
> > > +
> > > +     list_for_each_entry_safe(sem, sem2, &to_destroy, destroy_list_entry) {
> >
> >                 list_del(&sem->destroy_list_entry);
> >
> > > +             percpu_free_rwsem(sem);
> > > +             kfree(sem);
> > > +     }
> >
> >         goto again;
> > > +}
> > > +
> > > +static DECLARE_WORK(destroy_list_work, destroy_list_workfn);
> > > +
> > > +void percpu_rwsem_async_destroy(struct percpu_rw_semaphore *sem)
> > > +{
> > > +     spin_lock(&destroy_list_lock);
> > > +     list_add_tail(&sem->destroy_list_entry, &destroy_list);
> > > +     spin_unlock(&destroy_list_lock);
> > > +     schedule_work(&destroy_list_work);
> >
> > Nits
> >         spin_lock(&destroy_list_lock);
> > 1/      /* LIFO */
> >         list_add(&sem->destroy_list_entry, &destroy_list);
> > 2/      /* spawn worker if it is idle */
> >         if (!destroyer_running)
> > 3/              /* this is not critical work */
> >                 queue_work(system_unbound_wq, &destroy_list_work);
> >         spin_unlock(&destroy_list_lock);
> 
> Thanks for the review! Just to clarify, are you suggesting
> simplifications to the current patch or do you see a function issue?

Apart from the nits that can be safely ignored in usual spins, I wonder if
the async destroy can be used in the contexts wrt raw_spin_lock.

Hillf

	raw_spin_lock_irq(&foo->lock);
	...
	percpu_rwsem_async_destroy(*sem);
	...
	raw_spin_unlock_irq(&foo->lock);
Suren Baghdasaryan Feb. 7, 2022, 7:31 p.m. UTC | #4
On Mon, Jan 31, 2022 at 6:10 PM Hillf Danton <hdanton@sina.com> wrote:
>
> On Mon, 31 Jan 2022 10:04:16 -0800 Suren Baghdasaryan wrote:
> > On Sat, Jan 29, 2022 at 4:13 AM Hillf Danton wrote:
> > >
> > > On Fri, 28 Jan 2022 05:09:53 -0800 Michel Lespinasse wrote:
> > > > +
> > > > +static LIST_HEAD(destroy_list);
> > > > +static DEFINE_SPINLOCK(destroy_list_lock);
> > >
> > >    static bool destroyer_running;
> > >
> > > > +
> > > > +static void destroy_list_workfn(struct work_struct *work)
> > > > +{
> > > > +     struct percpu_rw_semaphore *sem, *sem2;
> > > > +     LIST_HEAD(to_destroy);
> > > > +
> > >
> > > again:
> > >
> > > > +     spin_lock(&destroy_list_lock);
> > >
> > >         if (list_empty(&destroy_list)) {
> > >                 destroyer_running = false;
> > >                 spin_unlock(&destroy_list_lock);
> > >                 return;
> > >         }
> > >         destroyer_running = true;
> > >
> > > > +     list_splice_init(&destroy_list, &to_destroy);
> > > > +     spin_unlock(&destroy_list_lock);
> > > > +
> > > > +     if (list_empty(&to_destroy))
> > > > +             return;
> > > > +
> > > > +     list_for_each_entry_safe(sem, sem2, &to_destroy, destroy_list_entry) {
> > >
> > >                 list_del(&sem->destroy_list_entry);
> > >
> > > > +             percpu_free_rwsem(sem);
> > > > +             kfree(sem);
> > > > +     }
> > >
> > >         goto again;
> > > > +}
> > > > +
> > > > +static DECLARE_WORK(destroy_list_work, destroy_list_workfn);
> > > > +
> > > > +void percpu_rwsem_async_destroy(struct percpu_rw_semaphore *sem)
> > > > +{
> > > > +     spin_lock(&destroy_list_lock);
> > > > +     list_add_tail(&sem->destroy_list_entry, &destroy_list);
> > > > +     spin_unlock(&destroy_list_lock);
> > > > +     schedule_work(&destroy_list_work);
> > >
> > > Nits
> > >         spin_lock(&destroy_list_lock);
> > > 1/      /* LIFO */
> > >         list_add(&sem->destroy_list_entry, &destroy_list);
> > > 2/      /* spawn worker if it is idle */
> > >         if (!destroyer_running)
> > > 3/              /* this is not critical work */
> > >                 queue_work(system_unbound_wq, &destroy_list_work);
> > >         spin_unlock(&destroy_list_lock);
> >
> > Thanks for the review! Just to clarify, are you suggesting
> > simplifications to the current patch or do you see a function issue?
>
> Apart from the nits that can be safely ignored in usual spins, I wonder if
> the async destroy can be used in the contexts wrt raw_spin_lock.
>
> Hillf
>
>         raw_spin_lock_irq(&foo->lock);
>         ...
>         percpu_rwsem_async_destroy(*sem);
>         ...
>         raw_spin_unlock_irq(&foo->lock);

Sorry for the delay. Are you concerned about the use of spin_lock()
inside percpu_rwsem_async_destroy() which would become a sleeping lock
in case of PREEMPT_RT? If so, we can use raw_spin_lock() when locking
destroy_list_lock. Please confirm. Thanks!


>
Hillf Danton Feb. 8, 2022, 12:20 a.m. UTC | #5
On Mon, 7 Feb 2022 11:31:38 -0800 Suren Baghdasaryan wrote:
> 
> Sorry for the delay. Are you concerned about the use of spin_lock()
> inside percpu_rwsem_async_destroy() which would become a sleeping lock
> in case of PREEMPT_RT? If so, we can use raw_spin_lock() when locking
> destroy_list_lock. Please confirm. Thanks!

Yes please replace spin lock with the raw version which can fit in
more scenarios.

Hillf
Suren Baghdasaryan Feb. 8, 2022, 1:31 a.m. UTC | #6
On Mon, Feb 7, 2022 at 4:21 PM Hillf Danton <hdanton@sina.com> wrote:
>
> On Mon, 7 Feb 2022 11:31:38 -0800 Suren Baghdasaryan wrote:
> >
> > Sorry for the delay. Are you concerned about the use of spin_lock()
> > inside percpu_rwsem_async_destroy() which would become a sleeping lock
> > in case of PREEMPT_RT? If so, we can use raw_spin_lock() when locking
> > destroy_list_lock. Please confirm. Thanks!
>
> Yes please replace spin lock with the raw version which can fit in
> more scenarios.

Thanks for confirmation! I'll rework my patch and will send it to
Michel to include in the next version of his patchset.

>
> Hillf
>
diff mbox series

Patch

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 5fda40f97fe9..bf1668fc9c5e 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -13,7 +13,14 @@  struct percpu_rw_semaphore {
 	struct rcu_sync		rss;
 	unsigned int __percpu	*read_count;
 	struct rcuwait		writer;
-	wait_queue_head_t	waiters;
+	/*
+	 * destroy_list_entry is used during object destruction when waiters
+	 * can't be used, therefore reusing the same space.
+	 */
+	union {
+		wait_queue_head_t	waiters;
+		struct list_head	destroy_list_entry;
+	};
 	atomic_t		block;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
@@ -127,8 +134,12 @@  extern void percpu_up_write(struct percpu_rw_semaphore *);
 extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
 				const char *, struct lock_class_key *);
 
+/* Can't be called in atomic context. */
 extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
+/* Invokes percpu_free_rwsem and frees the semaphore from a worker thread. */
+extern void percpu_rwsem_async_destroy(struct percpu_rw_semaphore *sem);
+
 #define percpu_init_rwsem(sem)					\
 ({								\
 	static struct lock_class_key rwsem_key;			\
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 70a32a576f3f..a3d37bf83c60 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -7,6 +7,7 @@ 
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/slab.h>
 #include <linux/errno.h>
 
 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
@@ -268,3 +269,34 @@  void percpu_up_write(struct percpu_rw_semaphore *sem)
 	rcu_sync_exit(&sem->rss);
 }
 EXPORT_SYMBOL_GPL(percpu_up_write);
+
+static LIST_HEAD(destroy_list);
+static DEFINE_SPINLOCK(destroy_list_lock);
+
+static void destroy_list_workfn(struct work_struct *work)
+{
+	struct percpu_rw_semaphore *sem, *sem2;
+	LIST_HEAD(to_destroy);
+
+	spin_lock(&destroy_list_lock);
+	list_splice_init(&destroy_list, &to_destroy);
+	spin_unlock(&destroy_list_lock);
+
+	if (list_empty(&to_destroy))
+		return;
+
+	list_for_each_entry_safe(sem, sem2, &to_destroy, destroy_list_entry) {
+		percpu_free_rwsem(sem);
+		kfree(sem);
+	}
+}
+
+static DECLARE_WORK(destroy_list_work, destroy_list_workfn);
+
+void percpu_rwsem_async_destroy(struct percpu_rw_semaphore *sem)
+{
+	spin_lock(&destroy_list_lock);
+	list_add_tail(&sem->destroy_list_entry, &destroy_list);
+	spin_unlock(&destroy_list_lock);
+	schedule_work(&destroy_list_work);
+}