diff mbox series

md: fix 'delete_mutex' deadlock

Message ID 20230621142933.1395629-1-yukuai1@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series md: fix 'delete_mutex' deadlock | expand

Commit Message

Yu Kuai June 21, 2023, 2:29 p.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

Commit 3ce94ce5d05a ("md: fix duplicate filename for rdev") introduce a
new lock 'delete_mutex', and trigger a new deadlock:

t1: remove rdev			t2: sysfs writer

rdev_attr_store			rdev_attr_store
 mddev_lock
 state_store
 md_kick_rdev_from_array
  lock delete_mutex
  list_add mddev->deleting
  unlock delete_mutex
 mddev_unlock
				 mddev_lock
				 ...
  lock delete_mutex
  kobject_del
  // wait for sysfs writers to be done
				 mddev_unlock
				 lock delete_mutex
				 // wait for delete_mutex, deadlock

'delete_mutex' is used to protect the list 'mddev->deleting', turns out
that this list can be protected by 'reconfig_mutex' directly, and this
lock can be removed.

Fix this problem by removing the lock, and use 'reconfig_mutex' to
protect the list. mddev_unlock() will move this list to a local list to
be handled after 'reconfig_mutex' is dropped.

Fixes: 3ce94ce5d05a ("md: fix duplicate filename for rdev")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/md.c | 28 +++++++++-------------------
 drivers/md/md.h |  4 +---
 2 files changed, 10 insertions(+), 22 deletions(-)

Comments

Song Liu June 21, 2023, 11:10 p.m. UTC | #1
On Tue, Jun 20, 2023 at 11:31 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> Commit 3ce94ce5d05a ("md: fix duplicate filename for rdev") introduce a
> new lock 'delete_mutex', and trigger a new deadlock:
>
> t1: remove rdev                 t2: sysfs writer
>
> rdev_attr_store                 rdev_attr_store
>  mddev_lock
>  state_store
>  md_kick_rdev_from_array
>   lock delete_mutex
>   list_add mddev->deleting
>   unlock delete_mutex
>  mddev_unlock
>                                  mddev_lock
>                                  ...
>   lock delete_mutex
>   kobject_del
>   // wait for sysfs writers to be done
>                                  mddev_unlock
>                                  lock delete_mutex
>                                  // wait for delete_mutex, deadlock
>
> 'delete_mutex' is used to protect the list 'mddev->deleting', turns out
> that this list can be protected by 'reconfig_mutex' directly, and this
> lock can be removed.
>
> Fix this problem by removing the lock, and use 'reconfig_mutex' to
> protect the list. mddev_unlock() will move this list to a local list to
> be handled after 'reconfig_mutex' is dropped.
>
> Fixes: 3ce94ce5d05a ("md: fix duplicate filename for rdev")
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>

Applied to md-next. Thanks for the quick fix!

Song

> ---
>  drivers/md/md.c | 28 +++++++++-------------------
>  drivers/md/md.h |  4 +---
>  2 files changed, 10 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 1086d7282ee7..089f7d7a9052 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev)
>  {
>         mutex_init(&mddev->open_mutex);
>         mutex_init(&mddev->reconfig_mutex);
> -       mutex_init(&mddev->delete_mutex);
>         mutex_init(&mddev->sync_mutex);
>         mutex_init(&mddev->bitmap_info.mutex);
>         INIT_LIST_HEAD(&mddev->disks);
> @@ -751,26 +750,15 @@ static void mddev_free(struct mddev *mddev)
>
>  static const struct attribute_group md_redundancy_group;
>
> -static void md_free_rdev(struct mddev *mddev)
> +void mddev_unlock(struct mddev *mddev)
>  {
>         struct md_rdev *rdev;
>         struct md_rdev *tmp;
> +       LIST_HEAD(delete);
>
> -       mutex_lock(&mddev->delete_mutex);
> -       if (list_empty(&mddev->deleting))
> -               goto out;
> +       if (!list_empty(&mddev->deleting))
> +               list_splice_init(&mddev->deleting, &delete);
>
> -       list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) {
> -               list_del_init(&rdev->same_set);
> -               kobject_del(&rdev->kobj);
> -               export_rdev(rdev, mddev);
> -       }
> -out:
> -       mutex_unlock(&mddev->delete_mutex);
> -}
> -
> -void mddev_unlock(struct mddev *mddev)
> -{
>         if (mddev->to_remove) {
>                 /* These cannot be removed under reconfig_mutex as
>                  * an access to the files will try to take reconfig_mutex
> @@ -810,7 +798,11 @@ void mddev_unlock(struct mddev *mddev)
>         } else
>                 mutex_unlock(&mddev->reconfig_mutex);
>
> -       md_free_rdev(mddev);
> +       list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
> +               list_del_init(&rdev->same_set);
> +               kobject_del(&rdev->kobj);
> +               export_rdev(rdev, mddev);
> +       }
>
>         md_wakeup_thread(mddev->thread);
>         wake_up(&mddev->sb_wait);
> @@ -2490,9 +2482,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
>          * reconfig_mutex is held, hence it can't be called under
>          * reconfig_mutex and it's delayed to mddev_unlock().
>          */
> -       mutex_lock(&mddev->delete_mutex);
>         list_add(&rdev->same_set, &mddev->deleting);
> -       mutex_unlock(&mddev->delete_mutex);
>  }
>
>  static void export_array(struct mddev *mddev)
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 892a598a5029..8ae957480976 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -531,11 +531,9 @@ struct mddev {
>
>         /*
>          * Temporarily store rdev that will be finally removed when
> -        * reconfig_mutex is unlocked.
> +        * reconfig_mutex is unlocked, protected by reconfig_mutex.
>          */
>         struct list_head                deleting;
> -       /* Protect the deleting list */
> -       struct mutex                    delete_mutex;
>
>         /* Used to synchronize idle and frozen for action_store() */
>         struct mutex                    sync_mutex;
> --
> 2.39.2
>
diff mbox series

Patch

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1086d7282ee7..089f7d7a9052 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -643,7 +643,6 @@  void mddev_init(struct mddev *mddev)
 {
 	mutex_init(&mddev->open_mutex);
 	mutex_init(&mddev->reconfig_mutex);
-	mutex_init(&mddev->delete_mutex);
 	mutex_init(&mddev->sync_mutex);
 	mutex_init(&mddev->bitmap_info.mutex);
 	INIT_LIST_HEAD(&mddev->disks);
@@ -751,26 +750,15 @@  static void mddev_free(struct mddev *mddev)
 
 static const struct attribute_group md_redundancy_group;
 
-static void md_free_rdev(struct mddev *mddev)
+void mddev_unlock(struct mddev *mddev)
 {
 	struct md_rdev *rdev;
 	struct md_rdev *tmp;
+	LIST_HEAD(delete);
 
-	mutex_lock(&mddev->delete_mutex);
-	if (list_empty(&mddev->deleting))
-		goto out;
+	if (!list_empty(&mddev->deleting))
+		list_splice_init(&mddev->deleting, &delete);
 
-	list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) {
-		list_del_init(&rdev->same_set);
-		kobject_del(&rdev->kobj);
-		export_rdev(rdev, mddev);
-	}
-out:
-	mutex_unlock(&mddev->delete_mutex);
-}
-
-void mddev_unlock(struct mddev *mddev)
-{
 	if (mddev->to_remove) {
 		/* These cannot be removed under reconfig_mutex as
 		 * an access to the files will try to take reconfig_mutex
@@ -810,7 +798,11 @@  void mddev_unlock(struct mddev *mddev)
 	} else
 		mutex_unlock(&mddev->reconfig_mutex);
 
-	md_free_rdev(mddev);
+	list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
+		list_del_init(&rdev->same_set);
+		kobject_del(&rdev->kobj);
+		export_rdev(rdev, mddev);
+	}
 
 	md_wakeup_thread(mddev->thread);
 	wake_up(&mddev->sb_wait);
@@ -2490,9 +2482,7 @@  static void md_kick_rdev_from_array(struct md_rdev *rdev)
 	 * reconfig_mutex is held, hence it can't be called under
 	 * reconfig_mutex and it's delayed to mddev_unlock().
 	 */
-	mutex_lock(&mddev->delete_mutex);
 	list_add(&rdev->same_set, &mddev->deleting);
-	mutex_unlock(&mddev->delete_mutex);
 }
 
 static void export_array(struct mddev *mddev)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 892a598a5029..8ae957480976 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -531,11 +531,9 @@  struct mddev {
 
 	/*
 	 * Temporarily store rdev that will be finally removed when
-	 * reconfig_mutex is unlocked.
+	 * reconfig_mutex is unlocked, protected by reconfig_mutex.
 	 */
 	struct list_head		deleting;
-	/* Protect the deleting list */
-	struct mutex			delete_mutex;
 
 	/* Used to synchronize idle and frozen for action_store() */
 	struct mutex			sync_mutex;