diff mbox series

[v2,md-6.11,07/12] md: don't fail action_store() if sync_thread is not registered

Message ID 20240611132251.1967786-8-yukuai1@huaweicloud.com (mailing list archive)
State New
Headers show
Series md: refacotor and some fixes related to sync_thread | expand

Commit Message

Yu Kuai June 11, 2024, 1:22 p.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

MD_RECOVERY_RUNNING will always be set when trying to register a new
sync_thread, however, if md_start_sync() turns out to do nothing,
MD_RECOVERY_RUNNING will be cleared in this case. And during the race
window, action_store() will return -EBUSY, which will cause some
mdadm tests to fail. For example:

The test 07reshape5intr will add a new disk to array, then start
reshape:

mdadm /dev/md0 --add /dev/xxx
mdadm --grow /dev/md0 -n 3

And add_bound_rdev() from mdadm --add will set MD_RECOVERY_NEEDED,
then during the race windown, mdadm --grow will fail.

Fix the problem by waiting in action_store() during the race window,
fail only if sync_thread is registered.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/md.c | 85 +++++++++++++++++++------------------------------
 drivers/md/md.h |  2 --
 2 files changed, 33 insertions(+), 54 deletions(-)
diff mbox series

Patch

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 44cb18ec1c52..86abd0fe0681 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -753,7 +753,6 @@  int mddev_init(struct mddev *mddev)
 
 	mutex_init(&mddev->open_mutex);
 	mutex_init(&mddev->reconfig_mutex);
-	mutex_init(&mddev->sync_mutex);
 	mutex_init(&mddev->suspend_mutex);
 	mutex_init(&mddev->bitmap_info.mutex);
 	INIT_LIST_HEAD(&mddev->disks);
@@ -5021,34 +5020,6 @@  void md_unfrozen_sync_thread(struct mddev *mddev)
 }
 EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
 
-static void idle_sync_thread(struct mddev *mddev)
-{
-	mutex_lock(&mddev->sync_mutex);
-	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
-	if (mddev_lock(mddev)) {
-		mutex_unlock(&mddev->sync_mutex);
-		return;
-	}
-
-	stop_sync_thread(mddev, false);
-	mutex_unlock(&mddev->sync_mutex);
-}
-
-static void frozen_sync_thread(struct mddev *mddev)
-{
-	mutex_lock(&mddev->sync_mutex);
-	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
-	if (mddev_lock(mddev)) {
-		mutex_unlock(&mddev->sync_mutex);
-		return;
-	}
-
-	stop_sync_thread(mddev, false);
-	mutex_unlock(&mddev->sync_mutex);
-}
-
 static int mddev_start_reshape(struct mddev *mddev)
 {
 	int ret;
@@ -5056,24 +5027,13 @@  static int mddev_start_reshape(struct mddev *mddev)
 	if (mddev->pers->start_reshape == NULL)
 		return -EINVAL;
 
-	ret = mddev_lock(mddev);
-	if (ret)
-		return ret;
-
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
-		mddev_unlock(mddev);
-		return -EBUSY;
-	}
-
 	if (mddev->reshape_position == MaxSector ||
 	    mddev->pers->check_reshape == NULL ||
 	    mddev->pers->check_reshape(mddev)) {
 		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 		ret = mddev->pers->start_reshape(mddev);
-		if (ret) {
-			mddev_unlock(mddev);
+		if (ret)
 			return ret;
-		}
 	} else {
 		/*
 		 * If reshape is still in progress, and md_check_recovery() can
@@ -5083,7 +5043,6 @@  static int mddev_start_reshape(struct mddev *mddev)
 		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 	}
 
-	mddev_unlock(mddev);
 	sysfs_notify_dirent_safe(mddev->sysfs_degraded);
 	return 0;
 }
@@ -5097,36 +5056,53 @@  action_store(struct mddev *mddev, const char *page, size_t len)
 	if (!mddev->pers || !mddev->pers->sync_request)
 		return -EINVAL;
 
+retry:
+	if (work_busy(&mddev->sync_work))
+		flush_work(&mddev->sync_work);
+
+	ret = mddev_lock(mddev);
+	if (ret)
+		return ret;
+
+	if (work_busy(&mddev->sync_work)) {
+		mddev_unlock(mddev);
+		goto retry;
+	}
+
 	action = md_sync_action_by_name(page);
 
 	/* TODO: mdadm rely on "idle" to start sync_thread. */
 	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
 		switch (action) {
 		case ACTION_FROZEN:
-			frozen_sync_thread(mddev);
-			return len;
+			md_frozen_sync_thread(mddev);
+			ret = len;
+			goto out;
 		case ACTION_IDLE:
-			idle_sync_thread(mddev);
+			md_idle_sync_thread(mddev);
 			break;
 		case ACTION_RESHAPE:
 		case ACTION_RECOVER:
 		case ACTION_CHECK:
 		case ACTION_REPAIR:
 		case ACTION_RESYNC:
-			return -EBUSY;
+			ret = -EBUSY;
+			goto out;
 		default:
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out;
 		}
 	} else {
 		switch (action) {
 		case ACTION_FROZEN:
 			set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-			return len;
+			ret = len;
+			goto out;
 		case ACTION_RESHAPE:
 			clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 			ret = mddev_start_reshape(mddev);
 			if (ret)
-				return ret;
+				goto out;
 			break;
 		case ACTION_RECOVER:
 			clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -5144,7 +5120,8 @@  action_store(struct mddev *mddev, const char *page, size_t len)
 			clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 			break;
 		default:
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out;
 		}
 	}
 
@@ -5152,14 +5129,18 @@  action_store(struct mddev *mddev, const char *page, size_t len)
 		/* A write to sync_action is enough to justify
 		 * canceling read-auto mode
 		 */
-		flush_work(&mddev->sync_work);
 		mddev->ro = MD_RDWR;
 		md_wakeup_thread(mddev->sync_thread);
 	}
+
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
 	sysfs_notify_dirent_safe(mddev->sysfs_action);
-	return len;
+	ret = len;
+
+out:
+	mddev_unlock(mddev);
+	return ret;
 }
 
 static struct md_sysfs_entry md_scan_mode =
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 88add162b08e..732053b905b2 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -595,8 +595,6 @@  struct mddev {
 	 */
 	struct list_head		deleting;
 
-	/* Used to synchronize idle and frozen for action_store() */
-	struct mutex			sync_mutex;
 	/* The sequence number for sync thread */
 	atomic_t sync_seq;