@@ -3796,6 +3796,8 @@ static void raid_postsuspend(struct dm_target *ti)
struct raid_set *rs = ti->private;
if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
+ if (test_bit(MD_RECOVERY_WAIT, &rs->md.recovery))
+ clear_bit(MD_RECOVERY_WAIT, &rs->md.recovery);
/* Writes have to be stopped before suspending to avoid deadlocks. */
if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
md_stop_writes(&rs->md);
@@ -4904,6 +4904,7 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
* never happen
*/
md_wakeup_thread_directly(mddev->sync_thread);
+ md_wakeup_thread(mddev->sync_thread);
if (work_pending(&mddev->sync_work))
flush_work(&mddev->sync_work);
MD_RECOVERY_WAIT is used by dmraid to delay reshape process by patch commit 644e2537fdc7 ("dm raid: fix stripe adding reshape deadlock"). Before patch commit f52f5c71f3d4b ("md: fix stopping sync thread") dmraid stopped sync thread directy by calling md_reap_sync_thread. After this patch dmraid stops sync thread asynchronously as md does. This is right. Now the dmraid stop process is like this: 1. raid_postsuspend->md_stop_writes->__md_stop_writes->stop_sync_thread. stop_sync_thread sets MD_RECOVERY_INTR and wait until MD_RECOVERY_RUNNING is cleared 2. md_do_sync finds MD_RECOVERY_WAIT is set and return. (This is the root cause for this deadlock. We hope md_do_sync can set MD_RECOVERY_DONE) 3. md thread calls md_check_recovery (This is the place to reap sync thread. Because MD_RECOVERY_DONE is not set. md thread can't reap sync thread) 4. raid_dtr stops/free struct mddev and release dmraid related resources dmraid only sets MD_RECOVERY_WAIT but doesn't clear it. It needs to clear this bit when stopping the dmraid before stopping sync thread. But the deadlock still can happen sometimes even MD_RECOVERY_WAIT is cleared before stopping sync thread. It's the reason stop_sync_thread only wakes up task. If the task isn't running, it still needs to wake up sync thread too. This deadlock can be reproduced 100% by these commands: modprobe brd rd_size=34816 rd_nr=5 while [ 1 ]; do vgcreate test_vg /dev/ram* lvcreate --type raid5 -L 16M -n test_lv test_vg lvconvert -y --stripes 4 /dev/test_vg/test_lv vgremove test_vg -ff sleep 1 done Fixes: 644e2537fdc7 ("dm raid: fix stripe adding reshape deadlock") Fixes: f52f5c71f3d4 ("md: fix stopping sync thread") Signed-off-by: Xiao Ni <xni@redhat.com> --- drivers/md/dm-raid.c | 2 ++ drivers/md/md.c | 1 + 2 files changed, 3 insertions(+)