Message ID | 20240201092559.910982-5-yukuai1@huaweicloud.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | dm-raid/md/raid: fix v6.7 regressions | expand |
在 2024/2/1 下午5:25, Yu Kuai 写道: > From: Yu Kuai <yukuai3@huawei.com> > > Currently, if reshape is interrupted, then reassemble the array will > register sync_thread directly from pers->run(), in this case > 'MD_RECOVERY_RUNNING' is set directly, however, there is no guarantee > that md_do_sync() will be executed, hence stop_sync_thread() will hang > because 'MD_RECOVERY_RUNNING' can't be cleared. Hi Kuai I have a question here. Is it the reason sync_thread can't run md_do_sync because kthread_should_stop, so it doesn't have the chance to set MD_RECOVERY_DONE? Why creating sync thread in md_check_recovery doesn't have this problem? Could you explain more about this? Best Regards Xiao > > Last patch make sure that md_do_sync() will set MD_RECOVERY_DONE, > however, following hang can still be triggered by dm-raid test > shell/lvconvert-raid-reshape.sh occasionally: > > [root@fedora ~]# cat /proc/1982/stack > [<0>] stop_sync_thread+0x1ab/0x270 [md_mod] > [<0>] md_frozen_sync_thread+0x5c/0xa0 [md_mod] > [<0>] raid_presuspend+0x1e/0x70 [dm_raid] > [<0>] dm_table_presuspend_targets+0x40/0xb0 [dm_mod] > [<0>] __dm_destroy+0x2a5/0x310 [dm_mod] > [<0>] dm_destroy+0x16/0x30 [dm_mod] > [<0>] dev_remove+0x165/0x290 [dm_mod] > [<0>] ctl_ioctl+0x4bb/0x7b0 [dm_mod] > [<0>] dm_ctl_ioctl+0x11/0x20 [dm_mod] > [<0>] vfs_ioctl+0x21/0x60 > [<0>] __x64_sys_ioctl+0xb9/0xe0 > [<0>] do_syscall_64+0xc6/0x230 > [<0>] entry_SYSCALL_64_after_hwframe+0x6c/0x74 > > Meanwhile mddev->recovery is: > MD_RECOVERY_RUNNING | > MD_RECOVERY_INTR | > MD_RECOVERY_RESHAPE | > MD_RECOVERY_FROZEN > > Fix this problem by remove the code to register sync_thread directly > from raid10 and raid5. And let md_check_recovery() to register > sync_thread. > > Fixes: f67055780caa ("[PATCH] md: Checkpoint and allow restart of raid5 reshape") > Fixes: f52f5c71f3d4 ("md: fix stopping sync thread") > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > drivers/md/md.c | 5 ++++- > drivers/md/raid10.c | 16 ++-------------- > drivers/md/raid5.c | 29 ++--------------------------- > 3 files changed, 8 insertions(+), 42 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index c65dfd156090..6c5d0a372927 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -9372,6 +9372,7 @@ static void md_start_sync(struct work_struct *ws) > struct mddev *mddev = container_of(ws, struct mddev, sync_work); > int spares = 0; > bool suspend = false; > + char *name; > > if (md_spares_need_change(mddev)) > suspend = true; > @@ -9404,8 +9405,10 @@ static void md_start_sync(struct work_struct *ws) > if (spares) > md_bitmap_write_all(mddev->bitmap); > > + name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ? > + "reshape" : "resync"; > rcu_assign_pointer(mddev->sync_thread, > - md_register_thread(md_do_sync, mddev, "resync")); > + md_register_thread(md_do_sync, mddev, name)); > if (!mddev->sync_thread) { > pr_warn("%s: could not start resync thread...\n", > mdname(mddev)); > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > index 7412066ea22c..a5f8419e2df1 100644 > --- a/drivers/md/raid10.c > +++ b/drivers/md/raid10.c > @@ -4175,11 +4175,7 @@ static int raid10_run(struct mddev *mddev) > clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > - rcu_assign_pointer(mddev->sync_thread, > - md_register_thread(md_do_sync, mddev, "reshape")); > - if (!mddev->sync_thread) > - goto out_free_conf; > + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > } > > return 0; > @@ -4573,16 +4569,8 @@ static int raid10_start_reshape(struct mddev *mddev) > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > clear_bit(MD_RECOVERY_DONE, &mddev->recovery); > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > - > - rcu_assign_pointer(mddev->sync_thread, > - md_register_thread(md_do_sync, mddev, "reshape")); > - if (!mddev->sync_thread) { > - ret = -EAGAIN; > - goto abort; > - } > + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > conf->reshape_checkpoint = jiffies; > - md_wakeup_thread(mddev->sync_thread); > md_new_event(); > return 0; > > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > index 8497880135ee..6a7a32f7fb91 100644 > --- a/drivers/md/raid5.c > +++ b/drivers/md/raid5.c > @@ -7936,11 +7936,7 @@ static int raid5_run(struct mddev *mddev) > clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > - rcu_assign_pointer(mddev->sync_thread, > - md_register_thread(md_do_sync, mddev, "reshape")); > - if (!mddev->sync_thread) > - goto abort; > + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > } > > /* Ok, everything is just fine now */ > @@ -8506,29 +8502,8 @@ static int raid5_start_reshape(struct mddev *mddev) > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > clear_bit(MD_RECOVERY_DONE, &mddev->recovery); > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > - rcu_assign_pointer(mddev->sync_thread, > - md_register_thread(md_do_sync, mddev, "reshape")); > - if (!mddev->sync_thread) { > - mddev->recovery = 0; > - spin_lock_irq(&conf->device_lock); > - write_seqcount_begin(&conf->gen_lock); > - mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; > - mddev->new_chunk_sectors = > - conf->chunk_sectors = conf->prev_chunk_sectors; > - mddev->new_layout = conf->algorithm = conf->prev_algo; > - rdev_for_each(rdev, mddev) > - rdev->new_data_offset = rdev->data_offset; > - smp_wmb(); > - conf->generation --; > - conf->reshape_progress = MaxSector; > - mddev->reshape_position = MaxSector; > - write_seqcount_end(&conf->gen_lock); > - spin_unlock_irq(&conf->device_lock); > - return -EAGAIN; > - } > + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > conf->reshape_checkpoint = jiffies; > - md_wakeup_thread(mddev->sync_thread); > md_new_event(); > return 0; > }
Hi, 在 2024/02/28 20:07, Xiao Ni 写道: > I have a question here. Is it the reason sync_thread can't run > md_do_sync because kthread_should_stop, so it doesn't have the chance to > set MD_RECOVERY_DONE? Why creating sync thread in md_check_recovery > doesn't have this problem? Could you explain more about this? raid10_run() only register sync_thread, without calling md_wakeup_thread() to set the bit 'THREAD_WAKEUP', md_do_sync() will not be executed. raid5 defines 'pers->start' hence md_start() will call md_wakeup_thread(). md_start_sync() will always call md_wakeup_thread() hence there is no such problem. BTW, this patch fix the same problem as you mentioned in your other thread: diff --git a/drivers/md/md.c b/drivers/md/md.c index 2266358d8074..54790261254d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4904,6 +4904,7 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) * never happen */ md_wakeup_thread_directly(mddev->sync_thread); + md_wakeup_thread(mddev->sync_thread); if (work_pending(&mddev->sync_work)) flush_work(&mddev->sync_work); However, I think the one to register sync_thread is responsible to wake it up. Thanks, Kuai
On Wed, Feb 28, 2024 at 8:44 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > Hi, > > 在 2024/02/28 20:07, Xiao Ni 写道: > > I have a question here. Is it the reason sync_thread can't run > > md_do_sync because kthread_should_stop, so it doesn't have the chance to > > set MD_RECOVERY_DONE? Why creating sync thread in md_check_recovery > > doesn't have this problem? Could you explain more about this? > > raid10_run() only register sync_thread, without calling > md_wakeup_thread() to set the bit 'THREAD_WAKEUP', md_do_sync() will not > be executed. I c. The user is responsible to wake up the thread. If raid10 wakes up the thread in the right way, we don't need to move register reshape thread to md_check_recovery, right? > > raid5 defines 'pers->start' hence md_start() will call > md_wakeup_thread(). > > md_start_sync() will always call md_wakeup_thread() hence there is no > such problem. > > BTW, this patch fix the same problem as you mentioned in your other > thread: > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 2266358d8074..54790261254d 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -4904,6 +4904,7 @@ static void stop_sync_thread(struct mddev *mddev, > bool locked, bool check_seq) > * never happen > */ > md_wakeup_thread_directly(mddev->sync_thread); > + md_wakeup_thread(mddev->sync_thread); > if (work_pending(&mddev->sync_work)) > flush_work(&mddev->sync_work); The first patch of my patch set has this already. Maybe it's the reason that my patch01 can fix this similar problem. > > However, I think the one to register sync_thread is responsible to wake > it up. Agree, the user that registers thread should wake it up. So start/stop sync thread apis are common. And they can be called by many users. Best Regards Xiao > > Thanks, > Kuai >
diff --git a/drivers/md/md.c b/drivers/md/md.c index c65dfd156090..6c5d0a372927 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9372,6 +9372,7 @@ static void md_start_sync(struct work_struct *ws) struct mddev *mddev = container_of(ws, struct mddev, sync_work); int spares = 0; bool suspend = false; + char *name; if (md_spares_need_change(mddev)) suspend = true; @@ -9404,8 +9405,10 @@ static void md_start_sync(struct work_struct *ws) if (spares) md_bitmap_write_all(mddev->bitmap); + name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ? + "reshape" : "resync"; rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "resync")); + md_register_thread(md_do_sync, mddev, name)); if (!mddev->sync_thread) { pr_warn("%s: could not start resync thread...\n", mdname(mddev)); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7412066ea22c..a5f8419e2df1 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4175,11 +4175,7 @@ static int raid10_run(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) - goto out_free_conf; + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } return 0; @@ -4573,16 +4569,8 @@ static int raid10_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) { - ret = -EAGAIN; - goto abort; - } + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); conf->reshape_checkpoint = jiffies; - md_wakeup_thread(mddev->sync_thread); md_new_event(); return 0; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8497880135ee..6a7a32f7fb91 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7936,11 +7936,7 @@ static int raid5_run(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) - goto abort; + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } /* Ok, everything is just fine now */ @@ -8506,29 +8502,8 @@ static int raid5_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) { - mddev->recovery = 0; - spin_lock_irq(&conf->device_lock); - write_seqcount_begin(&conf->gen_lock); - mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; - mddev->new_chunk_sectors = - conf->chunk_sectors = conf->prev_chunk_sectors; - mddev->new_layout = conf->algorithm = conf->prev_algo; - rdev_for_each(rdev, mddev) - rdev->new_data_offset = rdev->data_offset; - smp_wmb(); - conf->generation --; - conf->reshape_progress = MaxSector; - mddev->reshape_position = MaxSector; - write_seqcount_end(&conf->gen_lock); - spin_unlock_irq(&conf->device_lock); - return -EAGAIN; - } + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); conf->reshape_checkpoint = jiffies; - md_wakeup_thread(mddev->sync_thread); md_new_event(); return 0; }