Message ID | 20231221071109.1562530-3-linan666@huaweicloud.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | md: bugfix of creating symlink with disk holder | expand |
Hi, 在 2023/12/21 15:11, linan666@huaweicloud.com 写道: > From: Li Nan <linan122@huawei.com> > > There is a risk of deadlock when a process gets disk->open_mutex after > suspending mddev, because other processes may hold open_mutex while > submitting io. For example: > > T1 T2 > blkdev_open > bdev_open_by_dev > mutex_lock(&disk->open_mutex) > md_ioctl > mddev_suspend_and_lock > mddev_suspend > md_add_new_disk > bind_rdev_to_array > bd_link_disk_holder > //wait open_mutex > blkdev_get_whole > bdev_disk_changed > efi_partition > read_lba > ... > md_submit_bio > md_handle_request > //wait resume > Nice catch! This patch looks good except that the new flag 'SymlinkCreated' doesn't look accurate, perhaps 'HolderLinked' will make more sense. Thanks, Kuai > Fix it by getting disk->open_mutex after mddev resume, iterating each > mddev->disk to create symlink for rdev which has not been created yet. > and moving bd_unlink_disk_holder() to mddev_unlock(), rdev has been > deleted from mddev->disks here, which can avoid concurrent bind and unbind, > > Fixes: 1b0a2d950ee2 ("md: use new apis to suspend array for ioctls involed array reconfiguration") > Signed-off-by: Li Nan <linan122@huawei.com> > --- > drivers/md/md.c | 39 +++++++++++++++++++++++++++++---------- > 1 file changed, 29 insertions(+), 10 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index d6612b922c76..c128570f2a5d 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -521,6 +521,20 @@ void mddev_resume(struct mddev *mddev) > } > EXPORT_SYMBOL_GPL(mddev_resume); > > +static void md_link_disk_holder(struct mddev *mddev) > +{ > + struct md_rdev *rdev; > + > + rcu_read_lock(); > + rdev_for_each_rcu(rdev, mddev) { > + if (test_bit(SymlinkCreated, &rdev->flags)) > + continue; > + if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk)) > + set_bit(SymlinkCreated, &rdev->flags); > + } > + rcu_read_unlock(); > +} > + > /* > * Generic flush handling for md > */ > @@ -902,6 +916,11 @@ void mddev_unlock(struct mddev *mddev) > > list_for_each_entry_safe(rdev, tmp, &delete, same_set) { > list_del_init(&rdev->same_set); > + if (test_bit(SymlinkCreated, &rdev->flags)) { > + bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); > + clear_bit(SymlinkCreated, &rdev->flags); > + } > + rdev->mddev = NULL; > kobject_del(&rdev->kobj); > export_rdev(rdev, mddev); > } > @@ -2526,8 +2545,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) > sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks"); > > list_add_rcu(&rdev->same_set, &mddev->disks); > - if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk)) > - set_bit(SymlinkCreated, &rdev->flags); > > /* May as well allow recovery to be retried once */ > mddev->recovery_disabled++; > @@ -2562,14 +2579,9 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev) > { > struct mddev *mddev = rdev->mddev; > > - if (test_bit(SymlinkCreated, &rdev->flags)) { > - bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); > - clear_bit(SymlinkCreated, &rdev->flags); > - } > list_del_rcu(&rdev->same_set); > pr_debug("md: unbind<%pg>\n", rdev->bdev); > mddev_destroy_serial_pool(rdev->mddev, rdev); > - rdev->mddev = NULL; > sysfs_remove_link(&rdev->kobj, "block"); > sysfs_put(rdev->sysfs_state); > sysfs_put(rdev->sysfs_unack_badblocks); > @@ -4667,8 +4679,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) > if (err) > export_rdev(rdev, mddev); > mddev_unlock_and_resume(mddev); > - if (!err) > + if (!err) { > + md_link_disk_holder(mddev); > md_new_event(); > + } > return err ? err : len; > } > > @@ -6606,6 +6620,7 @@ static void autorun_devices(int part) > } > autorun_array(mddev); > mddev_unlock_and_resume(mddev); > + md_link_disk_holder(mddev); > } > /* on success, candidates will be empty, on error > * it won't... > @@ -7832,8 +7847,12 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, > err != -EINVAL) > mddev->hold_active = 0; > > - md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) : > - mddev_unlock(mddev); > + if (md_ioctl_need_suspend(cmd)) { > + mddev_unlock_and_resume(mddev); > + md_link_disk_holder(mddev); > + } else { > + mddev_unlock(mddev); > + } > > out: > if(did_set_md_closing) >
Hi, Thorsten here, the Linux kernel's regression tracker. On 21.12.23 09:49, Yu Kuai wrote: > 在 2023/12/21 15:11, linan666@huaweicloud.com 写道: >> From: Li Nan <linan122@huawei.com> >> >> There is a risk of deadlock when a process gets disk->open_mutex after >> suspending mddev, because other processes may hold open_mutex while >> submitting io. For example: >> [...] > Nice catch! This patch looks good except that the new flag > 'SymlinkCreated' doesn't look accurate, perhaps 'HolderLinked' > will make more sense. > >> Fix it by getting disk->open_mutex after mddev resume, iterating each >> mddev->disk to create symlink for rdev which has not been created yet. >> and moving bd_unlink_disk_holder() to mddev_unlock(), rdev has been >> deleted from mddev->disks here, which can avoid concurrent bind and >> unbind, >> >> Fixes: 1b0a2d950ee2 ("md: use new apis to suspend array for ioctls >> involed array reconfiguration") Hey, what happened to that patch? It looks a lot like things stalled here. I'm asking, because there is a regression report that claims 1b0a2d950ee2 to be the culprit that might or might not be causes by the problem this patch tries to fix: https://bugzilla.kernel.org/show_bug.cgi?id=218459 Ciao, Thorsten >> Signed-off-by: Li Nan <linan122@huawei.com> >> --- >> drivers/md/md.c | 39 +++++++++++++++++++++++++++++---------- >> 1 file changed, 29 insertions(+), 10 deletions(-) >> >> diff --git a/drivers/md/md.c b/drivers/md/md.c >> index d6612b922c76..c128570f2a5d 100644 >> --- a/drivers/md/md.c >> +++ b/drivers/md/md.c >> @@ -521,6 +521,20 @@ void mddev_resume(struct mddev *mddev) >> } >> EXPORT_SYMBOL_GPL(mddev_resume); >> +static void md_link_disk_holder(struct mddev *mddev) >> +{ >> + struct md_rdev *rdev; >> + >> + rcu_read_lock(); >> + rdev_for_each_rcu(rdev, mddev) { >> + if (test_bit(SymlinkCreated, &rdev->flags)) >> + continue; >> + if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk)) >> + set_bit(SymlinkCreated, &rdev->flags); >> + } >> + rcu_read_unlock(); >> +} >> + >> /* >> * Generic flush handling for md >> */ >> @@ -902,6 +916,11 @@ void mddev_unlock(struct mddev *mddev) >> list_for_each_entry_safe(rdev, tmp, &delete, same_set) { >> list_del_init(&rdev->same_set); >> + if (test_bit(SymlinkCreated, &rdev->flags)) { >> + bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); >> + clear_bit(SymlinkCreated, &rdev->flags); >> + } >> + rdev->mddev = NULL; >> kobject_del(&rdev->kobj); >> export_rdev(rdev, mddev); >> } >> @@ -2526,8 +2545,6 @@ static int bind_rdev_to_array(struct md_rdev >> *rdev, struct mddev *mddev) >> sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks"); >> list_add_rcu(&rdev->same_set, &mddev->disks); >> - if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk)) >> - set_bit(SymlinkCreated, &rdev->flags); >> /* May as well allow recovery to be retried once */ >> mddev->recovery_disabled++; >> @@ -2562,14 +2579,9 @@ static void md_kick_rdev_from_array(struct >> md_rdev *rdev) >> { >> struct mddev *mddev = rdev->mddev; >> - if (test_bit(SymlinkCreated, &rdev->flags)) { >> - bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); >> - clear_bit(SymlinkCreated, &rdev->flags); >> - } >> list_del_rcu(&rdev->same_set); >> pr_debug("md: unbind<%pg>\n", rdev->bdev); >> mddev_destroy_serial_pool(rdev->mddev, rdev); >> - rdev->mddev = NULL; >> sysfs_remove_link(&rdev->kobj, "block"); >> sysfs_put(rdev->sysfs_state); >> sysfs_put(rdev->sysfs_unack_badblocks); >> @@ -4667,8 +4679,10 @@ new_dev_store(struct mddev *mddev, const char >> *buf, size_t len) >> if (err) >> export_rdev(rdev, mddev); >> mddev_unlock_and_resume(mddev); >> - if (!err) >> + if (!err) { >> + md_link_disk_holder(mddev); >> md_new_event(); >> + } >> return err ? err : len; >> } >> @@ -6606,6 +6620,7 @@ static void autorun_devices(int part) >> } >> autorun_array(mddev); >> mddev_unlock_and_resume(mddev); >> + md_link_disk_holder(mddev); >> } >> /* on success, candidates will be empty, on error >> * it won't... >> @@ -7832,8 +7847,12 @@ static int md_ioctl(struct block_device *bdev, >> blk_mode_t mode, >> err != -EINVAL) >> mddev->hold_active = 0; >> - md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) : >> - mddev_unlock(mddev); >> + if (md_ioctl_need_suspend(cmd)) { >> + mddev_unlock_and_resume(mddev); >> + md_link_disk_holder(mddev); >> + } else { >> + mddev_unlock(mddev); >> + } >> out: >> if(did_set_md_closing) >> >
On Tue, Feb 6, 2024 at 6:46 AM Linux regression tracking (Thorsten Leemhuis) <regressions@leemhuis.info> wrote: > > Hi, Thorsten here, the Linux kernel's regression tracker. > > On 21.12.23 09:49, Yu Kuai wrote: > > 在 2023/12/21 15:11, linan666@huaweicloud.com 写道: > >> From: Li Nan <linan122@huawei.com> > >> > >> There is a risk of deadlock when a process gets disk->open_mutex after > >> suspending mddev, because other processes may hold open_mutex while > >> submitting io. For example: > >> [...] > > Nice catch! This patch looks good except that the new flag > > 'SymlinkCreated' doesn't look accurate, perhaps 'HolderLinked' > > will make more sense. > > > >> Fix it by getting disk->open_mutex after mddev resume, iterating each > >> mddev->disk to create symlink for rdev which has not been created yet. > >> and moving bd_unlink_disk_holder() to mddev_unlock(), rdev has been > >> deleted from mddev->disks here, which can avoid concurrent bind and > >> unbind, > >> > >> Fixes: 1b0a2d950ee2 ("md: use new apis to suspend array for ioctls > >> involed array reconfiguration") > > Hey, what happened to that patch? It looks a lot like things stalled > here. I'm asking, because there is a regression report that claims > 1b0a2d950ee2 to be the culprit that might or might not be causes by the > problem this patch tries to fix: > https://bugzilla.kernel.org/show_bug.cgi?id=218459 Thanks for the heads-up. Replied to the thread. Song
diff --git a/drivers/md/md.c b/drivers/md/md.c index d6612b922c76..c128570f2a5d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -521,6 +521,20 @@ void mddev_resume(struct mddev *mddev) } EXPORT_SYMBOL_GPL(mddev_resume); +static void md_link_disk_holder(struct mddev *mddev) +{ + struct md_rdev *rdev; + + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { + if (test_bit(SymlinkCreated, &rdev->flags)) + continue; + if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk)) + set_bit(SymlinkCreated, &rdev->flags); + } + rcu_read_unlock(); +} + /* * Generic flush handling for md */ @@ -902,6 +916,11 @@ void mddev_unlock(struct mddev *mddev) list_for_each_entry_safe(rdev, tmp, &delete, same_set) { list_del_init(&rdev->same_set); + if (test_bit(SymlinkCreated, &rdev->flags)) { + bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); + clear_bit(SymlinkCreated, &rdev->flags); + } + rdev->mddev = NULL; kobject_del(&rdev->kobj); export_rdev(rdev, mddev); } @@ -2526,8 +2545,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks"); list_add_rcu(&rdev->same_set, &mddev->disks); - if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk)) - set_bit(SymlinkCreated, &rdev->flags); /* May as well allow recovery to be retried once */ mddev->recovery_disabled++; @@ -2562,14 +2579,9 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev) { struct mddev *mddev = rdev->mddev; - if (test_bit(SymlinkCreated, &rdev->flags)) { - bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); - clear_bit(SymlinkCreated, &rdev->flags); - } list_del_rcu(&rdev->same_set); pr_debug("md: unbind<%pg>\n", rdev->bdev); mddev_destroy_serial_pool(rdev->mddev, rdev); - rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); sysfs_put(rdev->sysfs_state); sysfs_put(rdev->sysfs_unack_badblocks); @@ -4667,8 +4679,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) if (err) export_rdev(rdev, mddev); mddev_unlock_and_resume(mddev); - if (!err) + if (!err) { + md_link_disk_holder(mddev); md_new_event(); + } return err ? err : len; } @@ -6606,6 +6620,7 @@ static void autorun_devices(int part) } autorun_array(mddev); mddev_unlock_and_resume(mddev); + md_link_disk_holder(mddev); } /* on success, candidates will be empty, on error * it won't... @@ -7832,8 +7847,12 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode, err != -EINVAL) mddev->hold_active = 0; - md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) : - mddev_unlock(mddev); + if (md_ioctl_need_suspend(cmd)) { + mddev_unlock_and_resume(mddev); + md_link_disk_holder(mddev); + } else { + mddev_unlock(mddev); + } out: if(did_set_md_closing)