Message ID | 20240124091421.1261579-6-yukuai3@huawei.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | dm-raid: fix v6.7 regressions | expand |
On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote: > > The new heleprs will be used in dm-raid in later patches to fix > regressions and prevent calling md_reap_sync_thread() directly. > > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++---- > drivers/md/md.h | 3 +++ > 2 files changed, 40 insertions(+), 4 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 6c5d0a372927..90cf31b53804 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) > mddev_lock_nointr(mddev); > } > > -static void idle_sync_thread(struct mddev *mddev) > +void md_idle_sync_thread(struct mddev *mddev) > { > + lockdep_assert_held(mddev->reconfig_mutex); > + Hi Kuai There is a building error. It should give a pointer to lockdep_assert_held. And same with the other two places in this patch. Regards Xiao > mutex_lock(&mddev->sync_mutex); > clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + stop_sync_thread(mddev, true, true); > + mutex_unlock(&mddev->sync_mutex); > +} > +EXPORT_SYMBOL_GPL(md_idle_sync_thread); > + > +void md_frozen_sync_thread(struct mddev *mddev) > +{ > + lockdep_assert_held(mddev->reconfig_mutex); > + > + mutex_lock(&mddev->sync_mutex); > + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + stop_sync_thread(mddev, true, false); > + mutex_unlock(&mddev->sync_mutex); > +} > +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); > > +void md_unfrozen_sync_thread(struct mddev *mddev) > +{ > + lockdep_assert_held(mddev->reconfig_mutex); > + > + mutex_lock(&mddev->sync_mutex); > + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > + md_wakeup_thread(mddev->thread); > + sysfs_notify_dirent_safe(mddev->sysfs_action); > + mutex_unlock(&mddev->sync_mutex); > +} > +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); > + > +static void idle_sync_thread(struct mddev *mddev) > +{ > if (mddev_lock(mddev)) { > mutex_unlock(&mddev->sync_mutex); > return; > } > > + mutex_lock(&mddev->sync_mutex); > + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > stop_sync_thread(mddev, false, true); > mutex_unlock(&mddev->sync_mutex); > } > > static void frozen_sync_thread(struct mddev *mddev) > { > - mutex_lock(&mddev->sync_mutex); > - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > - > if (mddev_lock(mddev)) { > mutex_unlock(&mddev->sync_mutex); > return; > } > > + mutex_lock(&mddev->sync_mutex); > + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > stop_sync_thread(mddev, false, false); > mutex_unlock(&mddev->sync_mutex); > } > diff --git a/drivers/md/md.h b/drivers/md/md.h > index 8d881cc59799..437ab70ce79b 100644 > --- a/drivers/md/md.h > +++ b/drivers/md/md.h > @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); > extern void md_handle_request(struct mddev *mddev, struct bio *bio); > extern int mddev_suspend(struct mddev *mddev, bool interruptible); > extern void mddev_resume(struct mddev *mddev); > +extern void md_idle_sync_thread(struct mddev *mddev); > +extern void md_frozen_sync_thread(struct mddev *mddev); > +extern void md_unfrozen_sync_thread(struct mddev *mddev); > > extern void md_reload_sb(struct mddev *mddev, int raid_disk); > extern void md_update_sb(struct mddev *mddev, int force); > -- > 2.39.2 >
Hi, 在 2024/01/25 15:51, Xiao Ni 写道: > On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote: >> >> The new heleprs will be used in dm-raid in later patches to fix >> regressions and prevent calling md_reap_sync_thread() directly. >> >> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >> --- >> drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++---- >> drivers/md/md.h | 3 +++ >> 2 files changed, 40 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/md/md.c b/drivers/md/md.c >> index 6c5d0a372927..90cf31b53804 100644 >> --- a/drivers/md/md.c >> +++ b/drivers/md/md.c >> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) >> mddev_lock_nointr(mddev); >> } >> >> -static void idle_sync_thread(struct mddev *mddev) >> +void md_idle_sync_thread(struct mddev *mddev) >> { >> + lockdep_assert_held(mddev->reconfig_mutex); >> + > > Hi Kuai > > There is a building error. It should give a pointer to > lockdep_assert_held. And same with the other two places in this patch. Yes, I forgot that I disabled all the debug config in order to let tests finish quickly. Thanks for the notince, will fix this in v3. Thanks, Kuai > > Regards > Xiao > >> mutex_lock(&mddev->sync_mutex); >> clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> + stop_sync_thread(mddev, true, true); >> + mutex_unlock(&mddev->sync_mutex); >> +} >> +EXPORT_SYMBOL_GPL(md_idle_sync_thread); >> + >> +void md_frozen_sync_thread(struct mddev *mddev) >> +{ >> + lockdep_assert_held(mddev->reconfig_mutex); >> + >> + mutex_lock(&mddev->sync_mutex); >> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> + stop_sync_thread(mddev, true, false); >> + mutex_unlock(&mddev->sync_mutex); >> +} >> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); >> >> +void md_unfrozen_sync_thread(struct mddev *mddev) >> +{ >> + lockdep_assert_held(mddev->reconfig_mutex); >> + >> + mutex_lock(&mddev->sync_mutex); >> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); >> + md_wakeup_thread(mddev->thread); >> + sysfs_notify_dirent_safe(mddev->sysfs_action); >> + mutex_unlock(&mddev->sync_mutex); >> +} >> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); >> + >> +static void idle_sync_thread(struct mddev *mddev) >> +{ >> if (mddev_lock(mddev)) { >> mutex_unlock(&mddev->sync_mutex); >> return; >> } >> >> + mutex_lock(&mddev->sync_mutex); >> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> stop_sync_thread(mddev, false, true); >> mutex_unlock(&mddev->sync_mutex); >> } >> >> static void frozen_sync_thread(struct mddev *mddev) >> { >> - mutex_lock(&mddev->sync_mutex); >> - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> - >> if (mddev_lock(mddev)) { >> mutex_unlock(&mddev->sync_mutex); >> return; >> } >> >> + mutex_lock(&mddev->sync_mutex); >> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> stop_sync_thread(mddev, false, false); >> mutex_unlock(&mddev->sync_mutex); >> } >> diff --git a/drivers/md/md.h b/drivers/md/md.h >> index 8d881cc59799..437ab70ce79b 100644 >> --- a/drivers/md/md.h >> +++ b/drivers/md/md.h >> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); >> extern void md_handle_request(struct mddev *mddev, struct bio *bio); >> extern int mddev_suspend(struct mddev *mddev, bool interruptible); >> extern void mddev_resume(struct mddev *mddev); >> +extern void md_idle_sync_thread(struct mddev *mddev); >> +extern void md_frozen_sync_thread(struct mddev *mddev); >> +extern void md_unfrozen_sync_thread(struct mddev *mddev); >> >> extern void md_reload_sb(struct mddev *mddev, int raid_disk); >> extern void md_update_sb(struct mddev *mddev, int force); >> -- >> 2.39.2 >> > > . >
Hi all This is the result of lvm2 tests: make check ### 426 tests: 319 passed, 74 skipped, 0 timed out, 5 warned, 28 failed in 56:04.914 make[1]: *** [Makefile:138: check] Error 1 make[1]: Leaving directory '/root/lvm2/test' make: *** [Makefile:89: check] Error 2 Do you know where to check which cases fail? Best Regards Xiao On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote: > > The new heleprs will be used in dm-raid in later patches to fix > regressions and prevent calling md_reap_sync_thread() directly. > > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++---- > drivers/md/md.h | 3 +++ > 2 files changed, 40 insertions(+), 4 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 6c5d0a372927..90cf31b53804 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) > mddev_lock_nointr(mddev); > } > > -static void idle_sync_thread(struct mddev *mddev) > +void md_idle_sync_thread(struct mddev *mddev) > { > + lockdep_assert_held(mddev->reconfig_mutex); > + > mutex_lock(&mddev->sync_mutex); > clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + stop_sync_thread(mddev, true, true); > + mutex_unlock(&mddev->sync_mutex); > +} > +EXPORT_SYMBOL_GPL(md_idle_sync_thread); > + > +void md_frozen_sync_thread(struct mddev *mddev) > +{ > + lockdep_assert_held(mddev->reconfig_mutex); > + > + mutex_lock(&mddev->sync_mutex); > + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + stop_sync_thread(mddev, true, false); > + mutex_unlock(&mddev->sync_mutex); > +} > +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); > > +void md_unfrozen_sync_thread(struct mddev *mddev) > +{ > + lockdep_assert_held(mddev->reconfig_mutex); > + > + mutex_lock(&mddev->sync_mutex); > + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > + md_wakeup_thread(mddev->thread); > + sysfs_notify_dirent_safe(mddev->sysfs_action); > + mutex_unlock(&mddev->sync_mutex); > +} > +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); > + > +static void idle_sync_thread(struct mddev *mddev) > +{ > if (mddev_lock(mddev)) { > mutex_unlock(&mddev->sync_mutex); > return; > } > > + mutex_lock(&mddev->sync_mutex); > + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > stop_sync_thread(mddev, false, true); > mutex_unlock(&mddev->sync_mutex); > } > > static void frozen_sync_thread(struct mddev *mddev) > { > - mutex_lock(&mddev->sync_mutex); > - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > - > if (mddev_lock(mddev)) { > mutex_unlock(&mddev->sync_mutex); > return; > } > > + mutex_lock(&mddev->sync_mutex); > + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > stop_sync_thread(mddev, false, false); > mutex_unlock(&mddev->sync_mutex); > } > diff --git a/drivers/md/md.h b/drivers/md/md.h > index 8d881cc59799..437ab70ce79b 100644 > --- a/drivers/md/md.h > +++ b/drivers/md/md.h > @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); > extern void md_handle_request(struct mddev *mddev, struct bio *bio); > extern int mddev_suspend(struct mddev *mddev, bool interruptible); > extern void mddev_resume(struct mddev *mddev); > +extern void md_idle_sync_thread(struct mddev *mddev); > +extern void md_frozen_sync_thread(struct mddev *mddev); > +extern void md_unfrozen_sync_thread(struct mddev *mddev); > > extern void md_reload_sb(struct mddev *mddev, int raid_disk); > extern void md_update_sb(struct mddev *mddev, int force); > -- > 2.39.2 >
Hi, 在 2024/01/25 19:35, Xiao Ni 写道: > Hi all > > This is the result of lvm2 tests: > make check > ### 426 tests: 319 passed, 74 skipped, 0 timed out, 5 warned, 28 > failed in 56:04.914 Are you testing with this patchset? 28 failed is much more than my test result in following: > make[1]: *** [Makefile:138: check] Error 1 > make[1]: Leaving directory '/root/lvm2/test' > make: *** [Makefile:89: check] Error 2 > > Do you know where to check which cases fail? I saved logs and grep keyword "failed:", and the result will look like this: You can find each test log in the dir test/result/ > > Best Regards > Xiao > > On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote: >> >> The new heleprs will be used in dm-raid in later patches to fix >> regressions and prevent calling md_reap_sync_thread() directly. >> >> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >> --- >> drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++---- >> drivers/md/md.h | 3 +++ >> 2 files changed, 40 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/md/md.c b/drivers/md/md.c >> index 6c5d0a372927..90cf31b53804 100644 >> --- a/drivers/md/md.c >> +++ b/drivers/md/md.c >> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) >> mddev_lock_nointr(mddev); >> } >> >> -static void idle_sync_thread(struct mddev *mddev) >> +void md_idle_sync_thread(struct mddev *mddev) >> { >> + lockdep_assert_held(mddev->reconfig_mutex); >> + >> mutex_lock(&mddev->sync_mutex); >> clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> + stop_sync_thread(mddev, true, true); >> + mutex_unlock(&mddev->sync_mutex); >> +} >> +EXPORT_SYMBOL_GPL(md_idle_sync_thread); >> + >> +void md_frozen_sync_thread(struct mddev *mddev) >> +{ >> + lockdep_assert_held(mddev->reconfig_mutex); >> + >> + mutex_lock(&mddev->sync_mutex); >> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> + stop_sync_thread(mddev, true, false); >> + mutex_unlock(&mddev->sync_mutex); >> +} >> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); >> >> +void md_unfrozen_sync_thread(struct mddev *mddev) >> +{ >> + lockdep_assert_held(mddev->reconfig_mutex); >> + >> + mutex_lock(&mddev->sync_mutex); >> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); >> + md_wakeup_thread(mddev->thread); >> + sysfs_notify_dirent_safe(mddev->sysfs_action); >> + mutex_unlock(&mddev->sync_mutex); >> +} >> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); >> + >> +static void idle_sync_thread(struct mddev *mddev) >> +{ >> if (mddev_lock(mddev)) { >> mutex_unlock(&mddev->sync_mutex); >> return; >> } >> >> + mutex_lock(&mddev->sync_mutex); >> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> stop_sync_thread(mddev, false, true); >> mutex_unlock(&mddev->sync_mutex); >> } >> >> static void frozen_sync_thread(struct mddev *mddev) >> { >> - mutex_lock(&mddev->sync_mutex); >> - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> - >> if (mddev_lock(mddev)) { >> mutex_unlock(&mddev->sync_mutex); >> return; >> } >> >> + mutex_lock(&mddev->sync_mutex); >> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> stop_sync_thread(mddev, false, false); >> mutex_unlock(&mddev->sync_mutex); >> } >> diff --git a/drivers/md/md.h b/drivers/md/md.h >> index 8d881cc59799..437ab70ce79b 100644 >> --- a/drivers/md/md.h >> +++ b/drivers/md/md.h >> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); >> extern void md_handle_request(struct mddev *mddev, struct bio *bio); >> extern int mddev_suspend(struct mddev *mddev, bool interruptible); >> extern void mddev_resume(struct mddev *mddev); >> +extern void md_idle_sync_thread(struct mddev *mddev); >> +extern void md_frozen_sync_thread(struct mddev *mddev); >> +extern void md_unfrozen_sync_thread(struct mddev *mddev); >> >> extern void md_reload_sb(struct mddev *mddev, int raid_disk); >> extern void md_update_sb(struct mddev *mddev, int force); >> -- >> 2.39.2 >> > > . >
Hi, 在 2024/01/25 19:35, Xiao Ni 写道: > Hi all > > This is the result of lvm2 tests: > make check > ### 426 tests: 319 passed, 74 skipped, 0 timed out, 5 warned, 28 > failed in 56:04.914 Are you testing with this patchset? 28 failed is much more than my test result in following: ### 426 tests: 346 passed, 70 skipped, 0 timed out, 4 warned, 6 failed in 118:31.437 > make[1]: *** [Makefile:138: check] Error 1 > make[1]: Leaving directory '/root/lvm2/test' > make: *** [Makefile:89: check] Error 2 > > Do you know where to check which cases fail? I saved logs and grep keyword "failed:", and the result will look like this: ### failed: [ndev-vanilla] shell/duplicate-vgid.sh ### failed: [ndev-vanilla] shell/fsadm-crypt.sh ### failed: [ndev-vanilla] shell/lvchange-raid1-writemostly.sh ### failed: [ndev-vanilla] shell/lvconvert-repair-raid.sh ### failed: [ndev-vanilla] shell/lvextend-raid.sh ### failed: [ndev-vanilla] shell/select-report.sh BTW, you can find each test log in the dir(by default) test/result/. Thanks, Kuai > > Best Regards > Xiao > > On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote: >> >> The new heleprs will be used in dm-raid in later patches to fix >> regressions and prevent calling md_reap_sync_thread() directly. >> >> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >> --- >> drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++---- >> drivers/md/md.h | 3 +++ >> 2 files changed, 40 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/md/md.c b/drivers/md/md.c >> index 6c5d0a372927..90cf31b53804 100644 >> --- a/drivers/md/md.c >> +++ b/drivers/md/md.c >> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) >> mddev_lock_nointr(mddev); >> } >> >> -static void idle_sync_thread(struct mddev *mddev) >> +void md_idle_sync_thread(struct mddev *mddev) >> { >> + lockdep_assert_held(mddev->reconfig_mutex); >> + >> mutex_lock(&mddev->sync_mutex); >> clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> + stop_sync_thread(mddev, true, true); >> + mutex_unlock(&mddev->sync_mutex); >> +} >> +EXPORT_SYMBOL_GPL(md_idle_sync_thread); >> + >> +void md_frozen_sync_thread(struct mddev *mddev) >> +{ >> + lockdep_assert_held(mddev->reconfig_mutex); >> + >> + mutex_lock(&mddev->sync_mutex); >> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> + stop_sync_thread(mddev, true, false); >> + mutex_unlock(&mddev->sync_mutex); >> +} >> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); >> >> +void md_unfrozen_sync_thread(struct mddev *mddev) >> +{ >> + lockdep_assert_held(mddev->reconfig_mutex); >> + >> + mutex_lock(&mddev->sync_mutex); >> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); >> + md_wakeup_thread(mddev->thread); >> + sysfs_notify_dirent_safe(mddev->sysfs_action); >> + mutex_unlock(&mddev->sync_mutex); >> +} >> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); >> + >> +static void idle_sync_thread(struct mddev *mddev) >> +{ >> if (mddev_lock(mddev)) { >> mutex_unlock(&mddev->sync_mutex); >> return; >> } >> >> + mutex_lock(&mddev->sync_mutex); >> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> stop_sync_thread(mddev, false, true); >> mutex_unlock(&mddev->sync_mutex); >> } >> >> static void frozen_sync_thread(struct mddev *mddev) >> { >> - mutex_lock(&mddev->sync_mutex); >> - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> - >> if (mddev_lock(mddev)) { >> mutex_unlock(&mddev->sync_mutex); >> return; >> } >> >> + mutex_lock(&mddev->sync_mutex); >> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >> stop_sync_thread(mddev, false, false); >> mutex_unlock(&mddev->sync_mutex); >> } >> diff --git a/drivers/md/md.h b/drivers/md/md.h >> index 8d881cc59799..437ab70ce79b 100644 >> --- a/drivers/md/md.h >> +++ b/drivers/md/md.h >> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); >> extern void md_handle_request(struct mddev *mddev, struct bio *bio); >> extern int mddev_suspend(struct mddev *mddev, bool interruptible); >> extern void mddev_resume(struct mddev *mddev); >> +extern void md_idle_sync_thread(struct mddev *mddev); >> +extern void md_frozen_sync_thread(struct mddev *mddev); >> +extern void md_unfrozen_sync_thread(struct mddev *mddev); >> >> extern void md_reload_sb(struct mddev *mddev, int raid_disk); >> extern void md_update_sb(struct mddev *mddev, int force); >> -- >> 2.39.2 >> > > . >
On Thu, Jan 25, 2024 at 7:42 PM Yu Kuai <yukuai3@huawei.com> wrote: > > Hi, > > 在 2024/01/25 19:35, Xiao Ni 写道: > > Hi all > > > > This is the result of lvm2 tests: > > make check > > ### 426 tests: 319 passed, 74 skipped, 0 timed out, 5 warned, 28 > > failed in 56:04.914 > > Are you testing with this patchset? 28 failed is much more than my > test result in following: Yes, 6.7.0-rc8 with this patch set. > > > make[1]: *** [Makefile:138: check] Error 1 > > make[1]: Leaving directory '/root/lvm2/test' > > make: *** [Makefile:89: check] Error 2 > > > > Do you know where to check which cases fail? > > I saved logs and grep keyword "failed:", and the result will look like > this: I'll use this way to collect the failed cases. Regards Xiao > > > You can find each test log in the dir test/result/ > > > > Best Regards > > Xiao > > > > On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote: > >> > >> The new heleprs will be used in dm-raid in later patches to fix > >> regressions and prevent calling md_reap_sync_thread() directly. > >> > >> Signed-off-by: Yu Kuai <yukuai3@huawei.com> > >> --- > >> drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++---- > >> drivers/md/md.h | 3 +++ > >> 2 files changed, 40 insertions(+), 4 deletions(-) > >> > >> diff --git a/drivers/md/md.c b/drivers/md/md.c > >> index 6c5d0a372927..90cf31b53804 100644 > >> --- a/drivers/md/md.c > >> +++ b/drivers/md/md.c > >> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) > >> mddev_lock_nointr(mddev); > >> } > >> > >> -static void idle_sync_thread(struct mddev *mddev) > >> +void md_idle_sync_thread(struct mddev *mddev) > >> { > >> + lockdep_assert_held(mddev->reconfig_mutex); > >> + > >> mutex_lock(&mddev->sync_mutex); > >> clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > >> + stop_sync_thread(mddev, true, true); > >> + mutex_unlock(&mddev->sync_mutex); > >> +} > >> +EXPORT_SYMBOL_GPL(md_idle_sync_thread); > >> + > >> +void md_frozen_sync_thread(struct mddev *mddev) > >> +{ > >> + lockdep_assert_held(mddev->reconfig_mutex); > >> + > >> + mutex_lock(&mddev->sync_mutex); > >> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > >> + stop_sync_thread(mddev, true, false); > >> + mutex_unlock(&mddev->sync_mutex); > >> +} > >> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); > >> > >> +void md_unfrozen_sync_thread(struct mddev *mddev) > >> +{ > >> + lockdep_assert_held(mddev->reconfig_mutex); > >> + > >> + mutex_lock(&mddev->sync_mutex); > >> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > >> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > >> + md_wakeup_thread(mddev->thread); > >> + sysfs_notify_dirent_safe(mddev->sysfs_action); > >> + mutex_unlock(&mddev->sync_mutex); > >> +} > >> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); > >> + > >> +static void idle_sync_thread(struct mddev *mddev) > >> +{ > >> if (mddev_lock(mddev)) { > >> mutex_unlock(&mddev->sync_mutex); > >> return; > >> } > >> > >> + mutex_lock(&mddev->sync_mutex); > >> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > >> stop_sync_thread(mddev, false, true); > >> mutex_unlock(&mddev->sync_mutex); > >> } > >> > >> static void frozen_sync_thread(struct mddev *mddev) > >> { > >> - mutex_lock(&mddev->sync_mutex); > >> - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > >> - > >> if (mddev_lock(mddev)) { > >> mutex_unlock(&mddev->sync_mutex); > >> return; > >> } > >> > >> + mutex_lock(&mddev->sync_mutex); > >> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > >> stop_sync_thread(mddev, false, false); > >> mutex_unlock(&mddev->sync_mutex); > >> } > >> diff --git a/drivers/md/md.h b/drivers/md/md.h > >> index 8d881cc59799..437ab70ce79b 100644 > >> --- a/drivers/md/md.h > >> +++ b/drivers/md/md.h > >> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); > >> extern void md_handle_request(struct mddev *mddev, struct bio *bio); > >> extern int mddev_suspend(struct mddev *mddev, bool interruptible); > >> extern void mddev_resume(struct mddev *mddev); > >> +extern void md_idle_sync_thread(struct mddev *mddev); > >> +extern void md_frozen_sync_thread(struct mddev *mddev); > >> +extern void md_unfrozen_sync_thread(struct mddev *mddev); > >> > >> extern void md_reload_sb(struct mddev *mddev, int raid_disk); > >> extern void md_update_sb(struct mddev *mddev, int force); > >> -- > >> 2.39.2 > >> > > > > . > > >
Hi, 在 2024/01/25 19:52, Xiao Ni 写道: > On Thu, Jan 25, 2024 at 7:42 PM Yu Kuai <yukuai3@huawei.com> wrote: >> >> Hi, >> >> 在 2024/01/25 19:35, Xiao Ni 写道: >>> Hi all >>> >>> This is the result of lvm2 tests: >>> make check >>> ### 426 tests: 319 passed, 74 skipped, 0 timed out, 5 warned, 28 >>> failed in 56:04.914 >> >> Are you testing with this patchset? 28 failed is much more than my >> test result in following: > > Yes, 6.7.0-rc8 with this patch set. >> >>> make[1]: *** [Makefile:138: check] Error 1 >>> make[1]: Leaving directory '/root/lvm2/test' >>> make: *** [Makefile:89: check] Error 2 >>> >>> Do you know where to check which cases fail? >> >> I saved logs and grep keyword "failed:", and the result will look like >> this: > > I'll use this way to collect the failed cases. I somehow send out this draft. Please check the other reply. Kuai > > Regards > Xiao >> >> >> You can find each test log in the dir test/result/ >>> >>> Best Regards >>> Xiao >>> >>> On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote: >>>> >>>> The new heleprs will be used in dm-raid in later patches to fix >>>> regressions and prevent calling md_reap_sync_thread() directly. >>>> >>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >>>> --- >>>> drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++---- >>>> drivers/md/md.h | 3 +++ >>>> 2 files changed, 40 insertions(+), 4 deletions(-) >>>> >>>> diff --git a/drivers/md/md.c b/drivers/md/md.c >>>> index 6c5d0a372927..90cf31b53804 100644 >>>> --- a/drivers/md/md.c >>>> +++ b/drivers/md/md.c >>>> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) >>>> mddev_lock_nointr(mddev); >>>> } >>>> >>>> -static void idle_sync_thread(struct mddev *mddev) >>>> +void md_idle_sync_thread(struct mddev *mddev) >>>> { >>>> + lockdep_assert_held(mddev->reconfig_mutex); >>>> + >>>> mutex_lock(&mddev->sync_mutex); >>>> clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>>> + stop_sync_thread(mddev, true, true); >>>> + mutex_unlock(&mddev->sync_mutex); >>>> +} >>>> +EXPORT_SYMBOL_GPL(md_idle_sync_thread); >>>> + >>>> +void md_frozen_sync_thread(struct mddev *mddev) >>>> +{ >>>> + lockdep_assert_held(mddev->reconfig_mutex); >>>> + >>>> + mutex_lock(&mddev->sync_mutex); >>>> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>>> + stop_sync_thread(mddev, true, false); >>>> + mutex_unlock(&mddev->sync_mutex); >>>> +} >>>> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); >>>> >>>> +void md_unfrozen_sync_thread(struct mddev *mddev) >>>> +{ >>>> + lockdep_assert_held(mddev->reconfig_mutex); >>>> + >>>> + mutex_lock(&mddev->sync_mutex); >>>> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>>> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); >>>> + md_wakeup_thread(mddev->thread); >>>> + sysfs_notify_dirent_safe(mddev->sysfs_action); >>>> + mutex_unlock(&mddev->sync_mutex); >>>> +} >>>> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); >>>> + >>>> +static void idle_sync_thread(struct mddev *mddev) >>>> +{ >>>> if (mddev_lock(mddev)) { >>>> mutex_unlock(&mddev->sync_mutex); >>>> return; >>>> } >>>> >>>> + mutex_lock(&mddev->sync_mutex); >>>> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>>> stop_sync_thread(mddev, false, true); >>>> mutex_unlock(&mddev->sync_mutex); >>>> } >>>> >>>> static void frozen_sync_thread(struct mddev *mddev) >>>> { >>>> - mutex_lock(&mddev->sync_mutex); >>>> - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>>> - >>>> if (mddev_lock(mddev)) { >>>> mutex_unlock(&mddev->sync_mutex); >>>> return; >>>> } >>>> >>>> + mutex_lock(&mddev->sync_mutex); >>>> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>>> stop_sync_thread(mddev, false, false); >>>> mutex_unlock(&mddev->sync_mutex); >>>> } >>>> diff --git a/drivers/md/md.h b/drivers/md/md.h >>>> index 8d881cc59799..437ab70ce79b 100644 >>>> --- a/drivers/md/md.h >>>> +++ b/drivers/md/md.h >>>> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); >>>> extern void md_handle_request(struct mddev *mddev, struct bio *bio); >>>> extern int mddev_suspend(struct mddev *mddev, bool interruptible); >>>> extern void mddev_resume(struct mddev *mddev); >>>> +extern void md_idle_sync_thread(struct mddev *mddev); >>>> +extern void md_frozen_sync_thread(struct mddev *mddev); >>>> +extern void md_unfrozen_sync_thread(struct mddev *mddev); >>>> >>>> extern void md_reload_sb(struct mddev *mddev, int raid_disk); >>>> extern void md_update_sb(struct mddev *mddev, int force); >>>> -- >>>> 2.39.2 >>>> >>> >>> . >>> >> > > > . >
Hi all I build the kernel 6.7.0-rc8 with this patch set. The lvm2 regression test result: ### failed: [ndev-vanilla] shell/integrity.sh ### failed: [ndev-vanilla] shell/lvchange-partial-raid10.sh ### failed: [ndev-vanilla] shell/lvchange-raid-transient-failures.sh ### failed: [ndev-vanilla] shell/lvchange-raid10.sh ### failed: [ndev-vanilla] shell/lvchange-rebuild-raid.sh ### failed: [ndev-vanilla] shell/lvconvert-cache-abort.sh ### failed: [ndev-vanilla] shell/lvconvert-raid-regionsize.sh ### failed: [ndev-vanilla] shell/lvconvert-raid-reshape-linear_to_raid6-single-type.sh ### failed: [ndev-vanilla] shell/lvconvert-raid-reshape.sh ### failed: [ndev-vanilla] shell/lvconvert-raid-takeover-alloc-failure.sh ### failed: [ndev-vanilla] shell/lvconvert-raid-takeover-thin.sh ### failed: [ndev-vanilla] shell/lvconvert-raid-takeover.sh ### failed: [ndev-vanilla] shell/lvconvert-raid0-striped.sh ### failed: [ndev-vanilla] shell/lvconvert-raid0_to_raid10.sh ### failed: [ndev-vanilla] shell/lvconvert-raid10.sh ### failed: [ndev-vanilla] shell/lvconvert-raid5_to_raid10.sh ### failed: [ndev-vanilla] shell/lvconvert-repair-raid.sh ### failed: [ndev-vanilla] shell/lvconvert-striped-raid0.sh ### failed: [ndev-vanilla] shell/lvcreate-large-raid10.sh ### failed: [ndev-vanilla] shell/lvcreate-raid-nosync.sh ### failed: [ndev-vanilla] shell/lvcreate-raid10.sh ### failed: [ndev-vanilla] shell/lvdisplay-raid.sh ### failed: [ndev-vanilla] shell/lvextend-thin-raid.sh ### failed: [ndev-vanilla] shell/lvresize-fs-crypt.sh ### failed: [ndev-vanilla] shell/lvresize-raid.sh ### failed: [ndev-vanilla] shell/lvresize-raid10.sh ### failed: [ndev-vanilla] shell/pvck-dump.sh ### failed: [ndev-vanilla] shell/pvmove-raid-segtypes.sh ### failed: [ndev-vanilla] shell/select-report.sh Regards Xiao On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote: > > The new heleprs will be used in dm-raid in later patches to fix > regressions and prevent calling md_reap_sync_thread() directly. > > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++---- > drivers/md/md.h | 3 +++ > 2 files changed, 40 insertions(+), 4 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 6c5d0a372927..90cf31b53804 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) > mddev_lock_nointr(mddev); > } > > -static void idle_sync_thread(struct mddev *mddev) > +void md_idle_sync_thread(struct mddev *mddev) > { > + lockdep_assert_held(mddev->reconfig_mutex); > + > mutex_lock(&mddev->sync_mutex); > clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + stop_sync_thread(mddev, true, true); > + mutex_unlock(&mddev->sync_mutex); > +} > +EXPORT_SYMBOL_GPL(md_idle_sync_thread); > + > +void md_frozen_sync_thread(struct mddev *mddev) > +{ > + lockdep_assert_held(mddev->reconfig_mutex); > + > + mutex_lock(&mddev->sync_mutex); > + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + stop_sync_thread(mddev, true, false); > + mutex_unlock(&mddev->sync_mutex); > +} > +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); > > +void md_unfrozen_sync_thread(struct mddev *mddev) > +{ > + lockdep_assert_held(mddev->reconfig_mutex); > + > + mutex_lock(&mddev->sync_mutex); > + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > + md_wakeup_thread(mddev->thread); > + sysfs_notify_dirent_safe(mddev->sysfs_action); > + mutex_unlock(&mddev->sync_mutex); > +} > +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); > + > +static void idle_sync_thread(struct mddev *mddev) > +{ > if (mddev_lock(mddev)) { > mutex_unlock(&mddev->sync_mutex); > return; > } > > + mutex_lock(&mddev->sync_mutex); > + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > stop_sync_thread(mddev, false, true); > mutex_unlock(&mddev->sync_mutex); > } > > static void frozen_sync_thread(struct mddev *mddev) > { > - mutex_lock(&mddev->sync_mutex); > - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > - > if (mddev_lock(mddev)) { > mutex_unlock(&mddev->sync_mutex); > return; > } > > + mutex_lock(&mddev->sync_mutex); > + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > stop_sync_thread(mddev, false, false); > mutex_unlock(&mddev->sync_mutex); > } > diff --git a/drivers/md/md.h b/drivers/md/md.h > index 8d881cc59799..437ab70ce79b 100644 > --- a/drivers/md/md.h > +++ b/drivers/md/md.h > @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); > extern void md_handle_request(struct mddev *mddev, struct bio *bio); > extern int mddev_suspend(struct mddev *mddev, bool interruptible); > extern void mddev_resume(struct mddev *mddev); > +extern void md_idle_sync_thread(struct mddev *mddev); > +extern void md_frozen_sync_thread(struct mddev *mddev); > +extern void md_unfrozen_sync_thread(struct mddev *mddev); > > extern void md_reload_sb(struct mddev *mddev, int raid_disk); > extern void md_update_sb(struct mddev *mddev, int force); > -- > 2.39.2 >
Hi Xiao, On Thu, Jan 25, 2024 at 5:33 AM Xiao Ni <xni@redhat.com> wrote: > > Hi all > > I build the kernel 6.7.0-rc8 with this patch set. The lvm2 regression > test result: I believe the patchset is built on top of upstream (6.8-rc1). There are quite some md and dm changes between 6.7-rc8 and 6.8-rc1. Could you please rerun the test on top of 6.8-rc1? Once we identify the right set of fixes, we will see which ones to back port to older kernels. Thanks, Song > ### failed: [ndev-vanilla] shell/integrity.sh > ### failed: [ndev-vanilla] shell/lvchange-partial-raid10.sh > ### failed: [ndev-vanilla] shell/lvchange-raid-transient-failures.sh > ### failed: [ndev-vanilla] shell/lvchange-raid10.sh > ### failed: [ndev-vanilla] shell/lvchange-rebuild-raid.sh > ### failed: [ndev-vanilla] shell/lvconvert-cache-abort.sh > ### failed: [ndev-vanilla] shell/lvconvert-raid-regionsize.sh > ### failed: [ndev-vanilla] > shell/lvconvert-raid-reshape-linear_to_raid6-single-type.sh > ### failed: [ndev-vanilla] shell/lvconvert-raid-reshape.sh > ### failed: [ndev-vanilla] shell/lvconvert-raid-takeover-alloc-failure.sh > ### failed: [ndev-vanilla] shell/lvconvert-raid-takeover-thin.sh > ### failed: [ndev-vanilla] shell/lvconvert-raid-takeover.sh > ### failed: [ndev-vanilla] shell/lvconvert-raid0-striped.sh > ### failed: [ndev-vanilla] shell/lvconvert-raid0_to_raid10.sh > ### failed: [ndev-vanilla] shell/lvconvert-raid10.sh > ### failed: [ndev-vanilla] shell/lvconvert-raid5_to_raid10.sh > ### failed: [ndev-vanilla] shell/lvconvert-repair-raid.sh > ### failed: [ndev-vanilla] shell/lvconvert-striped-raid0.sh > ### failed: [ndev-vanilla] shell/lvcreate-large-raid10.sh > ### failed: [ndev-vanilla] shell/lvcreate-raid-nosync.sh > ### failed: [ndev-vanilla] shell/lvcreate-raid10.sh > ### failed: [ndev-vanilla] shell/lvdisplay-raid.sh > ### failed: [ndev-vanilla] shell/lvextend-thin-raid.sh > ### failed: [ndev-vanilla] shell/lvresize-fs-crypt.sh > ### failed: [ndev-vanilla] shell/lvresize-raid.sh > ### failed: [ndev-vanilla] shell/lvresize-raid10.sh > ### failed: [ndev-vanilla] shell/pvck-dump.sh > ### failed: [ndev-vanilla] shell/pvmove-raid-segtypes.sh > ### failed: [ndev-vanilla] shell/select-report.sh
Hi, 在 2024/01/25 15:57, Yu Kuai 写道: > Hi, > > 在 2024/01/25 15:51, Xiao Ni 写道: >> On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote: >>> >>> The new heleprs will be used in dm-raid in later patches to fix >>> regressions and prevent calling md_reap_sync_thread() directly. >>> >>> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >>> --- >>> drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++---- >>> drivers/md/md.h | 3 +++ >>> 2 files changed, 40 insertions(+), 4 deletions(-) >>> >>> diff --git a/drivers/md/md.c b/drivers/md/md.c >>> index 6c5d0a372927..90cf31b53804 100644 >>> --- a/drivers/md/md.c >>> +++ b/drivers/md/md.c >>> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev >>> *mddev, bool locked, bool check_seq) >>> mddev_lock_nointr(mddev); >>> } >>> >>> -static void idle_sync_thread(struct mddev *mddev) >>> +void md_idle_sync_thread(struct mddev *mddev) >>> { >>> + lockdep_assert_held(mddev->reconfig_mutex); >>> + >> >> Hi Kuai >> >> There is a building error. It should give a pointer to >> lockdep_assert_held. And same with the other two places in this patch. > > Yes, I forgot that I disabled all the debug config in order to let tests > finish quickly. I enabled these debuging conifg, and turns out this patch has some problem, see below. > > Thanks for the notince, will fix this in v3. > > Thanks, > Kuai > >> >> Regards >> Xiao >> >>> mutex_lock(&mddev->sync_mutex); >>> clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>> + stop_sync_thread(mddev, true, true); >>> + mutex_unlock(&mddev->sync_mutex); >>> +} >>> +EXPORT_SYMBOL_GPL(md_idle_sync_thread); >>> + >>> +void md_frozen_sync_thread(struct mddev *mddev) >>> +{ >>> + lockdep_assert_held(mddev->reconfig_mutex); >>> + >>> + mutex_lock(&mddev->sync_mutex); >>> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>> + stop_sync_thread(mddev, true, false); stop_sync_thread() may release 'reconfig_mutex' and grab it again, hence it's not right to grab 'sync_mutex' before 'reconfig_mutex'. Since 'sync_mutex' is introduced to serialize sysfs api 'sync_action' writers, while is not involved for dm-raid. I'll remove 'sync_mutex' for the new helper in v3. Thanks, Kuai >>> + mutex_unlock(&mddev->sync_mutex); >>> +} >>> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); >>> >>> +void md_unfrozen_sync_thread(struct mddev *mddev) >>> +{ >>> + lockdep_assert_held(mddev->reconfig_mutex); >>> + >>> + mutex_lock(&mddev->sync_mutex); >>> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>> + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); >>> + md_wakeup_thread(mddev->thread); >>> + sysfs_notify_dirent_safe(mddev->sysfs_action); >>> + mutex_unlock(&mddev->sync_mutex); >>> +} >>> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); >>> + >>> +static void idle_sync_thread(struct mddev *mddev) >>> +{ >>> if (mddev_lock(mddev)) { >>> mutex_unlock(&mddev->sync_mutex); >>> return; >>> } >>> >>> + mutex_lock(&mddev->sync_mutex); >>> + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>> stop_sync_thread(mddev, false, true); >>> mutex_unlock(&mddev->sync_mutex); >>> } >>> >>> static void frozen_sync_thread(struct mddev *mddev) >>> { >>> - mutex_lock(&mddev->sync_mutex); >>> - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>> - >>> if (mddev_lock(mddev)) { >>> mutex_unlock(&mddev->sync_mutex); >>> return; >>> } >>> >>> + mutex_lock(&mddev->sync_mutex); >>> + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); >>> stop_sync_thread(mddev, false, false); >>> mutex_unlock(&mddev->sync_mutex); >>> } >>> diff --git a/drivers/md/md.h b/drivers/md/md.h >>> index 8d881cc59799..437ab70ce79b 100644 >>> --- a/drivers/md/md.h >>> +++ b/drivers/md/md.h >>> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); >>> extern void md_handle_request(struct mddev *mddev, struct bio *bio); >>> extern int mddev_suspend(struct mddev *mddev, bool interruptible); >>> extern void mddev_resume(struct mddev *mddev); >>> +extern void md_idle_sync_thread(struct mddev *mddev); >>> +extern void md_frozen_sync_thread(struct mddev *mddev); >>> +extern void md_unfrozen_sync_thread(struct mddev *mddev); >>> >>> extern void md_reload_sb(struct mddev *mddev, int raid_disk); >>> extern void md_update_sb(struct mddev *mddev, int force); >>> -- >>> 2.39.2 >>> >> >> . >> > > . >
On Fri, Jan 26, 2024 at 8:14 AM Song Liu <song@kernel.org> wrote: > > Hi Xiao, > > On Thu, Jan 25, 2024 at 5:33 AM Xiao Ni <xni@redhat.com> wrote: > > > > Hi all > > > > I build the kernel 6.7.0-rc8 with this patch set. The lvm2 regression > > test result: > > I believe the patchset is built on top of upstream (6.8-rc1). There are > quite some md and dm changes between 6.7-rc8 and 6.8-rc1. Could > you please rerun the test on top of 6.8-rc1? Once we identify the right > set of fixes, we will see which ones to back port to older kernels. > > Thanks, > Song Hi all I tried to run tests on 6.8-rc1, the failure number increases to 72. And I ran the tests on 6.6, there are only 4 failed cases ### failed: [ndev-vanilla] shell/lvconvert-cache-abort.sh ### failed: [ndev-vanilla] shell/lvresize-fs-crypt.sh ### failed: [ndev-vanilla] shell/pvck-dump.sh ### failed: [ndev-vanilla] shell/select-report.sh Best Regards Xiao
diff --git a/drivers/md/md.c b/drivers/md/md.c index 6c5d0a372927..90cf31b53804 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq) mddev_lock_nointr(mddev); } -static void idle_sync_thread(struct mddev *mddev) +void md_idle_sync_thread(struct mddev *mddev) { + lockdep_assert_held(mddev->reconfig_mutex); + mutex_lock(&mddev->sync_mutex); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + stop_sync_thread(mddev, true, true); + mutex_unlock(&mddev->sync_mutex); +} +EXPORT_SYMBOL_GPL(md_idle_sync_thread); + +void md_frozen_sync_thread(struct mddev *mddev) +{ + lockdep_assert_held(mddev->reconfig_mutex); + + mutex_lock(&mddev->sync_mutex); + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + stop_sync_thread(mddev, true, false); + mutex_unlock(&mddev->sync_mutex); +} +EXPORT_SYMBOL_GPL(md_frozen_sync_thread); +void md_unfrozen_sync_thread(struct mddev *mddev) +{ + lockdep_assert_held(mddev->reconfig_mutex); + + mutex_lock(&mddev->sync_mutex); + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + md_wakeup_thread(mddev->thread); + sysfs_notify_dirent_safe(mddev->sysfs_action); + mutex_unlock(&mddev->sync_mutex); +} +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); + +static void idle_sync_thread(struct mddev *mddev) +{ if (mddev_lock(mddev)) { mutex_unlock(&mddev->sync_mutex); return; } + mutex_lock(&mddev->sync_mutex); + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); stop_sync_thread(mddev, false, true); mutex_unlock(&mddev->sync_mutex); } static void frozen_sync_thread(struct mddev *mddev) { - mutex_lock(&mddev->sync_mutex); - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - if (mddev_lock(mddev)) { mutex_unlock(&mddev->sync_mutex); return; } + mutex_lock(&mddev->sync_mutex); + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); stop_sync_thread(mddev, false, false); mutex_unlock(&mddev->sync_mutex); } diff --git a/drivers/md/md.h b/drivers/md/md.h index 8d881cc59799..437ab70ce79b 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev); extern void md_handle_request(struct mddev *mddev, struct bio *bio); extern int mddev_suspend(struct mddev *mddev, bool interruptible); extern void mddev_resume(struct mddev *mddev); +extern void md_idle_sync_thread(struct mddev *mddev); +extern void md_frozen_sync_thread(struct mddev *mddev); +extern void md_unfrozen_sync_thread(struct mddev *mddev); extern void md_reload_sb(struct mddev *mddev, int raid_disk); extern void md_update_sb(struct mddev *mddev, int force);
The new heleprs will be used in dm-raid in later patches to fix regressions and prevent calling md_reap_sync_thread() directly. Signed-off-by: Yu Kuai <yukuai3@huawei.com> --- drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++---- drivers/md/md.h | 3 +++ 2 files changed, 40 insertions(+), 4 deletions(-)