diff mbox series

[v2,05/11] md: export helpers to stop sync_thread

Message ID 20240124091421.1261579-6-yukuai3@huawei.com (mailing list archive)
State Superseded, archived
Headers show
Series dm-raid: fix v6.7 regressions | expand

Commit Message

Yu Kuai Jan. 24, 2024, 9:14 a.m. UTC
The new heleprs will be used in dm-raid in later patches to fix
regressions and prevent calling md_reap_sync_thread() directly.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++----
 drivers/md/md.h |  3 +++
 2 files changed, 40 insertions(+), 4 deletions(-)

Comments

Xiao Ni Jan. 25, 2024, 7:51 a.m. UTC | #1
On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote:
>
> The new heleprs will be used in dm-raid in later patches to fix
> regressions and prevent calling md_reap_sync_thread() directly.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++----
>  drivers/md/md.h |  3 +++
>  2 files changed, 40 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 6c5d0a372927..90cf31b53804 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
>                 mddev_lock_nointr(mddev);
>  }
>
> -static void idle_sync_thread(struct mddev *mddev)
> +void md_idle_sync_thread(struct mddev *mddev)
>  {
> +       lockdep_assert_held(mddev->reconfig_mutex);
> +

Hi Kuai

There is a building error. It should give a pointer to
lockdep_assert_held. And same with the other two places in this patch.

Regards
Xiao

>         mutex_lock(&mddev->sync_mutex);
>         clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       stop_sync_thread(mddev, true, true);
> +       mutex_unlock(&mddev->sync_mutex);
> +}
> +EXPORT_SYMBOL_GPL(md_idle_sync_thread);
> +
> +void md_frozen_sync_thread(struct mddev *mddev)
> +{
> +       lockdep_assert_held(mddev->reconfig_mutex);
> +
> +       mutex_lock(&mddev->sync_mutex);
> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       stop_sync_thread(mddev, true, false);
> +       mutex_unlock(&mddev->sync_mutex);
> +}
> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
>
> +void md_unfrozen_sync_thread(struct mddev *mddev)
> +{
> +       lockdep_assert_held(mddev->reconfig_mutex);
> +
> +       mutex_lock(&mddev->sync_mutex);
> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> +       md_wakeup_thread(mddev->thread);
> +       sysfs_notify_dirent_safe(mddev->sysfs_action);
> +       mutex_unlock(&mddev->sync_mutex);
> +}
> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
> +
> +static void idle_sync_thread(struct mddev *mddev)
> +{
>         if (mddev_lock(mddev)) {
>                 mutex_unlock(&mddev->sync_mutex);
>                 return;
>         }
>
> +       mutex_lock(&mddev->sync_mutex);
> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>         stop_sync_thread(mddev, false, true);
>         mutex_unlock(&mddev->sync_mutex);
>  }
>
>  static void frozen_sync_thread(struct mddev *mddev)
>  {
> -       mutex_lock(&mddev->sync_mutex);
> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -
>         if (mddev_lock(mddev)) {
>                 mutex_unlock(&mddev->sync_mutex);
>                 return;
>         }
>
> +       mutex_lock(&mddev->sync_mutex);
> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>         stop_sync_thread(mddev, false, false);
>         mutex_unlock(&mddev->sync_mutex);
>  }
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 8d881cc59799..437ab70ce79b 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev);
>  extern void md_handle_request(struct mddev *mddev, struct bio *bio);
>  extern int mddev_suspend(struct mddev *mddev, bool interruptible);
>  extern void mddev_resume(struct mddev *mddev);
> +extern void md_idle_sync_thread(struct mddev *mddev);
> +extern void md_frozen_sync_thread(struct mddev *mddev);
> +extern void md_unfrozen_sync_thread(struct mddev *mddev);
>
>  extern void md_reload_sb(struct mddev *mddev, int raid_disk);
>  extern void md_update_sb(struct mddev *mddev, int force);
> --
> 2.39.2
>
Yu Kuai Jan. 25, 2024, 7:57 a.m. UTC | #2
Hi,

在 2024/01/25 15:51, Xiao Ni 写道:
> On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote:
>>
>> The new heleprs will be used in dm-raid in later patches to fix
>> regressions and prevent calling md_reap_sync_thread() directly.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++----
>>   drivers/md/md.h |  3 +++
>>   2 files changed, 40 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 6c5d0a372927..90cf31b53804 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
>>                  mddev_lock_nointr(mddev);
>>   }
>>
>> -static void idle_sync_thread(struct mddev *mddev)
>> +void md_idle_sync_thread(struct mddev *mddev)
>>   {
>> +       lockdep_assert_held(mddev->reconfig_mutex);
>> +
> 
> Hi Kuai
> 
> There is a building error. It should give a pointer to
> lockdep_assert_held. And same with the other two places in this patch.

Yes, I forgot that I disabled all the debug config in order to let tests
finish quickly.

Thanks for the notince, will fix this in v3.

Thanks,
Kuai

> 
> Regards
> Xiao
> 
>>          mutex_lock(&mddev->sync_mutex);
>>          clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       stop_sync_thread(mddev, true, true);
>> +       mutex_unlock(&mddev->sync_mutex);
>> +}
>> +EXPORT_SYMBOL_GPL(md_idle_sync_thread);
>> +
>> +void md_frozen_sync_thread(struct mddev *mddev)
>> +{
>> +       lockdep_assert_held(mddev->reconfig_mutex);
>> +
>> +       mutex_lock(&mddev->sync_mutex);
>> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       stop_sync_thread(mddev, true, false);
>> +       mutex_unlock(&mddev->sync_mutex);
>> +}
>> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
>>
>> +void md_unfrozen_sync_thread(struct mddev *mddev)
>> +{
>> +       lockdep_assert_held(mddev->reconfig_mutex);
>> +
>> +       mutex_lock(&mddev->sync_mutex);
>> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
>> +       md_wakeup_thread(mddev->thread);
>> +       sysfs_notify_dirent_safe(mddev->sysfs_action);
>> +       mutex_unlock(&mddev->sync_mutex);
>> +}
>> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
>> +
>> +static void idle_sync_thread(struct mddev *mddev)
>> +{
>>          if (mddev_lock(mddev)) {
>>                  mutex_unlock(&mddev->sync_mutex);
>>                  return;
>>          }
>>
>> +       mutex_lock(&mddev->sync_mutex);
>> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>          stop_sync_thread(mddev, false, true);
>>          mutex_unlock(&mddev->sync_mutex);
>>   }
>>
>>   static void frozen_sync_thread(struct mddev *mddev)
>>   {
>> -       mutex_lock(&mddev->sync_mutex);
>> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> -
>>          if (mddev_lock(mddev)) {
>>                  mutex_unlock(&mddev->sync_mutex);
>>                  return;
>>          }
>>
>> +       mutex_lock(&mddev->sync_mutex);
>> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>          stop_sync_thread(mddev, false, false);
>>          mutex_unlock(&mddev->sync_mutex);
>>   }
>> diff --git a/drivers/md/md.h b/drivers/md/md.h
>> index 8d881cc59799..437ab70ce79b 100644
>> --- a/drivers/md/md.h
>> +++ b/drivers/md/md.h
>> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev);
>>   extern void md_handle_request(struct mddev *mddev, struct bio *bio);
>>   extern int mddev_suspend(struct mddev *mddev, bool interruptible);
>>   extern void mddev_resume(struct mddev *mddev);
>> +extern void md_idle_sync_thread(struct mddev *mddev);
>> +extern void md_frozen_sync_thread(struct mddev *mddev);
>> +extern void md_unfrozen_sync_thread(struct mddev *mddev);
>>
>>   extern void md_reload_sb(struct mddev *mddev, int raid_disk);
>>   extern void md_update_sb(struct mddev *mddev, int force);
>> --
>> 2.39.2
>>
> 
> .
>
Xiao Ni Jan. 25, 2024, 11:35 a.m. UTC | #3
Hi all

This is the result of lvm2 tests:
make check
### 426 tests: 319 passed, 74 skipped, 0 timed out, 5 warned, 28
failed   in 56:04.914
make[1]: *** [Makefile:138: check] Error 1
make[1]: Leaving directory '/root/lvm2/test'
make: *** [Makefile:89: check] Error 2

Do you know where to check which cases fail?

Best Regards
Xiao

On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote:
>
> The new heleprs will be used in dm-raid in later patches to fix
> regressions and prevent calling md_reap_sync_thread() directly.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++----
>  drivers/md/md.h |  3 +++
>  2 files changed, 40 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 6c5d0a372927..90cf31b53804 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
>                 mddev_lock_nointr(mddev);
>  }
>
> -static void idle_sync_thread(struct mddev *mddev)
> +void md_idle_sync_thread(struct mddev *mddev)
>  {
> +       lockdep_assert_held(mddev->reconfig_mutex);
> +
>         mutex_lock(&mddev->sync_mutex);
>         clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       stop_sync_thread(mddev, true, true);
> +       mutex_unlock(&mddev->sync_mutex);
> +}
> +EXPORT_SYMBOL_GPL(md_idle_sync_thread);
> +
> +void md_frozen_sync_thread(struct mddev *mddev)
> +{
> +       lockdep_assert_held(mddev->reconfig_mutex);
> +
> +       mutex_lock(&mddev->sync_mutex);
> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       stop_sync_thread(mddev, true, false);
> +       mutex_unlock(&mddev->sync_mutex);
> +}
> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
>
> +void md_unfrozen_sync_thread(struct mddev *mddev)
> +{
> +       lockdep_assert_held(mddev->reconfig_mutex);
> +
> +       mutex_lock(&mddev->sync_mutex);
> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> +       md_wakeup_thread(mddev->thread);
> +       sysfs_notify_dirent_safe(mddev->sysfs_action);
> +       mutex_unlock(&mddev->sync_mutex);
> +}
> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
> +
> +static void idle_sync_thread(struct mddev *mddev)
> +{
>         if (mddev_lock(mddev)) {
>                 mutex_unlock(&mddev->sync_mutex);
>                 return;
>         }
>
> +       mutex_lock(&mddev->sync_mutex);
> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>         stop_sync_thread(mddev, false, true);
>         mutex_unlock(&mddev->sync_mutex);
>  }
>
>  static void frozen_sync_thread(struct mddev *mddev)
>  {
> -       mutex_lock(&mddev->sync_mutex);
> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -
>         if (mddev_lock(mddev)) {
>                 mutex_unlock(&mddev->sync_mutex);
>                 return;
>         }
>
> +       mutex_lock(&mddev->sync_mutex);
> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>         stop_sync_thread(mddev, false, false);
>         mutex_unlock(&mddev->sync_mutex);
>  }
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 8d881cc59799..437ab70ce79b 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev);
>  extern void md_handle_request(struct mddev *mddev, struct bio *bio);
>  extern int mddev_suspend(struct mddev *mddev, bool interruptible);
>  extern void mddev_resume(struct mddev *mddev);
> +extern void md_idle_sync_thread(struct mddev *mddev);
> +extern void md_frozen_sync_thread(struct mddev *mddev);
> +extern void md_unfrozen_sync_thread(struct mddev *mddev);
>
>  extern void md_reload_sb(struct mddev *mddev, int raid_disk);
>  extern void md_update_sb(struct mddev *mddev, int force);
> --
> 2.39.2
>
Yu Kuai Jan. 25, 2024, 11:42 a.m. UTC | #4
Hi,

在 2024/01/25 19:35, Xiao Ni 写道:
> Hi all
> 
> This is the result of lvm2 tests:
> make check
> ### 426 tests: 319 passed, 74 skipped, 0 timed out, 5 warned, 28
> failed   in 56:04.914

Are you testing with this patchset? 28 failed is much more than my
test result in following:

> make[1]: *** [Makefile:138: check] Error 1
> make[1]: Leaving directory '/root/lvm2/test'
> make: *** [Makefile:89: check] Error 2
> 
> Do you know where to check which cases fail?

I saved logs and grep keyword "failed:", and the result will look like
this:


You can find each test log in the dir test/result/
> 
> Best Regards
> Xiao
> 
> On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote:
>>
>> The new heleprs will be used in dm-raid in later patches to fix
>> regressions and prevent calling md_reap_sync_thread() directly.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++----
>>   drivers/md/md.h |  3 +++
>>   2 files changed, 40 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 6c5d0a372927..90cf31b53804 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
>>                  mddev_lock_nointr(mddev);
>>   }
>>
>> -static void idle_sync_thread(struct mddev *mddev)
>> +void md_idle_sync_thread(struct mddev *mddev)
>>   {
>> +       lockdep_assert_held(mddev->reconfig_mutex);
>> +
>>          mutex_lock(&mddev->sync_mutex);
>>          clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       stop_sync_thread(mddev, true, true);
>> +       mutex_unlock(&mddev->sync_mutex);
>> +}
>> +EXPORT_SYMBOL_GPL(md_idle_sync_thread);
>> +
>> +void md_frozen_sync_thread(struct mddev *mddev)
>> +{
>> +       lockdep_assert_held(mddev->reconfig_mutex);
>> +
>> +       mutex_lock(&mddev->sync_mutex);
>> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       stop_sync_thread(mddev, true, false);
>> +       mutex_unlock(&mddev->sync_mutex);
>> +}
>> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
>>
>> +void md_unfrozen_sync_thread(struct mddev *mddev)
>> +{
>> +       lockdep_assert_held(mddev->reconfig_mutex);
>> +
>> +       mutex_lock(&mddev->sync_mutex);
>> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
>> +       md_wakeup_thread(mddev->thread);
>> +       sysfs_notify_dirent_safe(mddev->sysfs_action);
>> +       mutex_unlock(&mddev->sync_mutex);
>> +}
>> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
>> +
>> +static void idle_sync_thread(struct mddev *mddev)
>> +{
>>          if (mddev_lock(mddev)) {
>>                  mutex_unlock(&mddev->sync_mutex);
>>                  return;
>>          }
>>
>> +       mutex_lock(&mddev->sync_mutex);
>> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>          stop_sync_thread(mddev, false, true);
>>          mutex_unlock(&mddev->sync_mutex);
>>   }
>>
>>   static void frozen_sync_thread(struct mddev *mddev)
>>   {
>> -       mutex_lock(&mddev->sync_mutex);
>> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> -
>>          if (mddev_lock(mddev)) {
>>                  mutex_unlock(&mddev->sync_mutex);
>>                  return;
>>          }
>>
>> +       mutex_lock(&mddev->sync_mutex);
>> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>          stop_sync_thread(mddev, false, false);
>>          mutex_unlock(&mddev->sync_mutex);
>>   }
>> diff --git a/drivers/md/md.h b/drivers/md/md.h
>> index 8d881cc59799..437ab70ce79b 100644
>> --- a/drivers/md/md.h
>> +++ b/drivers/md/md.h
>> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev);
>>   extern void md_handle_request(struct mddev *mddev, struct bio *bio);
>>   extern int mddev_suspend(struct mddev *mddev, bool interruptible);
>>   extern void mddev_resume(struct mddev *mddev);
>> +extern void md_idle_sync_thread(struct mddev *mddev);
>> +extern void md_frozen_sync_thread(struct mddev *mddev);
>> +extern void md_unfrozen_sync_thread(struct mddev *mddev);
>>
>>   extern void md_reload_sb(struct mddev *mddev, int raid_disk);
>>   extern void md_update_sb(struct mddev *mddev, int force);
>> --
>> 2.39.2
>>
> 
> .
>
Yu Kuai Jan. 25, 2024, 11:45 a.m. UTC | #5
Hi,

在 2024/01/25 19:35, Xiao Ni 写道:
> Hi all
> 
> This is the result of lvm2 tests:
> make check
> ### 426 tests: 319 passed, 74 skipped, 0 timed out, 5 warned, 28
> failed   in 56:04.914

Are you testing with this patchset? 28 failed is much more than my
test result in following:

### 426 tests: 346 passed, 70 skipped, 0 timed out, 4 warned, 6 failed 
in 118:31.437
> make[1]: *** [Makefile:138: check] Error 1
> make[1]: Leaving directory '/root/lvm2/test'
> make: *** [Makefile:89: check] Error 2
> 
> Do you know where to check which cases fail?

I saved logs and grep keyword "failed:", and the result will look like
this:

###       failed: [ndev-vanilla] shell/duplicate-vgid.sh
###       failed: [ndev-vanilla] shell/fsadm-crypt.sh
###       failed: [ndev-vanilla] shell/lvchange-raid1-writemostly.sh
###       failed: [ndev-vanilla] shell/lvconvert-repair-raid.sh
###       failed: [ndev-vanilla] shell/lvextend-raid.sh
###       failed: [ndev-vanilla] shell/select-report.sh

BTW, you can find each test log in the dir(by default) test/result/.

Thanks,
Kuai

> 
> Best Regards
> Xiao
> 
> On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote:
>>
>> The new heleprs will be used in dm-raid in later patches to fix
>> regressions and prevent calling md_reap_sync_thread() directly.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++----
>>   drivers/md/md.h |  3 +++
>>   2 files changed, 40 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 6c5d0a372927..90cf31b53804 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
>>                  mddev_lock_nointr(mddev);
>>   }
>>
>> -static void idle_sync_thread(struct mddev *mddev)
>> +void md_idle_sync_thread(struct mddev *mddev)
>>   {
>> +       lockdep_assert_held(mddev->reconfig_mutex);
>> +
>>          mutex_lock(&mddev->sync_mutex);
>>          clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       stop_sync_thread(mddev, true, true);
>> +       mutex_unlock(&mddev->sync_mutex);
>> +}
>> +EXPORT_SYMBOL_GPL(md_idle_sync_thread);
>> +
>> +void md_frozen_sync_thread(struct mddev *mddev)
>> +{
>> +       lockdep_assert_held(mddev->reconfig_mutex);
>> +
>> +       mutex_lock(&mddev->sync_mutex);
>> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       stop_sync_thread(mddev, true, false);
>> +       mutex_unlock(&mddev->sync_mutex);
>> +}
>> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
>>
>> +void md_unfrozen_sync_thread(struct mddev *mddev)
>> +{
>> +       lockdep_assert_held(mddev->reconfig_mutex);
>> +
>> +       mutex_lock(&mddev->sync_mutex);
>> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> +       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
>> +       md_wakeup_thread(mddev->thread);
>> +       sysfs_notify_dirent_safe(mddev->sysfs_action);
>> +       mutex_unlock(&mddev->sync_mutex);
>> +}
>> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
>> +
>> +static void idle_sync_thread(struct mddev *mddev)
>> +{
>>          if (mddev_lock(mddev)) {
>>                  mutex_unlock(&mddev->sync_mutex);
>>                  return;
>>          }
>>
>> +       mutex_lock(&mddev->sync_mutex);
>> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>          stop_sync_thread(mddev, false, true);
>>          mutex_unlock(&mddev->sync_mutex);
>>   }
>>
>>   static void frozen_sync_thread(struct mddev *mddev)
>>   {
>> -       mutex_lock(&mddev->sync_mutex);
>> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>> -
>>          if (mddev_lock(mddev)) {
>>                  mutex_unlock(&mddev->sync_mutex);
>>                  return;
>>          }
>>
>> +       mutex_lock(&mddev->sync_mutex);
>> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>          stop_sync_thread(mddev, false, false);
>>          mutex_unlock(&mddev->sync_mutex);
>>   }
>> diff --git a/drivers/md/md.h b/drivers/md/md.h
>> index 8d881cc59799..437ab70ce79b 100644
>> --- a/drivers/md/md.h
>> +++ b/drivers/md/md.h
>> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev);
>>   extern void md_handle_request(struct mddev *mddev, struct bio *bio);
>>   extern int mddev_suspend(struct mddev *mddev, bool interruptible);
>>   extern void mddev_resume(struct mddev *mddev);
>> +extern void md_idle_sync_thread(struct mddev *mddev);
>> +extern void md_frozen_sync_thread(struct mddev *mddev);
>> +extern void md_unfrozen_sync_thread(struct mddev *mddev);
>>
>>   extern void md_reload_sb(struct mddev *mddev, int raid_disk);
>>   extern void md_update_sb(struct mddev *mddev, int force);
>> --
>> 2.39.2
>>
> 
> .
>
Xiao Ni Jan. 25, 2024, 11:52 a.m. UTC | #6
On Thu, Jan 25, 2024 at 7:42 PM Yu Kuai <yukuai3@huawei.com> wrote:
>
> Hi,
>
> 在 2024/01/25 19:35, Xiao Ni 写道:
> > Hi all
> >
> > This is the result of lvm2 tests:
> > make check
> > ### 426 tests: 319 passed, 74 skipped, 0 timed out, 5 warned, 28
> > failed   in 56:04.914
>
> Are you testing with this patchset? 28 failed is much more than my
> test result in following:

Yes, 6.7.0-rc8 with this patch set.
>
> > make[1]: *** [Makefile:138: check] Error 1
> > make[1]: Leaving directory '/root/lvm2/test'
> > make: *** [Makefile:89: check] Error 2
> >
> > Do you know where to check which cases fail?
>
> I saved logs and grep keyword "failed:", and the result will look like
> this:

I'll use this way to collect the failed cases.

Regards
Xiao
>
>
> You can find each test log in the dir test/result/
> >
> > Best Regards
> > Xiao
> >
> > On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote:
> >>
> >> The new heleprs will be used in dm-raid in later patches to fix
> >> regressions and prevent calling md_reap_sync_thread() directly.
> >>
> >> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> >> ---
> >>   drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++----
> >>   drivers/md/md.h |  3 +++
> >>   2 files changed, 40 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/md/md.c b/drivers/md/md.c
> >> index 6c5d0a372927..90cf31b53804 100644
> >> --- a/drivers/md/md.c
> >> +++ b/drivers/md/md.c
> >> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
> >>                  mddev_lock_nointr(mddev);
> >>   }
> >>
> >> -static void idle_sync_thread(struct mddev *mddev)
> >> +void md_idle_sync_thread(struct mddev *mddev)
> >>   {
> >> +       lockdep_assert_held(mddev->reconfig_mutex);
> >> +
> >>          mutex_lock(&mddev->sync_mutex);
> >>          clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >> +       stop_sync_thread(mddev, true, true);
> >> +       mutex_unlock(&mddev->sync_mutex);
> >> +}
> >> +EXPORT_SYMBOL_GPL(md_idle_sync_thread);
> >> +
> >> +void md_frozen_sync_thread(struct mddev *mddev)
> >> +{
> >> +       lockdep_assert_held(mddev->reconfig_mutex);
> >> +
> >> +       mutex_lock(&mddev->sync_mutex);
> >> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >> +       stop_sync_thread(mddev, true, false);
> >> +       mutex_unlock(&mddev->sync_mutex);
> >> +}
> >> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
> >>
> >> +void md_unfrozen_sync_thread(struct mddev *mddev)
> >> +{
> >> +       lockdep_assert_held(mddev->reconfig_mutex);
> >> +
> >> +       mutex_lock(&mddev->sync_mutex);
> >> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >> +       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> >> +       md_wakeup_thread(mddev->thread);
> >> +       sysfs_notify_dirent_safe(mddev->sysfs_action);
> >> +       mutex_unlock(&mddev->sync_mutex);
> >> +}
> >> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
> >> +
> >> +static void idle_sync_thread(struct mddev *mddev)
> >> +{
> >>          if (mddev_lock(mddev)) {
> >>                  mutex_unlock(&mddev->sync_mutex);
> >>                  return;
> >>          }
> >>
> >> +       mutex_lock(&mddev->sync_mutex);
> >> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >>          stop_sync_thread(mddev, false, true);
> >>          mutex_unlock(&mddev->sync_mutex);
> >>   }
> >>
> >>   static void frozen_sync_thread(struct mddev *mddev)
> >>   {
> >> -       mutex_lock(&mddev->sync_mutex);
> >> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >> -
> >>          if (mddev_lock(mddev)) {
> >>                  mutex_unlock(&mddev->sync_mutex);
> >>                  return;
> >>          }
> >>
> >> +       mutex_lock(&mddev->sync_mutex);
> >> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >>          stop_sync_thread(mddev, false, false);
> >>          mutex_unlock(&mddev->sync_mutex);
> >>   }
> >> diff --git a/drivers/md/md.h b/drivers/md/md.h
> >> index 8d881cc59799..437ab70ce79b 100644
> >> --- a/drivers/md/md.h
> >> +++ b/drivers/md/md.h
> >> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev);
> >>   extern void md_handle_request(struct mddev *mddev, struct bio *bio);
> >>   extern int mddev_suspend(struct mddev *mddev, bool interruptible);
> >>   extern void mddev_resume(struct mddev *mddev);
> >> +extern void md_idle_sync_thread(struct mddev *mddev);
> >> +extern void md_frozen_sync_thread(struct mddev *mddev);
> >> +extern void md_unfrozen_sync_thread(struct mddev *mddev);
> >>
> >>   extern void md_reload_sb(struct mddev *mddev, int raid_disk);
> >>   extern void md_update_sb(struct mddev *mddev, int force);
> >> --
> >> 2.39.2
> >>
> >
> > .
> >
>
Yu Kuai Jan. 25, 2024, 11:56 a.m. UTC | #7
Hi,

在 2024/01/25 19:52, Xiao Ni 写道:
> On Thu, Jan 25, 2024 at 7:42 PM Yu Kuai <yukuai3@huawei.com> wrote:
>>
>> Hi,
>>
>> 在 2024/01/25 19:35, Xiao Ni 写道:
>>> Hi all
>>>
>>> This is the result of lvm2 tests:
>>> make check
>>> ### 426 tests: 319 passed, 74 skipped, 0 timed out, 5 warned, 28
>>> failed   in 56:04.914
>>
>> Are you testing with this patchset? 28 failed is much more than my
>> test result in following:
> 
> Yes, 6.7.0-rc8 with this patch set.
>>
>>> make[1]: *** [Makefile:138: check] Error 1
>>> make[1]: Leaving directory '/root/lvm2/test'
>>> make: *** [Makefile:89: check] Error 2
>>>
>>> Do you know where to check which cases fail?
>>
>> I saved logs and grep keyword "failed:", and the result will look like
>> this:
> 
> I'll use this way to collect the failed cases.

I somehow send out this draft. Please check the other reply.

Kuai
> 
> Regards
> Xiao
>>
>>
>> You can find each test log in the dir test/result/
>>>
>>> Best Regards
>>> Xiao
>>>
>>> On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote:
>>>>
>>>> The new heleprs will be used in dm-raid in later patches to fix
>>>> regressions and prevent calling md_reap_sync_thread() directly.
>>>>
>>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>>> ---
>>>>    drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++----
>>>>    drivers/md/md.h |  3 +++
>>>>    2 files changed, 40 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>>>> index 6c5d0a372927..90cf31b53804 100644
>>>> --- a/drivers/md/md.c
>>>> +++ b/drivers/md/md.c
>>>> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
>>>>                   mddev_lock_nointr(mddev);
>>>>    }
>>>>
>>>> -static void idle_sync_thread(struct mddev *mddev)
>>>> +void md_idle_sync_thread(struct mddev *mddev)
>>>>    {
>>>> +       lockdep_assert_held(mddev->reconfig_mutex);
>>>> +
>>>>           mutex_lock(&mddev->sync_mutex);
>>>>           clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>> +       stop_sync_thread(mddev, true, true);
>>>> +       mutex_unlock(&mddev->sync_mutex);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(md_idle_sync_thread);
>>>> +
>>>> +void md_frozen_sync_thread(struct mddev *mddev)
>>>> +{
>>>> +       lockdep_assert_held(mddev->reconfig_mutex);
>>>> +
>>>> +       mutex_lock(&mddev->sync_mutex);
>>>> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>> +       stop_sync_thread(mddev, true, false);
>>>> +       mutex_unlock(&mddev->sync_mutex);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
>>>>
>>>> +void md_unfrozen_sync_thread(struct mddev *mddev)
>>>> +{
>>>> +       lockdep_assert_held(mddev->reconfig_mutex);
>>>> +
>>>> +       mutex_lock(&mddev->sync_mutex);
>>>> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>> +       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
>>>> +       md_wakeup_thread(mddev->thread);
>>>> +       sysfs_notify_dirent_safe(mddev->sysfs_action);
>>>> +       mutex_unlock(&mddev->sync_mutex);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
>>>> +
>>>> +static void idle_sync_thread(struct mddev *mddev)
>>>> +{
>>>>           if (mddev_lock(mddev)) {
>>>>                   mutex_unlock(&mddev->sync_mutex);
>>>>                   return;
>>>>           }
>>>>
>>>> +       mutex_lock(&mddev->sync_mutex);
>>>> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>>           stop_sync_thread(mddev, false, true);
>>>>           mutex_unlock(&mddev->sync_mutex);
>>>>    }
>>>>
>>>>    static void frozen_sync_thread(struct mddev *mddev)
>>>>    {
>>>> -       mutex_lock(&mddev->sync_mutex);
>>>> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>> -
>>>>           if (mddev_lock(mddev)) {
>>>>                   mutex_unlock(&mddev->sync_mutex);
>>>>                   return;
>>>>           }
>>>>
>>>> +       mutex_lock(&mddev->sync_mutex);
>>>> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>>           stop_sync_thread(mddev, false, false);
>>>>           mutex_unlock(&mddev->sync_mutex);
>>>>    }
>>>> diff --git a/drivers/md/md.h b/drivers/md/md.h
>>>> index 8d881cc59799..437ab70ce79b 100644
>>>> --- a/drivers/md/md.h
>>>> +++ b/drivers/md/md.h
>>>> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev);
>>>>    extern void md_handle_request(struct mddev *mddev, struct bio *bio);
>>>>    extern int mddev_suspend(struct mddev *mddev, bool interruptible);
>>>>    extern void mddev_resume(struct mddev *mddev);
>>>> +extern void md_idle_sync_thread(struct mddev *mddev);
>>>> +extern void md_frozen_sync_thread(struct mddev *mddev);
>>>> +extern void md_unfrozen_sync_thread(struct mddev *mddev);
>>>>
>>>>    extern void md_reload_sb(struct mddev *mddev, int raid_disk);
>>>>    extern void md_update_sb(struct mddev *mddev, int force);
>>>> --
>>>> 2.39.2
>>>>
>>>
>>> .
>>>
>>
> 
> 
> .
>
Xiao Ni Jan. 25, 2024, 1:33 p.m. UTC | #8
Hi all

I build the kernel 6.7.0-rc8 with this patch set. The lvm2 regression
test result:
###       failed: [ndev-vanilla] shell/integrity.sh
###       failed: [ndev-vanilla] shell/lvchange-partial-raid10.sh
###       failed: [ndev-vanilla] shell/lvchange-raid-transient-failures.sh
###       failed: [ndev-vanilla] shell/lvchange-raid10.sh
###       failed: [ndev-vanilla] shell/lvchange-rebuild-raid.sh
###       failed: [ndev-vanilla] shell/lvconvert-cache-abort.sh
###       failed: [ndev-vanilla] shell/lvconvert-raid-regionsize.sh
###       failed: [ndev-vanilla]
shell/lvconvert-raid-reshape-linear_to_raid6-single-type.sh
###       failed: [ndev-vanilla] shell/lvconvert-raid-reshape.sh
###       failed: [ndev-vanilla] shell/lvconvert-raid-takeover-alloc-failure.sh
###       failed: [ndev-vanilla] shell/lvconvert-raid-takeover-thin.sh
###       failed: [ndev-vanilla] shell/lvconvert-raid-takeover.sh
###       failed: [ndev-vanilla] shell/lvconvert-raid0-striped.sh
###       failed: [ndev-vanilla] shell/lvconvert-raid0_to_raid10.sh
###       failed: [ndev-vanilla] shell/lvconvert-raid10.sh
###       failed: [ndev-vanilla] shell/lvconvert-raid5_to_raid10.sh
###       failed: [ndev-vanilla] shell/lvconvert-repair-raid.sh
###       failed: [ndev-vanilla] shell/lvconvert-striped-raid0.sh
###       failed: [ndev-vanilla] shell/lvcreate-large-raid10.sh
###       failed: [ndev-vanilla] shell/lvcreate-raid-nosync.sh
###       failed: [ndev-vanilla] shell/lvcreate-raid10.sh
###       failed: [ndev-vanilla] shell/lvdisplay-raid.sh
###       failed: [ndev-vanilla] shell/lvextend-thin-raid.sh
###       failed: [ndev-vanilla] shell/lvresize-fs-crypt.sh
###       failed: [ndev-vanilla] shell/lvresize-raid.sh
###       failed: [ndev-vanilla] shell/lvresize-raid10.sh
###       failed: [ndev-vanilla] shell/pvck-dump.sh
###       failed: [ndev-vanilla] shell/pvmove-raid-segtypes.sh
###       failed: [ndev-vanilla] shell/select-report.sh

Regards
Xiao

On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote:
>
> The new heleprs will be used in dm-raid in later patches to fix
> regressions and prevent calling md_reap_sync_thread() directly.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++----
>  drivers/md/md.h |  3 +++
>  2 files changed, 40 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 6c5d0a372927..90cf31b53804 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
>                 mddev_lock_nointr(mddev);
>  }
>
> -static void idle_sync_thread(struct mddev *mddev)
> +void md_idle_sync_thread(struct mddev *mddev)
>  {
> +       lockdep_assert_held(mddev->reconfig_mutex);
> +
>         mutex_lock(&mddev->sync_mutex);
>         clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       stop_sync_thread(mddev, true, true);
> +       mutex_unlock(&mddev->sync_mutex);
> +}
> +EXPORT_SYMBOL_GPL(md_idle_sync_thread);
> +
> +void md_frozen_sync_thread(struct mddev *mddev)
> +{
> +       lockdep_assert_held(mddev->reconfig_mutex);
> +
> +       mutex_lock(&mddev->sync_mutex);
> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       stop_sync_thread(mddev, true, false);
> +       mutex_unlock(&mddev->sync_mutex);
> +}
> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
>
> +void md_unfrozen_sync_thread(struct mddev *mddev)
> +{
> +       lockdep_assert_held(mddev->reconfig_mutex);
> +
> +       mutex_lock(&mddev->sync_mutex);
> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> +       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
> +       md_wakeup_thread(mddev->thread);
> +       sysfs_notify_dirent_safe(mddev->sysfs_action);
> +       mutex_unlock(&mddev->sync_mutex);
> +}
> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
> +
> +static void idle_sync_thread(struct mddev *mddev)
> +{
>         if (mddev_lock(mddev)) {
>                 mutex_unlock(&mddev->sync_mutex);
>                 return;
>         }
>
> +       mutex_lock(&mddev->sync_mutex);
> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>         stop_sync_thread(mddev, false, true);
>         mutex_unlock(&mddev->sync_mutex);
>  }
>
>  static void frozen_sync_thread(struct mddev *mddev)
>  {
> -       mutex_lock(&mddev->sync_mutex);
> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -
>         if (mddev_lock(mddev)) {
>                 mutex_unlock(&mddev->sync_mutex);
>                 return;
>         }
>
> +       mutex_lock(&mddev->sync_mutex);
> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>         stop_sync_thread(mddev, false, false);
>         mutex_unlock(&mddev->sync_mutex);
>  }
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 8d881cc59799..437ab70ce79b 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev);
>  extern void md_handle_request(struct mddev *mddev, struct bio *bio);
>  extern int mddev_suspend(struct mddev *mddev, bool interruptible);
>  extern void mddev_resume(struct mddev *mddev);
> +extern void md_idle_sync_thread(struct mddev *mddev);
> +extern void md_frozen_sync_thread(struct mddev *mddev);
> +extern void md_unfrozen_sync_thread(struct mddev *mddev);
>
>  extern void md_reload_sb(struct mddev *mddev, int raid_disk);
>  extern void md_update_sb(struct mddev *mddev, int force);
> --
> 2.39.2
>
Song Liu Jan. 26, 2024, 12:14 a.m. UTC | #9
Hi Xiao,

On Thu, Jan 25, 2024 at 5:33 AM Xiao Ni <xni@redhat.com> wrote:
>
> Hi all
>
> I build the kernel 6.7.0-rc8 with this patch set. The lvm2 regression
> test result:

I believe the patchset is built on top of upstream (6.8-rc1). There are
quite some md and dm changes between 6.7-rc8 and 6.8-rc1. Could
you please rerun the test on top of 6.8-rc1? Once we identify the right
set of fixes, we will see which ones to back port to older kernels.

Thanks,
Song

> ###       failed: [ndev-vanilla] shell/integrity.sh
> ###       failed: [ndev-vanilla] shell/lvchange-partial-raid10.sh
> ###       failed: [ndev-vanilla] shell/lvchange-raid-transient-failures.sh
> ###       failed: [ndev-vanilla] shell/lvchange-raid10.sh
> ###       failed: [ndev-vanilla] shell/lvchange-rebuild-raid.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-cache-abort.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-raid-regionsize.sh
> ###       failed: [ndev-vanilla]
> shell/lvconvert-raid-reshape-linear_to_raid6-single-type.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-raid-reshape.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-raid-takeover-alloc-failure.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-raid-takeover-thin.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-raid-takeover.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-raid0-striped.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-raid0_to_raid10.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-raid10.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-raid5_to_raid10.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-repair-raid.sh
> ###       failed: [ndev-vanilla] shell/lvconvert-striped-raid0.sh
> ###       failed: [ndev-vanilla] shell/lvcreate-large-raid10.sh
> ###       failed: [ndev-vanilla] shell/lvcreate-raid-nosync.sh
> ###       failed: [ndev-vanilla] shell/lvcreate-raid10.sh
> ###       failed: [ndev-vanilla] shell/lvdisplay-raid.sh
> ###       failed: [ndev-vanilla] shell/lvextend-thin-raid.sh
> ###       failed: [ndev-vanilla] shell/lvresize-fs-crypt.sh
> ###       failed: [ndev-vanilla] shell/lvresize-raid.sh
> ###       failed: [ndev-vanilla] shell/lvresize-raid10.sh
> ###       failed: [ndev-vanilla] shell/pvck-dump.sh
> ###       failed: [ndev-vanilla] shell/pvmove-raid-segtypes.sh
> ###       failed: [ndev-vanilla] shell/select-report.sh
Yu Kuai Jan. 26, 2024, 2:38 a.m. UTC | #10
Hi,

在 2024/01/25 15:57, Yu Kuai 写道:
> Hi,
> 
> 在 2024/01/25 15:51, Xiao Ni 写道:
>> On Wed, Jan 24, 2024 at 5:19 PM Yu Kuai <yukuai3@huawei.com> wrote:
>>>
>>> The new heleprs will be used in dm-raid in later patches to fix
>>> regressions and prevent calling md_reap_sync_thread() directly.
>>>
>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>> ---
>>>   drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++----
>>>   drivers/md/md.h |  3 +++
>>>   2 files changed, 40 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>>> index 6c5d0a372927..90cf31b53804 100644
>>> --- a/drivers/md/md.c
>>> +++ b/drivers/md/md.c
>>> @@ -4915,30 +4915,63 @@ static void stop_sync_thread(struct mddev 
>>> *mddev, bool locked, bool check_seq)
>>>                  mddev_lock_nointr(mddev);
>>>   }
>>>
>>> -static void idle_sync_thread(struct mddev *mddev)
>>> +void md_idle_sync_thread(struct mddev *mddev)
>>>   {
>>> +       lockdep_assert_held(mddev->reconfig_mutex);
>>> +
>>
>> Hi Kuai
>>
>> There is a building error. It should give a pointer to
>> lockdep_assert_held. And same with the other two places in this patch.
> 
> Yes, I forgot that I disabled all the debug config in order to let tests
> finish quickly.

I enabled these debuging conifg, and turns out this patch has some
problem, see below.
> 
> Thanks for the notince, will fix this in v3.
> 
> Thanks,
> Kuai
> 
>>
>> Regards
>> Xiao
>>
>>>          mutex_lock(&mddev->sync_mutex);
>>>          clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +       stop_sync_thread(mddev, true, true);
>>> +       mutex_unlock(&mddev->sync_mutex);
>>> +}
>>> +EXPORT_SYMBOL_GPL(md_idle_sync_thread);
>>> +
>>> +void md_frozen_sync_thread(struct mddev *mddev)
>>> +{
>>> +       lockdep_assert_held(mddev->reconfig_mutex);
>>> +
>>> +       mutex_lock(&mddev->sync_mutex);
>>> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +       stop_sync_thread(mddev, true, false);

stop_sync_thread() may release 'reconfig_mutex' and grab it again, hence
it's not right to grab 'sync_mutex' before 'reconfig_mutex'.

Since 'sync_mutex' is introduced to serialize sysfs api 'sync_action'
writers, while is not involved for dm-raid. I'll remove 'sync_mutex' for
the new helper in v3.

Thanks,
Kuai

>>> +       mutex_unlock(&mddev->sync_mutex);
>>> +}
>>> +EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
>>>
>>> +void md_unfrozen_sync_thread(struct mddev *mddev)
>>> +{
>>> +       lockdep_assert_held(mddev->reconfig_mutex);
>>> +
>>> +       mutex_lock(&mddev->sync_mutex);
>>> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> +       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
>>> +       md_wakeup_thread(mddev->thread);
>>> +       sysfs_notify_dirent_safe(mddev->sysfs_action);
>>> +       mutex_unlock(&mddev->sync_mutex);
>>> +}
>>> +EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
>>> +
>>> +static void idle_sync_thread(struct mddev *mddev)
>>> +{
>>>          if (mddev_lock(mddev)) {
>>>                  mutex_unlock(&mddev->sync_mutex);
>>>                  return;
>>>          }
>>>
>>> +       mutex_lock(&mddev->sync_mutex);
>>> +       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>          stop_sync_thread(mddev, false, true);
>>>          mutex_unlock(&mddev->sync_mutex);
>>>   }
>>>
>>>   static void frozen_sync_thread(struct mddev *mddev)
>>>   {
>>> -       mutex_lock(&mddev->sync_mutex);
>>> -       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>> -
>>>          if (mddev_lock(mddev)) {
>>>                  mutex_unlock(&mddev->sync_mutex);
>>>                  return;
>>>          }
>>>
>>> +       mutex_lock(&mddev->sync_mutex);
>>> +       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
>>>          stop_sync_thread(mddev, false, false);
>>>          mutex_unlock(&mddev->sync_mutex);
>>>   }
>>> diff --git a/drivers/md/md.h b/drivers/md/md.h
>>> index 8d881cc59799..437ab70ce79b 100644
>>> --- a/drivers/md/md.h
>>> +++ b/drivers/md/md.h
>>> @@ -781,6 +781,9 @@ extern void md_rdev_clear(struct md_rdev *rdev);
>>>   extern void md_handle_request(struct mddev *mddev, struct bio *bio);
>>>   extern int mddev_suspend(struct mddev *mddev, bool interruptible);
>>>   extern void mddev_resume(struct mddev *mddev);
>>> +extern void md_idle_sync_thread(struct mddev *mddev);
>>> +extern void md_frozen_sync_thread(struct mddev *mddev);
>>> +extern void md_unfrozen_sync_thread(struct mddev *mddev);
>>>
>>>   extern void md_reload_sb(struct mddev *mddev, int raid_disk);
>>>   extern void md_update_sb(struct mddev *mddev, int force);
>>> -- 
>>> 2.39.2
>>>
>>
>> .
>>
> 
> .
>
Xiao Ni Jan. 26, 2024, 6:54 a.m. UTC | #11
On Fri, Jan 26, 2024 at 8:14 AM Song Liu <song@kernel.org> wrote:
>
> Hi Xiao,
>
> On Thu, Jan 25, 2024 at 5:33 AM Xiao Ni <xni@redhat.com> wrote:
> >
> > Hi all
> >
> > I build the kernel 6.7.0-rc8 with this patch set. The lvm2 regression
> > test result:
>
> I believe the patchset is built on top of upstream (6.8-rc1). There are
> quite some md and dm changes between 6.7-rc8 and 6.8-rc1. Could
> you please rerun the test on top of 6.8-rc1? Once we identify the right
> set of fixes, we will see which ones to back port to older kernels.
>
> Thanks,
> Song

Hi all

I tried to run tests on 6.8-rc1, the failure number increases to 72.

And I ran the tests on 6.6, there are only 4 failed cases
###       failed: [ndev-vanilla] shell/lvconvert-cache-abort.sh
###       failed: [ndev-vanilla] shell/lvresize-fs-crypt.sh
###       failed: [ndev-vanilla] shell/pvck-dump.sh
###       failed: [ndev-vanilla] shell/select-report.sh

Best Regards
Xiao
diff mbox series

Patch

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6c5d0a372927..90cf31b53804 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4915,30 +4915,63 @@  static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
 		mddev_lock_nointr(mddev);
 }
 
-static void idle_sync_thread(struct mddev *mddev)
+void md_idle_sync_thread(struct mddev *mddev)
 {
+	lockdep_assert_held(mddev->reconfig_mutex);
+
 	mutex_lock(&mddev->sync_mutex);
 	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+	stop_sync_thread(mddev, true, true);
+	mutex_unlock(&mddev->sync_mutex);
+}
+EXPORT_SYMBOL_GPL(md_idle_sync_thread);
+
+void md_frozen_sync_thread(struct mddev *mddev)
+{
+	lockdep_assert_held(mddev->reconfig_mutex);
+
+	mutex_lock(&mddev->sync_mutex);
+	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+	stop_sync_thread(mddev, true, false);
+	mutex_unlock(&mddev->sync_mutex);
+}
+EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
 
+void md_unfrozen_sync_thread(struct mddev *mddev)
+{
+	lockdep_assert_held(mddev->reconfig_mutex);
+
+	mutex_lock(&mddev->sync_mutex);
+	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+	md_wakeup_thread(mddev->thread);
+	sysfs_notify_dirent_safe(mddev->sysfs_action);
+	mutex_unlock(&mddev->sync_mutex);
+}
+EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
+
+static void idle_sync_thread(struct mddev *mddev)
+{
 	if (mddev_lock(mddev)) {
 		mutex_unlock(&mddev->sync_mutex);
 		return;
 	}
 
+	mutex_lock(&mddev->sync_mutex);
+	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 	stop_sync_thread(mddev, false, true);
 	mutex_unlock(&mddev->sync_mutex);
 }
 
 static void frozen_sync_thread(struct mddev *mddev)
 {
-	mutex_lock(&mddev->sync_mutex);
-	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
 	if (mddev_lock(mddev)) {
 		mutex_unlock(&mddev->sync_mutex);
 		return;
 	}
 
+	mutex_lock(&mddev->sync_mutex);
+	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 	stop_sync_thread(mddev, false, false);
 	mutex_unlock(&mddev->sync_mutex);
 }
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 8d881cc59799..437ab70ce79b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -781,6 +781,9 @@  extern void md_rdev_clear(struct md_rdev *rdev);
 extern void md_handle_request(struct mddev *mddev, struct bio *bio);
 extern int mddev_suspend(struct mddev *mddev, bool interruptible);
 extern void mddev_resume(struct mddev *mddev);
+extern void md_idle_sync_thread(struct mddev *mddev);
+extern void md_frozen_sync_thread(struct mddev *mddev);
+extern void md_unfrozen_sync_thread(struct mddev *mddev);
 
 extern void md_reload_sb(struct mddev *mddev, int raid_disk);
 extern void md_update_sb(struct mddev *mddev, int force);