Message ID | 20240229154941.99557-2-xni@redhat.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Mike Snitzer |
Headers | show |
Series | Fix dmraid regression bugs | expand |
Hi, 在 2024/02/29 23:49, Xiao Ni 写道: > This reverts commit ad39c08186f8a0f221337985036ba86731d6aafe. > > Function stop_sync_thread only wakes up sync task. It also needs to > wake up sync thread. This problem will be fixed in the following > patch. I don't think so, unlike mddev->thread, sync_thread will only be executed once and must be executed each time it's registered, and caller must make sure to wake up registered sync_thread. Thanks, Kuai > > Signed-off-by: Xiao Ni <xni@redhat.com> > --- > drivers/md/md.c | 5 +---- > drivers/md/raid10.c | 16 ++++++++++++++-- > drivers/md/raid5.c | 29 +++++++++++++++++++++++++++-- > 3 files changed, 42 insertions(+), 8 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 9e41a9aaba8b..db4743ba7f6c 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -9376,7 +9376,6 @@ static void md_start_sync(struct work_struct *ws) > struct mddev *mddev = container_of(ws, struct mddev, sync_work); > int spares = 0; > bool suspend = false; > - char *name; > > /* > * If reshape is still in progress, spares won't be added or removed > @@ -9414,10 +9413,8 @@ static void md_start_sync(struct work_struct *ws) > if (spares) > md_bitmap_write_all(mddev->bitmap); > > - name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ? > - "reshape" : "resync"; > rcu_assign_pointer(mddev->sync_thread, > - md_register_thread(md_do_sync, mddev, name)); > + md_register_thread(md_do_sync, mddev, "resync")); > if (!mddev->sync_thread) { > pr_warn("%s: could not start resync thread...\n", > mdname(mddev)); > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > index a5f8419e2df1..7412066ea22c 100644 > --- a/drivers/md/raid10.c > +++ b/drivers/md/raid10.c > @@ -4175,7 +4175,11 @@ static int raid10_run(struct mddev *mddev) > clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > + rcu_assign_pointer(mddev->sync_thread, > + md_register_thread(md_do_sync, mddev, "reshape")); > + if (!mddev->sync_thread) > + goto out_free_conf; > } > > return 0; > @@ -4569,8 +4573,16 @@ static int raid10_start_reshape(struct mddev *mddev) > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > clear_bit(MD_RECOVERY_DONE, &mddev->recovery); > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > + > + rcu_assign_pointer(mddev->sync_thread, > + md_register_thread(md_do_sync, mddev, "reshape")); > + if (!mddev->sync_thread) { > + ret = -EAGAIN; > + goto abort; > + } > conf->reshape_checkpoint = jiffies; > + md_wakeup_thread(mddev->sync_thread); > md_new_event(); > return 0; > > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > index 6a7a32f7fb91..8497880135ee 100644 > --- a/drivers/md/raid5.c > +++ b/drivers/md/raid5.c > @@ -7936,7 +7936,11 @@ static int raid5_run(struct mddev *mddev) > clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > + rcu_assign_pointer(mddev->sync_thread, > + md_register_thread(md_do_sync, mddev, "reshape")); > + if (!mddev->sync_thread) > + goto abort; > } > > /* Ok, everything is just fine now */ > @@ -8502,8 +8506,29 @@ static int raid5_start_reshape(struct mddev *mddev) > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > clear_bit(MD_RECOVERY_DONE, &mddev->recovery); > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > + rcu_assign_pointer(mddev->sync_thread, > + md_register_thread(md_do_sync, mddev, "reshape")); > + if (!mddev->sync_thread) { > + mddev->recovery = 0; > + spin_lock_irq(&conf->device_lock); > + write_seqcount_begin(&conf->gen_lock); > + mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; > + mddev->new_chunk_sectors = > + conf->chunk_sectors = conf->prev_chunk_sectors; > + mddev->new_layout = conf->algorithm = conf->prev_algo; > + rdev_for_each(rdev, mddev) > + rdev->new_data_offset = rdev->data_offset; > + smp_wmb(); > + conf->generation--; > + conf->reshape_progress = MaxSector; > + mddev->reshape_position = MaxSector; > + write_seqcount_end(&conf->gen_lock); > + spin_unlock_irq(&conf->device_lock); > + return -EAGAIN; > + } > conf->reshape_checkpoint = jiffies; > + md_wakeup_thread(mddev->sync_thread); > md_new_event(); > return 0; > } >
On Fri, Mar 1, 2024 at 10:38 AM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > Hi, > > 在 2024/02/29 23:49, Xiao Ni 写道: > > This reverts commit ad39c08186f8a0f221337985036ba86731d6aafe. > > > > Function stop_sync_thread only wakes up sync task. It also needs to > > wake up sync thread. This problem will be fixed in the following > > patch. > > I don't think so, unlike mddev->thread, sync_thread will only be > executed once and must be executed each time it's registered, and caller > must make sure to wake up registered sync_thread. Hi Kuai I'll modify the comments. But it should be right to wake_up(mddev->sync_thread) in function stop_sync_thread too? You gave the same patch yesterday too. I know the caller should wake up sync thread too. "However, I think the one to register sync_thread is responsible to wake it up." I put your comments here. If I understand correctly, we can do something like this? --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7937,6 +7937,7 @@ static int raid5_run(struct mddev *mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); rcu_assign_pointer(mddev->sync_thread, md_register_thread(md_do_sync, mddev, "reshape")); + md_wakeup_thread(mddev->sync_thread); if (!mddev->sync_thread) goto abort; } And at first, I didn't revert ad39c08186f8a0f221337985036ba86731d6aafe. But with my patch set, it can cause failure in lvm2 test suit. And the patch you gave yesterday is part of my patch01, so I revert it. Are you good if I change the comments and with the modification (wake up sync thread after registering reshape)? Best Regards Xiao > > Thanks, > Kuai > > > > Signed-off-by: Xiao Ni <xni@redhat.com> > > --- > > drivers/md/md.c | 5 +---- > > drivers/md/raid10.c | 16 ++++++++++++++-- > > drivers/md/raid5.c | 29 +++++++++++++++++++++++++++-- > > 3 files changed, 42 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/md/md.c b/drivers/md/md.c > > index 9e41a9aaba8b..db4743ba7f6c 100644 > > --- a/drivers/md/md.c > > +++ b/drivers/md/md.c > > @@ -9376,7 +9376,6 @@ static void md_start_sync(struct work_struct *ws) > > struct mddev *mddev = container_of(ws, struct mddev, sync_work); > > int spares = 0; > > bool suspend = false; > > - char *name; > > > > /* > > * If reshape is still in progress, spares won't be added or removed > > @@ -9414,10 +9413,8 @@ static void md_start_sync(struct work_struct *ws) > > if (spares) > > md_bitmap_write_all(mddev->bitmap); > > > > - name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ? > > - "reshape" : "resync"; > > rcu_assign_pointer(mddev->sync_thread, > > - md_register_thread(md_do_sync, mddev, name)); > > + md_register_thread(md_do_sync, mddev, "resync")); > > if (!mddev->sync_thread) { > > pr_warn("%s: could not start resync thread...\n", > > mdname(mddev)); > > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > > index a5f8419e2df1..7412066ea22c 100644 > > --- a/drivers/md/raid10.c > > +++ b/drivers/md/raid10.c > > @@ -4175,7 +4175,11 @@ static int raid10_run(struct mddev *mddev) > > clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); > > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > > + rcu_assign_pointer(mddev->sync_thread, > > + md_register_thread(md_do_sync, mddev, "reshape")); > > + if (!mddev->sync_thread) > > + goto out_free_conf; > > } > > > > return 0; > > @@ -4569,8 +4573,16 @@ static int raid10_start_reshape(struct mddev *mddev) > > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > > clear_bit(MD_RECOVERY_DONE, &mddev->recovery); > > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > > + > > + rcu_assign_pointer(mddev->sync_thread, > > + md_register_thread(md_do_sync, mddev, "reshape")); > > + if (!mddev->sync_thread) { > > + ret = -EAGAIN; > > + goto abort; > > + } > > conf->reshape_checkpoint = jiffies; > > + md_wakeup_thread(mddev->sync_thread); > > md_new_event(); > > return 0; > > > > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > > index 6a7a32f7fb91..8497880135ee 100644 > > --- a/drivers/md/raid5.c > > +++ b/drivers/md/raid5.c > > @@ -7936,7 +7936,11 @@ static int raid5_run(struct mddev *mddev) > > clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); > > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > > + rcu_assign_pointer(mddev->sync_thread, > > + md_register_thread(md_do_sync, mddev, "reshape")); > > + if (!mddev->sync_thread) > > + goto abort; > > } > > > > /* Ok, everything is just fine now */ > > @@ -8502,8 +8506,29 @@ static int raid5_start_reshape(struct mddev *mddev) > > clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); > > clear_bit(MD_RECOVERY_DONE, &mddev->recovery); > > set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); > > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > > + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); > > + rcu_assign_pointer(mddev->sync_thread, > > + md_register_thread(md_do_sync, mddev, "reshape")); > > + if (!mddev->sync_thread) { > > + mddev->recovery = 0; > > + spin_lock_irq(&conf->device_lock); > > + write_seqcount_begin(&conf->gen_lock); > > + mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; > > + mddev->new_chunk_sectors = > > + conf->chunk_sectors = conf->prev_chunk_sectors; > > + mddev->new_layout = conf->algorithm = conf->prev_algo; > > + rdev_for_each(rdev, mddev) > > + rdev->new_data_offset = rdev->data_offset; > > + smp_wmb(); > > + conf->generation--; > > + conf->reshape_progress = MaxSector; > > + mddev->reshape_position = MaxSector; > > + write_seqcount_end(&conf->gen_lock); > > + spin_unlock_irq(&conf->device_lock); > > + return -EAGAIN; > > + } > > conf->reshape_checkpoint = jiffies; > > + md_wakeup_thread(mddev->sync_thread); > > md_new_event(); > > return 0; > > } > > >
diff --git a/drivers/md/md.c b/drivers/md/md.c index 9e41a9aaba8b..db4743ba7f6c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9376,7 +9376,6 @@ static void md_start_sync(struct work_struct *ws) struct mddev *mddev = container_of(ws, struct mddev, sync_work); int spares = 0; bool suspend = false; - char *name; /* * If reshape is still in progress, spares won't be added or removed @@ -9414,10 +9413,8 @@ static void md_start_sync(struct work_struct *ws) if (spares) md_bitmap_write_all(mddev->bitmap); - name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ? - "reshape" : "resync"; rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, name)); + md_register_thread(md_do_sync, mddev, "resync")); if (!mddev->sync_thread) { pr_warn("%s: could not start resync thread...\n", mdname(mddev)); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a5f8419e2df1..7412066ea22c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4175,7 +4175,11 @@ static int raid10_run(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + rcu_assign_pointer(mddev->sync_thread, + md_register_thread(md_do_sync, mddev, "reshape")); + if (!mddev->sync_thread) + goto out_free_conf; } return 0; @@ -4569,8 +4573,16 @@ static int raid10_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + + rcu_assign_pointer(mddev->sync_thread, + md_register_thread(md_do_sync, mddev, "reshape")); + if (!mddev->sync_thread) { + ret = -EAGAIN; + goto abort; + } conf->reshape_checkpoint = jiffies; + md_wakeup_thread(mddev->sync_thread); md_new_event(); return 0; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6a7a32f7fb91..8497880135ee 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7936,7 +7936,11 @@ static int raid5_run(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + rcu_assign_pointer(mddev->sync_thread, + md_register_thread(md_do_sync, mddev, "reshape")); + if (!mddev->sync_thread) + goto abort; } /* Ok, everything is just fine now */ @@ -8502,8 +8506,29 @@ static int raid5_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + rcu_assign_pointer(mddev->sync_thread, + md_register_thread(md_do_sync, mddev, "reshape")); + if (!mddev->sync_thread) { + mddev->recovery = 0; + spin_lock_irq(&conf->device_lock); + write_seqcount_begin(&conf->gen_lock); + mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; + mddev->new_chunk_sectors = + conf->chunk_sectors = conf->prev_chunk_sectors; + mddev->new_layout = conf->algorithm = conf->prev_algo; + rdev_for_each(rdev, mddev) + rdev->new_data_offset = rdev->data_offset; + smp_wmb(); + conf->generation--; + conf->reshape_progress = MaxSector; + mddev->reshape_position = MaxSector; + write_seqcount_end(&conf->gen_lock); + spin_unlock_irq(&conf->device_lock); + return -EAGAIN; + } conf->reshape_checkpoint = jiffies; + md_wakeup_thread(mddev->sync_thread); md_new_event(); return 0; }
This reverts commit ad39c08186f8a0f221337985036ba86731d6aafe. Function stop_sync_thread only wakes up sync task. It also needs to wake up sync thread. This problem will be fixed in the following patch. Signed-off-by: Xiao Ni <xni@redhat.com> --- drivers/md/md.c | 5 +---- drivers/md/raid10.c | 16 ++++++++++++++-- drivers/md/raid5.c | 29 +++++++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 8 deletions(-)