@@ -9376,7 +9376,6 @@ static void md_start_sync(struct work_struct *ws)
struct mddev *mddev = container_of(ws, struct mddev, sync_work);
int spares = 0;
bool suspend = false;
- char *name;
/*
* If reshape is still in progress, spares won't be added or removed
@@ -9414,10 +9413,8 @@ static void md_start_sync(struct work_struct *ws)
if (spares)
md_bitmap_write_all(mddev->bitmap);
- name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ?
- "reshape" : "resync";
rcu_assign_pointer(mddev->sync_thread,
- md_register_thread(md_do_sync, mddev, name));
+ md_register_thread(md_do_sync, mddev, "resync"));
if (!mddev->sync_thread) {
pr_warn("%s: could not start resync thread...\n",
mdname(mddev));
@@ -4175,7 +4175,11 @@ static int raid10_run(struct mddev *mddev)
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+ rcu_assign_pointer(mddev->sync_thread,
+ md_register_thread(md_do_sync, mddev, "reshape"));
+ if (!mddev->sync_thread)
+ goto out_free_conf;
}
return 0;
@@ -4569,8 +4573,16 @@ static int raid10_start_reshape(struct mddev *mddev)
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+
+ rcu_assign_pointer(mddev->sync_thread,
+ md_register_thread(md_do_sync, mddev, "reshape"));
+ if (!mddev->sync_thread) {
+ ret = -EAGAIN;
+ goto abort;
+ }
conf->reshape_checkpoint = jiffies;
+ md_wakeup_thread(mddev->sync_thread);
md_new_event();
return 0;
@@ -7936,7 +7936,11 @@ static int raid5_run(struct mddev *mddev)
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+ rcu_assign_pointer(mddev->sync_thread,
+ md_register_thread(md_do_sync, mddev, "reshape"));
+ if (!mddev->sync_thread)
+ goto abort;
}
/* Ok, everything is just fine now */
@@ -8502,8 +8506,29 @@ static int raid5_start_reshape(struct mddev *mddev)
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+ rcu_assign_pointer(mddev->sync_thread,
+ md_register_thread(md_do_sync, mddev, "reshape"));
+ if (!mddev->sync_thread) {
+ mddev->recovery = 0;
+ spin_lock_irq(&conf->device_lock);
+ write_seqcount_begin(&conf->gen_lock);
+ mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
+ mddev->new_chunk_sectors =
+ conf->chunk_sectors = conf->prev_chunk_sectors;
+ mddev->new_layout = conf->algorithm = conf->prev_algo;
+ rdev_for_each(rdev, mddev)
+ rdev->new_data_offset = rdev->data_offset;
+ smp_wmb();
+ conf->generation--;
+ conf->reshape_progress = MaxSector;
+ mddev->reshape_position = MaxSector;
+ write_seqcount_end(&conf->gen_lock);
+ spin_unlock_irq(&conf->device_lock);
+ return -EAGAIN;
+ }
conf->reshape_checkpoint = jiffies;
+ md_wakeup_thread(mddev->sync_thread);
md_new_event();
return 0;
}
This reverts commit ad39c08186f8a0f221337985036ba86731d6aafe. The reverted patch says there is no way to guarantee that md_do_sync will be executed. Users should choose a sutiable chance to wake up sync thread after registering sync thread. And this patch set tries to use a minimal change to fix dmraid regressions. With patch03 and patch04 and commit 82ec0ae59d02 ("md: Make sure md_do_sync() will set MD_RECOVERY_DONE"), all deadlock problems can be fixed. So revert this one and we can rethink about this in future. Signed-off-by: Xiao Ni <xni@redhat.com> --- drivers/md/md.c | 5 +---- drivers/md/raid10.c | 16 ++++++++++++++-- drivers/md/raid5.c | 29 +++++++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 8 deletions(-)