Message ID | 20231125065419.3518254-1-yukuai1@huaweicloud.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [-next,v2] md: synchronize flush io with array reconfiguration | expand |
On Fri, Nov 24, 2023 at 10:54 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > From: Yu Kuai <yukuai3@huawei.com> > > Currently rcu is used to protect iterating rdev from submit_flushes(): > > submit_flushes remove_and_add_spares > synchronize_rcu > pers->hot_remove_disk() > rcu_read_lock() > rdev_for_each_rcu > if (rdev->raid_disk >= 0) > rdev->radi_disk = -1; > atomic_inc(&rdev->nr_pending) > rcu_read_unlock() > bi = bio_alloc_bioset() > bi->bi_end_io = md_end_flush > bi->private = rdev > submit_bio > // issue io for removed rdev > > Fix this problem by grabbing 'acive_io' before iterating rdev, make sure > that remove_and_add_spares() won't concurrent with submit_flushes(). > > Fixes: a2826aa92e2e ("md: support barrier requests on all personalities.") > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > Changes v2: > - Add WARN_ON in case md_flush_request() is not called from > md_handle_request() in future. > > drivers/md/md.c | 22 ++++++++++++++++------ > 1 file changed, 16 insertions(+), 6 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 86efc9c2ae56..2ffedc39edd6 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -538,6 +538,9 @@ static void md_end_flush(struct bio *bio) > rdev_dec_pending(rdev, mddev); > > if (atomic_dec_and_test(&mddev->flush_pending)) { > + /* The pair is percpu_ref_tryget() from md_flush_request() */ > + percpu_ref_put(&mddev->active_io); > + > /* The pre-request flush has finished */ > queue_work(md_wq, &mddev->flush_work); > } > @@ -557,12 +560,8 @@ static void submit_flushes(struct work_struct *ws) > rdev_for_each_rcu(rdev, mddev) > if (rdev->raid_disk >= 0 && > !test_bit(Faulty, &rdev->flags)) { > - /* Take two references, one is dropped > - * when request finishes, one after > - * we reclaim rcu_read_lock > - */ > struct bio *bi; > - atomic_inc(&rdev->nr_pending); > + > atomic_inc(&rdev->nr_pending); > rcu_read_unlock(); > bi = bio_alloc_bioset(rdev->bdev, 0, > @@ -573,7 +572,6 @@ static void submit_flushes(struct work_struct *ws) > atomic_inc(&mddev->flush_pending); > submit_bio(bi); > rcu_read_lock(); > - rdev_dec_pending(rdev, mddev); > } > rcu_read_unlock(); > if (atomic_dec_and_test(&mddev->flush_pending)) > @@ -626,6 +624,18 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio) > /* new request after previous flush is completed */ > if (ktime_after(req_start, mddev->prev_flush_start)) { > WARN_ON(mddev->flush_bio); > + /* > + * Grab a reference to make sure mddev_suspend() will wait for > + * this flush to be done. > + * > + * md_flush_reqeust() is called under md_handle_request() and > + * 'active_io' is already grabbed, hence percpu_ref_tryget() > + * won't fail, percpu_ref_tryget_live() can't be used because > + * percpu_ref_kill() can be called by mddev_suspend() > + * concurrently. > + */ > + if (WARN_ON(percpu_ref_tryget(&mddev->active_io))) This should be "if (!WARN_ON(..))", right? Song > + percpu_ref_get(&mddev->active_io); > mddev->flush_bio = bio; > bio = NULL; > } > -- > 2.39.2 > >
On Mon, Nov 27, 2023 at 2:16 PM Song Liu <song@kernel.org> wrote: > > On Fri, Nov 24, 2023 at 10:54 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > > > From: Yu Kuai <yukuai3@huawei.com> > > > > Currently rcu is used to protect iterating rdev from submit_flushes(): > > > > submit_flushes remove_and_add_spares > > synchronize_rcu > > pers->hot_remove_disk() > > rcu_read_lock() > > rdev_for_each_rcu > > if (rdev->raid_disk >= 0) > > rdev->radi_disk = -1; > > atomic_inc(&rdev->nr_pending) > > rcu_read_unlock() > > bi = bio_alloc_bioset() > > bi->bi_end_io = md_end_flush > > bi->private = rdev > > submit_bio > > // issue io for removed rdev > > > > Fix this problem by grabbing 'acive_io' before iterating rdev, make sure > > that remove_and_add_spares() won't concurrent with submit_flushes(). > > > > Fixes: a2826aa92e2e ("md: support barrier requests on all personalities.") > > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > > --- > > Changes v2: > > - Add WARN_ON in case md_flush_request() is not called from > > md_handle_request() in future. > > > > drivers/md/md.c | 22 ++++++++++++++++------ > > 1 file changed, 16 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/md/md.c b/drivers/md/md.c > > index 86efc9c2ae56..2ffedc39edd6 100644 > > --- a/drivers/md/md.c > > +++ b/drivers/md/md.c > > @@ -538,6 +538,9 @@ static void md_end_flush(struct bio *bio) > > rdev_dec_pending(rdev, mddev); > > > > if (atomic_dec_and_test(&mddev->flush_pending)) { > > + /* The pair is percpu_ref_tryget() from md_flush_request() */ > > + percpu_ref_put(&mddev->active_io); > > + > > /* The pre-request flush has finished */ > > queue_work(md_wq, &mddev->flush_work); > > } > > @@ -557,12 +560,8 @@ static void submit_flushes(struct work_struct *ws) > > rdev_for_each_rcu(rdev, mddev) > > if (rdev->raid_disk >= 0 && > > !test_bit(Faulty, &rdev->flags)) { > > - /* Take two references, one is dropped > > - * when request finishes, one after > > - * we reclaim rcu_read_lock > > - */ > > struct bio *bi; > > - atomic_inc(&rdev->nr_pending); > > + > > atomic_inc(&rdev->nr_pending); > > rcu_read_unlock(); > > bi = bio_alloc_bioset(rdev->bdev, 0, > > @@ -573,7 +572,6 @@ static void submit_flushes(struct work_struct *ws) > > atomic_inc(&mddev->flush_pending); > > submit_bio(bi); > > rcu_read_lock(); > > - rdev_dec_pending(rdev, mddev); > > } > > rcu_read_unlock(); > > if (atomic_dec_and_test(&mddev->flush_pending)) > > @@ -626,6 +624,18 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio) > > /* new request after previous flush is completed */ > > if (ktime_after(req_start, mddev->prev_flush_start)) { > > WARN_ON(mddev->flush_bio); > > + /* > > + * Grab a reference to make sure mddev_suspend() will wait for > > + * this flush to be done. > > + * > > + * md_flush_reqeust() is called under md_handle_request() and > > + * 'active_io' is already grabbed, hence percpu_ref_tryget() > > + * won't fail, percpu_ref_tryget_live() can't be used because > > + * percpu_ref_kill() can be called by mddev_suspend() > > + * concurrently. > > + */ > > + if (WARN_ON(percpu_ref_tryget(&mddev->active_io))) > > This should be "if (!WARN_ON(..))", right? > > Song > > > + percpu_ref_get(&mddev->active_io); Actually, we can just use percpu_ref_get(), no? Thanks, Song
Hi, 在 2023/11/28 7:32, Song Liu 写道: > On Mon, Nov 27, 2023 at 2:16 PM Song Liu <song@kernel.org> wrote: >> >> On Fri, Nov 24, 2023 at 10:54 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: >>> >>> From: Yu Kuai <yukuai3@huawei.com> >>> >>> Currently rcu is used to protect iterating rdev from submit_flushes(): >>> >>> submit_flushes remove_and_add_spares >>> synchronize_rcu >>> pers->hot_remove_disk() >>> rcu_read_lock() >>> rdev_for_each_rcu >>> if (rdev->raid_disk >= 0) >>> rdev->radi_disk = -1; >>> atomic_inc(&rdev->nr_pending) >>> rcu_read_unlock() >>> bi = bio_alloc_bioset() >>> bi->bi_end_io = md_end_flush >>> bi->private = rdev >>> submit_bio >>> // issue io for removed rdev >>> >>> Fix this problem by grabbing 'acive_io' before iterating rdev, make sure >>> that remove_and_add_spares() won't concurrent with submit_flushes(). >>> >>> Fixes: a2826aa92e2e ("md: support barrier requests on all personalities.") >>> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >>> --- >>> Changes v2: >>> - Add WARN_ON in case md_flush_request() is not called from >>> md_handle_request() in future. >>> >>> drivers/md/md.c | 22 ++++++++++++++++------ >>> 1 file changed, 16 insertions(+), 6 deletions(-) >>> >>> diff --git a/drivers/md/md.c b/drivers/md/md.c >>> index 86efc9c2ae56..2ffedc39edd6 100644 >>> --- a/drivers/md/md.c >>> +++ b/drivers/md/md.c >>> @@ -538,6 +538,9 @@ static void md_end_flush(struct bio *bio) >>> rdev_dec_pending(rdev, mddev); >>> >>> if (atomic_dec_and_test(&mddev->flush_pending)) { >>> + /* The pair is percpu_ref_tryget() from md_flush_request() */ >>> + percpu_ref_put(&mddev->active_io); >>> + >>> /* The pre-request flush has finished */ >>> queue_work(md_wq, &mddev->flush_work); >>> } >>> @@ -557,12 +560,8 @@ static void submit_flushes(struct work_struct *ws) >>> rdev_for_each_rcu(rdev, mddev) >>> if (rdev->raid_disk >= 0 && >>> !test_bit(Faulty, &rdev->flags)) { >>> - /* Take two references, one is dropped >>> - * when request finishes, one after >>> - * we reclaim rcu_read_lock >>> - */ >>> struct bio *bi; >>> - atomic_inc(&rdev->nr_pending); >>> + >>> atomic_inc(&rdev->nr_pending); >>> rcu_read_unlock(); >>> bi = bio_alloc_bioset(rdev->bdev, 0, >>> @@ -573,7 +572,6 @@ static void submit_flushes(struct work_struct *ws) >>> atomic_inc(&mddev->flush_pending); >>> submit_bio(bi); >>> rcu_read_lock(); >>> - rdev_dec_pending(rdev, mddev); >>> } >>> rcu_read_unlock(); >>> if (atomic_dec_and_test(&mddev->flush_pending)) >>> @@ -626,6 +624,18 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio) >>> /* new request after previous flush is completed */ >>> if (ktime_after(req_start, mddev->prev_flush_start)) { >>> WARN_ON(mddev->flush_bio); >>> + /* >>> + * Grab a reference to make sure mddev_suspend() will wait for >>> + * this flush to be done. >>> + * >>> + * md_flush_reqeust() is called under md_handle_request() and >>> + * 'active_io' is already grabbed, hence percpu_ref_tryget() >>> + * won't fail, percpu_ref_tryget_live() can't be used because >>> + * percpu_ref_kill() can be called by mddev_suspend() >>> + * concurrently. >>> + */ >>> + if (WARN_ON(percpu_ref_tryget(&mddev->active_io))) >> >> This should be "if (!WARN_ON(..))", right? Sorry for the mistake, this actually should be: if (WARN_ON(!percpu_ref_tryget(...)) >> >> Song >> >>> + percpu_ref_get(&mddev->active_io); > > Actually, we can just use percpu_ref_get(), no? Yes, we can, but if someone else doesn't call md_flush_request() under md_handle_request() in the fulture, there will be problem and percpu_ref_get() can't catch this, do you think it'll make sense to prevent such case? Thanks, Kuai > > Thanks, > Song > . >
On Mon, Nov 27, 2023 at 6:12 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > Hi, > > 在 2023/11/28 7:32, Song Liu 写道: > > On Mon, Nov 27, 2023 at 2:16 PM Song Liu <song@kernel.org> wrote: > >> > >> On Fri, Nov 24, 2023 at 10:54 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: > >>> > >>> From: Yu Kuai <yukuai3@huawei.com> > >>> > >>> Currently rcu is used to protect iterating rdev from submit_flushes(): > >>> > >>> submit_flushes remove_and_add_spares > >>> synchronize_rcu > >>> pers->hot_remove_disk() > >>> rcu_read_lock() > >>> rdev_for_each_rcu > >>> if (rdev->raid_disk >= 0) > >>> rdev->radi_disk = -1; > >>> atomic_inc(&rdev->nr_pending) > >>> rcu_read_unlock() > >>> bi = bio_alloc_bioset() > >>> bi->bi_end_io = md_end_flush > >>> bi->private = rdev > >>> submit_bio > >>> // issue io for removed rdev > >>> > >>> Fix this problem by grabbing 'acive_io' before iterating rdev, make sure > >>> that remove_and_add_spares() won't concurrent with submit_flushes(). > >>> > >>> Fixes: a2826aa92e2e ("md: support barrier requests on all personalities.") > >>> Signed-off-by: Yu Kuai <yukuai3@huawei.com> > >>> --- > >>> Changes v2: > >>> - Add WARN_ON in case md_flush_request() is not called from > >>> md_handle_request() in future. > >>> > >>> drivers/md/md.c | 22 ++++++++++++++++------ > >>> 1 file changed, 16 insertions(+), 6 deletions(-) > >>> > >>> diff --git a/drivers/md/md.c b/drivers/md/md.c > >>> index 86efc9c2ae56..2ffedc39edd6 100644 > >>> --- a/drivers/md/md.c > >>> +++ b/drivers/md/md.c > >>> @@ -538,6 +538,9 @@ static void md_end_flush(struct bio *bio) > >>> rdev_dec_pending(rdev, mddev); > >>> > >>> if (atomic_dec_and_test(&mddev->flush_pending)) { > >>> + /* The pair is percpu_ref_tryget() from md_flush_request() */ > >>> + percpu_ref_put(&mddev->active_io); > >>> + > >>> /* The pre-request flush has finished */ > >>> queue_work(md_wq, &mddev->flush_work); > >>> } > >>> @@ -557,12 +560,8 @@ static void submit_flushes(struct work_struct *ws) > >>> rdev_for_each_rcu(rdev, mddev) > >>> if (rdev->raid_disk >= 0 && > >>> !test_bit(Faulty, &rdev->flags)) { > >>> - /* Take two references, one is dropped > >>> - * when request finishes, one after > >>> - * we reclaim rcu_read_lock > >>> - */ > >>> struct bio *bi; > >>> - atomic_inc(&rdev->nr_pending); > >>> + > >>> atomic_inc(&rdev->nr_pending); > >>> rcu_read_unlock(); > >>> bi = bio_alloc_bioset(rdev->bdev, 0, > >>> @@ -573,7 +572,6 @@ static void submit_flushes(struct work_struct *ws) > >>> atomic_inc(&mddev->flush_pending); > >>> submit_bio(bi); > >>> rcu_read_lock(); > >>> - rdev_dec_pending(rdev, mddev); > >>> } > >>> rcu_read_unlock(); > >>> if (atomic_dec_and_test(&mddev->flush_pending)) > >>> @@ -626,6 +624,18 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio) > >>> /* new request after previous flush is completed */ > >>> if (ktime_after(req_start, mddev->prev_flush_start)) { > >>> WARN_ON(mddev->flush_bio); > >>> + /* > >>> + * Grab a reference to make sure mddev_suspend() will wait for > >>> + * this flush to be done. > >>> + * > >>> + * md_flush_reqeust() is called under md_handle_request() and > >>> + * 'active_io' is already grabbed, hence percpu_ref_tryget() > >>> + * won't fail, percpu_ref_tryget_live() can't be used because > >>> + * percpu_ref_kill() can be called by mddev_suspend() > >>> + * concurrently. > >>> + */ > >>> + if (WARN_ON(percpu_ref_tryget(&mddev->active_io))) > >> > >> This should be "if (!WARN_ON(..))", right? > > Sorry for the mistake, this actually should be: > > if (WARN_ON(!percpu_ref_tryget(...)) > >> > >> Song > >> > >>> + percpu_ref_get(&mddev->active_io); > > > > Actually, we can just use percpu_ref_get(), no? > > Yes, we can, but if someone else doesn't call md_flush_request() under > md_handle_request() in the fulture, there will be problem and > percpu_ref_get() can't catch this, do you think it'll make sense to > prevent such case? This combination is really weird + if (WARN_ON(percpu_ref_tryget(&mddev->active_io))) + percpu_ref_get(&mddev->active_io); We can use percpu_ref_get() here, and add WARN_ON(percpu_ref_is_zero()) earlier in the function. Does this make sense? Thanks, Song
diff --git a/drivers/md/md.c b/drivers/md/md.c index 86efc9c2ae56..2ffedc39edd6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -538,6 +538,9 @@ static void md_end_flush(struct bio *bio) rdev_dec_pending(rdev, mddev); if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pair is percpu_ref_tryget() from md_flush_request() */ + percpu_ref_put(&mddev->active_io); + /* The pre-request flush has finished */ queue_work(md_wq, &mddev->flush_work); } @@ -557,12 +560,8 @@ static void submit_flushes(struct work_struct *ws) rdev_for_each_rcu(rdev, mddev) if (rdev->raid_disk >= 0 && !test_bit(Faulty, &rdev->flags)) { - /* Take two references, one is dropped - * when request finishes, one after - * we reclaim rcu_read_lock - */ struct bio *bi; - atomic_inc(&rdev->nr_pending); + atomic_inc(&rdev->nr_pending); rcu_read_unlock(); bi = bio_alloc_bioset(rdev->bdev, 0, @@ -573,7 +572,6 @@ static void submit_flushes(struct work_struct *ws) atomic_inc(&mddev->flush_pending); submit_bio(bi); rcu_read_lock(); - rdev_dec_pending(rdev, mddev); } rcu_read_unlock(); if (atomic_dec_and_test(&mddev->flush_pending)) @@ -626,6 +624,18 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio) /* new request after previous flush is completed */ if (ktime_after(req_start, mddev->prev_flush_start)) { WARN_ON(mddev->flush_bio); + /* + * Grab a reference to make sure mddev_suspend() will wait for + * this flush to be done. + * + * md_flush_reqeust() is called under md_handle_request() and + * 'active_io' is already grabbed, hence percpu_ref_tryget() + * won't fail, percpu_ref_tryget_live() can't be used because + * percpu_ref_kill() can be called by mddev_suspend() + * concurrently. + */ + if (WARN_ON(percpu_ref_tryget(&mddev->active_io))) + percpu_ref_get(&mddev->active_io); mddev->flush_bio = bio; bio = NULL; }