Message ID | 20240830072721.2112006-5-yukuai1@huaweicloud.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | md: enhance faulty chekcing for blocked handling | expand |
Context | Check | Description |
---|---|---|
mdraidci/vmtest-md-6_12-PR | success | PR summary |
mdraidci/vmtest-md-6_12-VM_Test-0 | success | Logs for per-patch-testing |
On Fri, 30 Aug 2024 15:27:18 +0800 Yu Kuai <yukuai1@huaweicloud.com> wrote: > From: Yu Kuai <yukuai3@huawei.com> > > Currently raid1 is preparing IO for underlying disks while checking if > any disk is blocked, if so allocated resources must be released, then > waiting for rdev to be unblocked and try to prepare IO again. > > Make code cleaner by checking blocked rdev first, it doesn't matter if > rdev is blocked while issuing this IO. > > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > drivers/md/raid1.c | 84 ++++++++++++++++++++++++++-------------------- > 1 file changed, 48 insertions(+), 36 deletions(-) > > diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c > index f55c8e67d059..aa30c3240c85 100644 > --- a/drivers/md/raid1.c > +++ b/drivers/md/raid1.c > @@ -1406,6 +1406,49 @@ static void raid1_read_request(struct mddev *mddev, > struct bio *bio, submit_bio_noacct(read_bio); > } > > +static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio) > +{ > + struct r1conf *conf = mddev->private; > + int disks = conf->raid_disks * 2; > + int i; > + > +retry: > + for (i = 0; i < disks; i++) { > + struct md_rdev *rdev = conf->mirrors[i].rdev; > + > + if (!rdev) > + continue; > + > + if (test_bit(Blocked, &rdev->flags)) { Don't we need unlikely here? > + if (bio->bi_opf & REQ_NOWAIT) > + return false; > + > + mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d > blocked", > + rdev->raid_disk); > + atomic_inc(&rdev->nr_pending); retry moves us before for (ugh, ugly) and "theoretically" we can back here with the same disk and increase nr_pending twice or more because rdve can become block again from different thread. This is what I suspect but it could be wrong. Mariusz
Hi, 在 2024/08/30 19:06, Mariusz Tkaczyk 写道: > On Fri, 30 Aug 2024 15:27:18 +0800 > Yu Kuai <yukuai1@huaweicloud.com> wrote: > >> From: Yu Kuai <yukuai3@huawei.com> >> >> Currently raid1 is preparing IO for underlying disks while checking if >> any disk is blocked, if so allocated resources must be released, then >> waiting for rdev to be unblocked and try to prepare IO again. >> >> Make code cleaner by checking blocked rdev first, it doesn't matter if >> rdev is blocked while issuing this IO. >> >> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >> --- >> drivers/md/raid1.c | 84 ++++++++++++++++++++++++++-------------------- >> 1 file changed, 48 insertions(+), 36 deletions(-) >> >> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c >> index f55c8e67d059..aa30c3240c85 100644 >> --- a/drivers/md/raid1.c >> +++ b/drivers/md/raid1.c >> @@ -1406,6 +1406,49 @@ static void raid1_read_request(struct mddev *mddev, >> struct bio *bio, submit_bio_noacct(read_bio); >> } >> >> +static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio) >> +{ >> + struct r1conf *conf = mddev->private; >> + int disks = conf->raid_disks * 2; >> + int i; >> + >> +retry: >> + for (i = 0; i < disks; i++) { >> + struct md_rdev *rdev = conf->mirrors[i].rdev; >> + >> + if (!rdev) >> + continue; >> + >> + if (test_bit(Blocked, &rdev->flags)) { > Don't we need unlikely here? > > >> + if (bio->bi_opf & REQ_NOWAIT) >> + return false; >> + >> + mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d >> blocked", >> + rdev->raid_disk); >> + atomic_inc(&rdev->nr_pending); > > > retry moves us before for (ugh, ugly) and "theoretically" we can back here > with the same disk and increase nr_pending twice or more because rdve can become > block again from different thread. Rety is always after md_wait_for_blocked_rdev(), which decrease the nr_pending. Thanks, Kuai > > This is what I suspect but it could be wrong. > > Mariusz > > > > . >
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f55c8e67d059..aa30c3240c85 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1406,6 +1406,49 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, submit_bio_noacct(read_bio); } +static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio) +{ + struct r1conf *conf = mddev->private; + int disks = conf->raid_disks * 2; + int i; + +retry: + for (i = 0; i < disks; i++) { + struct md_rdev *rdev = conf->mirrors[i].rdev; + + if (!rdev) + continue; + + if (test_bit(Blocked, &rdev->flags)) { + if (bio->bi_opf & REQ_NOWAIT) + return false; + + mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked", + rdev->raid_disk); + atomic_inc(&rdev->nr_pending); + md_wait_for_blocked_rdev(rdev, rdev->mddev); + goto retry; + } + + /* don't write here until the bad block is acknowledged */ + if (test_bit(WriteErrorSeen, &rdev->flags) && + rdev_has_badblock(rdev, bio->bi_iter.bi_sector, + bio_sectors(bio)) < 0) { + if (bio->bi_opf & REQ_NOWAIT) + return false; + + set_bit(BlockedBadBlocks, &rdev->flags); + mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked", + rdev->raid_disk); + atomic_inc(&rdev->nr_pending); + md_wait_for_blocked_rdev(rdev, rdev->mddev); + goto retry; + } + } + + return true; +} + static void raid1_write_request(struct mddev *mddev, struct bio *bio, int max_write_sectors) { @@ -1413,7 +1456,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, struct r1bio *r1_bio; int i, disks; unsigned long flags; - struct md_rdev *blocked_rdev; int first_clone; int max_sectors; bool write_behind = false; @@ -1451,7 +1493,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, return; } - retry_write: + if (!wait_blocked_rdev(mddev, bio)) { + bio_wouldblock_error(bio); + return; + } + r1_bio = alloc_r1bio(mddev, bio); r1_bio->sectors = max_write_sectors; @@ -1467,7 +1513,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, */ disks = conf->raid_disks * 2; - blocked_rdev = NULL; max_sectors = r1_bio->sectors; for (i = 0; i < disks; i++) { struct md_rdev *rdev = conf->mirrors[i].rdev; @@ -1480,11 +1525,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (!is_discard && rdev && test_bit(WriteMostly, &rdev->flags)) write_behind = true; - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - atomic_inc(&rdev->nr_pending); - blocked_rdev = rdev; - break; - } r1_bio->bios[i] = NULL; if (!rdev || test_bit(Faulty, &rdev->flags)) { if (i < conf->raid_disks) @@ -1500,13 +1540,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, is_bad = is_badblock(rdev, r1_bio->sector, max_sectors, &first_bad, &bad_sectors); - if (is_bad < 0) { - /* mustn't write here until the bad block is - * acknowledged*/ - set_bit(BlockedBadBlocks, &rdev->flags); - blocked_rdev = rdev; - break; - } if (is_bad && first_bad <= r1_bio->sector) { /* Cannot write here at all */ bad_sectors -= (r1_bio->sector - first_bad); @@ -1537,27 +1570,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, r1_bio->bios[i] = bio; } - if (unlikely(blocked_rdev)) { - /* Wait for this device to become unblocked */ - int j; - - for (j = 0; j < i; j++) - if (r1_bio->bios[j]) - rdev_dec_pending(conf->mirrors[j].rdev, mddev); - mempool_free(r1_bio, &conf->r1bio_pool); - allow_barrier(conf, bio->bi_iter.bi_sector); - - if (bio->bi_opf & REQ_NOWAIT) { - bio_wouldblock_error(bio); - return; - } - mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked", - blocked_rdev->raid_disk); - md_wait_for_blocked_rdev(blocked_rdev, mddev); - wait_barrier(conf, bio->bi_iter.bi_sector, false); - goto retry_write; - } - /* * When using a bitmap, we may call alloc_behind_master_bio below. * alloc_behind_master_bio allocates a copy of the data payload a page