diff mbox series

[md-6.12,4/7] md/raid1: factor out helper to handle blocked rdev from raid1_write_request()

Message ID 20240830072721.2112006-5-yukuai1@huaweicloud.com (mailing list archive)
State Superseded
Headers show
Series md: enhance faulty chekcing for blocked handling | expand

Checks

Context Check Description
mdraidci/vmtest-md-6_12-PR success PR summary
mdraidci/vmtest-md-6_12-VM_Test-0 success Logs for per-patch-testing

Commit Message

Yu Kuai Aug. 30, 2024, 7:27 a.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

Currently raid1 is preparing IO for underlying disks while checking if
any disk is blocked, if so allocated resources must be released, then
waiting for rdev to be unblocked and try to prepare IO again.

Make code cleaner by checking blocked rdev first, it doesn't matter if
rdev is blocked while issuing this IO.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/raid1.c | 84 ++++++++++++++++++++++++++--------------------
 1 file changed, 48 insertions(+), 36 deletions(-)

Comments

Mariusz Tkaczyk Aug. 30, 2024, 11:06 a.m. UTC | #1
On Fri, 30 Aug 2024 15:27:18 +0800
Yu Kuai <yukuai1@huaweicloud.com> wrote:

> From: Yu Kuai <yukuai3@huawei.com>
> 
> Currently raid1 is preparing IO for underlying disks while checking if
> any disk is blocked, if so allocated resources must be released, then
> waiting for rdev to be unblocked and try to prepare IO again.
> 
> Make code cleaner by checking blocked rdev first, it doesn't matter if
> rdev is blocked while issuing this IO.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/md/raid1.c | 84 ++++++++++++++++++++++++++--------------------
>  1 file changed, 48 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index f55c8e67d059..aa30c3240c85 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -1406,6 +1406,49 @@ static void raid1_read_request(struct mddev *mddev,
> struct bio *bio, submit_bio_noacct(read_bio);
>  }
>  
> +static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio)
> +{
> +	struct r1conf *conf = mddev->private;
> +	int disks = conf->raid_disks * 2;
> +	int i;
> +
> +retry:
> +	for (i = 0; i < disks; i++) {
> +		struct md_rdev *rdev = conf->mirrors[i].rdev;
> +
> +		if (!rdev)
> +			continue;
> +
> +		if (test_bit(Blocked, &rdev->flags)) {
Don't we need unlikely here?


> +			if (bio->bi_opf & REQ_NOWAIT)
> +				return false;
> +
> +			mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d
> blocked",
> +					    rdev->raid_disk);
> +			atomic_inc(&rdev->nr_pending);


retry moves us before for (ugh, ugly) and "theoretically" we can back here
with the same disk and increase nr_pending twice or more because rdve can become
block again from different thread.

This is what I suspect but it could be wrong.

Mariusz
Yu Kuai Aug. 31, 2024, 1:13 a.m. UTC | #2
Hi,

在 2024/08/30 19:06, Mariusz Tkaczyk 写道:
> On Fri, 30 Aug 2024 15:27:18 +0800
> Yu Kuai <yukuai1@huaweicloud.com> wrote:
> 
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Currently raid1 is preparing IO for underlying disks while checking if
>> any disk is blocked, if so allocated resources must be released, then
>> waiting for rdev to be unblocked and try to prepare IO again.
>>
>> Make code cleaner by checking blocked rdev first, it doesn't matter if
>> rdev is blocked while issuing this IO.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/md/raid1.c | 84 ++++++++++++++++++++++++++--------------------
>>   1 file changed, 48 insertions(+), 36 deletions(-)
>>
>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>> index f55c8e67d059..aa30c3240c85 100644
>> --- a/drivers/md/raid1.c
>> +++ b/drivers/md/raid1.c
>> @@ -1406,6 +1406,49 @@ static void raid1_read_request(struct mddev *mddev,
>> struct bio *bio, submit_bio_noacct(read_bio);
>>   }
>>   
>> +static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio)
>> +{
>> +	struct r1conf *conf = mddev->private;
>> +	int disks = conf->raid_disks * 2;
>> +	int i;
>> +
>> +retry:
>> +	for (i = 0; i < disks; i++) {
>> +		struct md_rdev *rdev = conf->mirrors[i].rdev;
>> +
>> +		if (!rdev)
>> +			continue;
>> +
>> +		if (test_bit(Blocked, &rdev->flags)) {
> Don't we need unlikely here?
> 
> 
>> +			if (bio->bi_opf & REQ_NOWAIT)
>> +				return false;
>> +
>> +			mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d
>> blocked",
>> +					    rdev->raid_disk);
>> +			atomic_inc(&rdev->nr_pending);
> 
> 
> retry moves us before for (ugh, ugly) and "theoretically" we can back here
> with the same disk and increase nr_pending twice or more because rdve can become
> block again from different thread.

Rety is always after md_wait_for_blocked_rdev(), which decrease the
nr_pending.

Thanks,
Kuai

> 
> This is what I suspect but it could be wrong.
> 
> Mariusz
> 
> 
> 
> .
>
diff mbox series

Patch

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index f55c8e67d059..aa30c3240c85 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1406,6 +1406,49 @@  static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 	submit_bio_noacct(read_bio);
 }
 
+static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio)
+{
+	struct r1conf *conf = mddev->private;
+	int disks = conf->raid_disks * 2;
+	int i;
+
+retry:
+	for (i = 0; i < disks; i++) {
+		struct md_rdev *rdev = conf->mirrors[i].rdev;
+
+		if (!rdev)
+			continue;
+
+		if (test_bit(Blocked, &rdev->flags)) {
+			if (bio->bi_opf & REQ_NOWAIT)
+				return false;
+
+			mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked",
+					    rdev->raid_disk);
+			atomic_inc(&rdev->nr_pending);
+			md_wait_for_blocked_rdev(rdev, rdev->mddev);
+			goto retry;
+		}
+
+		/* don't write here until the bad block is acknowledged */
+		if (test_bit(WriteErrorSeen, &rdev->flags) &&
+		    rdev_has_badblock(rdev, bio->bi_iter.bi_sector,
+				      bio_sectors(bio)) < 0) {
+			if (bio->bi_opf & REQ_NOWAIT)
+				return false;
+
+			set_bit(BlockedBadBlocks, &rdev->flags);
+			mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked",
+					    rdev->raid_disk);
+			atomic_inc(&rdev->nr_pending);
+			md_wait_for_blocked_rdev(rdev, rdev->mddev);
+			goto retry;
+		}
+	}
+
+	return true;
+}
+
 static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 				int max_write_sectors)
 {
@@ -1413,7 +1456,6 @@  static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 	struct r1bio *r1_bio;
 	int i, disks;
 	unsigned long flags;
-	struct md_rdev *blocked_rdev;
 	int first_clone;
 	int max_sectors;
 	bool write_behind = false;
@@ -1451,7 +1493,11 @@  static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 		return;
 	}
 
- retry_write:
+	if (!wait_blocked_rdev(mddev, bio)) {
+		bio_wouldblock_error(bio);
+		return;
+	}
+
 	r1_bio = alloc_r1bio(mddev, bio);
 	r1_bio->sectors = max_write_sectors;
 
@@ -1467,7 +1513,6 @@  static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 	 */
 
 	disks = conf->raid_disks * 2;
-	blocked_rdev = NULL;
 	max_sectors = r1_bio->sectors;
 	for (i = 0;  i < disks; i++) {
 		struct md_rdev *rdev = conf->mirrors[i].rdev;
@@ -1480,11 +1525,6 @@  static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 		if (!is_discard && rdev && test_bit(WriteMostly, &rdev->flags))
 			write_behind = true;
 
-		if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
-			atomic_inc(&rdev->nr_pending);
-			blocked_rdev = rdev;
-			break;
-		}
 		r1_bio->bios[i] = NULL;
 		if (!rdev || test_bit(Faulty, &rdev->flags)) {
 			if (i < conf->raid_disks)
@@ -1500,13 +1540,6 @@  static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 
 			is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
 					     &first_bad, &bad_sectors);
-			if (is_bad < 0) {
-				/* mustn't write here until the bad block is
-				 * acknowledged*/
-				set_bit(BlockedBadBlocks, &rdev->flags);
-				blocked_rdev = rdev;
-				break;
-			}
 			if (is_bad && first_bad <= r1_bio->sector) {
 				/* Cannot write here at all */
 				bad_sectors -= (r1_bio->sector - first_bad);
@@ -1537,27 +1570,6 @@  static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 		r1_bio->bios[i] = bio;
 	}
 
-	if (unlikely(blocked_rdev)) {
-		/* Wait for this device to become unblocked */
-		int j;
-
-		for (j = 0; j < i; j++)
-			if (r1_bio->bios[j])
-				rdev_dec_pending(conf->mirrors[j].rdev, mddev);
-		mempool_free(r1_bio, &conf->r1bio_pool);
-		allow_barrier(conf, bio->bi_iter.bi_sector);
-
-		if (bio->bi_opf & REQ_NOWAIT) {
-			bio_wouldblock_error(bio);
-			return;
-		}
-		mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked",
-				blocked_rdev->raid_disk);
-		md_wait_for_blocked_rdev(blocked_rdev, mddev);
-		wait_barrier(conf, bio->bi_iter.bi_sector, false);
-		goto retry_write;
-	}
-
 	/*
 	 * When using a bitmap, we may call alloc_behind_master_bio below.
 	 * alloc_behind_master_bio allocates a copy of the data payload a page