diff mbox series

[01/11] block: improve handling of all zones reset operation

Message ID 20210519025529.707897-2-damien.lemoal@wdc.com (mailing list archive)
State Superseded, archived
Delegated to: Mike Snitzer
Headers show
Series dm: Improve zoned block device support | expand

Commit Message

Damien Le Moal May 19, 2021, 2:55 a.m. UTC
SCSI, ZNS and null_blk zoned devices support resetting all zones using
a single command (REQ_OP_ZONE_RESET_ALL), as indicated using the device
request queue flag QUEUE_FLAG_ZONE_RESETALL. This flag is not set for
device mapper targets creating zoned devices. In this case, a user
request for resetting all zones of a device is processed in
blkdev_zone_mgmt() by issuing a REQ_OP_ZONE_RESET operation for each
zone of the device. This leads to different behaviors of the
BLKRESETZONE ioctl() depending on the target device support for the
reset all operation. E.g.

blkzone reset /dev/sdX

will reset all zones of a SCSI device using a single command that will
ignore conventional, read-only or offline zones.

But a dm-linear device including conventional, read-only or offline
zones cannot be reset in the same manner as some of the single zone
reset operations issued by blkdev_zone_mgmt() will fail. E.g.:

blkzone reset /dev/dm-Y
blkzone: /dev/dm-0: BLKRESETZONE ioctl failed: Remote I/O error

To simplify applications and tools development, unify the behavior of
an all-zone reset operation by modifying blkdev_zone_mgmt() to not
issue a zone reset operation for conventional, read-only and offline
zones, thus mimicking what an actual reset-all device command does on a
device supporting REQ_OP_ZONE_RESET_ALL. The zones needing a reset are
identified using a bitmap that is initialized using a zone report.
Since empty zones do not need a reset, also ignore these zones.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
---
 block/blk-zoned.c | 87 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 63 insertions(+), 24 deletions(-)

Comments

Christoph Hellwig May 19, 2021, 9:35 a.m. UTC | #1
The logic looks fine to me, but this makes blkdev_zone_mgmt extremely
convoluted.  What do you think of the version below that splits out
two self contained helpers instead?

---
>From 3ff31f2502cf032ac1331122c9f1a018b769b577 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 19 May 2021 11:55:19 +0900
Subject: block: improve handling of all zones reset operation

SCSI, ZNS and null_blk zoned devices support resetting all zones using
a single command (REQ_OP_ZONE_RESET_ALL), as indicated using the device
request queue flag QUEUE_FLAG_ZONE_RESETALL. This flag is not set for
device mapper targets creating zoned devices. In this case, a user
request for resetting all zones of a device is processed in
blkdev_zone_mgmt() by issuing a REQ_OP_ZONE_RESET operation for each
zone of the device. This leads to different behaviors of the
BLKRESETZONE ioctl() depending on the target device support for the
reset all operation. E.g.

blkzone reset /dev/sdX

will reset all zones of a SCSI device using a single command that will
ignore conventional, read-only or offline zones.

But a dm-linear device including conventional, read-only or offline
zones cannot be reset in the same manner as some of the single zone
reset operations issued by blkdev_zone_mgmt() will fail. E.g.:

blkzone reset /dev/dm-Y
blkzone: /dev/dm-0: BLKRESETZONE ioctl failed: Remote I/O error

To simplify applications and tools development, unify the behavior of
an all-zone reset operation by modifying blkdev_zone_mgmt() to not
issue a zone reset operation for conventional, read-only and offline
zones, thus mimicking what an actual reset-all device command does on a
device supporting REQ_OP_ZONE_RESET_ALL. The zones needing a reset are
identified using a bitmap that is initialized using a zone report.
Since empty zones do not need a reset, also ignore these zones.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
[hch: split into multiple functions]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-zoned.c | 115 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 88 insertions(+), 27 deletions(-)

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 250cb76ee615..48e8376c1db8 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -161,18 +161,85 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 }
 EXPORT_SYMBOL_GPL(blkdev_report_zones);
 
-static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
-						sector_t sector,
-						sector_t nr_sectors)
+static inline unsigned long *blk_alloc_zone_bitmap(int node,
+						   unsigned int nr_zones)
 {
-	if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
-		return false;
+	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
+			    GFP_NOIO, node);
+}
 
+static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
+				  void *data)
+{
 	/*
-	 * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
-	 * of the applicable zone range is the entire disk.
+	 * For an all-zones reset, ignore conventional, empty, read-only
+	 * and offline zones.
 	 */
-	return !sector && nr_sectors == get_capacity(bdev->bd_disk);
+	switch (zone->cond) {
+	case BLK_ZONE_COND_NOT_WP:
+	case BLK_ZONE_COND_EMPTY:
+	case BLK_ZONE_COND_READONLY:
+	case BLK_ZONE_COND_OFFLINE:
+		return 0;
+	default:
+		set_bit(idx, (unsigned long *)data);
+		return 0;
+	}
+}
+
+static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
+		     gfp_t gfp_mask)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+	sector_t capacity = get_capacity(bdev->bd_disk);
+	sector_t zone_sectors = blk_queue_zone_sectors(q);
+	unsigned long *need_reset;
+	sector_t sector;
+	struct bio *bio = NULL;
+	int ret;
+
+	need_reset = blk_alloc_zone_bitmap(q->node, q->nr_zones);
+	if (!need_reset)
+		return -ENOMEM;
+	ret = bdev->bd_disk->fops->report_zones(bdev->bd_disk, 0,
+				q->nr_zones, blk_zone_need_reset_cb,
+				need_reset);
+	if (ret < 0)
+		goto out_free_need_reset;
+
+	ret = 0;
+	while (sector < capacity) {
+		if (!test_bit(blk_queue_zone_no(q, sector), need_reset)) {
+			sector += zone_sectors;
+			continue;
+		}
+		bio = blk_next_bio(bio, 0, gfp_mask);
+		bio_set_dev(bio, bdev);
+		bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC;
+		bio->bi_iter.bi_sector = sector;
+		sector += zone_sectors;
+
+		/* This may take a while, so be nice to others */
+		cond_resched();
+	}
+
+	if (bio) {
+		ret = submit_bio_wait(bio);
+		bio_put(bio);
+	}
+out_free_need_reset:
+	kfree(need_reset);
+	return ret;
+}
+
+static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
+{
+	struct bio bio;
+
+	bio_init(&bio, NULL, 0);
+	bio_set_dev(&bio, bdev);
+	bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
+	return submit_bio_wait(&bio);
 }
 
 /**
@@ -200,7 +267,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 	sector_t capacity = get_capacity(bdev->bd_disk);
 	sector_t end_sector = sector + nr_sectors;
 	struct bio *bio = NULL;
-	int ret;
+	int ret = 0;
 
 	if (!blk_queue_is_zoned(q))
 		return -EOPNOTSUPP;
@@ -222,20 +289,21 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 	if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
 		return -EINVAL;
 
+	/*
+	 * In the case of a zone reset operation over all zones,
+	 * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
+	 * command. For other devices, we emulate this command behavior by
+	 * identifying the zones needing a reset.
+	 */
+	if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
+		if (!blk_queue_zone_resetall(q))
+			return blkdev_zone_reset_all_emulated(bdev, gfp_mask);
+		return blkdev_zone_reset_all(bdev, gfp_mask);
+	}
+
 	while (sector < end_sector) {
 		bio = blk_next_bio(bio, 0, gfp_mask);
 		bio_set_dev(bio, bdev);
-
-		/*
-		 * Special case for the zone reset operation that reset all
-		 * zones, this is useful for applications like mkfs.
-		 */
-		if (op == REQ_OP_ZONE_RESET &&
-		    blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
-			bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
-			break;
-		}
-
 		bio->bi_opf = op | REQ_SYNC;
 		bio->bi_iter.bi_sector = sector;
 		sector += zone_sectors;
@@ -396,13 +464,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
-static inline unsigned long *blk_alloc_zone_bitmap(int node,
-						   unsigned int nr_zones)
-{
-	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
-			    GFP_NOIO, node);
-}
-
 void blk_queue_free_zone_bitmaps(struct request_queue *q)
 {
 	kfree(q->conv_zones_bitmap);
Damien Le Moal May 19, 2021, 9:48 a.m. UTC | #2
On 2021/05/19 18:36, Christoph Hellwig wrote:
> The logic looks fine to me, but this makes blkdev_zone_mgmt extremely
> convoluted.  What do you think of the version below that splits out
> two self contained helpers instead?

Yep, that works for me. Will use this in v2.

> 
> ---
> From 3ff31f2502cf032ac1331122c9f1a018b769b577 Mon Sep 17 00:00:00 2001
> From: Damien Le Moal <damien.lemoal@wdc.com>
> Date: Wed, 19 May 2021 11:55:19 +0900
> Subject: block: improve handling of all zones reset operation
> 
> SCSI, ZNS and null_blk zoned devices support resetting all zones using
> a single command (REQ_OP_ZONE_RESET_ALL), as indicated using the device
> request queue flag QUEUE_FLAG_ZONE_RESETALL. This flag is not set for
> device mapper targets creating zoned devices. In this case, a user
> request for resetting all zones of a device is processed in
> blkdev_zone_mgmt() by issuing a REQ_OP_ZONE_RESET operation for each
> zone of the device. This leads to different behaviors of the
> BLKRESETZONE ioctl() depending on the target device support for the
> reset all operation. E.g.
> 
> blkzone reset /dev/sdX
> 
> will reset all zones of a SCSI device using a single command that will
> ignore conventional, read-only or offline zones.
> 
> But a dm-linear device including conventional, read-only or offline
> zones cannot be reset in the same manner as some of the single zone
> reset operations issued by blkdev_zone_mgmt() will fail. E.g.:
> 
> blkzone reset /dev/dm-Y
> blkzone: /dev/dm-0: BLKRESETZONE ioctl failed: Remote I/O error
> 
> To simplify applications and tools development, unify the behavior of
> an all-zone reset operation by modifying blkdev_zone_mgmt() to not
> issue a zone reset operation for conventional, read-only and offline
> zones, thus mimicking what an actual reset-all device command does on a
> device supporting REQ_OP_ZONE_RESET_ALL. The zones needing a reset are
> identified using a bitmap that is initialized using a zone report.
> Since empty zones do not need a reset, also ignore these zones.
> 
> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
> [hch: split into multiple functions]
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  block/blk-zoned.c | 115 +++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 88 insertions(+), 27 deletions(-)
> 
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index 250cb76ee615..48e8376c1db8 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -161,18 +161,85 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>  }
>  EXPORT_SYMBOL_GPL(blkdev_report_zones);
>  
> -static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
> -						sector_t sector,
> -						sector_t nr_sectors)
> +static inline unsigned long *blk_alloc_zone_bitmap(int node,
> +						   unsigned int nr_zones)
>  {
> -	if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
> -		return false;
> +	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
> +			    GFP_NOIO, node);
> +}
>  
> +static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
> +				  void *data)
> +{
>  	/*
> -	 * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
> -	 * of the applicable zone range is the entire disk.
> +	 * For an all-zones reset, ignore conventional, empty, read-only
> +	 * and offline zones.
>  	 */
> -	return !sector && nr_sectors == get_capacity(bdev->bd_disk);
> +	switch (zone->cond) {
> +	case BLK_ZONE_COND_NOT_WP:
> +	case BLK_ZONE_COND_EMPTY:
> +	case BLK_ZONE_COND_READONLY:
> +	case BLK_ZONE_COND_OFFLINE:
> +		return 0;
> +	default:
> +		set_bit(idx, (unsigned long *)data);
> +		return 0;
> +	}
> +}
> +
> +static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
> +		     gfp_t gfp_mask)
> +{
> +	struct request_queue *q = bdev_get_queue(bdev);
> +	sector_t capacity = get_capacity(bdev->bd_disk);
> +	sector_t zone_sectors = blk_queue_zone_sectors(q);
> +	unsigned long *need_reset;
> +	sector_t sector;
> +	struct bio *bio = NULL;
> +	int ret;
> +
> +	need_reset = blk_alloc_zone_bitmap(q->node, q->nr_zones);
> +	if (!need_reset)
> +		return -ENOMEM;
> +	ret = bdev->bd_disk->fops->report_zones(bdev->bd_disk, 0,
> +				q->nr_zones, blk_zone_need_reset_cb,
> +				need_reset);
> +	if (ret < 0)
> +		goto out_free_need_reset;
> +
> +	ret = 0;
> +	while (sector < capacity) {
> +		if (!test_bit(blk_queue_zone_no(q, sector), need_reset)) {
> +			sector += zone_sectors;
> +			continue;
> +		}
> +		bio = blk_next_bio(bio, 0, gfp_mask);
> +		bio_set_dev(bio, bdev);
> +		bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC;
> +		bio->bi_iter.bi_sector = sector;
> +		sector += zone_sectors;
> +
> +		/* This may take a while, so be nice to others */
> +		cond_resched();
> +	}
> +
> +	if (bio) {
> +		ret = submit_bio_wait(bio);
> +		bio_put(bio);
> +	}
> +out_free_need_reset:
> +	kfree(need_reset);
> +	return ret;
> +}
> +
> +static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
> +{
> +	struct bio bio;
> +
> +	bio_init(&bio, NULL, 0);
> +	bio_set_dev(&bio, bdev);
> +	bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
> +	return submit_bio_wait(&bio);
>  }
>  
>  /**
> @@ -200,7 +267,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
>  	sector_t capacity = get_capacity(bdev->bd_disk);
>  	sector_t end_sector = sector + nr_sectors;
>  	struct bio *bio = NULL;
> -	int ret;
> +	int ret = 0;
>  
>  	if (!blk_queue_is_zoned(q))
>  		return -EOPNOTSUPP;
> @@ -222,20 +289,21 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
>  	if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
>  		return -EINVAL;
>  
> +	/*
> +	 * In the case of a zone reset operation over all zones,
> +	 * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
> +	 * command. For other devices, we emulate this command behavior by
> +	 * identifying the zones needing a reset.
> +	 */
> +	if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
> +		if (!blk_queue_zone_resetall(q))
> +			return blkdev_zone_reset_all_emulated(bdev, gfp_mask);
> +		return blkdev_zone_reset_all(bdev, gfp_mask);
> +	}
> +
>  	while (sector < end_sector) {
>  		bio = blk_next_bio(bio, 0, gfp_mask);
>  		bio_set_dev(bio, bdev);
> -
> -		/*
> -		 * Special case for the zone reset operation that reset all
> -		 * zones, this is useful for applications like mkfs.
> -		 */
> -		if (op == REQ_OP_ZONE_RESET &&
> -		    blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
> -			bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
> -			break;
> -		}
> -
>  		bio->bi_opf = op | REQ_SYNC;
>  		bio->bi_iter.bi_sector = sector;
>  		sector += zone_sectors;
> @@ -396,13 +464,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>  	return ret;
>  }
>  
> -static inline unsigned long *blk_alloc_zone_bitmap(int node,
> -						   unsigned int nr_zones)
> -{
> -	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
> -			    GFP_NOIO, node);
> -}
> -
>  void blk_queue_free_zone_bitmaps(struct request_queue *q)
>  {
>  	kfree(q->conv_zones_bitmap);
>
diff mbox series

Patch

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 250cb76ee615..13f053c06d9e 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -161,18 +161,30 @@  int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 }
 EXPORT_SYMBOL_GPL(blkdev_report_zones);
 
-static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
-						sector_t sector,
-						sector_t nr_sectors)
+static inline unsigned long *blk_alloc_zone_bitmap(int node,
+						   unsigned int nr_zones)
 {
-	if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
-		return false;
+	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
+			    GFP_NOIO, node);
+}
 
+static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
+				  void *data)
+{
 	/*
-	 * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
-	 * of the applicable zone range is the entire disk.
+	 * For an all-zones reset, ignore conventional, empty, read-only
+	 * and offline zones.
 	 */
-	return !sector && nr_sectors == get_capacity(bdev->bd_disk);
+	switch (zone->cond) {
+	case BLK_ZONE_COND_NOT_WP:
+	case BLK_ZONE_COND_EMPTY:
+	case BLK_ZONE_COND_READONLY:
+	case BLK_ZONE_COND_OFFLINE:
+		return 0;
+	default:
+		set_bit(idx, (unsigned long *)data);
+		return 0;
+	}
 }
 
 /**
@@ -199,8 +211,10 @@  int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 	sector_t zone_sectors = blk_queue_zone_sectors(q);
 	sector_t capacity = get_capacity(bdev->bd_disk);
 	sector_t end_sector = sector + nr_sectors;
+	unsigned long *need_reset = NULL;
 	struct bio *bio = NULL;
-	int ret;
+	bool reset_all;
+	int ret = 0;
 
 	if (!blk_queue_is_zoned(q))
 		return -EOPNOTSUPP;
@@ -222,16 +236,44 @@  int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 	if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
 		return -EINVAL;
 
+	/*
+	 * In the case of a zone reset operation over all zones,
+	 * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
+	 * command. For other devices, we emulate this command behavior by
+	 * identifying the zones needing a reset.
+	 */
+	reset_all = op == REQ_OP_ZONE_RESET &&
+		!sector && nr_sectors == capacity;
+	if (reset_all && !blk_queue_zone_resetall(q)) {
+		need_reset = blk_alloc_zone_bitmap(q->node, q->nr_zones);
+		if (!need_reset)
+			return -ENOMEM;
+		ret = bdev->bd_disk->fops->report_zones(bdev->bd_disk, 0,
+					q->nr_zones, blk_zone_need_reset_cb,
+					need_reset);
+		if (ret < 0)
+			return ret;
+		ret = 0;
+	}
+
 	while (sector < end_sector) {
-		bio = blk_next_bio(bio, 0, gfp_mask);
-		bio_set_dev(bio, bdev);
 
 		/*
-		 * Special case for the zone reset operation that reset all
-		 * zones, this is useful for applications like mkfs.
+		 * For an all zone reset operation, if the device does not
+		 + support REQ_OP_ZONE_RESET_ALL, skip zones that do not
+		 * need a reset.
 		 */
-		if (op == REQ_OP_ZONE_RESET &&
-		    blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
+		if (reset_all && !blk_queue_zone_resetall(q) &&
+		    !test_bit(blk_queue_zone_no(q, sector), need_reset)) {
+			sector += zone_sectors;
+			continue;
+		}
+
+		bio = blk_next_bio(bio, 0, gfp_mask);
+		bio_set_dev(bio, bdev);
+
+		if (reset_all && blk_queue_zone_resetall(q)) {
+			/* The device supports REQ_OP_ZONE_RESET_ALL */
 			bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
 			break;
 		}
@@ -244,8 +286,12 @@  int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 		cond_resched();
 	}
 
-	ret = submit_bio_wait(bio);
-	bio_put(bio);
+	if (bio) {
+		ret = submit_bio_wait(bio);
+		bio_put(bio);
+	}
+
+	kfree(need_reset);
 
 	return ret;
 }
@@ -396,13 +442,6 @@  int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
-static inline unsigned long *blk_alloc_zone_bitmap(int node,
-						   unsigned int nr_zones)
-{
-	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
-			    GFP_NOIO, node);
-}
-
 void blk_queue_free_zone_bitmaps(struct request_queue *q)
 {
 	kfree(q->conv_zones_bitmap);