diff mbox series

[2/5] null_blk: introduce zone capacity for zoned device

Message ID 20200615233424.13458-3-keith.busch@wdc.com (mailing list archive)
State New, archived
Headers show
Series nvme support for zoned namespace command set | expand

Commit Message

Keith Busch June 15, 2020, 11:34 p.m. UTC
From: Aravind Ramesh <aravind.ramesh@wdc.com>

Allow emulation of a zoned device with a per zone capacity smaller than
the zone size as as defined in the Zoned Namespace (ZNS) Command Set
specification. The zone capacity defaults to the zone size if not
specified and must be smaller than the zone size otherwise.

Signed-off-by: Aravind Ramesh <aravind.ramesh@wdc.com>
---
 drivers/block/null_blk.h       |  2 ++
 drivers/block/null_blk_main.c  |  9 ++++++++-
 drivers/block/null_blk_zoned.c | 17 +++++++++++++++--
 3 files changed, 25 insertions(+), 3 deletions(-)

Comments

Chaitanya Kulkarni June 15, 2020, 11:46 p.m. UTC | #1
Apart from couple of nits this looks good.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>

On 6/15/20 4:35 PM, Keith Busch wrote:

> From: Aravind Ramesh <aravind.ramesh@wdc.com>
> 
> Allow emulation of a zoned device with a per zone capacity smaller than
> the zone size as as defined in the Zoned Namespace (ZNS) Command Set
> specification. The zone capacity defaults to the zone size if not
> specified and must be smaller than the zone size otherwise.
> 
> Signed-off-by: Aravind Ramesh <aravind.ramesh@wdc.com>
> ---
>   drivers/block/null_blk.h       |  2 ++
>   drivers/block/null_blk_main.c  |  9 ++++++++-
>   drivers/block/null_blk_zoned.c | 17 +++++++++++++++--
>   3 files changed, 25 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
> index 81b311c9d781..7eadf190528c 100644
> --- a/drivers/block/null_blk.h
> +++ b/drivers/block/null_blk.h
> @@ -44,11 +44,13 @@ struct nullb_device {
>   	unsigned int nr_zones;
>   	struct blk_zone *zones;
>   	sector_t zone_size_sects;
> +	sector_t zone_capacity_sects;
>   
>   	unsigned long size; /* device size in MB */
>   	unsigned long completion_nsec; /* time in ns to complete a request */
>   	unsigned long cache_size; /* disk cache size in MB */
>   	unsigned long zone_size; /* zone size in MB if device is zoned */
> +	unsigned long zone_capacity; /* zone cap in MB if device is zoned */
>   	unsigned int zone_nr_conv; /* number of conventional zones */
>   	unsigned int submit_queues; /* number of submission queues */
>   	unsigned int home_node; /* home node for the device */
> diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
> index 87b31f9ca362..54c5b5df399d 100644
> --- a/drivers/block/null_blk_main.c
> +++ b/drivers/block/null_blk_main.c
> @@ -200,6 +200,10 @@ static unsigned long g_zone_size = 256;
>   module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
>   MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
>   
> +static unsigned long g_zone_capacity;
> +module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
> +MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
> +
>   static unsigned int g_zone_nr_conv;
>   module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
>   MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
> @@ -341,6 +345,7 @@ NULLB_DEVICE_ATTR(mbps, uint, NULL);
>   NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
>   NULLB_DEVICE_ATTR(zoned, bool, NULL);
>   NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
> +NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
>   NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
>   
>   static ssize_t nullb_device_power_show(struct config_item *item, char *page)
> @@ -457,6 +462,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
>   	&nullb_device_attr_badblocks,
>   	&nullb_device_attr_zoned,
>   	&nullb_device_attr_zone_size,
> +	&nullb_device_attr_zone_capacity,
>   	&nullb_device_attr_zone_nr_conv,
>   	NULL,
>   };
> @@ -510,7 +516,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
>   
>   static ssize_t memb_group_features_show(struct config_item *item, char *page)
>   {
> -	return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_nr_conv\n");
> +	return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv\n");
This line is becoming ridiculously long even for new 100 char limit. 
Maybe we should do a cleanup patch for feature to string conversion.
>   }
>   
>   CONFIGFS_ATTR_RO(memb_group_, features);
> @@ -571,6 +577,7 @@ static struct nullb_device *null_alloc_dev(void)
>   	dev->use_per_node_hctx = g_use_per_node_hctx;
>   	dev->zoned = g_zoned;
>   	dev->zone_size = g_zone_size;
> +	dev->zone_capacity = g_zone_capacity;
>   	dev->zone_nr_conv = g_zone_nr_conv;
>   	return dev;
>   }
> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
> index 624aac09b005..b05832eb21b2 100644
> --- a/drivers/block/null_blk_zoned.c
> +++ b/drivers/block/null_blk_zoned.c
> @@ -28,7 +28,17 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
>   		return -EINVAL;
>   	}
>   
> +	if (!dev->zone_capacity)
> +		dev->zone_capacity = dev->zone_size;
> +
> +	if (dev->zone_capacity > dev->zone_size) {
> +		pr_err("null_blk: zone capacity %lu more than its size %lu\n",
> +					dev->zone_capacity, dev->zone_size);
> +		return -EINVAL;
> +	}
> +
>   	dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT;
> +	dev->zone_capacity_sects = dev->zone_capacity << ZONE_SIZE_SHIFT;
Do we really need zone_capacity_sects ? As far as I can see its is not 
used into fast path, correct me if I'm wrong.
>   	dev->nr_zones = dev_size >>
>   				(SECTOR_SHIFT + ilog2(dev->zone_size_sects));
>   	dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone),
> @@ -60,7 +70,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
>   
>   		zone->start = zone->wp = sector;
>   		zone->len = dev->zone_size_sects;
> -		zone->capacity = zone->len;
> +		zone->capacity = dev->zone_capacity_sects;
We can just dev->zone_capacity << ZONE_SIZE_SHIFT here.
>   		zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
>   		zone->cond = BLK_ZONE_COND_EMPTY;
>   
> @@ -187,6 +197,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>   			return BLK_STS_IOERR;
>   		}
>   
> +		if (zone->wp + nr_sectors > zone->start + zone->capacity)
> +			return BLK_STS_IOERR;
> +
>   		if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
>   			zone->cond = BLK_ZONE_COND_IMP_OPEN;
>   
> @@ -195,7 +208,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>   			return ret;
>   
>   		zone->wp += nr_sectors;
> -		if (zone->wp == zone->start + zone->len)
> +		if (zone->wp == zone->start + zone->capacity)
>   			zone->cond = BLK_ZONE_COND_FULL;
>   		return BLK_STS_OK;
>   	default:
>
Daniel Wagner June 16, 2020, 2:18 p.m. UTC | #2
On Tue, Jun 16, 2020 at 08:34:21AM +0900, Keith Busch wrote:
> From: Aravind Ramesh <aravind.ramesh@wdc.com>
> 
> Allow emulation of a zoned device with a per zone capacity smaller than
> the zone size as as defined in the Zoned Namespace (ZNS) Command Set
> specification. The zone capacity defaults to the zone size if not
> specified and must be smaller than the zone size otherwise.
> 
> Signed-off-by: Aravind Ramesh <aravind.ramesh@wdc.com>

Just a little nitpick.

Reviewed-by: Daniel Wagner <dwagner@suse.de>

> ---
>  drivers/block/null_blk.h       |  2 ++
>  drivers/block/null_blk_main.c  |  9 ++++++++-
>  drivers/block/null_blk_zoned.c | 17 +++++++++++++++--
>  3 files changed, 25 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
> index 81b311c9d781..7eadf190528c 100644
> --- a/drivers/block/null_blk.h
> +++ b/drivers/block/null_blk.h
> @@ -44,11 +44,13 @@ struct nullb_device {
>  	unsigned int nr_zones;
>  	struct blk_zone *zones;
>  	sector_t zone_size_sects;
> +	sector_t zone_capacity_sects;
>  
>  	unsigned long size; /* device size in MB */
>  	unsigned long completion_nsec; /* time in ns to complete a request */
>  	unsigned long cache_size; /* disk cache size in MB */
>  	unsigned long zone_size; /* zone size in MB if device is zoned */
> +	unsigned long zone_capacity; /* zone cap in MB if device is zoned */

Maybe also use zone capacity in the comment instead of the abbreviation?
Martin K. Petersen June 16, 2020, 3:48 p.m. UTC | #3
Keith,

> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
> index 624aac09b005..b05832eb21b2 100644
> --- a/drivers/block/null_blk_zoned.c
> +++ b/drivers/block/null_blk_zoned.c
> @@ -28,7 +28,17 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
>  		return -EINVAL;
>  	}
>  
> +	if (!dev->zone_capacity)
> +		dev->zone_capacity = dev->zone_size;
> +
> +	if (dev->zone_capacity > dev->zone_size) {
> +		pr_err("null_blk: zone capacity %lu more than its size %lu\n",
> +					dev->zone_capacity, dev->zone_size);
> +		return -EINVAL;
> +	}

Minor nit. I found the the error message hard to grok. Maybe something
like:

"null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n"

Otherwise this looks fine.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
diff mbox series

Patch

diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 81b311c9d781..7eadf190528c 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -44,11 +44,13 @@  struct nullb_device {
 	unsigned int nr_zones;
 	struct blk_zone *zones;
 	sector_t zone_size_sects;
+	sector_t zone_capacity_sects;
 
 	unsigned long size; /* device size in MB */
 	unsigned long completion_nsec; /* time in ns to complete a request */
 	unsigned long cache_size; /* disk cache size in MB */
 	unsigned long zone_size; /* zone size in MB if device is zoned */
+	unsigned long zone_capacity; /* zone cap in MB if device is zoned */
 	unsigned int zone_nr_conv; /* number of conventional zones */
 	unsigned int submit_queues; /* number of submission queues */
 	unsigned int home_node; /* home node for the device */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 87b31f9ca362..54c5b5df399d 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -200,6 +200,10 @@  static unsigned long g_zone_size = 256;
 module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
 MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
 
+static unsigned long g_zone_capacity;
+module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
+MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
+
 static unsigned int g_zone_nr_conv;
 module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
 MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
@@ -341,6 +345,7 @@  NULLB_DEVICE_ATTR(mbps, uint, NULL);
 NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
 NULLB_DEVICE_ATTR(zoned, bool, NULL);
 NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
+NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
 NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
 
 static ssize_t nullb_device_power_show(struct config_item *item, char *page)
@@ -457,6 +462,7 @@  static struct configfs_attribute *nullb_device_attrs[] = {
 	&nullb_device_attr_badblocks,
 	&nullb_device_attr_zoned,
 	&nullb_device_attr_zone_size,
+	&nullb_device_attr_zone_capacity,
 	&nullb_device_attr_zone_nr_conv,
 	NULL,
 };
@@ -510,7 +516,7 @@  nullb_group_drop_item(struct config_group *group, struct config_item *item)
 
 static ssize_t memb_group_features_show(struct config_item *item, char *page)
 {
-	return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_nr_conv\n");
+	return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv\n");
 }
 
 CONFIGFS_ATTR_RO(memb_group_, features);
@@ -571,6 +577,7 @@  static struct nullb_device *null_alloc_dev(void)
 	dev->use_per_node_hctx = g_use_per_node_hctx;
 	dev->zoned = g_zoned;
 	dev->zone_size = g_zone_size;
+	dev->zone_capacity = g_zone_capacity;
 	dev->zone_nr_conv = g_zone_nr_conv;
 	return dev;
 }
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index 624aac09b005..b05832eb21b2 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -28,7 +28,17 @@  int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
 		return -EINVAL;
 	}
 
+	if (!dev->zone_capacity)
+		dev->zone_capacity = dev->zone_size;
+
+	if (dev->zone_capacity > dev->zone_size) {
+		pr_err("null_blk: zone capacity %lu more than its size %lu\n",
+					dev->zone_capacity, dev->zone_size);
+		return -EINVAL;
+	}
+
 	dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT;
+	dev->zone_capacity_sects = dev->zone_capacity << ZONE_SIZE_SHIFT;
 	dev->nr_zones = dev_size >>
 				(SECTOR_SHIFT + ilog2(dev->zone_size_sects));
 	dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone),
@@ -60,7 +70,7 @@  int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
 
 		zone->start = zone->wp = sector;
 		zone->len = dev->zone_size_sects;
-		zone->capacity = zone->len;
+		zone->capacity = dev->zone_capacity_sects;
 		zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
 		zone->cond = BLK_ZONE_COND_EMPTY;
 
@@ -187,6 +197,9 @@  static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
 			return BLK_STS_IOERR;
 		}
 
+		if (zone->wp + nr_sectors > zone->start + zone->capacity)
+			return BLK_STS_IOERR;
+
 		if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
 			zone->cond = BLK_ZONE_COND_IMP_OPEN;
 
@@ -195,7 +208,7 @@  static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
 			return ret;
 
 		zone->wp += nr_sectors;
-		if (zone->wp == zone->start + zone->len)
+		if (zone->wp == zone->start + zone->capacity)
 			zone->cond = BLK_ZONE_COND_FULL;
 		return BLK_STS_OK;
 	default: