diff mbox series

block: support arbitrary zone size

Message ID 20200210220816.GA7769@avx2 (mailing list archive)
State New, archived
Headers show
Series block: support arbitrary zone size | expand

Commit Message

Alexey Dobriyan Feb. 10, 2020, 10:08 p.m. UTC
SK hynix is going to ship ZNS device with zone size not being power of 2.

Signed-off-by: Alexey Dobriyan (SK hynix) <adobriyan@gmail.com>
---

 block/blk-settings.c           |    4 +---
 block/blk-zoned.c              |   10 +++++-----
 drivers/block/null_blk_main.c  |   11 ++++++-----
 drivers/block/null_blk_zoned.c |   10 ++--------
 4 files changed, 14 insertions(+), 21 deletions(-)

Comments

Bart Van Assche Feb. 10, 2020, 10:23 p.m. UTC | #1
On 2/10/20 2:08 PM, Alexey Dobriyan wrote:
> -	return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors);
> +	return div64_u64(get_capacity(disk) + zone_sectors - 1, zone_sectors);

This kind of change impacts the performance negatively of ZNS devices 
for which the zone size is a power of two. I don't think it's fair to 
make others pay a price for a feature that is only needed for one single 
device.

Bart.
Keith Busch Feb. 10, 2020, 10:33 p.m. UTC | #2
On Tue, Feb 11, 2020 at 01:08:16AM +0300, Alexey Dobriyan wrote:
>  void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
>  {
> -	BUG_ON(!is_power_of_2(chunk_sectors));
>  	q->limits.chunk_sectors = chunk_sectors;
>  }

This breaks blk_max_size_offset() if the value isn't a power of 2,
but I wouldn't want to replace its cheap mask with an expensive divide
operation anyway. Please keep the power-of-2 requirement.
diff mbox series

Patch

--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -206,15 +206,13 @@  EXPORT_SYMBOL(blk_queue_max_hw_sectors);
  *
  * Description:
  *    If a driver doesn't want IOs to cross a given chunk size, it can set
- *    this limit and prevent merging across chunks. Note that the chunk size
- *    must currently be a power-of-2 in sectors. Also note that the block
+ *    this limit and prevent merging across chunks. Note that the block
  *    layer must accept a page worth of data at any offset. So if the
  *    crossing of chunks is a hard limitation in the driver, it must still be
  *    prepared to split single page bios.
  **/
 void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
 {
-	BUG_ON(!is_power_of_2(chunk_sectors));
 	q->limits.chunk_sectors = chunk_sectors;
 }
 EXPORT_SYMBOL(blk_queue_chunk_sectors);
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -83,7 +83,7 @@  unsigned int blkdev_nr_zones(struct gendisk *disk)
 
 	if (!blk_queue_is_zoned(disk->queue))
 		return 0;
-	return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors);
+	return div64_u64(get_capacity(disk) + zone_sectors - 1, zone_sectors);
 }
 EXPORT_SYMBOL_GPL(blkdev_nr_zones);
 
@@ -363,14 +363,14 @@  static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 	 * smaller last zone.
 	 */
 	if (zone->start == 0) {
-		if (zone->len == 0 || !is_power_of_2(zone->len)) {
-			pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
-				disk->disk_name, zone->len);
+		if (zone->len == 0) {
+			pr_warn("%s: Invalid zoned device with length 0\n",
+				disk->disk_name);
 			return -ENODEV;
 		}
 
 		args->zone_sectors = zone->len;
-		args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
+		args->nr_zones = div64_u64(capacity + zone->len - 1, zone->len);
 	} else if (zone->start + args->zone_sectors < capacity) {
 		if (zone->len != args->zone_sectors) {
 			pr_warn("%s: Invalid zoned device with non constant zone size\n",
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -187,7 +187,7 @@  MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Defau
 
 static unsigned long g_zone_size = 256;
 module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
-MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
+MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Default: 256");
 
 static unsigned int g_zone_nr_conv;
 module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
@@ -1641,10 +1641,11 @@  static int null_validate_conf(struct nullb_device *dev)
 	if (dev->queue_mode == NULL_Q_BIO)
 		dev->mbps = 0;
 
-	if (dev->zoned &&
-	    (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
-		pr_err("zone_size must be power-of-two\n");
-		return -EINVAL;
+	if (dev->zoned) {
+		if (dev->zone_size == 0) {
+			pr_err("zone_size must be positive\n");
+			return -EINVAL;
+		}
 	}
 
 	return 0;
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -7,7 +7,7 @@ 
 
 static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
 {
-	return sect >> ilog2(dev->zone_size_sects);
+	return div64_u64(sect, dev->zone_size_sects * SECTOR_SIZE);
 }
 
 int null_zone_init(struct nullb_device *dev)
@@ -16,14 +16,8 @@  int null_zone_init(struct nullb_device *dev)
 	sector_t sector = 0;
 	unsigned int i;
 
-	if (!is_power_of_2(dev->zone_size)) {
-		pr_err("zone_size must be power-of-two\n");
-		return -EINVAL;
-	}
-
 	dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT;
-	dev->nr_zones = dev_size >>
-				(SECTOR_SHIFT + ilog2(dev->zone_size_sects));
+	dev->nr_zones = div64_u64(dev_size, dev->zone_size_sects * SECTOR_SIZE);
 	dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone),
 			GFP_KERNEL | __GFP_ZERO);
 	if (!dev->zones)