diff mbox series

[v2,1/2] block: add max_open_zones to blk-sysfs

Message ID 20200702181922.24190-2-niklas.cassel@wdc.com (mailing list archive)
State Superseded
Headers show
Series Export max open zones and max active zones to sysfs | expand

Commit Message

Niklas Cassel July 2, 2020, 6:19 p.m. UTC
Add a new max_open_zones definition in the sysfs documentation.
This definition will be common for all devices utilizing the zoned block
device support in the kernel.

Export max open zones according to this new definition for NVMe Zoned
Namespace devices, ZAC ATA devices (which are treated as SCSI devices by
the kernel), and ZBC SCSI devices.

Add the new max_open_zones member to struct request_queue, rather
than as a queue limit, since this property cannot be split across stacking
drivers.

Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Reviewed-by: Javier González <javier@javigon.com>
---
 Documentation/block/queue-sysfs.rst |  7 +++++++
 block/blk-sysfs.c                   | 15 +++++++++++++++
 drivers/nvme/host/zns.c             |  1 +
 drivers/scsi/sd_zbc.c               |  4 ++++
 include/linux/blkdev.h              | 16 ++++++++++++++++
 5 files changed, 43 insertions(+)

Comments

Damien Le Moal July 3, 2020, 5:03 a.m. UTC | #1
On 2020/07/03 3:20, Niklas Cassel wrote:
> Add a new max_open_zones definition in the sysfs documentation.
> This definition will be common for all devices utilizing the zoned block
> device support in the kernel.
> 
> Export max open zones according to this new definition for NVMe Zoned
> Namespace devices, ZAC ATA devices (which are treated as SCSI devices by
> the kernel), and ZBC SCSI devices.
> 
> Add the new max_open_zones member to struct request_queue, rather
> than as a queue limit, since this property cannot be split across stacking
> drivers.
> 
> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
> Reviewed-by: Javier González <javier@javigon.com>
> ---
>  Documentation/block/queue-sysfs.rst |  7 +++++++
>  block/blk-sysfs.c                   | 15 +++++++++++++++
>  drivers/nvme/host/zns.c             |  1 +
>  drivers/scsi/sd_zbc.c               |  4 ++++
>  include/linux/blkdev.h              | 16 ++++++++++++++++
>  5 files changed, 43 insertions(+)
> 
> diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
> index 6a8513af9201..f01cf8530ae4 100644
> --- a/Documentation/block/queue-sysfs.rst
> +++ b/Documentation/block/queue-sysfs.rst
> @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity
>  data that will be submitted by the block layer core to the associated
>  block driver.
>  
> +max_open_zones (RO)
> +-------------------
> +For zoned block devices (zoned attribute indicating "host-managed" or
> +"host-aware"), the sum of zones belonging to any of the zone states:
> +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
> +If this value is 0, there is no limit.
> +
>  max_sectors_kb (RW)
>  -------------------
>  This is the maximum number of kilobytes that the block layer will allow
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 02643e149d5e..fa42961e9678 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
>  	return queue_var_show(blk_queue_nr_zones(q), page);
>  }
>  
> +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
> +{
> +	return queue_var_show(queue_max_open_zones(q), page);
> +}
> +
>  static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
>  {
>  	return queue_var_show((blk_queue_nomerges(q) << 1) |
> @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = {
>  	.show = queue_nr_zones_show,
>  };
>  
> +static struct queue_sysfs_entry queue_max_open_zones_entry = {
> +	.attr = {.name = "max_open_zones", .mode = 0444 },
> +	.show = queue_max_open_zones_show,
> +};
> +
>  static struct queue_sysfs_entry queue_nomerges_entry = {
>  	.attr = {.name = "nomerges", .mode = 0644 },
>  	.show = queue_nomerges_show,
> @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = {
>  	&queue_nonrot_entry.attr,
>  	&queue_zoned_entry.attr,
>  	&queue_nr_zones_entry.attr,
> +	&queue_max_open_zones_entry.attr,
>  	&queue_nomerges_entry.attr,
>  	&queue_rq_affinity_entry.attr,
>  	&queue_iostats_entry.attr,
> @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
>  		(!q->mq_ops || !q->mq_ops->timeout))
>  			return 0;
>  
> +	if (attr == &queue_max_open_zones_entry.attr &&
> +	    !blk_queue_is_zoned(q))
> +		return 0;
> +
>  	return attr->mode;
>  }
>  
> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
> index 04e5b991c00c..3d80b9cf6bfc 100644
> --- a/drivers/nvme/host/zns.c
> +++ b/drivers/nvme/host/zns.c
> @@ -96,6 +96,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>  
>  	q->limits.zoned = BLK_ZONED_HM;
>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
> +	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
>  free_data:
>  	kfree(id);
>  	return status;
> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
> index 183a20720da9..aa3564139b40 100644
> --- a/drivers/scsi/sd_zbc.c
> +++ b/drivers/scsi/sd_zbc.c
> @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
>  	/* The drive satisfies the kernel restrictions: set it up */
>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>  	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
> +	if (sdkp->zones_max_open == U32_MAX)
> +		blk_queue_max_open_zones(q, 0);
> +	else
> +		blk_queue_max_open_zones(q, sdkp->zones_max_open);
>  	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
>  
>  	/* READ16/WRITE16 is mandatory for ZBC disks */
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 8fd900998b4e..fe168abcfdda 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -520,6 +520,7 @@ struct request_queue {
>  	unsigned int		nr_zones;
>  	unsigned long		*conv_zones_bitmap;
>  	unsigned long		*seq_zones_wlock;
> +	unsigned int		max_open_zones;
>  #endif /* CONFIG_BLK_DEV_ZONED */
>  
>  	/*
> @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
>  		return true;
>  	return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
>  }
> +
> +static inline void blk_queue_max_open_zones(struct request_queue *q,
> +		unsigned int max_open_zones)
> +{
> +	q->max_open_zones = max_open_zones;
> +}
> +
> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
> +{
> +	return q->max_open_zones;
> +}
>  #else /* CONFIG_BLK_DEV_ZONED */
>  static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
>  {
> @@ -744,6 +756,10 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
>  {
>  	return 0;
>  }
> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
> +{
> +	return 0;
> +}
>  #endif /* CONFIG_BLK_DEV_ZONED */
>  
>  static inline bool rq_is_sync(struct request *rq)
> 

Looks good to me.

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Johannes Thumshirn July 3, 2020, 8:22 a.m. UTC | #2
On 02/07/2020 20:20, Niklas Cassel wrote:
> Documentation/block/queue-sysfs.rst |  7 +++++++
>  block/blk-sysfs.c                   | 15 +++++++++++++++
>  drivers/nvme/host/zns.c             |  1 +
>  drivers/scsi/sd_zbc.c               |  4 ++++
>  include/linux/blkdev.h              | 16 ++++++++++++++++
>  5 files changed, 43 insertions(+)

Sorry I haven't noticed before, but you forgot null_blk.
Niklas Cassel July 3, 2020, 9:23 a.m. UTC | #3
On Fri, Jul 03, 2020 at 08:22:45AM +0000, Johannes Thumshirn wrote:
> On 02/07/2020 20:20, Niklas Cassel wrote:
> > Documentation/block/queue-sysfs.rst |  7 +++++++
> >  block/blk-sysfs.c                   | 15 +++++++++++++++
> >  drivers/nvme/host/zns.c             |  1 +
> >  drivers/scsi/sd_zbc.c               |  4 ++++
> >  include/linux/blkdev.h              | 16 ++++++++++++++++
> >  5 files changed, 43 insertions(+)
> 
> Sorry I haven't noticed before, but you forgot null_blk.

Hello Johannes,

Actually, I haven't forgotten about null_blk :)

The problem with null_blk is that, compared to these simple patches that
simply exposes the Max Open Zones/Max Active Zones, null_blk additionally
has to keep track of all the zone accounting, and give an error if any
of these limits are exceeded.

null_blk right now follows neither the ZBC nor the ZNS specification
(even though it is almost compliant with ZBC). However, since null_blk
is really great to test with, we want it to support Max Active Zones,
even if that concept does not exist in the ZBC standard.

To add to the problem, open() does not work exactly the same in ZBC and
ZNS. In ZBC, the device always tries to close an implicit zone to make
room for an explicit zone. In ZNS, a controller that doesn't do this is
fully compliant with the spec.

So now for null_blk, you have things like zones being implicit closed when
a new zone is opened. And now we will have an additional limit (Max Active
Zones), that we need to consider before we can even try to close a zone.


I've spent a couple of days trying to implement this already, and I think
that I have a way forward. However, considering that vacations are coming
up, and that I have a bunch of other stuff that I need to do before then,
I'm not 100% sure that I will be able to finish it in time for the coming
merge window.

Therefore, I was hoping that we could merge this series as is, and I will
send out the null_blk changes when they are ready, which might or might
not make it for this merge window.

In the meantime, MAR/MOR properties for null_blk will be exposed as 0,
which means "no limit". (Which is the case when a zoned block device driver
doesn't do an explicit call to blk_queue_max_{open,active}_zones()).


Kind regards,
Niklas
Johannes Thumshirn July 3, 2020, 12:09 p.m. UTC | #4
On 03/07/2020 11:23, Niklas Cassel wrote:
> On Fri, Jul 03, 2020 at 08:22:45AM +0000, Johannes Thumshirn wrote:
>> On 02/07/2020 20:20, Niklas Cassel wrote:
>>> Documentation/block/queue-sysfs.rst |  7 +++++++
>>>  block/blk-sysfs.c                   | 15 +++++++++++++++
>>>  drivers/nvme/host/zns.c             |  1 +
>>>  drivers/scsi/sd_zbc.c               |  4 ++++
>>>  include/linux/blkdev.h              | 16 ++++++++++++++++
>>>  5 files changed, 43 insertions(+)
>>
>> Sorry I haven't noticed before, but you forgot null_blk.
> 
> Hello Johannes,
> 
> Actually, I haven't forgotten about null_blk :)
> 
> The problem with null_blk is that, compared to these simple patches that
> simply exposes the Max Open Zones/Max Active Zones, null_blk additionally
> has to keep track of all the zone accounting, and give an error if any
> of these limits are exceeded.
> 
> null_blk right now follows neither the ZBC nor the ZNS specification
> (even though it is almost compliant with ZBC). However, since null_blk
> is really great to test with, we want it to support Max Active Zones,
> even if that concept does not exist in the ZBC standard.
> 
> To add to the problem, open() does not work exactly the same in ZBC and
> ZNS. In ZBC, the device always tries to close an implicit zone to make
> room for an explicit zone. In ZNS, a controller that doesn't do this is
> fully compliant with the spec.
> 
> So now for null_blk, you have things like zones being implicit closed when
> a new zone is opened. And now we will have an additional limit (Max Active
> Zones), that we need to consider before we can even try to close a zone.
> 
> 
> I've spent a couple of days trying to implement this already, and I think
> that I have a way forward. However, considering that vacations are coming
> up, and that I have a bunch of other stuff that I need to do before then,
> I'm not 100% sure that I will be able to finish it in time for the coming
> merge window.
> 
> Therefore, I was hoping that we could merge this series as is, and I will
> send out the null_blk changes when they are ready, which might or might
> not make it for this merge window.

No problem, I'm just working on MOR support for zonefs and though about how
I'm going to test it. This is where I've noticed null_blk doesn't really 
expose a config knob for MOR. I can do some temporary hacks to test my changes
and wait for your's to materialize. 


> In the meantime, MAR/MOR properties for null_blk will be exposed as 0,
> which means "no limit". (Which is the case when a zoned block device driver
> doesn't do an explicit call to blk_queue_max_{open,active}_zones()).
Johannes Thumshirn July 3, 2020, 1:41 p.m. UTC | #5
On 03/07/2020 14:09, Johannes Thumshirn wrote:
> On 03/07/2020 11:23, Niklas Cassel wrote:
>> On Fri, Jul 03, 2020 at 08:22:45AM +0000, Johannes Thumshirn wrote:
>>> On 02/07/2020 20:20, Niklas Cassel wrote:
>>>> Documentation/block/queue-sysfs.rst |  7 +++++++
>>>>  block/blk-sysfs.c                   | 15 +++++++++++++++
>>>>  drivers/nvme/host/zns.c             |  1 +
>>>>  drivers/scsi/sd_zbc.c               |  4 ++++
>>>>  include/linux/blkdev.h              | 16 ++++++++++++++++
>>>>  5 files changed, 43 insertions(+)
>>>
>>> Sorry I haven't noticed before, but you forgot null_blk.
>>
>> Hello Johannes,
>>
>> Actually, I haven't forgotten about null_blk :)
>>
>> The problem with null_blk is that, compared to these simple patches that
>> simply exposes the Max Open Zones/Max Active Zones, null_blk additionally
>> has to keep track of all the zone accounting, and give an error if any
>> of these limits are exceeded.
>>
>> null_blk right now follows neither the ZBC nor the ZNS specification
>> (even though it is almost compliant with ZBC). However, since null_blk
>> is really great to test with, we want it to support Max Active Zones,
>> even if that concept does not exist in the ZBC standard.
>>
>> To add to the problem, open() does not work exactly the same in ZBC and
>> ZNS. In ZBC, the device always tries to close an implicit zone to make
>> room for an explicit zone. In ZNS, a controller that doesn't do this is
>> fully compliant with the spec.
>>
>> So now for null_blk, you have things like zones being implicit closed when
>> a new zone is opened. And now we will have an additional limit (Max Active
>> Zones), that we need to consider before we can even try to close a zone.
>>
>>
>> I've spent a couple of days trying to implement this already, and I think
>> that I have a way forward. However, considering that vacations are coming
>> up, and that I have a bunch of other stuff that I need to do before then,
>> I'm not 100% sure that I will be able to finish it in time for the coming
>> merge window.
>>
>> Therefore, I was hoping that we could merge this series as is, and I will
>> send out the null_blk changes when they are ready, which might or might
>> not make it for this merge window.
> 
> No problem, I'm just working on MOR support for zonefs and though about how
> I'm going to test it. This is where I've noticed null_blk doesn't really 
> expose a config knob for MOR. I can do some temporary hacks to test my changes
> and wait for your's to materialize. 
> 
> 
>> In the meantime, MAR/MOR properties for null_blk will be exposed as 0,
>> which means "no limit". (Which is the case when a zoned block device driver
>> doesn't do an explicit call to blk_queue_max_{open,active}_zones()).
> 
> 

Another thing I've noticed, can you please introduce the bdev_max_open_zones() 
and bdev_max_open_reagions() functions as well?
Like this (untested):

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bb9e6eb6a7e6..612f4e36828d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1522,6 +1522,15 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev)
        return 0;
 }
 
+static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
+{
+       struct request_queue *q = bdev_get_queue(bdev);
+
+       if (q)
+               return queue_max_open_zones(q);
+       return 0;
+}
+
 static inline int queue_dma_alignment(const struct request_queue *q)
 {
        return q ? q->dma_alignment : 511;
diff mbox series

Patch

diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
index 6a8513af9201..f01cf8530ae4 100644
--- a/Documentation/block/queue-sysfs.rst
+++ b/Documentation/block/queue-sysfs.rst
@@ -117,6 +117,13 @@  Maximum number of elements in a DMA scatter/gather list with integrity
 data that will be submitted by the block layer core to the associated
 block driver.
 
+max_open_zones (RO)
+-------------------
+For zoned block devices (zoned attribute indicating "host-managed" or
+"host-aware"), the sum of zones belonging to any of the zone states:
+EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
+If this value is 0, there is no limit.
+
 max_sectors_kb (RW)
 -------------------
 This is the maximum number of kilobytes that the block layer will allow
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 02643e149d5e..fa42961e9678 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -305,6 +305,11 @@  static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
 	return queue_var_show(blk_queue_nr_zones(q), page);
 }
 
+static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(queue_max_open_zones(q), page);
+}
+
 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
 {
 	return queue_var_show((blk_queue_nomerges(q) << 1) |
@@ -667,6 +672,11 @@  static struct queue_sysfs_entry queue_nr_zones_entry = {
 	.show = queue_nr_zones_show,
 };
 
+static struct queue_sysfs_entry queue_max_open_zones_entry = {
+	.attr = {.name = "max_open_zones", .mode = 0444 },
+	.show = queue_max_open_zones_show,
+};
+
 static struct queue_sysfs_entry queue_nomerges_entry = {
 	.attr = {.name = "nomerges", .mode = 0644 },
 	.show = queue_nomerges_show,
@@ -765,6 +775,7 @@  static struct attribute *queue_attrs[] = {
 	&queue_nonrot_entry.attr,
 	&queue_zoned_entry.attr,
 	&queue_nr_zones_entry.attr,
+	&queue_max_open_zones_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
@@ -792,6 +803,10 @@  static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
 		(!q->mq_ops || !q->mq_ops->timeout))
 			return 0;
 
+	if (attr == &queue_max_open_zones_entry.attr &&
+	    !blk_queue_is_zoned(q))
+		return 0;
+
 	return attr->mode;
 }
 
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 04e5b991c00c..3d80b9cf6bfc 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -96,6 +96,7 @@  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
 
 	q->limits.zoned = BLK_ZONED_HM;
 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
+	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
 free_data:
 	kfree(id);
 	return status;
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 183a20720da9..aa3564139b40 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -717,6 +717,10 @@  int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 	/* The drive satisfies the kernel restrictions: set it up */
 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
 	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
+	if (sdkp->zones_max_open == U32_MAX)
+		blk_queue_max_open_zones(q, 0);
+	else
+		blk_queue_max_open_zones(q, sdkp->zones_max_open);
 	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
 
 	/* READ16/WRITE16 is mandatory for ZBC disks */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8fd900998b4e..fe168abcfdda 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -520,6 +520,7 @@  struct request_queue {
 	unsigned int		nr_zones;
 	unsigned long		*conv_zones_bitmap;
 	unsigned long		*seq_zones_wlock;
+	unsigned int		max_open_zones;
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 	/*
@@ -729,6 +730,17 @@  static inline bool blk_queue_zone_is_seq(struct request_queue *q,
 		return true;
 	return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
 }
+
+static inline void blk_queue_max_open_zones(struct request_queue *q,
+		unsigned int max_open_zones)
+{
+	q->max_open_zones = max_open_zones;
+}
+
+static inline unsigned int queue_max_open_zones(const struct request_queue *q)
+{
+	return q->max_open_zones;
+}
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
 {
@@ -744,6 +756,10 @@  static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 {
 	return 0;
 }
+static inline unsigned int queue_max_open_zones(const struct request_queue *q)
+{
+	return 0;
+}
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 static inline bool rq_is_sync(struct request *rq)