Message ID | 20200702181922.24190-2-niklas.cassel@wdc.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Export max open zones and max active zones to sysfs | expand |
On 2020/07/03 3:20, Niklas Cassel wrote: > Add a new max_open_zones definition in the sysfs documentation. > This definition will be common for all devices utilizing the zoned block > device support in the kernel. > > Export max open zones according to this new definition for NVMe Zoned > Namespace devices, ZAC ATA devices (which are treated as SCSI devices by > the kernel), and ZBC SCSI devices. > > Add the new max_open_zones member to struct request_queue, rather > than as a queue limit, since this property cannot be split across stacking > drivers. > > Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> > Reviewed-by: Javier González <javier@javigon.com> > --- > Documentation/block/queue-sysfs.rst | 7 +++++++ > block/blk-sysfs.c | 15 +++++++++++++++ > drivers/nvme/host/zns.c | 1 + > drivers/scsi/sd_zbc.c | 4 ++++ > include/linux/blkdev.h | 16 ++++++++++++++++ > 5 files changed, 43 insertions(+) > > diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst > index 6a8513af9201..f01cf8530ae4 100644 > --- a/Documentation/block/queue-sysfs.rst > +++ b/Documentation/block/queue-sysfs.rst > @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity > data that will be submitted by the block layer core to the associated > block driver. > > +max_open_zones (RO) > +------------------- > +For zoned block devices (zoned attribute indicating "host-managed" or > +"host-aware"), the sum of zones belonging to any of the zone states: > +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value. > +If this value is 0, there is no limit. > + > max_sectors_kb (RW) > ------------------- > This is the maximum number of kilobytes that the block layer will allow > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c > index 02643e149d5e..fa42961e9678 100644 > --- a/block/blk-sysfs.c > +++ b/block/blk-sysfs.c > @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page) > return queue_var_show(blk_queue_nr_zones(q), page); > } > > +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page) > +{ > + return queue_var_show(queue_max_open_zones(q), page); > +} > + > static ssize_t queue_nomerges_show(struct request_queue *q, char *page) > { > return queue_var_show((blk_queue_nomerges(q) << 1) | > @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = { > .show = queue_nr_zones_show, > }; > > +static struct queue_sysfs_entry queue_max_open_zones_entry = { > + .attr = {.name = "max_open_zones", .mode = 0444 }, > + .show = queue_max_open_zones_show, > +}; > + > static struct queue_sysfs_entry queue_nomerges_entry = { > .attr = {.name = "nomerges", .mode = 0644 }, > .show = queue_nomerges_show, > @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = { > &queue_nonrot_entry.attr, > &queue_zoned_entry.attr, > &queue_nr_zones_entry.attr, > + &queue_max_open_zones_entry.attr, > &queue_nomerges_entry.attr, > &queue_rq_affinity_entry.attr, > &queue_iostats_entry.attr, > @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, > (!q->mq_ops || !q->mq_ops->timeout)) > return 0; > > + if (attr == &queue_max_open_zones_entry.attr && > + !blk_queue_is_zoned(q)) > + return 0; > + > return attr->mode; > } > > diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c > index 04e5b991c00c..3d80b9cf6bfc 100644 > --- a/drivers/nvme/host/zns.c > +++ b/drivers/nvme/host/zns.c > @@ -96,6 +96,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, > > q->limits.zoned = BLK_ZONED_HM; > blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); > + blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); > free_data: > kfree(id); > return status; > diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c > index 183a20720da9..aa3564139b40 100644 > --- a/drivers/scsi/sd_zbc.c > +++ b/drivers/scsi/sd_zbc.c > @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) > /* The drive satisfies the kernel restrictions: set it up */ > blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); > blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); > + if (sdkp->zones_max_open == U32_MAX) > + blk_queue_max_open_zones(q, 0); > + else > + blk_queue_max_open_zones(q, sdkp->zones_max_open); > nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); > > /* READ16/WRITE16 is mandatory for ZBC disks */ > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index 8fd900998b4e..fe168abcfdda 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -520,6 +520,7 @@ struct request_queue { > unsigned int nr_zones; > unsigned long *conv_zones_bitmap; > unsigned long *seq_zones_wlock; > + unsigned int max_open_zones; > #endif /* CONFIG_BLK_DEV_ZONED */ > > /* > @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, > return true; > return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); > } > + > +static inline void blk_queue_max_open_zones(struct request_queue *q, > + unsigned int max_open_zones) > +{ > + q->max_open_zones = max_open_zones; > +} > + > +static inline unsigned int queue_max_open_zones(const struct request_queue *q) > +{ > + return q->max_open_zones; > +} > #else /* CONFIG_BLK_DEV_ZONED */ > static inline unsigned int blk_queue_nr_zones(struct request_queue *q) > { > @@ -744,6 +756,10 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, > { > return 0; > } > +static inline unsigned int queue_max_open_zones(const struct request_queue *q) > +{ > + return 0; > +} > #endif /* CONFIG_BLK_DEV_ZONED */ > > static inline bool rq_is_sync(struct request *rq) > Looks good to me. Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
On 02/07/2020 20:20, Niklas Cassel wrote: > Documentation/block/queue-sysfs.rst | 7 +++++++ > block/blk-sysfs.c | 15 +++++++++++++++ > drivers/nvme/host/zns.c | 1 + > drivers/scsi/sd_zbc.c | 4 ++++ > include/linux/blkdev.h | 16 ++++++++++++++++ > 5 files changed, 43 insertions(+) Sorry I haven't noticed before, but you forgot null_blk.
On Fri, Jul 03, 2020 at 08:22:45AM +0000, Johannes Thumshirn wrote: > On 02/07/2020 20:20, Niklas Cassel wrote: > > Documentation/block/queue-sysfs.rst | 7 +++++++ > > block/blk-sysfs.c | 15 +++++++++++++++ > > drivers/nvme/host/zns.c | 1 + > > drivers/scsi/sd_zbc.c | 4 ++++ > > include/linux/blkdev.h | 16 ++++++++++++++++ > > 5 files changed, 43 insertions(+) > > Sorry I haven't noticed before, but you forgot null_blk. Hello Johannes, Actually, I haven't forgotten about null_blk :) The problem with null_blk is that, compared to these simple patches that simply exposes the Max Open Zones/Max Active Zones, null_blk additionally has to keep track of all the zone accounting, and give an error if any of these limits are exceeded. null_blk right now follows neither the ZBC nor the ZNS specification (even though it is almost compliant with ZBC). However, since null_blk is really great to test with, we want it to support Max Active Zones, even if that concept does not exist in the ZBC standard. To add to the problem, open() does not work exactly the same in ZBC and ZNS. In ZBC, the device always tries to close an implicit zone to make room for an explicit zone. In ZNS, a controller that doesn't do this is fully compliant with the spec. So now for null_blk, you have things like zones being implicit closed when a new zone is opened. And now we will have an additional limit (Max Active Zones), that we need to consider before we can even try to close a zone. I've spent a couple of days trying to implement this already, and I think that I have a way forward. However, considering that vacations are coming up, and that I have a bunch of other stuff that I need to do before then, I'm not 100% sure that I will be able to finish it in time for the coming merge window. Therefore, I was hoping that we could merge this series as is, and I will send out the null_blk changes when they are ready, which might or might not make it for this merge window. In the meantime, MAR/MOR properties for null_blk will be exposed as 0, which means "no limit". (Which is the case when a zoned block device driver doesn't do an explicit call to blk_queue_max_{open,active}_zones()). Kind regards, Niklas
On 03/07/2020 11:23, Niklas Cassel wrote: > On Fri, Jul 03, 2020 at 08:22:45AM +0000, Johannes Thumshirn wrote: >> On 02/07/2020 20:20, Niklas Cassel wrote: >>> Documentation/block/queue-sysfs.rst | 7 +++++++ >>> block/blk-sysfs.c | 15 +++++++++++++++ >>> drivers/nvme/host/zns.c | 1 + >>> drivers/scsi/sd_zbc.c | 4 ++++ >>> include/linux/blkdev.h | 16 ++++++++++++++++ >>> 5 files changed, 43 insertions(+) >> >> Sorry I haven't noticed before, but you forgot null_blk. > > Hello Johannes, > > Actually, I haven't forgotten about null_blk :) > > The problem with null_blk is that, compared to these simple patches that > simply exposes the Max Open Zones/Max Active Zones, null_blk additionally > has to keep track of all the zone accounting, and give an error if any > of these limits are exceeded. > > null_blk right now follows neither the ZBC nor the ZNS specification > (even though it is almost compliant with ZBC). However, since null_blk > is really great to test with, we want it to support Max Active Zones, > even if that concept does not exist in the ZBC standard. > > To add to the problem, open() does not work exactly the same in ZBC and > ZNS. In ZBC, the device always tries to close an implicit zone to make > room for an explicit zone. In ZNS, a controller that doesn't do this is > fully compliant with the spec. > > So now for null_blk, you have things like zones being implicit closed when > a new zone is opened. And now we will have an additional limit (Max Active > Zones), that we need to consider before we can even try to close a zone. > > > I've spent a couple of days trying to implement this already, and I think > that I have a way forward. However, considering that vacations are coming > up, and that I have a bunch of other stuff that I need to do before then, > I'm not 100% sure that I will be able to finish it in time for the coming > merge window. > > Therefore, I was hoping that we could merge this series as is, and I will > send out the null_blk changes when they are ready, which might or might > not make it for this merge window. No problem, I'm just working on MOR support for zonefs and though about how I'm going to test it. This is where I've noticed null_blk doesn't really expose a config knob for MOR. I can do some temporary hacks to test my changes and wait for your's to materialize. > In the meantime, MAR/MOR properties for null_blk will be exposed as 0, > which means "no limit". (Which is the case when a zoned block device driver > doesn't do an explicit call to blk_queue_max_{open,active}_zones()).
On 03/07/2020 14:09, Johannes Thumshirn wrote: > On 03/07/2020 11:23, Niklas Cassel wrote: >> On Fri, Jul 03, 2020 at 08:22:45AM +0000, Johannes Thumshirn wrote: >>> On 02/07/2020 20:20, Niklas Cassel wrote: >>>> Documentation/block/queue-sysfs.rst | 7 +++++++ >>>> block/blk-sysfs.c | 15 +++++++++++++++ >>>> drivers/nvme/host/zns.c | 1 + >>>> drivers/scsi/sd_zbc.c | 4 ++++ >>>> include/linux/blkdev.h | 16 ++++++++++++++++ >>>> 5 files changed, 43 insertions(+) >>> >>> Sorry I haven't noticed before, but you forgot null_blk. >> >> Hello Johannes, >> >> Actually, I haven't forgotten about null_blk :) >> >> The problem with null_blk is that, compared to these simple patches that >> simply exposes the Max Open Zones/Max Active Zones, null_blk additionally >> has to keep track of all the zone accounting, and give an error if any >> of these limits are exceeded. >> >> null_blk right now follows neither the ZBC nor the ZNS specification >> (even though it is almost compliant with ZBC). However, since null_blk >> is really great to test with, we want it to support Max Active Zones, >> even if that concept does not exist in the ZBC standard. >> >> To add to the problem, open() does not work exactly the same in ZBC and >> ZNS. In ZBC, the device always tries to close an implicit zone to make >> room for an explicit zone. In ZNS, a controller that doesn't do this is >> fully compliant with the spec. >> >> So now for null_blk, you have things like zones being implicit closed when >> a new zone is opened. And now we will have an additional limit (Max Active >> Zones), that we need to consider before we can even try to close a zone. >> >> >> I've spent a couple of days trying to implement this already, and I think >> that I have a way forward. However, considering that vacations are coming >> up, and that I have a bunch of other stuff that I need to do before then, >> I'm not 100% sure that I will be able to finish it in time for the coming >> merge window. >> >> Therefore, I was hoping that we could merge this series as is, and I will >> send out the null_blk changes when they are ready, which might or might >> not make it for this merge window. > > No problem, I'm just working on MOR support for zonefs and though about how > I'm going to test it. This is where I've noticed null_blk doesn't really > expose a config knob for MOR. I can do some temporary hacks to test my changes > and wait for your's to materialize. > > >> In the meantime, MAR/MOR properties for null_blk will be exposed as 0, >> which means "no limit". (Which is the case when a zoned block device driver >> doesn't do an explicit call to blk_queue_max_{open,active}_zones()). > > Another thing I've noticed, can you please introduce the bdev_max_open_zones() and bdev_max_open_reagions() functions as well? Like this (untested): diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bb9e6eb6a7e6..612f4e36828d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1522,6 +1522,15 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) return 0; } +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return queue_max_open_zones(q); + return 0; +} + static inline int queue_dma_alignment(const struct request_queue *q) { return q ? q->dma_alignment : 511;
diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst index 6a8513af9201..f01cf8530ae4 100644 --- a/Documentation/block/queue-sysfs.rst +++ b/Documentation/block/queue-sysfs.rst @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity data that will be submitted by the block layer core to the associated block driver. +max_open_zones (RO) +------------------- +For zoned block devices (zoned attribute indicating "host-managed" or +"host-aware"), the sum of zones belonging to any of the zone states: +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value. +If this value is 0, there is no limit. + max_sectors_kb (RW) ------------------- This is the maximum number of kilobytes that the block layer will allow diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 02643e149d5e..fa42961e9678 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page) return queue_var_show(blk_queue_nr_zones(q), page); } +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page) +{ + return queue_var_show(queue_max_open_zones(q), page); +} + static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { return queue_var_show((blk_queue_nomerges(q) << 1) | @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = { .show = queue_nr_zones_show, }; +static struct queue_sysfs_entry queue_max_open_zones_entry = { + .attr = {.name = "max_open_zones", .mode = 0444 }, + .show = queue_max_open_zones_show, +}; + static struct queue_sysfs_entry queue_nomerges_entry = { .attr = {.name = "nomerges", .mode = 0644 }, .show = queue_nomerges_show, @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = { &queue_nonrot_entry.attr, &queue_zoned_entry.attr, &queue_nr_zones_entry.attr, + &queue_max_open_zones_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, (!q->mq_ops || !q->mq_ops->timeout)) return 0; + if (attr == &queue_max_open_zones_entry.attr && + !blk_queue_is_zoned(q)) + return 0; + return attr->mode; } diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 04e5b991c00c..3d80b9cf6bfc 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -96,6 +96,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns, q->limits.zoned = BLK_ZONED_HM; blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); + blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); free_data: kfree(id); return status; diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 183a20720da9..aa3564139b40 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) /* The drive satisfies the kernel restrictions: set it up */ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); + if (sdkp->zones_max_open == U32_MAX) + blk_queue_max_open_zones(q, 0); + else + blk_queue_max_open_zones(q, sdkp->zones_max_open); nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); /* READ16/WRITE16 is mandatory for ZBC disks */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8fd900998b4e..fe168abcfdda 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -520,6 +520,7 @@ struct request_queue { unsigned int nr_zones; unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; + unsigned int max_open_zones; #endif /* CONFIG_BLK_DEV_ZONED */ /* @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, return true; return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } + +static inline void blk_queue_max_open_zones(struct request_queue *q, + unsigned int max_open_zones) +{ + q->max_open_zones = max_open_zones; +} + +static inline unsigned int queue_max_open_zones(const struct request_queue *q) +{ + return q->max_open_zones; +} #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -744,6 +756,10 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, { return 0; } +static inline unsigned int queue_max_open_zones(const struct request_queue *q) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq)