diff mbox series

[4/6] block: introduce IOCTL to report dev properties

Message ID 20200625122152.17359-5-javier@javigon.com (mailing list archive)
State New, archived
Headers show
Series ZNS: Extra features for current patches | expand

Commit Message

Javier González June 25, 2020, 12:21 p.m. UTC
From: Javier González <javier.gonz@samsung.com>

With the addition of ZNS, a new set of properties have been added to the
zoned block device. This patch introduces a new IOCTL to expose these
rroperties to user space.

Signed-off-by: Javier González <javier.gonz@samsung.com>
Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
---
 block/blk-zoned.c             | 46 ++++++++++++++++++++++++++
 block/ioctl.c                 |  2 ++
 drivers/nvme/host/core.c      |  2 ++
 drivers/nvme/host/nvme.h      | 11 +++++++
 drivers/nvme/host/zns.c       | 61 +++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h        |  9 ++++++
 include/uapi/linux/blkzoned.h | 13 ++++++++
 7 files changed, 144 insertions(+)

Comments

Matias Bjorling June 25, 2020, 1:10 p.m. UTC | #1
On 25/06/2020 14.21, Javier González wrote:
> From: Javier González <javier.gonz@samsung.com>
>
> With the addition of ZNS, a new set of properties have been added to the
> zoned block device. This patch introduces a new IOCTL to expose these
> rroperties to user space.
>
> Signed-off-by: Javier González <javier.gonz@samsung.com>
> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> ---
>   block/blk-zoned.c             | 46 ++++++++++++++++++++++++++
>   block/ioctl.c                 |  2 ++
>   drivers/nvme/host/core.c      |  2 ++
>   drivers/nvme/host/nvme.h      | 11 +++++++
>   drivers/nvme/host/zns.c       | 61 +++++++++++++++++++++++++++++++++++
>   include/linux/blkdev.h        |  9 ++++++
>   include/uapi/linux/blkzoned.h | 13 ++++++++
>   7 files changed, 144 insertions(+)
>
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index 704fc15813d1..39ec72af9537 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -169,6 +169,17 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>   }
>   EXPORT_SYMBOL_GPL(blkdev_report_zones);
>   
> +static int blkdev_report_zonedev_prop(struct block_device *bdev,
> +				      struct blk_zone_dev *zprop)
> +{
> +	struct gendisk *disk = bdev->bd_disk;
> +
> +	if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
> +		return -EOPNOTSUPP;
> +
> +	return disk->fops->report_zone_p(disk, zprop);
> +}
> +
>   static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
>   						sector_t sector,
>   						sector_t nr_sectors)
> @@ -430,6 +441,41 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>   				GFP_KERNEL);
>   }
>   
> +int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
> +			unsigned int cmd, unsigned long arg)
> +{
> +	void __user *argp = (void __user *)arg;
> +	struct request_queue *q;
> +	struct blk_zone_dev zprop;
> +	int ret;
> +
> +	if (!argp)
> +		return -EINVAL;
> +
> +	q = bdev_get_queue(bdev);
> +	if (!q)
> +		return -ENXIO;
> +
> +	if (!blk_queue_is_zoned(q))
> +		return -ENOTTY;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EACCES;
> +
> +	if (!(mode & FMODE_WRITE))
> +		return -EBADF;
> +
> +	ret = blkdev_report_zonedev_prop(bdev, &zprop);
> +	if (ret)
> +		goto out;
> +
> +	if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
> +		return -EFAULT;
> +
> +out:
> +	return ret;
> +}
> +
>   static inline unsigned long *blk_alloc_zone_bitmap(int node,
>   						   unsigned int nr_zones)
>   {
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 0ea29754e7dd..f7b4e0f2dd4c 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -517,6 +517,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
>   		return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>   	case BLKMGMTZONE:
>   		return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
> +	case BLKZONEDEVPROP:
> +		return blkdev_zonedev_prop(bdev, mode, cmd, arg);
>   	case BLKGETZONESZ:
>   		return put_uint(argp, bdev_zone_sectors(bdev));
>   	case BLKGETNRZONES:
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 5b95c81d2a2d..a32c909a915f 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2254,6 +2254,7 @@ static const struct block_device_operations nvme_fops = {
>   	.getgeo		= nvme_getgeo,
>   	.revalidate_disk= nvme_revalidate_disk,
>   	.report_zones	= nvme_report_zones,
> +	.report_zone_p	= nvme_report_zone_prop,
>   	.pr_ops		= &nvme_pr_ops,
>   };
>   
> @@ -2280,6 +2281,7 @@ const struct block_device_operations nvme_ns_head_ops = {
>   	.compat_ioctl	= nvme_compat_ioctl,
>   	.getgeo		= nvme_getgeo,
>   	.report_zones	= nvme_report_zones,
> +	.report_zone_p	= nvme_report_zone_prop,
>   	.pr_ops		= &nvme_pr_ops,
>   };
>   #endif /* CONFIG_NVME_MULTIPATH */
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index ecf443efdf91..172e0531f37f 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -407,6 +407,14 @@ struct nvme_ns {
>   	u8 pi_type;
>   #ifdef CONFIG_BLK_DEV_ZONED
>   	u64 zsze;
> +
> +	u32 nr_zones;
> +	u32 mar;
> +	u32 mor;
> +	u32 rrl;
> +	u32 frl;
> +	u16 zoc;
> +	u16 ozcs;
>   #endif
>   	unsigned long features;
>   	unsigned long flags;
> @@ -704,11 +712,14 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>   int nvme_report_zones(struct gendisk *disk, sector_t sector,
>   		      unsigned int nr_zones, report_zones_cb cb, void *data);
>   
> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop);
> +
>   blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>   				       struct nvme_command *cmnd,
>   				       enum nvme_zone_mgmt_action action);
>   #else
>   #define nvme_report_zones NULL
> +#define nvme_report_zone_prop NULL
>   
>   static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
>   		struct request *req, struct nvme_command *cmnd,
> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
> index 2e6512ac6f01..258d03610cc0 100644
> --- a/drivers/nvme/host/zns.c
> +++ b/drivers/nvme/host/zns.c
> @@ -32,6 +32,28 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
>   	return 0;
>   }
>   
> +static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
> +{
> +	struct nvme_command c = { };
> +	struct nvme_zone_report report;
> +	int buflen = sizeof(struct nvme_zone_report);
> +	int ret;
> +
> +	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
> +	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
> +	c.zmr.slba = cpu_to_le64(0);
> +	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
> +	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
> +	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
> +	c.zmr.pr = 0;
> +
> +	ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
> +	if (ret)
> +		return ret;
> +
> +	return le64_to_cpu(report.nr_zones);
> +}
> +
>   int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>   			  unsigned lbaf)
>   {
> @@ -87,6 +109,13 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>   		goto free_data;
>   	}
>   
> +	ns->nr_zones = nvme_zns_nr_zones(ns);
> +	ns->mar = le32_to_cpu(id->mar);
> +	ns->mor = le32_to_cpu(id->mor);
> +	ns->rrl = le32_to_cpu(id->rrl);
> +	ns->frl = le32_to_cpu(id->frl);
> +	ns->zoc = le16_to_cpu(id->zoc);
> +
>   	q->limits.zoned = BLK_ZONED_HM;
>   	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>   free_data:
> @@ -230,6 +259,38 @@ int nvme_report_zones(struct gendisk *disk, sector_t sector,
>   	return ret;
>   }
>   
> +static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct blk_zone_dev *zprop)
> +{
> +	zprop->nr_zones = ns->nr_zones;
> +	zprop->zoc = ns->zoc;
> +	zprop->ozcs = ns->ozcs;
> +	zprop->mar = ns->mar;
> +	zprop->mor = ns->mor;
> +	zprop->rrl = ns->rrl;
> +	zprop->frl = ns->frl;
> +
> +	return 0;
> +}
> +
> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop)
> +{
> +	struct nvme_ns_head *head = NULL;
> +	struct nvme_ns *ns;
> +	int srcu_idx, ret;
> +
> +	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
> +	if (unlikely(!ns))
> +		return -EWOULDBLOCK;
> +
> +	if (ns->head->ids.csi == NVME_CSI_ZNS)
> +		ret = nvme_ns_report_zone_prop(ns, zprop);
> +	else
> +		ret = -EINVAL;
> +	nvme_put_ns_from_disk(head, srcu_idx);
> +
> +	return ret;
> +}
> +
>   blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>   		struct nvme_command *c, enum nvme_zone_mgmt_action action)
>   {
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 8308d8a3720b..0c0faa58b7f4 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -372,6 +372,8 @@ extern int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
>   				  unsigned int cmd, unsigned long arg);
>   extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>   				  unsigned int cmd, unsigned long arg);
> +extern int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
> +			unsigned int cmd, unsigned long arg);
>   #else /* CONFIG_BLK_DEV_ZONED */
>   
>   static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
> @@ -400,6 +402,12 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
>   	return -ENOTTY;
>   }
>   
> +static inline int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
> +				      unsigned int cmd, unsigned long arg)
> +{
> +	return -ENOTTY;
> +}
> +
>   #endif /* CONFIG_BLK_DEV_ZONED */
>   
>   struct request_queue {
> @@ -1770,6 +1778,7 @@ struct block_device_operations {
>   	int (*report_zones)(struct gendisk *, sector_t sector,
>   			unsigned int nr_zones, report_zones_cb cb, void *data);
>   	char *(*devnode)(struct gendisk *disk, umode_t *mode);
> +	int (*report_zone_p)(struct gendisk *disk, struct blk_zone_dev *zprop);
>   	struct module *owner;
>   	const struct pr_ops *pr_ops;
>   };
> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
> index d0978ee10fc7..0c49a4b2ce5d 100644
> --- a/include/uapi/linux/blkzoned.h
> +++ b/include/uapi/linux/blkzoned.h
> @@ -142,6 +142,18 @@ struct blk_zone_range {
>   	__u64		nr_sectors;
>   };
>   
> +struct blk_zone_dev {
> +	__u32	nr_zones;
> +	__u32	mar;
> +	__u32	mor;
> +	__u32	rrl;
> +	__u32	frl;
> +	__u16	zoc;
> +	__u16	ozcs;
> +	__u32	rsv31[2];
> +	__u64	rsv63[4];
> +};
> +
>   /**
>    * enum blk_zone_action - Zone state transitions managed from user-space
>    *
> @@ -209,5 +221,6 @@ struct blk_zone_mgmt {
>   #define BLKCLOSEZONE	_IOW(0x12, 135, struct blk_zone_range)
>   #define BLKFINISHZONE	_IOW(0x12, 136, struct blk_zone_range)
>   #define BLKMGMTZONE	_IOR(0x12, 137, struct blk_zone_mgmt)
> +#define BLKZONEDEVPROP	_IOR(0x12, 138, struct blk_zone_dev)
>   
>   #endif /* _UAPI_BLKZONED_H */

Nak. These properties can already be retrieved using the nvme ioctl 
passthru command and support have also been added to nvme-cli.
Javier González June 25, 2020, 7:42 p.m. UTC | #2
On 25.06.2020 15:10, Matias Bjørling wrote:
>On 25/06/2020 14.21, Javier González wrote:
>>From: Javier González <javier.gonz@samsung.com>
>>
>>With the addition of ZNS, a new set of properties have been added to the
>>zoned block device. This patch introduces a new IOCTL to expose these
>>rroperties to user space.
>>
>>Signed-off-by: Javier González <javier.gonz@samsung.com>
>>Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
>>Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>>Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>>---
>>  block/blk-zoned.c             | 46 ++++++++++++++++++++++++++
>>  block/ioctl.c                 |  2 ++
>>  drivers/nvme/host/core.c      |  2 ++
>>  drivers/nvme/host/nvme.h      | 11 +++++++
>>  drivers/nvme/host/zns.c       | 61 +++++++++++++++++++++++++++++++++++
>>  include/linux/blkdev.h        |  9 ++++++
>>  include/uapi/linux/blkzoned.h | 13 ++++++++
>>  7 files changed, 144 insertions(+)
>>
>>diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>>index 704fc15813d1..39ec72af9537 100644
>>--- a/block/blk-zoned.c
>>+++ b/block/blk-zoned.c
>>@@ -169,6 +169,17 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>>  }
>>  EXPORT_SYMBOL_GPL(blkdev_report_zones);
>>+static int blkdev_report_zonedev_prop(struct block_device *bdev,
>>+				      struct blk_zone_dev *zprop)
>>+{
>>+	struct gendisk *disk = bdev->bd_disk;
>>+
>>+	if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
>>+		return -EOPNOTSUPP;
>>+
>>+	return disk->fops->report_zone_p(disk, zprop);
>>+}
>>+
>>  static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
>>  						sector_t sector,
>>  						sector_t nr_sectors)
>>@@ -430,6 +441,41 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>  				GFP_KERNEL);
>>  }
>>+int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>+			unsigned int cmd, unsigned long arg)
>>+{
>>+	void __user *argp = (void __user *)arg;
>>+	struct request_queue *q;
>>+	struct blk_zone_dev zprop;
>>+	int ret;
>>+
>>+	if (!argp)
>>+		return -EINVAL;
>>+
>>+	q = bdev_get_queue(bdev);
>>+	if (!q)
>>+		return -ENXIO;
>>+
>>+	if (!blk_queue_is_zoned(q))
>>+		return -ENOTTY;
>>+
>>+	if (!capable(CAP_SYS_ADMIN))
>>+		return -EACCES;
>>+
>>+	if (!(mode & FMODE_WRITE))
>>+		return -EBADF;
>>+
>>+	ret = blkdev_report_zonedev_prop(bdev, &zprop);
>>+	if (ret)
>>+		goto out;
>>+
>>+	if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
>>+		return -EFAULT;
>>+
>>+out:
>>+	return ret;
>>+}
>>+
>>  static inline unsigned long *blk_alloc_zone_bitmap(int node,
>>  						   unsigned int nr_zones)
>>  {
>>diff --git a/block/ioctl.c b/block/ioctl.c
>>index 0ea29754e7dd..f7b4e0f2dd4c 100644
>>--- a/block/ioctl.c
>>+++ b/block/ioctl.c
>>@@ -517,6 +517,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
>>  		return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>>  	case BLKMGMTZONE:
>>  		return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
>>+	case BLKZONEDEVPROP:
>>+		return blkdev_zonedev_prop(bdev, mode, cmd, arg);
>>  	case BLKGETZONESZ:
>>  		return put_uint(argp, bdev_zone_sectors(bdev));
>>  	case BLKGETNRZONES:
>>diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>index 5b95c81d2a2d..a32c909a915f 100644
>>--- a/drivers/nvme/host/core.c
>>+++ b/drivers/nvme/host/core.c
>>@@ -2254,6 +2254,7 @@ static const struct block_device_operations nvme_fops = {
>>  	.getgeo		= nvme_getgeo,
>>  	.revalidate_disk= nvme_revalidate_disk,
>>  	.report_zones	= nvme_report_zones,
>>+	.report_zone_p	= nvme_report_zone_prop,
>>  	.pr_ops		= &nvme_pr_ops,
>>  };
>>@@ -2280,6 +2281,7 @@ const struct block_device_operations nvme_ns_head_ops = {
>>  	.compat_ioctl	= nvme_compat_ioctl,
>>  	.getgeo		= nvme_getgeo,
>>  	.report_zones	= nvme_report_zones,
>>+	.report_zone_p	= nvme_report_zone_prop,
>>  	.pr_ops		= &nvme_pr_ops,
>>  };
>>  #endif /* CONFIG_NVME_MULTIPATH */
>>diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>>index ecf443efdf91..172e0531f37f 100644
>>--- a/drivers/nvme/host/nvme.h
>>+++ b/drivers/nvme/host/nvme.h
>>@@ -407,6 +407,14 @@ struct nvme_ns {
>>  	u8 pi_type;
>>  #ifdef CONFIG_BLK_DEV_ZONED
>>  	u64 zsze;
>>+
>>+	u32 nr_zones;
>>+	u32 mar;
>>+	u32 mor;
>>+	u32 rrl;
>>+	u32 frl;
>>+	u16 zoc;
>>+	u16 ozcs;
>>  #endif
>>  	unsigned long features;
>>  	unsigned long flags;
>>@@ -704,11 +712,14 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>  		      unsigned int nr_zones, report_zones_cb cb, void *data);
>>+int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop);
>>+
>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>>  				       struct nvme_command *cmnd,
>>  				       enum nvme_zone_mgmt_action action);
>>  #else
>>  #define nvme_report_zones NULL
>>+#define nvme_report_zone_prop NULL
>>  static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
>>  		struct request *req, struct nvme_command *cmnd,
>>diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>>index 2e6512ac6f01..258d03610cc0 100644
>>--- a/drivers/nvme/host/zns.c
>>+++ b/drivers/nvme/host/zns.c
>>@@ -32,6 +32,28 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
>>  	return 0;
>>  }
>>+static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
>>+{
>>+	struct nvme_command c = { };
>>+	struct nvme_zone_report report;
>>+	int buflen = sizeof(struct nvme_zone_report);
>>+	int ret;
>>+
>>+	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
>>+	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
>>+	c.zmr.slba = cpu_to_le64(0);
>>+	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
>>+	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
>>+	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
>>+	c.zmr.pr = 0;
>>+
>>+	ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
>>+	if (ret)
>>+		return ret;
>>+
>>+	return le64_to_cpu(report.nr_zones);
>>+}
>>+
>>  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  			  unsigned lbaf)
>>  {
>>@@ -87,6 +109,13 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  		goto free_data;
>>  	}
>>+	ns->nr_zones = nvme_zns_nr_zones(ns);
>>+	ns->mar = le32_to_cpu(id->mar);
>>+	ns->mor = le32_to_cpu(id->mor);
>>+	ns->rrl = le32_to_cpu(id->rrl);
>>+	ns->frl = le32_to_cpu(id->frl);
>>+	ns->zoc = le16_to_cpu(id->zoc);
>>+
>>  	q->limits.zoned = BLK_ZONED_HM;
>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>  free_data:
>>@@ -230,6 +259,38 @@ int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>  	return ret;
>>  }
>>+static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct blk_zone_dev *zprop)
>>+{
>>+	zprop->nr_zones = ns->nr_zones;
>>+	zprop->zoc = ns->zoc;
>>+	zprop->ozcs = ns->ozcs;
>>+	zprop->mar = ns->mar;
>>+	zprop->mor = ns->mor;
>>+	zprop->rrl = ns->rrl;
>>+	zprop->frl = ns->frl;
>>+
>>+	return 0;
>>+}
>>+
>>+int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop)
>>+{
>>+	struct nvme_ns_head *head = NULL;
>>+	struct nvme_ns *ns;
>>+	int srcu_idx, ret;
>>+
>>+	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
>>+	if (unlikely(!ns))
>>+		return -EWOULDBLOCK;
>>+
>>+	if (ns->head->ids.csi == NVME_CSI_ZNS)
>>+		ret = nvme_ns_report_zone_prop(ns, zprop);
>>+	else
>>+		ret = -EINVAL;
>>+	nvme_put_ns_from_disk(head, srcu_idx);
>>+
>>+	return ret;
>>+}
>>+
>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>>  		struct nvme_command *c, enum nvme_zone_mgmt_action action)
>>  {
>>diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>index 8308d8a3720b..0c0faa58b7f4 100644
>>--- a/include/linux/blkdev.h
>>+++ b/include/linux/blkdev.h
>>@@ -372,6 +372,8 @@ extern int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
>>  				  unsigned int cmd, unsigned long arg);
>>  extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>  				  unsigned int cmd, unsigned long arg);
>>+extern int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>+			unsigned int cmd, unsigned long arg);
>>  #else /* CONFIG_BLK_DEV_ZONED */
>>  static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
>>@@ -400,6 +402,12 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
>>  	return -ENOTTY;
>>  }
>>+static inline int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>+				      unsigned int cmd, unsigned long arg)
>>+{
>>+	return -ENOTTY;
>>+}
>>+
>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>  struct request_queue {
>>@@ -1770,6 +1778,7 @@ struct block_device_operations {
>>  	int (*report_zones)(struct gendisk *, sector_t sector,
>>  			unsigned int nr_zones, report_zones_cb cb, void *data);
>>  	char *(*devnode)(struct gendisk *disk, umode_t *mode);
>>+	int (*report_zone_p)(struct gendisk *disk, struct blk_zone_dev *zprop);
>>  	struct module *owner;
>>  	const struct pr_ops *pr_ops;
>>  };
>>diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>>index d0978ee10fc7..0c49a4b2ce5d 100644
>>--- a/include/uapi/linux/blkzoned.h
>>+++ b/include/uapi/linux/blkzoned.h
>>@@ -142,6 +142,18 @@ struct blk_zone_range {
>>  	__u64		nr_sectors;
>>  };
>>+struct blk_zone_dev {
>>+	__u32	nr_zones;
>>+	__u32	mar;
>>+	__u32	mor;
>>+	__u32	rrl;
>>+	__u32	frl;
>>+	__u16	zoc;
>>+	__u16	ozcs;
>>+	__u32	rsv31[2];
>>+	__u64	rsv63[4];
>>+};
>>+
>>  /**
>>   * enum blk_zone_action - Zone state transitions managed from user-space
>>   *
>>@@ -209,5 +221,6 @@ struct blk_zone_mgmt {
>>  #define BLKCLOSEZONE	_IOW(0x12, 135, struct blk_zone_range)
>>  #define BLKFINISHZONE	_IOW(0x12, 136, struct blk_zone_range)
>>  #define BLKMGMTZONE	_IOR(0x12, 137, struct blk_zone_mgmt)
>>+#define BLKZONEDEVPROP	_IOR(0x12, 138, struct blk_zone_dev)
>>  #endif /* _UAPI_BLKZONED_H */
>
>Nak. These properties can already be retrieved using the nvme ioctl 
>passthru command and support have also been added to nvme-cli.
>

These properties are intended to be consumed by an application, so
nvme-cli is of not much use. I would also like to avoid sysfs variables.

We can use nvme passthru, but this bypasses the zoned block abstraction.
Why not representing ZNS features in the standard zoned block API? I am
happy to iterate on the actual implementation if you have feedback.

Javier
Matias Bjorling June 25, 2020, 7:58 p.m. UTC | #3
On 25/06/2020 21.42, Javier González wrote:
> On 25.06.2020 15:10, Matias Bjørling wrote:
>> On 25/06/2020 14.21, Javier González wrote:
>>> From: Javier González <javier.gonz@samsung.com>
>>>
>>> With the addition of ZNS, a new set of properties have been added to 
>>> the
>>> zoned block device. This patch introduces a new IOCTL to expose these
>>> rroperties to user space.
>>>
>>> Signed-off-by: Javier González <javier.gonz@samsung.com>
>>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
>>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>>> ---
>>>  block/blk-zoned.c             | 46 ++++++++++++++++++++++++++
>>>  block/ioctl.c                 |  2 ++
>>>  drivers/nvme/host/core.c      |  2 ++
>>>  drivers/nvme/host/nvme.h      | 11 +++++++
>>>  drivers/nvme/host/zns.c       | 61 +++++++++++++++++++++++++++++++++++
>>>  include/linux/blkdev.h        |  9 ++++++
>>>  include/uapi/linux/blkzoned.h | 13 ++++++++
>>>  7 files changed, 144 insertions(+)
>>>
>>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>>> index 704fc15813d1..39ec72af9537 100644
>>> --- a/block/blk-zoned.c
>>> +++ b/block/blk-zoned.c
>>> @@ -169,6 +169,17 @@ int blkdev_report_zones(struct block_device 
>>> *bdev, sector_t sector,
>>>  }
>>>  EXPORT_SYMBOL_GPL(blkdev_report_zones);
>>> +static int blkdev_report_zonedev_prop(struct block_device *bdev,
>>> +                      struct blk_zone_dev *zprop)
>>> +{
>>> +    struct gendisk *disk = bdev->bd_disk;
>>> +
>>> +    if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    return disk->fops->report_zone_p(disk, zprop);
>>> +}
>>> +
>>>  static inline bool blkdev_allow_reset_all_zones(struct block_device 
>>> *bdev,
>>>                          sector_t sector,
>>>                          sector_t nr_sectors)
>>> @@ -430,6 +441,41 @@ int blkdev_zone_mgmt_ioctl(struct block_device 
>>> *bdev, fmode_t mode,
>>>                  GFP_KERNEL);
>>>  }
>>> +int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>> +            unsigned int cmd, unsigned long arg)
>>> +{
>>> +    void __user *argp = (void __user *)arg;
>>> +    struct request_queue *q;
>>> +    struct blk_zone_dev zprop;
>>> +    int ret;
>>> +
>>> +    if (!argp)
>>> +        return -EINVAL;
>>> +
>>> +    q = bdev_get_queue(bdev);
>>> +    if (!q)
>>> +        return -ENXIO;
>>> +
>>> +    if (!blk_queue_is_zoned(q))
>>> +        return -ENOTTY;
>>> +
>>> +    if (!capable(CAP_SYS_ADMIN))
>>> +        return -EACCES;
>>> +
>>> +    if (!(mode & FMODE_WRITE))
>>> +        return -EBADF;
>>> +
>>> +    ret = blkdev_report_zonedev_prop(bdev, &zprop);
>>> +    if (ret)
>>> +        goto out;
>>> +
>>> +    if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
>>> +        return -EFAULT;
>>> +
>>> +out:
>>> +    return ret;
>>> +}
>>> +
>>>  static inline unsigned long *blk_alloc_zone_bitmap(int node,
>>>                             unsigned int nr_zones)
>>>  {
>>> diff --git a/block/ioctl.c b/block/ioctl.c
>>> index 0ea29754e7dd..f7b4e0f2dd4c 100644
>>> --- a/block/ioctl.c
>>> +++ b/block/ioctl.c
>>> @@ -517,6 +517,8 @@ static int blkdev_common_ioctl(struct 
>>> block_device *bdev, fmode_t mode,
>>>          return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>>>      case BLKMGMTZONE:
>>>          return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
>>> +    case BLKZONEDEVPROP:
>>> +        return blkdev_zonedev_prop(bdev, mode, cmd, arg);
>>>      case BLKGETZONESZ:
>>>          return put_uint(argp, bdev_zone_sectors(bdev));
>>>      case BLKGETNRZONES:
>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>> index 5b95c81d2a2d..a32c909a915f 100644
>>> --- a/drivers/nvme/host/core.c
>>> +++ b/drivers/nvme/host/core.c
>>> @@ -2254,6 +2254,7 @@ static const struct block_device_operations 
>>> nvme_fops = {
>>>      .getgeo        = nvme_getgeo,
>>>      .revalidate_disk= nvme_revalidate_disk,
>>>      .report_zones    = nvme_report_zones,
>>> +    .report_zone_p    = nvme_report_zone_prop,
>>>      .pr_ops        = &nvme_pr_ops,
>>>  };
>>> @@ -2280,6 +2281,7 @@ const struct block_device_operations 
>>> nvme_ns_head_ops = {
>>>      .compat_ioctl    = nvme_compat_ioctl,
>>>      .getgeo        = nvme_getgeo,
>>>      .report_zones    = nvme_report_zones,
>>> +    .report_zone_p    = nvme_report_zone_prop,
>>>      .pr_ops        = &nvme_pr_ops,
>>>  };
>>>  #endif /* CONFIG_NVME_MULTIPATH */
>>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>>> index ecf443efdf91..172e0531f37f 100644
>>> --- a/drivers/nvme/host/nvme.h
>>> +++ b/drivers/nvme/host/nvme.h
>>> @@ -407,6 +407,14 @@ struct nvme_ns {
>>>      u8 pi_type;
>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>      u64 zsze;
>>> +
>>> +    u32 nr_zones;
>>> +    u32 mar;
>>> +    u32 mor;
>>> +    u32 rrl;
>>> +    u32 frl;
>>> +    u16 zoc;
>>> +    u16 ozcs;
>>>  #endif
>>>      unsigned long features;
>>>      unsigned long flags;
>>> @@ -704,11 +712,14 @@ int nvme_update_zone_info(struct gendisk 
>>> *disk, struct nvme_ns *ns,
>>>  int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>>                unsigned int nr_zones, report_zones_cb cb, void *data);
>>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev 
>>> *zprop);
>>> +
>>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct 
>>> request *req,
>>>                         struct nvme_command *cmnd,
>>>                         enum nvme_zone_mgmt_action action);
>>>  #else
>>>  #define nvme_report_zones NULL
>>> +#define nvme_report_zone_prop NULL
>>>  static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns 
>>> *ns,
>>>          struct request *req, struct nvme_command *cmnd,
>>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>>> index 2e6512ac6f01..258d03610cc0 100644
>>> --- a/drivers/nvme/host/zns.c
>>> +++ b/drivers/nvme/host/zns.c
>>> @@ -32,6 +32,28 @@ static int nvme_set_max_append(struct nvme_ctrl 
>>> *ctrl)
>>>      return 0;
>>>  }
>>> +static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
>>> +{
>>> +    struct nvme_command c = { };
>>> +    struct nvme_zone_report report;
>>> +    int buflen = sizeof(struct nvme_zone_report);
>>> +    int ret;
>>> +
>>> +    c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
>>> +    c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
>>> +    c.zmr.slba = cpu_to_le64(0);
>>> +    c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
>>> +    c.zmr.zra = NVME_ZRA_ZONE_REPORT;
>>> +    c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
>>> +    c.zmr.pr = 0;
>>> +
>>> +    ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    return le64_to_cpu(report.nr_zones);
>>> +}
>>> +
>>>  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>>                unsigned lbaf)
>>>  {
>>> @@ -87,6 +109,13 @@ int nvme_update_zone_info(struct gendisk *disk, 
>>> struct nvme_ns *ns,
>>>          goto free_data;
>>>      }
>>> +    ns->nr_zones = nvme_zns_nr_zones(ns);
>>> +    ns->mar = le32_to_cpu(id->mar);
>>> +    ns->mor = le32_to_cpu(id->mor);
>>> +    ns->rrl = le32_to_cpu(id->rrl);
>>> +    ns->frl = le32_to_cpu(id->frl);
>>> +    ns->zoc = le16_to_cpu(id->zoc);
>>> +
>>>      q->limits.zoned = BLK_ZONED_HM;
>>>      blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>>  free_data:
>>> @@ -230,6 +259,38 @@ int nvme_report_zones(struct gendisk *disk, 
>>> sector_t sector,
>>>      return ret;
>>>  }
>>> +static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct 
>>> blk_zone_dev *zprop)
>>> +{
>>> +    zprop->nr_zones = ns->nr_zones;
>>> +    zprop->zoc = ns->zoc;
>>> +    zprop->ozcs = ns->ozcs;
>>> +    zprop->mar = ns->mar;
>>> +    zprop->mor = ns->mor;
>>> +    zprop->rrl = ns->rrl;
>>> +    zprop->frl = ns->frl;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev 
>>> *zprop)
>>> +{
>>> +    struct nvme_ns_head *head = NULL;
>>> +    struct nvme_ns *ns;
>>> +    int srcu_idx, ret;
>>> +
>>> +    ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
>>> +    if (unlikely(!ns))
>>> +        return -EWOULDBLOCK;
>>> +
>>> +    if (ns->head->ids.csi == NVME_CSI_ZNS)
>>> +        ret = nvme_ns_report_zone_prop(ns, zprop);
>>> +    else
>>> +        ret = -EINVAL;
>>> +    nvme_put_ns_from_disk(head, srcu_idx);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct 
>>> request *req,
>>>          struct nvme_command *c, enum nvme_zone_mgmt_action action)
>>>  {
>>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>> index 8308d8a3720b..0c0faa58b7f4 100644
>>> --- a/include/linux/blkdev.h
>>> +++ b/include/linux/blkdev.h
>>> @@ -372,6 +372,8 @@ extern int blkdev_zone_ops_ioctl(struct 
>>> block_device *bdev, fmode_t mode,
>>>                    unsigned int cmd, unsigned long arg);
>>>  extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, 
>>> fmode_t mode,
>>>                    unsigned int cmd, unsigned long arg);
>>> +extern int blkdev_zonedev_prop(struct block_device *bdev, fmode_t 
>>> mode,
>>> +            unsigned int cmd, unsigned long arg);
>>>  #else /* CONFIG_BLK_DEV_ZONED */
>>>  static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
>>> @@ -400,6 +402,12 @@ static inline int blkdev_zone_mgmt_ioctl(struct 
>>> block_device *bdev,
>>>      return -ENOTTY;
>>>  }
>>> +static inline int blkdev_zonedev_prop(struct block_device *bdev, 
>>> fmode_t mode,
>>> +                      unsigned int cmd, unsigned long arg)
>>> +{
>>> +    return -ENOTTY;
>>> +}
>>> +
>>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>>  struct request_queue {
>>> @@ -1770,6 +1778,7 @@ struct block_device_operations {
>>>      int (*report_zones)(struct gendisk *, sector_t sector,
>>>              unsigned int nr_zones, report_zones_cb cb, void *data);
>>>      char *(*devnode)(struct gendisk *disk, umode_t *mode);
>>> +    int (*report_zone_p)(struct gendisk *disk, struct blk_zone_dev 
>>> *zprop);
>>>      struct module *owner;
>>>      const struct pr_ops *pr_ops;
>>>  };
>>> diff --git a/include/uapi/linux/blkzoned.h 
>>> b/include/uapi/linux/blkzoned.h
>>> index d0978ee10fc7..0c49a4b2ce5d 100644
>>> --- a/include/uapi/linux/blkzoned.h
>>> +++ b/include/uapi/linux/blkzoned.h
>>> @@ -142,6 +142,18 @@ struct blk_zone_range {
>>>      __u64        nr_sectors;
>>>  };
>>> +struct blk_zone_dev {
>>> +    __u32    nr_zones;
>>> +    __u32    mar;
>>> +    __u32    mor;
>>> +    __u32    rrl;
>>> +    __u32    frl;
>>> +    __u16    zoc;
>>> +    __u16    ozcs;
>>> +    __u32    rsv31[2];
>>> +    __u64    rsv63[4];
>>> +};
>>> +
>>>  /**
>>>   * enum blk_zone_action - Zone state transitions managed from 
>>> user-space
>>>   *
>>> @@ -209,5 +221,6 @@ struct blk_zone_mgmt {
>>>  #define BLKCLOSEZONE    _IOW(0x12, 135, struct blk_zone_range)
>>>  #define BLKFINISHZONE    _IOW(0x12, 136, struct blk_zone_range)
>>>  #define BLKMGMTZONE    _IOR(0x12, 137, struct blk_zone_mgmt)
>>> +#define BLKZONEDEVPROP    _IOR(0x12, 138, struct blk_zone_dev)
>>>  #endif /* _UAPI_BLKZONED_H */
>>
>> Nak. These properties can already be retrieved using the nvme ioctl 
>> passthru command and support have also been added to nvme-cli.
>>
>
> These properties are intended to be consumed by an application, so
> nvme-cli is of not much use. I would also like to avoid sysfs variables.
>
I can recommend libnvme https://github.com/linux-nvme/libnvme

It provides an easy way to retrieve the options.

> We can use nvme passthru, but this bypasses the zoned block abstraction.
> Why not representing ZNS features in the standard zoned block API? I am
> happy to iterate on the actual implementation if you have feedback.
>
> Javier
>
Keith Busch June 25, 2020, 8:25 p.m. UTC | #4
On Thu, Jun 25, 2020 at 09:42:48PM +0200, Javier González wrote:
> We can use nvme passthru, but this bypasses the zoned block abstraction.
> Why not representing ZNS features in the standard zoned block API?

This looks too nvme zns specific to want the block layer in the middle.
Just use the driver's passthrough interface.
Damien Le Moal June 26, 2020, 12:57 a.m. UTC | #5
On 2020/06/26 4:42, Javier González wrote:
> On 25.06.2020 15:10, Matias Bjørling wrote:
>> On 25/06/2020 14.21, Javier González wrote:
>>> From: Javier González <javier.gonz@samsung.com>
>>>
>>> With the addition of ZNS, a new set of properties have been added to the
>>> zoned block device. This patch introduces a new IOCTL to expose these
>>> rroperties to user space.
>>>
>>> Signed-off-by: Javier González <javier.gonz@samsung.com>
>>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
>>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>>> ---
>>>  block/blk-zoned.c             | 46 ++++++++++++++++++++++++++
>>>  block/ioctl.c                 |  2 ++
>>>  drivers/nvme/host/core.c      |  2 ++
>>>  drivers/nvme/host/nvme.h      | 11 +++++++
>>>  drivers/nvme/host/zns.c       | 61 +++++++++++++++++++++++++++++++++++
>>>  include/linux/blkdev.h        |  9 ++++++
>>>  include/uapi/linux/blkzoned.h | 13 ++++++++
>>>  7 files changed, 144 insertions(+)
>>>
>>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>>> index 704fc15813d1..39ec72af9537 100644
>>> --- a/block/blk-zoned.c
>>> +++ b/block/blk-zoned.c
>>> @@ -169,6 +169,17 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>>>  }
>>>  EXPORT_SYMBOL_GPL(blkdev_report_zones);
>>> +static int blkdev_report_zonedev_prop(struct block_device *bdev,
>>> +				      struct blk_zone_dev *zprop)
>>> +{
>>> +	struct gendisk *disk = bdev->bd_disk;
>>> +
>>> +	if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	return disk->fops->report_zone_p(disk, zprop);
>>> +}
>>> +
>>>  static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
>>>  						sector_t sector,
>>>  						sector_t nr_sectors)
>>> @@ -430,6 +441,41 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>>  				GFP_KERNEL);
>>>  }
>>> +int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>> +			unsigned int cmd, unsigned long arg)
>>> +{
>>> +	void __user *argp = (void __user *)arg;
>>> +	struct request_queue *q;
>>> +	struct blk_zone_dev zprop;
>>> +	int ret;
>>> +
>>> +	if (!argp)
>>> +		return -EINVAL;
>>> +
>>> +	q = bdev_get_queue(bdev);
>>> +	if (!q)
>>> +		return -ENXIO;
>>> +
>>> +	if (!blk_queue_is_zoned(q))
>>> +		return -ENOTTY;
>>> +
>>> +	if (!capable(CAP_SYS_ADMIN))
>>> +		return -EACCES;
>>> +
>>> +	if (!(mode & FMODE_WRITE))
>>> +		return -EBADF;
>>> +
>>> +	ret = blkdev_report_zonedev_prop(bdev, &zprop);
>>> +	if (ret)
>>> +		goto out;
>>> +
>>> +	if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
>>> +		return -EFAULT;
>>> +
>>> +out:
>>> +	return ret;
>>> +}
>>> +
>>>  static inline unsigned long *blk_alloc_zone_bitmap(int node,
>>>  						   unsigned int nr_zones)
>>>  {
>>> diff --git a/block/ioctl.c b/block/ioctl.c
>>> index 0ea29754e7dd..f7b4e0f2dd4c 100644
>>> --- a/block/ioctl.c
>>> +++ b/block/ioctl.c
>>> @@ -517,6 +517,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
>>>  		return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>>>  	case BLKMGMTZONE:
>>>  		return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
>>> +	case BLKZONEDEVPROP:
>>> +		return blkdev_zonedev_prop(bdev, mode, cmd, arg);
>>>  	case BLKGETZONESZ:
>>>  		return put_uint(argp, bdev_zone_sectors(bdev));
>>>  	case BLKGETNRZONES:
>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>> index 5b95c81d2a2d..a32c909a915f 100644
>>> --- a/drivers/nvme/host/core.c
>>> +++ b/drivers/nvme/host/core.c
>>> @@ -2254,6 +2254,7 @@ static const struct block_device_operations nvme_fops = {
>>>  	.getgeo		= nvme_getgeo,
>>>  	.revalidate_disk= nvme_revalidate_disk,
>>>  	.report_zones	= nvme_report_zones,
>>> +	.report_zone_p	= nvme_report_zone_prop,
>>>  	.pr_ops		= &nvme_pr_ops,
>>>  };
>>> @@ -2280,6 +2281,7 @@ const struct block_device_operations nvme_ns_head_ops = {
>>>  	.compat_ioctl	= nvme_compat_ioctl,
>>>  	.getgeo		= nvme_getgeo,
>>>  	.report_zones	= nvme_report_zones,
>>> +	.report_zone_p	= nvme_report_zone_prop,
>>>  	.pr_ops		= &nvme_pr_ops,
>>>  };
>>>  #endif /* CONFIG_NVME_MULTIPATH */
>>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>>> index ecf443efdf91..172e0531f37f 100644
>>> --- a/drivers/nvme/host/nvme.h
>>> +++ b/drivers/nvme/host/nvme.h
>>> @@ -407,6 +407,14 @@ struct nvme_ns {
>>>  	u8 pi_type;
>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>  	u64 zsze;
>>> +
>>> +	u32 nr_zones;
>>> +	u32 mar;
>>> +	u32 mor;
>>> +	u32 rrl;
>>> +	u32 frl;
>>> +	u16 zoc;
>>> +	u16 ozcs;
>>>  #endif
>>>  	unsigned long features;
>>>  	unsigned long flags;
>>> @@ -704,11 +712,14 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>>  int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>>  		      unsigned int nr_zones, report_zones_cb cb, void *data);
>>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop);
>>> +
>>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>>>  				       struct nvme_command *cmnd,
>>>  				       enum nvme_zone_mgmt_action action);
>>>  #else
>>>  #define nvme_report_zones NULL
>>> +#define nvme_report_zone_prop NULL
>>>  static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
>>>  		struct request *req, struct nvme_command *cmnd,
>>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>>> index 2e6512ac6f01..258d03610cc0 100644
>>> --- a/drivers/nvme/host/zns.c
>>> +++ b/drivers/nvme/host/zns.c
>>> @@ -32,6 +32,28 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
>>>  	return 0;
>>>  }
>>> +static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
>>> +{
>>> +	struct nvme_command c = { };
>>> +	struct nvme_zone_report report;
>>> +	int buflen = sizeof(struct nvme_zone_report);
>>> +	int ret;
>>> +
>>> +	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
>>> +	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
>>> +	c.zmr.slba = cpu_to_le64(0);
>>> +	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
>>> +	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
>>> +	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
>>> +	c.zmr.pr = 0;
>>> +
>>> +	ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	return le64_to_cpu(report.nr_zones);
>>> +}
>>> +
>>>  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>>  			  unsigned lbaf)
>>>  {
>>> @@ -87,6 +109,13 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>>  		goto free_data;
>>>  	}
>>> +	ns->nr_zones = nvme_zns_nr_zones(ns);
>>> +	ns->mar = le32_to_cpu(id->mar);
>>> +	ns->mor = le32_to_cpu(id->mor);
>>> +	ns->rrl = le32_to_cpu(id->rrl);
>>> +	ns->frl = le32_to_cpu(id->frl);
>>> +	ns->zoc = le16_to_cpu(id->zoc);
>>> +
>>>  	q->limits.zoned = BLK_ZONED_HM;
>>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>>  free_data:
>>> @@ -230,6 +259,38 @@ int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>>  	return ret;
>>>  }
>>> +static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct blk_zone_dev *zprop)
>>> +{
>>> +	zprop->nr_zones = ns->nr_zones;
>>> +	zprop->zoc = ns->zoc;
>>> +	zprop->ozcs = ns->ozcs;
>>> +	zprop->mar = ns->mar;
>>> +	zprop->mor = ns->mor;
>>> +	zprop->rrl = ns->rrl;
>>> +	zprop->frl = ns->frl;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop)
>>> +{
>>> +	struct nvme_ns_head *head = NULL;
>>> +	struct nvme_ns *ns;
>>> +	int srcu_idx, ret;
>>> +
>>> +	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
>>> +	if (unlikely(!ns))
>>> +		return -EWOULDBLOCK;
>>> +
>>> +	if (ns->head->ids.csi == NVME_CSI_ZNS)
>>> +		ret = nvme_ns_report_zone_prop(ns, zprop);
>>> +	else
>>> +		ret = -EINVAL;
>>> +	nvme_put_ns_from_disk(head, srcu_idx);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>>>  		struct nvme_command *c, enum nvme_zone_mgmt_action action)
>>>  {
>>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>> index 8308d8a3720b..0c0faa58b7f4 100644
>>> --- a/include/linux/blkdev.h
>>> +++ b/include/linux/blkdev.h
>>> @@ -372,6 +372,8 @@ extern int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
>>>  				  unsigned int cmd, unsigned long arg);
>>>  extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>>  				  unsigned int cmd, unsigned long arg);
>>> +extern int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>> +			unsigned int cmd, unsigned long arg);
>>>  #else /* CONFIG_BLK_DEV_ZONED */
>>>  static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
>>> @@ -400,6 +402,12 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
>>>  	return -ENOTTY;
>>>  }
>>> +static inline int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>> +				      unsigned int cmd, unsigned long arg)
>>> +{
>>> +	return -ENOTTY;
>>> +}
>>> +
>>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>>  struct request_queue {
>>> @@ -1770,6 +1778,7 @@ struct block_device_operations {
>>>  	int (*report_zones)(struct gendisk *, sector_t sector,
>>>  			unsigned int nr_zones, report_zones_cb cb, void *data);
>>>  	char *(*devnode)(struct gendisk *disk, umode_t *mode);
>>> +	int (*report_zone_p)(struct gendisk *disk, struct blk_zone_dev *zprop);
>>>  	struct module *owner;
>>>  	const struct pr_ops *pr_ops;
>>>  };
>>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>>> index d0978ee10fc7..0c49a4b2ce5d 100644
>>> --- a/include/uapi/linux/blkzoned.h
>>> +++ b/include/uapi/linux/blkzoned.h
>>> @@ -142,6 +142,18 @@ struct blk_zone_range {
>>>  	__u64		nr_sectors;
>>>  };
>>> +struct blk_zone_dev {
>>> +	__u32	nr_zones;
>>> +	__u32	mar;
>>> +	__u32	mor;
>>> +	__u32	rrl;
>>> +	__u32	frl;
>>> +	__u16	zoc;
>>> +	__u16	ozcs;
>>> +	__u32	rsv31[2];
>>> +	__u64	rsv63[4];
>>> +};
>>> +
>>>  /**
>>>   * enum blk_zone_action - Zone state transitions managed from user-space
>>>   *
>>> @@ -209,5 +221,6 @@ struct blk_zone_mgmt {
>>>  #define BLKCLOSEZONE	_IOW(0x12, 135, struct blk_zone_range)
>>>  #define BLKFINISHZONE	_IOW(0x12, 136, struct blk_zone_range)
>>>  #define BLKMGMTZONE	_IOR(0x12, 137, struct blk_zone_mgmt)
>>> +#define BLKZONEDEVPROP	_IOR(0x12, 138, struct blk_zone_dev)
>>>  #endif /* _UAPI_BLKZONED_H */
>>
>> Nak. These properties can already be retrieved using the nvme ioctl 
>> passthru command and support have also been added to nvme-cli.
>>
> 
> These properties are intended to be consumed by an application, so
> nvme-cli is of not much use. I would also like to avoid sysfs variables.

Why not sysfs ? These are device properties, they can be defined as sysfs device
attributes. If there is an equivalent for ZBC/ZAC drives, you could even
consider defining them as queue attributes as long as you also patch sd.c, but
that may be pushing things too far.

In any case, sysfs seems a much better approach to me as that would be limited
to the NVMe driver rather than all this additional code in the block layer.

> 
> We can use nvme passthru, but this bypasses the zoned block abstraction.
> Why not representing ZNS features in the standard zoned block API? I am
> happy to iterate on the actual implementation if you have feedback.
> 
> Javier
> 
>
Damien Le Moal June 26, 2020, 1:38 a.m. UTC | #6
On 2020/06/25 21:22, Javier González wrote:
> From: Javier González <javier.gonz@samsung.com>
> 
> With the addition of ZNS, a new set of properties have been added to the
> zoned block device. This patch introduces a new IOCTL to expose these
> rroperties to user space.
> 
> Signed-off-by: Javier González <javier.gonz@samsung.com>
> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> ---
>  block/blk-zoned.c             | 46 ++++++++++++++++++++++++++
>  block/ioctl.c                 |  2 ++
>  drivers/nvme/host/core.c      |  2 ++
>  drivers/nvme/host/nvme.h      | 11 +++++++
>  drivers/nvme/host/zns.c       | 61 +++++++++++++++++++++++++++++++++++
>  include/linux/blkdev.h        |  9 ++++++
>  include/uapi/linux/blkzoned.h | 13 ++++++++
>  7 files changed, 144 insertions(+)
> 
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index 704fc15813d1..39ec72af9537 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -169,6 +169,17 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>  }
>  EXPORT_SYMBOL_GPL(blkdev_report_zones);
>  
> +static int blkdev_report_zonedev_prop(struct block_device *bdev,
> +				      struct blk_zone_dev *zprop)
> +{
> +	struct gendisk *disk = bdev->bd_disk;
> +
> +	if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
> +		return -EOPNOTSUPP;
> +
> +	return disk->fops->report_zone_p(disk, zprop);
> +}
> +
>  static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
>  						sector_t sector,
>  						sector_t nr_sectors)
> @@ -430,6 +441,41 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>  				GFP_KERNEL);
>  }
>  
> +int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
> +			unsigned int cmd, unsigned long arg)
> +{
> +	void __user *argp = (void __user *)arg;
> +	struct request_queue *q;
> +	struct blk_zone_dev zprop;
> +	int ret;
> +
> +	if (!argp)
> +		return -EINVAL;
> +
> +	q = bdev_get_queue(bdev);
> +	if (!q)
> +		return -ENXIO;
> +
> +	if (!blk_queue_is_zoned(q))
> +		return -ENOTTY;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EACCES;
> +
> +	if (!(mode & FMODE_WRITE))
> +		return -EBADF;
> +
> +	ret = blkdev_report_zonedev_prop(bdev, &zprop);
> +	if (ret)
> +		goto out;
> +
> +	if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
> +		return -EFAULT;
> +
> +out:
> +	return ret;
> +}
> +
>  static inline unsigned long *blk_alloc_zone_bitmap(int node,
>  						   unsigned int nr_zones)
>  {
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 0ea29754e7dd..f7b4e0f2dd4c 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -517,6 +517,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
>  		return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>  	case BLKMGMTZONE:
>  		return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
> +	case BLKZONEDEVPROP:
> +		return blkdev_zonedev_prop(bdev, mode, cmd, arg);
>  	case BLKGETZONESZ:
>  		return put_uint(argp, bdev_zone_sectors(bdev));
>  	case BLKGETNRZONES:
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 5b95c81d2a2d..a32c909a915f 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2254,6 +2254,7 @@ static const struct block_device_operations nvme_fops = {
>  	.getgeo		= nvme_getgeo,
>  	.revalidate_disk= nvme_revalidate_disk,
>  	.report_zones	= nvme_report_zones,
> +	.report_zone_p	= nvme_report_zone_prop,
>  	.pr_ops		= &nvme_pr_ops,
>  };
>  
> @@ -2280,6 +2281,7 @@ const struct block_device_operations nvme_ns_head_ops = {
>  	.compat_ioctl	= nvme_compat_ioctl,
>  	.getgeo		= nvme_getgeo,
>  	.report_zones	= nvme_report_zones,
> +	.report_zone_p	= nvme_report_zone_prop,
>  	.pr_ops		= &nvme_pr_ops,
>  };
>  #endif /* CONFIG_NVME_MULTIPATH */
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index ecf443efdf91..172e0531f37f 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -407,6 +407,14 @@ struct nvme_ns {
>  	u8 pi_type;
>  #ifdef CONFIG_BLK_DEV_ZONED
>  	u64 zsze;
> +
> +	u32 nr_zones;
> +	u32 mar;
> +	u32 mor;
> +	u32 rrl;
> +	u32 frl;
> +	u16 zoc;
> +	u16 ozcs;
>  #endif
>  	unsigned long features;
>  	unsigned long flags;
> @@ -704,11 +712,14 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>  int nvme_report_zones(struct gendisk *disk, sector_t sector,
>  		      unsigned int nr_zones, report_zones_cb cb, void *data);
>  
> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop);
> +
>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>  				       struct nvme_command *cmnd,
>  				       enum nvme_zone_mgmt_action action);
>  #else
>  #define nvme_report_zones NULL
> +#define nvme_report_zone_prop NULL
>  
>  static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
>  		struct request *req, struct nvme_command *cmnd,
> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
> index 2e6512ac6f01..258d03610cc0 100644
> --- a/drivers/nvme/host/zns.c
> +++ b/drivers/nvme/host/zns.c
> @@ -32,6 +32,28 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
>  	return 0;
>  }
>  
> +static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
> +{
> +	struct nvme_command c = { };
> +	struct nvme_zone_report report;
> +	int buflen = sizeof(struct nvme_zone_report);
> +	int ret;
> +
> +	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
> +	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
> +	c.zmr.slba = cpu_to_le64(0);
> +	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
> +	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
> +	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
> +	c.zmr.pr = 0;
> +
> +	ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
> +	if (ret)
> +		return ret;
> +
> +	return le64_to_cpu(report.nr_zones);
> +}
> +
>  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>  			  unsigned lbaf)
>  {
> @@ -87,6 +109,13 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>  		goto free_data;
>  	}
>  
> +	ns->nr_zones = nvme_zns_nr_zones(ns);
> +	ns->mar = le32_to_cpu(id->mar);
> +	ns->mor = le32_to_cpu(id->mor);
> +	ns->rrl = le32_to_cpu(id->rrl);
> +	ns->frl = le32_to_cpu(id->frl);
> +	ns->zoc = le16_to_cpu(id->zoc);
> +
>  	q->limits.zoned = BLK_ZONED_HM;
>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>  free_data:
> @@ -230,6 +259,38 @@ int nvme_report_zones(struct gendisk *disk, sector_t sector,
>  	return ret;
>  }
>  
> +static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct blk_zone_dev *zprop)
> +{
> +	zprop->nr_zones = ns->nr_zones;
> +	zprop->zoc = ns->zoc;
> +	zprop->ozcs = ns->ozcs;
> +	zprop->mar = ns->mar;
> +	zprop->mor = ns->mor;
> +	zprop->rrl = ns->rrl;
> +	zprop->frl = ns->frl;
> +
> +	return 0;
> +}
> +
> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop)
> +{
> +	struct nvme_ns_head *head = NULL;
> +	struct nvme_ns *ns;
> +	int srcu_idx, ret;
> +
> +	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
> +	if (unlikely(!ns))
> +		return -EWOULDBLOCK;
> +
> +	if (ns->head->ids.csi == NVME_CSI_ZNS)
> +		ret = nvme_ns_report_zone_prop(ns, zprop);
> +	else
> +		ret = -EINVAL;
> +	nvme_put_ns_from_disk(head, srcu_idx);
> +
> +	return ret;
> +}
> +
>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>  		struct nvme_command *c, enum nvme_zone_mgmt_action action)
>  {
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 8308d8a3720b..0c0faa58b7f4 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -372,6 +372,8 @@ extern int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
>  				  unsigned int cmd, unsigned long arg);
>  extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>  				  unsigned int cmd, unsigned long arg);
> +extern int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
> +			unsigned int cmd, unsigned long arg);
>  #else /* CONFIG_BLK_DEV_ZONED */
>  
>  static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
> @@ -400,6 +402,12 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
>  	return -ENOTTY;
>  }
>  
> +static inline int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
> +				      unsigned int cmd, unsigned long arg)
> +{
> +	return -ENOTTY;
> +}
> +
>  #endif /* CONFIG_BLK_DEV_ZONED */
>  
>  struct request_queue {
> @@ -1770,6 +1778,7 @@ struct block_device_operations {
>  	int (*report_zones)(struct gendisk *, sector_t sector,
>  			unsigned int nr_zones, report_zones_cb cb, void *data);
>  	char *(*devnode)(struct gendisk *disk, umode_t *mode);
> +	int (*report_zone_p)(struct gendisk *disk, struct blk_zone_dev *zprop);
>  	struct module *owner;
>  	const struct pr_ops *pr_ops;
>  };
> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
> index d0978ee10fc7..0c49a4b2ce5d 100644
> --- a/include/uapi/linux/blkzoned.h
> +++ b/include/uapi/linux/blkzoned.h
> @@ -142,6 +142,18 @@ struct blk_zone_range {
>  	__u64		nr_sectors;
>  };
>  
> +struct blk_zone_dev {
> +	__u32	nr_zones;
> +	__u32	mar;
> +	__u32	mor;
> +	__u32	rrl;
> +	__u32	frl;
> +	__u16	zoc;
> +	__u16	ozcs;
> +	__u32	rsv31[2];
> +	__u64	rsv63[4];
> +};
> +
>  /**
>   * enum blk_zone_action - Zone state transitions managed from user-space
>   *
> @@ -209,5 +221,6 @@ struct blk_zone_mgmt {
>  #define BLKCLOSEZONE	_IOW(0x12, 135, struct blk_zone_range)
>  #define BLKFINISHZONE	_IOW(0x12, 136, struct blk_zone_range)
>  #define BLKMGMTZONE	_IOR(0x12, 137, struct blk_zone_mgmt)
> +#define BLKZONEDEVPROP	_IOR(0x12, 138, struct blk_zone_dev)
>  
>  #endif /* _UAPI_BLKZONED_H */
> 

As commented already, NVMe passthrough or sysfs device attributes would be much
better. See scsi: there is no IOCTL defined to obtain every single log page or
mode page defined. Passthrough is the interface to do that. For frequently used
log pages giving device information, sysfs is used as a cache. See all the
"vpd_pgXX" entries under /sys/block/sdX/device. All of this is done by the
driver. Not the block layer.
Javier González June 26, 2020, 6:22 a.m. UTC | #7
On 26.06.2020 01:38, Damien Le Moal wrote:
>On 2020/06/25 21:22, Javier González wrote:
>> From: Javier González <javier.gonz@samsung.com>
>>
>> With the addition of ZNS, a new set of properties have been added to the
>> zoned block device. This patch introduces a new IOCTL to expose these
>> rroperties to user space.
>>
>> Signed-off-by: Javier González <javier.gonz@samsung.com>
>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>> ---
>>  block/blk-zoned.c             | 46 ++++++++++++++++++++++++++
>>  block/ioctl.c                 |  2 ++
>>  drivers/nvme/host/core.c      |  2 ++
>>  drivers/nvme/host/nvme.h      | 11 +++++++
>>  drivers/nvme/host/zns.c       | 61 +++++++++++++++++++++++++++++++++++
>>  include/linux/blkdev.h        |  9 ++++++
>>  include/uapi/linux/blkzoned.h | 13 ++++++++
>>  7 files changed, 144 insertions(+)
>>
>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>> index 704fc15813d1..39ec72af9537 100644
>> --- a/block/blk-zoned.c
>> +++ b/block/blk-zoned.c
>> @@ -169,6 +169,17 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>>  }
>>  EXPORT_SYMBOL_GPL(blkdev_report_zones);
>>
>> +static int blkdev_report_zonedev_prop(struct block_device *bdev,
>> +				      struct blk_zone_dev *zprop)
>> +{
>> +	struct gendisk *disk = bdev->bd_disk;
>> +
>> +	if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
>> +		return -EOPNOTSUPP;
>> +
>> +	return disk->fops->report_zone_p(disk, zprop);
>> +}
>> +
>>  static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
>>  						sector_t sector,
>>  						sector_t nr_sectors)
>> @@ -430,6 +441,41 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>  				GFP_KERNEL);
>>  }
>>
>> +int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>> +			unsigned int cmd, unsigned long arg)
>> +{
>> +	void __user *argp = (void __user *)arg;
>> +	struct request_queue *q;
>> +	struct blk_zone_dev zprop;
>> +	int ret;
>> +
>> +	if (!argp)
>> +		return -EINVAL;
>> +
>> +	q = bdev_get_queue(bdev);
>> +	if (!q)
>> +		return -ENXIO;
>> +
>> +	if (!blk_queue_is_zoned(q))
>> +		return -ENOTTY;
>> +
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return -EACCES;
>> +
>> +	if (!(mode & FMODE_WRITE))
>> +		return -EBADF;
>> +
>> +	ret = blkdev_report_zonedev_prop(bdev, &zprop);
>> +	if (ret)
>> +		goto out;
>> +
>> +	if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
>> +		return -EFAULT;
>> +
>> +out:
>> +	return ret;
>> +}
>> +
>>  static inline unsigned long *blk_alloc_zone_bitmap(int node,
>>  						   unsigned int nr_zones)
>>  {
>> diff --git a/block/ioctl.c b/block/ioctl.c
>> index 0ea29754e7dd..f7b4e0f2dd4c 100644
>> --- a/block/ioctl.c
>> +++ b/block/ioctl.c
>> @@ -517,6 +517,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
>>  		return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>>  	case BLKMGMTZONE:
>>  		return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
>> +	case BLKZONEDEVPROP:
>> +		return blkdev_zonedev_prop(bdev, mode, cmd, arg);
>>  	case BLKGETZONESZ:
>>  		return put_uint(argp, bdev_zone_sectors(bdev));
>>  	case BLKGETNRZONES:
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 5b95c81d2a2d..a32c909a915f 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -2254,6 +2254,7 @@ static const struct block_device_operations nvme_fops = {
>>  	.getgeo		= nvme_getgeo,
>>  	.revalidate_disk= nvme_revalidate_disk,
>>  	.report_zones	= nvme_report_zones,
>> +	.report_zone_p	= nvme_report_zone_prop,
>>  	.pr_ops		= &nvme_pr_ops,
>>  };
>>
>> @@ -2280,6 +2281,7 @@ const struct block_device_operations nvme_ns_head_ops = {
>>  	.compat_ioctl	= nvme_compat_ioctl,
>>  	.getgeo		= nvme_getgeo,
>>  	.report_zones	= nvme_report_zones,
>> +	.report_zone_p	= nvme_report_zone_prop,
>>  	.pr_ops		= &nvme_pr_ops,
>>  };
>>  #endif /* CONFIG_NVME_MULTIPATH */
>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>> index ecf443efdf91..172e0531f37f 100644
>> --- a/drivers/nvme/host/nvme.h
>> +++ b/drivers/nvme/host/nvme.h
>> @@ -407,6 +407,14 @@ struct nvme_ns {
>>  	u8 pi_type;
>>  #ifdef CONFIG_BLK_DEV_ZONED
>>  	u64 zsze;
>> +
>> +	u32 nr_zones;
>> +	u32 mar;
>> +	u32 mor;
>> +	u32 rrl;
>> +	u32 frl;
>> +	u16 zoc;
>> +	u16 ozcs;
>>  #endif
>>  	unsigned long features;
>>  	unsigned long flags;
>> @@ -704,11 +712,14 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>  		      unsigned int nr_zones, report_zones_cb cb, void *data);
>>
>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop);
>> +
>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>>  				       struct nvme_command *cmnd,
>>  				       enum nvme_zone_mgmt_action action);
>>  #else
>>  #define nvme_report_zones NULL
>> +#define nvme_report_zone_prop NULL
>>
>>  static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
>>  		struct request *req, struct nvme_command *cmnd,
>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>> index 2e6512ac6f01..258d03610cc0 100644
>> --- a/drivers/nvme/host/zns.c
>> +++ b/drivers/nvme/host/zns.c
>> @@ -32,6 +32,28 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
>>  	return 0;
>>  }
>>
>> +static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
>> +{
>> +	struct nvme_command c = { };
>> +	struct nvme_zone_report report;
>> +	int buflen = sizeof(struct nvme_zone_report);
>> +	int ret;
>> +
>> +	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
>> +	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
>> +	c.zmr.slba = cpu_to_le64(0);
>> +	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
>> +	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
>> +	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
>> +	c.zmr.pr = 0;
>> +
>> +	ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
>> +	if (ret)
>> +		return ret;
>> +
>> +	return le64_to_cpu(report.nr_zones);
>> +}
>> +
>>  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  			  unsigned lbaf)
>>  {
>> @@ -87,6 +109,13 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>  		goto free_data;
>>  	}
>>
>> +	ns->nr_zones = nvme_zns_nr_zones(ns);
>> +	ns->mar = le32_to_cpu(id->mar);
>> +	ns->mor = le32_to_cpu(id->mor);
>> +	ns->rrl = le32_to_cpu(id->rrl);
>> +	ns->frl = le32_to_cpu(id->frl);
>> +	ns->zoc = le16_to_cpu(id->zoc);
>> +
>>  	q->limits.zoned = BLK_ZONED_HM;
>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>  free_data:
>> @@ -230,6 +259,38 @@ int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>  	return ret;
>>  }
>>
>> +static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct blk_zone_dev *zprop)
>> +{
>> +	zprop->nr_zones = ns->nr_zones;
>> +	zprop->zoc = ns->zoc;
>> +	zprop->ozcs = ns->ozcs;
>> +	zprop->mar = ns->mar;
>> +	zprop->mor = ns->mor;
>> +	zprop->rrl = ns->rrl;
>> +	zprop->frl = ns->frl;
>> +
>> +	return 0;
>> +}
>> +
>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop)
>> +{
>> +	struct nvme_ns_head *head = NULL;
>> +	struct nvme_ns *ns;
>> +	int srcu_idx, ret;
>> +
>> +	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
>> +	if (unlikely(!ns))
>> +		return -EWOULDBLOCK;
>> +
>> +	if (ns->head->ids.csi == NVME_CSI_ZNS)
>> +		ret = nvme_ns_report_zone_prop(ns, zprop);
>> +	else
>> +		ret = -EINVAL;
>> +	nvme_put_ns_from_disk(head, srcu_idx);
>> +
>> +	return ret;
>> +}
>> +
>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>>  		struct nvme_command *c, enum nvme_zone_mgmt_action action)
>>  {
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 8308d8a3720b..0c0faa58b7f4 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -372,6 +372,8 @@ extern int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
>>  				  unsigned int cmd, unsigned long arg);
>>  extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>  				  unsigned int cmd, unsigned long arg);
>> +extern int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>> +			unsigned int cmd, unsigned long arg);
>>  #else /* CONFIG_BLK_DEV_ZONED */
>>
>>  static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
>> @@ -400,6 +402,12 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
>>  	return -ENOTTY;
>>  }
>>
>> +static inline int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>> +				      unsigned int cmd, unsigned long arg)
>> +{
>> +	return -ENOTTY;
>> +}
>> +
>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>
>>  struct request_queue {
>> @@ -1770,6 +1778,7 @@ struct block_device_operations {
>>  	int (*report_zones)(struct gendisk *, sector_t sector,
>>  			unsigned int nr_zones, report_zones_cb cb, void *data);
>>  	char *(*devnode)(struct gendisk *disk, umode_t *mode);
>> +	int (*report_zone_p)(struct gendisk *disk, struct blk_zone_dev *zprop);
>>  	struct module *owner;
>>  	const struct pr_ops *pr_ops;
>>  };
>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>> index d0978ee10fc7..0c49a4b2ce5d 100644
>> --- a/include/uapi/linux/blkzoned.h
>> +++ b/include/uapi/linux/blkzoned.h
>> @@ -142,6 +142,18 @@ struct blk_zone_range {
>>  	__u64		nr_sectors;
>>  };
>>
>> +struct blk_zone_dev {
>> +	__u32	nr_zones;
>> +	__u32	mar;
>> +	__u32	mor;
>> +	__u32	rrl;
>> +	__u32	frl;
>> +	__u16	zoc;
>> +	__u16	ozcs;
>> +	__u32	rsv31[2];
>> +	__u64	rsv63[4];
>> +};
>> +
>>  /**
>>   * enum blk_zone_action - Zone state transitions managed from user-space
>>   *
>> @@ -209,5 +221,6 @@ struct blk_zone_mgmt {
>>  #define BLKCLOSEZONE	_IOW(0x12, 135, struct blk_zone_range)
>>  #define BLKFINISHZONE	_IOW(0x12, 136, struct blk_zone_range)
>>  #define BLKMGMTZONE	_IOR(0x12, 137, struct blk_zone_mgmt)
>> +#define BLKZONEDEVPROP	_IOR(0x12, 138, struct blk_zone_dev)
>>
>>  #endif /* _UAPI_BLKZONED_H */
>>
>
>As commented already, NVMe passthrough or sysfs device attributes would be much
>better. See scsi: there is no IOCTL defined to obtain every single log page or
>mode page defined. Passthrough is the interface to do that. For frequently used
>log pages giving device information, sysfs is used as a cache. See all the
>"vpd_pgXX" entries under /sys/block/sdX/device. All of this is done by the
>driver. Not the block layer.

Ok. Let me look into that. I was hesitant to do this as (i) it is a lot
of parameters and (ii) naming ATM is a bit inconsistent. Guess we could
group them all under zone_* prefix for new parameters.

Javier
Javier González June 26, 2020, 6:24 a.m. UTC | #8
On 25.06.2020 21:58, Matias Bjørling wrote:
>On 25/06/2020 21.42, Javier González wrote:
>>On 25.06.2020 15:10, Matias Bjørling wrote:
>>>On 25/06/2020 14.21, Javier González wrote:
>>>>From: Javier González <javier.gonz@samsung.com>
>>>>
>>>>With the addition of ZNS, a new set of properties have been 
>>>>added to the
>>>>zoned block device. This patch introduces a new IOCTL to expose these
>>>>rroperties to user space.
>>>>
>>>>Signed-off-by: Javier González <javier.gonz@samsung.com>
>>>>Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
>>>>Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>>>>Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>>>>---
>>>> block/blk-zoned.c             | 46 ++++++++++++++++++++++++++
>>>> block/ioctl.c                 |  2 ++
>>>> drivers/nvme/host/core.c      |  2 ++
>>>> drivers/nvme/host/nvme.h      | 11 +++++++
>>>> drivers/nvme/host/zns.c       | 61 +++++++++++++++++++++++++++++++++++
>>>> include/linux/blkdev.h        |  9 ++++++
>>>> include/uapi/linux/blkzoned.h | 13 ++++++++
>>>> 7 files changed, 144 insertions(+)
>>>>
>>>>diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>>>>index 704fc15813d1..39ec72af9537 100644
>>>>--- a/block/blk-zoned.c
>>>>+++ b/block/blk-zoned.c
>>>>@@ -169,6 +169,17 @@ int blkdev_report_zones(struct block_device 
>>>>*bdev, sector_t sector,
>>>> }
>>>> EXPORT_SYMBOL_GPL(blkdev_report_zones);
>>>>+static int blkdev_report_zonedev_prop(struct block_device *bdev,
>>>>+                      struct blk_zone_dev *zprop)
>>>>+{
>>>>+    struct gendisk *disk = bdev->bd_disk;
>>>>+
>>>>+    if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
>>>>+        return -EOPNOTSUPP;
>>>>+
>>>>+    return disk->fops->report_zone_p(disk, zprop);
>>>>+}
>>>>+
>>>> static inline bool blkdev_allow_reset_all_zones(struct 
>>>>block_device *bdev,
>>>>                         sector_t sector,
>>>>                         sector_t nr_sectors)
>>>>@@ -430,6 +441,41 @@ int blkdev_zone_mgmt_ioctl(struct 
>>>>block_device *bdev, fmode_t mode,
>>>>                 GFP_KERNEL);
>>>> }
>>>>+int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>>>+            unsigned int cmd, unsigned long arg)
>>>>+{
>>>>+    void __user *argp = (void __user *)arg;
>>>>+    struct request_queue *q;
>>>>+    struct blk_zone_dev zprop;
>>>>+    int ret;
>>>>+
>>>>+    if (!argp)
>>>>+        return -EINVAL;
>>>>+
>>>>+    q = bdev_get_queue(bdev);
>>>>+    if (!q)
>>>>+        return -ENXIO;
>>>>+
>>>>+    if (!blk_queue_is_zoned(q))
>>>>+        return -ENOTTY;
>>>>+
>>>>+    if (!capable(CAP_SYS_ADMIN))
>>>>+        return -EACCES;
>>>>+
>>>>+    if (!(mode & FMODE_WRITE))
>>>>+        return -EBADF;
>>>>+
>>>>+    ret = blkdev_report_zonedev_prop(bdev, &zprop);
>>>>+    if (ret)
>>>>+        goto out;
>>>>+
>>>>+    if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
>>>>+        return -EFAULT;
>>>>+
>>>>+out:
>>>>+    return ret;
>>>>+}
>>>>+
>>>> static inline unsigned long *blk_alloc_zone_bitmap(int node,
>>>>                            unsigned int nr_zones)
>>>> {
>>>>diff --git a/block/ioctl.c b/block/ioctl.c
>>>>index 0ea29754e7dd..f7b4e0f2dd4c 100644
>>>>--- a/block/ioctl.c
>>>>+++ b/block/ioctl.c
>>>>@@ -517,6 +517,8 @@ static int blkdev_common_ioctl(struct 
>>>>block_device *bdev, fmode_t mode,
>>>>         return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>>>>     case BLKMGMTZONE:
>>>>         return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
>>>>+    case BLKZONEDEVPROP:
>>>>+        return blkdev_zonedev_prop(bdev, mode, cmd, arg);
>>>>     case BLKGETZONESZ:
>>>>         return put_uint(argp, bdev_zone_sectors(bdev));
>>>>     case BLKGETNRZONES:
>>>>diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>>>index 5b95c81d2a2d..a32c909a915f 100644
>>>>--- a/drivers/nvme/host/core.c
>>>>+++ b/drivers/nvme/host/core.c
>>>>@@ -2254,6 +2254,7 @@ static const struct 
>>>>block_device_operations nvme_fops = {
>>>>     .getgeo        = nvme_getgeo,
>>>>     .revalidate_disk= nvme_revalidate_disk,
>>>>     .report_zones    = nvme_report_zones,
>>>>+    .report_zone_p    = nvme_report_zone_prop,
>>>>     .pr_ops        = &nvme_pr_ops,
>>>> };
>>>>@@ -2280,6 +2281,7 @@ const struct block_device_operations 
>>>>nvme_ns_head_ops = {
>>>>     .compat_ioctl    = nvme_compat_ioctl,
>>>>     .getgeo        = nvme_getgeo,
>>>>     .report_zones    = nvme_report_zones,
>>>>+    .report_zone_p    = nvme_report_zone_prop,
>>>>     .pr_ops        = &nvme_pr_ops,
>>>> };
>>>> #endif /* CONFIG_NVME_MULTIPATH */
>>>>diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>>>>index ecf443efdf91..172e0531f37f 100644
>>>>--- a/drivers/nvme/host/nvme.h
>>>>+++ b/drivers/nvme/host/nvme.h
>>>>@@ -407,6 +407,14 @@ struct nvme_ns {
>>>>     u8 pi_type;
>>>> #ifdef CONFIG_BLK_DEV_ZONED
>>>>     u64 zsze;
>>>>+
>>>>+    u32 nr_zones;
>>>>+    u32 mar;
>>>>+    u32 mor;
>>>>+    u32 rrl;
>>>>+    u32 frl;
>>>>+    u16 zoc;
>>>>+    u16 ozcs;
>>>> #endif
>>>>     unsigned long features;
>>>>     unsigned long flags;
>>>>@@ -704,11 +712,14 @@ int nvme_update_zone_info(struct gendisk 
>>>>*disk, struct nvme_ns *ns,
>>>> int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>>>               unsigned int nr_zones, report_zones_cb cb, void *data);
>>>>+int nvme_report_zone_prop(struct gendisk *disk, struct 
>>>>blk_zone_dev *zprop);
>>>>+
>>>> blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, 
>>>>struct request *req,
>>>>                        struct nvme_command *cmnd,
>>>>                        enum nvme_zone_mgmt_action action);
>>>> #else
>>>> #define nvme_report_zones NULL
>>>>+#define nvme_report_zone_prop NULL
>>>> static inline blk_status_t nvme_setup_zone_mgmt_send(struct 
>>>>nvme_ns *ns,
>>>>         struct request *req, struct nvme_command *cmnd,
>>>>diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>>>>index 2e6512ac6f01..258d03610cc0 100644
>>>>--- a/drivers/nvme/host/zns.c
>>>>+++ b/drivers/nvme/host/zns.c
>>>>@@ -32,6 +32,28 @@ static int nvme_set_max_append(struct 
>>>>nvme_ctrl *ctrl)
>>>>     return 0;
>>>> }
>>>>+static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
>>>>+{
>>>>+    struct nvme_command c = { };
>>>>+    struct nvme_zone_report report;
>>>>+    int buflen = sizeof(struct nvme_zone_report);
>>>>+    int ret;
>>>>+
>>>>+    c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
>>>>+    c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
>>>>+    c.zmr.slba = cpu_to_le64(0);
>>>>+    c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
>>>>+    c.zmr.zra = NVME_ZRA_ZONE_REPORT;
>>>>+    c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
>>>>+    c.zmr.pr = 0;
>>>>+
>>>>+    ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
>>>>+    if (ret)
>>>>+        return ret;
>>>>+
>>>>+    return le64_to_cpu(report.nr_zones);
>>>>+}
>>>>+
>>>> int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>>>               unsigned lbaf)
>>>> {
>>>>@@ -87,6 +109,13 @@ int nvme_update_zone_info(struct gendisk 
>>>>*disk, struct nvme_ns *ns,
>>>>         goto free_data;
>>>>     }
>>>>+    ns->nr_zones = nvme_zns_nr_zones(ns);
>>>>+    ns->mar = le32_to_cpu(id->mar);
>>>>+    ns->mor = le32_to_cpu(id->mor);
>>>>+    ns->rrl = le32_to_cpu(id->rrl);
>>>>+    ns->frl = le32_to_cpu(id->frl);
>>>>+    ns->zoc = le16_to_cpu(id->zoc);
>>>>+
>>>>     q->limits.zoned = BLK_ZONED_HM;
>>>>     blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>>> free_data:
>>>>@@ -230,6 +259,38 @@ int nvme_report_zones(struct gendisk *disk, 
>>>>sector_t sector,
>>>>     return ret;
>>>> }
>>>>+static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct 
>>>>blk_zone_dev *zprop)
>>>>+{
>>>>+    zprop->nr_zones = ns->nr_zones;
>>>>+    zprop->zoc = ns->zoc;
>>>>+    zprop->ozcs = ns->ozcs;
>>>>+    zprop->mar = ns->mar;
>>>>+    zprop->mor = ns->mor;
>>>>+    zprop->rrl = ns->rrl;
>>>>+    zprop->frl = ns->frl;
>>>>+
>>>>+    return 0;
>>>>+}
>>>>+
>>>>+int nvme_report_zone_prop(struct gendisk *disk, struct 
>>>>blk_zone_dev *zprop)
>>>>+{
>>>>+    struct nvme_ns_head *head = NULL;
>>>>+    struct nvme_ns *ns;
>>>>+    int srcu_idx, ret;
>>>>+
>>>>+    ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
>>>>+    if (unlikely(!ns))
>>>>+        return -EWOULDBLOCK;
>>>>+
>>>>+    if (ns->head->ids.csi == NVME_CSI_ZNS)
>>>>+        ret = nvme_ns_report_zone_prop(ns, zprop);
>>>>+    else
>>>>+        ret = -EINVAL;
>>>>+    nvme_put_ns_from_disk(head, srcu_idx);
>>>>+
>>>>+    return ret;
>>>>+}
>>>>+
>>>> blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, 
>>>>struct request *req,
>>>>         struct nvme_command *c, enum nvme_zone_mgmt_action action)
>>>> {
>>>>diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>>>index 8308d8a3720b..0c0faa58b7f4 100644
>>>>--- a/include/linux/blkdev.h
>>>>+++ b/include/linux/blkdev.h
>>>>@@ -372,6 +372,8 @@ extern int blkdev_zone_ops_ioctl(struct 
>>>>block_device *bdev, fmode_t mode,
>>>>                   unsigned int cmd, unsigned long arg);
>>>> extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, 
>>>>fmode_t mode,
>>>>                   unsigned int cmd, unsigned long arg);
>>>>+extern int blkdev_zonedev_prop(struct block_device *bdev, 
>>>>fmode_t mode,
>>>>+            unsigned int cmd, unsigned long arg);
>>>> #else /* CONFIG_BLK_DEV_ZONED */
>>>> static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
>>>>@@ -400,6 +402,12 @@ static inline int 
>>>>blkdev_zone_mgmt_ioctl(struct block_device *bdev,
>>>>     return -ENOTTY;
>>>> }
>>>>+static inline int blkdev_zonedev_prop(struct block_device 
>>>>*bdev, fmode_t mode,
>>>>+                      unsigned int cmd, unsigned long arg)
>>>>+{
>>>>+    return -ENOTTY;
>>>>+}
>>>>+
>>>> #endif /* CONFIG_BLK_DEV_ZONED */
>>>> struct request_queue {
>>>>@@ -1770,6 +1778,7 @@ struct block_device_operations {
>>>>     int (*report_zones)(struct gendisk *, sector_t sector,
>>>>             unsigned int nr_zones, report_zones_cb cb, void *data);
>>>>     char *(*devnode)(struct gendisk *disk, umode_t *mode);
>>>>+    int (*report_zone_p)(struct gendisk *disk, struct 
>>>>blk_zone_dev *zprop);
>>>>     struct module *owner;
>>>>     const struct pr_ops *pr_ops;
>>>> };
>>>>diff --git a/include/uapi/linux/blkzoned.h 
>>>>b/include/uapi/linux/blkzoned.h
>>>>index d0978ee10fc7..0c49a4b2ce5d 100644
>>>>--- a/include/uapi/linux/blkzoned.h
>>>>+++ b/include/uapi/linux/blkzoned.h
>>>>@@ -142,6 +142,18 @@ struct blk_zone_range {
>>>>     __u64        nr_sectors;
>>>> };
>>>>+struct blk_zone_dev {
>>>>+    __u32    nr_zones;
>>>>+    __u32    mar;
>>>>+    __u32    mor;
>>>>+    __u32    rrl;
>>>>+    __u32    frl;
>>>>+    __u16    zoc;
>>>>+    __u16    ozcs;
>>>>+    __u32    rsv31[2];
>>>>+    __u64    rsv63[4];
>>>>+};
>>>>+
>>>> /**
>>>>  * enum blk_zone_action - Zone state transitions managed from 
>>>>user-space
>>>>  *
>>>>@@ -209,5 +221,6 @@ struct blk_zone_mgmt {
>>>> #define BLKCLOSEZONE    _IOW(0x12, 135, struct blk_zone_range)
>>>> #define BLKFINISHZONE    _IOW(0x12, 136, struct blk_zone_range)
>>>> #define BLKMGMTZONE    _IOR(0x12, 137, struct blk_zone_mgmt)
>>>>+#define BLKZONEDEVPROP    _IOR(0x12, 138, struct blk_zone_dev)
>>>> #endif /* _UAPI_BLKZONED_H */
>>>
>>>Nak. These properties can already be retrieved using the nvme 
>>>ioctl passthru command and support have also been added to 
>>>nvme-cli.
>>>
>>
>>These properties are intended to be consumed by an application, so
>>nvme-cli is of not much use. I would also like to avoid sysfs variables.
>>
>I can recommend libnvme https://github.com/linux-nvme/libnvme
>
>It provides an easy way to retrieve the options.

Thanks for the advice. We use libnvme already, it is really nice. As I
commented to Damien, I was hesitant to put so many new parameters on
sysfs. I will move it there on a V2.

Javier
Javier González June 26, 2020, 6:27 a.m. UTC | #9
On 26.06.2020 00:57, Damien Le Moal wrote:
>On 2020/06/26 4:42, Javier González wrote:
>> On 25.06.2020 15:10, Matias Bjørling wrote:
>>> On 25/06/2020 14.21, Javier González wrote:
>>>> From: Javier González <javier.gonz@samsung.com>
>>>>
>>>> With the addition of ZNS, a new set of properties have been added to the
>>>> zoned block device. This patch introduces a new IOCTL to expose these
>>>> rroperties to user space.
>>>>
>>>> Signed-off-by: Javier González <javier.gonz@samsung.com>
>>>> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
>>>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>>>> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
>>>> ---
>>>>  block/blk-zoned.c             | 46 ++++++++++++++++++++++++++
>>>>  block/ioctl.c                 |  2 ++
>>>>  drivers/nvme/host/core.c      |  2 ++
>>>>  drivers/nvme/host/nvme.h      | 11 +++++++
>>>>  drivers/nvme/host/zns.c       | 61 +++++++++++++++++++++++++++++++++++
>>>>  include/linux/blkdev.h        |  9 ++++++
>>>>  include/uapi/linux/blkzoned.h | 13 ++++++++
>>>>  7 files changed, 144 insertions(+)
>>>>
>>>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>>>> index 704fc15813d1..39ec72af9537 100644
>>>> --- a/block/blk-zoned.c
>>>> +++ b/block/blk-zoned.c
>>>> @@ -169,6 +169,17 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(blkdev_report_zones);
>>>> +static int blkdev_report_zonedev_prop(struct block_device *bdev,
>>>> +				      struct blk_zone_dev *zprop)
>>>> +{
>>>> +	struct gendisk *disk = bdev->bd_disk;
>>>> +
>>>> +	if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
>>>> +		return -EOPNOTSUPP;
>>>> +
>>>> +	return disk->fops->report_zone_p(disk, zprop);
>>>> +}
>>>> +
>>>>  static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
>>>>  						sector_t sector,
>>>>  						sector_t nr_sectors)
>>>> @@ -430,6 +441,41 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>>>  				GFP_KERNEL);
>>>>  }
>>>> +int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>>> +			unsigned int cmd, unsigned long arg)
>>>> +{
>>>> +	void __user *argp = (void __user *)arg;
>>>> +	struct request_queue *q;
>>>> +	struct blk_zone_dev zprop;
>>>> +	int ret;
>>>> +
>>>> +	if (!argp)
>>>> +		return -EINVAL;
>>>> +
>>>> +	q = bdev_get_queue(bdev);
>>>> +	if (!q)
>>>> +		return -ENXIO;
>>>> +
>>>> +	if (!blk_queue_is_zoned(q))
>>>> +		return -ENOTTY;
>>>> +
>>>> +	if (!capable(CAP_SYS_ADMIN))
>>>> +		return -EACCES;
>>>> +
>>>> +	if (!(mode & FMODE_WRITE))
>>>> +		return -EBADF;
>>>> +
>>>> +	ret = blkdev_report_zonedev_prop(bdev, &zprop);
>>>> +	if (ret)
>>>> +		goto out;
>>>> +
>>>> +	if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
>>>> +		return -EFAULT;
>>>> +
>>>> +out:
>>>> +	return ret;
>>>> +}
>>>> +
>>>>  static inline unsigned long *blk_alloc_zone_bitmap(int node,
>>>>  						   unsigned int nr_zones)
>>>>  {
>>>> diff --git a/block/ioctl.c b/block/ioctl.c
>>>> index 0ea29754e7dd..f7b4e0f2dd4c 100644
>>>> --- a/block/ioctl.c
>>>> +++ b/block/ioctl.c
>>>> @@ -517,6 +517,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
>>>>  		return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
>>>>  	case BLKMGMTZONE:
>>>>  		return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
>>>> +	case BLKZONEDEVPROP:
>>>> +		return blkdev_zonedev_prop(bdev, mode, cmd, arg);
>>>>  	case BLKGETZONESZ:
>>>>  		return put_uint(argp, bdev_zone_sectors(bdev));
>>>>  	case BLKGETNRZONES:
>>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>>> index 5b95c81d2a2d..a32c909a915f 100644
>>>> --- a/drivers/nvme/host/core.c
>>>> +++ b/drivers/nvme/host/core.c
>>>> @@ -2254,6 +2254,7 @@ static const struct block_device_operations nvme_fops = {
>>>>  	.getgeo		= nvme_getgeo,
>>>>  	.revalidate_disk= nvme_revalidate_disk,
>>>>  	.report_zones	= nvme_report_zones,
>>>> +	.report_zone_p	= nvme_report_zone_prop,
>>>>  	.pr_ops		= &nvme_pr_ops,
>>>>  };
>>>> @@ -2280,6 +2281,7 @@ const struct block_device_operations nvme_ns_head_ops = {
>>>>  	.compat_ioctl	= nvme_compat_ioctl,
>>>>  	.getgeo		= nvme_getgeo,
>>>>  	.report_zones	= nvme_report_zones,
>>>> +	.report_zone_p	= nvme_report_zone_prop,
>>>>  	.pr_ops		= &nvme_pr_ops,
>>>>  };
>>>>  #endif /* CONFIG_NVME_MULTIPATH */
>>>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>>>> index ecf443efdf91..172e0531f37f 100644
>>>> --- a/drivers/nvme/host/nvme.h
>>>> +++ b/drivers/nvme/host/nvme.h
>>>> @@ -407,6 +407,14 @@ struct nvme_ns {
>>>>  	u8 pi_type;
>>>>  #ifdef CONFIG_BLK_DEV_ZONED
>>>>  	u64 zsze;
>>>> +
>>>> +	u32 nr_zones;
>>>> +	u32 mar;
>>>> +	u32 mor;
>>>> +	u32 rrl;
>>>> +	u32 frl;
>>>> +	u16 zoc;
>>>> +	u16 ozcs;
>>>>  #endif
>>>>  	unsigned long features;
>>>>  	unsigned long flags;
>>>> @@ -704,11 +712,14 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>>>  int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>>>  		      unsigned int nr_zones, report_zones_cb cb, void *data);
>>>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop);
>>>> +
>>>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>>>>  				       struct nvme_command *cmnd,
>>>>  				       enum nvme_zone_mgmt_action action);
>>>>  #else
>>>>  #define nvme_report_zones NULL
>>>> +#define nvme_report_zone_prop NULL
>>>>  static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
>>>>  		struct request *req, struct nvme_command *cmnd,
>>>> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
>>>> index 2e6512ac6f01..258d03610cc0 100644
>>>> --- a/drivers/nvme/host/zns.c
>>>> +++ b/drivers/nvme/host/zns.c
>>>> @@ -32,6 +32,28 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
>>>>  	return 0;
>>>>  }
>>>> +static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
>>>> +{
>>>> +	struct nvme_command c = { };
>>>> +	struct nvme_zone_report report;
>>>> +	int buflen = sizeof(struct nvme_zone_report);
>>>> +	int ret;
>>>> +
>>>> +	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
>>>> +	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
>>>> +	c.zmr.slba = cpu_to_le64(0);
>>>> +	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
>>>> +	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
>>>> +	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
>>>> +	c.zmr.pr = 0;
>>>> +
>>>> +	ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
>>>> +	if (ret)
>>>> +		return ret;
>>>> +
>>>> +	return le64_to_cpu(report.nr_zones);
>>>> +}
>>>> +
>>>>  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>>>  			  unsigned lbaf)
>>>>  {
>>>> @@ -87,6 +109,13 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>>>>  		goto free_data;
>>>>  	}
>>>> +	ns->nr_zones = nvme_zns_nr_zones(ns);
>>>> +	ns->mar = le32_to_cpu(id->mar);
>>>> +	ns->mor = le32_to_cpu(id->mor);
>>>> +	ns->rrl = le32_to_cpu(id->rrl);
>>>> +	ns->frl = le32_to_cpu(id->frl);
>>>> +	ns->zoc = le16_to_cpu(id->zoc);
>>>> +
>>>>  	q->limits.zoned = BLK_ZONED_HM;
>>>>  	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
>>>>  free_data:
>>>> @@ -230,6 +259,38 @@ int nvme_report_zones(struct gendisk *disk, sector_t sector,
>>>>  	return ret;
>>>>  }
>>>> +static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct blk_zone_dev *zprop)
>>>> +{
>>>> +	zprop->nr_zones = ns->nr_zones;
>>>> +	zprop->zoc = ns->zoc;
>>>> +	zprop->ozcs = ns->ozcs;
>>>> +	zprop->mar = ns->mar;
>>>> +	zprop->mor = ns->mor;
>>>> +	zprop->rrl = ns->rrl;
>>>> +	zprop->frl = ns->frl;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop)
>>>> +{
>>>> +	struct nvme_ns_head *head = NULL;
>>>> +	struct nvme_ns *ns;
>>>> +	int srcu_idx, ret;
>>>> +
>>>> +	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
>>>> +	if (unlikely(!ns))
>>>> +		return -EWOULDBLOCK;
>>>> +
>>>> +	if (ns->head->ids.csi == NVME_CSI_ZNS)
>>>> +		ret = nvme_ns_report_zone_prop(ns, zprop);
>>>> +	else
>>>> +		ret = -EINVAL;
>>>> +	nvme_put_ns_from_disk(head, srcu_idx);
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +
>>>>  blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
>>>>  		struct nvme_command *c, enum nvme_zone_mgmt_action action)
>>>>  {
>>>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>>> index 8308d8a3720b..0c0faa58b7f4 100644
>>>> --- a/include/linux/blkdev.h
>>>> +++ b/include/linux/blkdev.h
>>>> @@ -372,6 +372,8 @@ extern int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
>>>>  				  unsigned int cmd, unsigned long arg);
>>>>  extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
>>>>  				  unsigned int cmd, unsigned long arg);
>>>> +extern int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>>> +			unsigned int cmd, unsigned long arg);
>>>>  #else /* CONFIG_BLK_DEV_ZONED */
>>>>  static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
>>>> @@ -400,6 +402,12 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
>>>>  	return -ENOTTY;
>>>>  }
>>>> +static inline int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
>>>> +				      unsigned int cmd, unsigned long arg)
>>>> +{
>>>> +	return -ENOTTY;
>>>> +}
>>>> +
>>>>  #endif /* CONFIG_BLK_DEV_ZONED */
>>>>  struct request_queue {
>>>> @@ -1770,6 +1778,7 @@ struct block_device_operations {
>>>>  	int (*report_zones)(struct gendisk *, sector_t sector,
>>>>  			unsigned int nr_zones, report_zones_cb cb, void *data);
>>>>  	char *(*devnode)(struct gendisk *disk, umode_t *mode);
>>>> +	int (*report_zone_p)(struct gendisk *disk, struct blk_zone_dev *zprop);
>>>>  	struct module *owner;
>>>>  	const struct pr_ops *pr_ops;
>>>>  };
>>>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>>>> index d0978ee10fc7..0c49a4b2ce5d 100644
>>>> --- a/include/uapi/linux/blkzoned.h
>>>> +++ b/include/uapi/linux/blkzoned.h
>>>> @@ -142,6 +142,18 @@ struct blk_zone_range {
>>>>  	__u64		nr_sectors;
>>>>  };
>>>> +struct blk_zone_dev {
>>>> +	__u32	nr_zones;
>>>> +	__u32	mar;
>>>> +	__u32	mor;
>>>> +	__u32	rrl;
>>>> +	__u32	frl;
>>>> +	__u16	zoc;
>>>> +	__u16	ozcs;
>>>> +	__u32	rsv31[2];
>>>> +	__u64	rsv63[4];
>>>> +};
>>>> +
>>>>  /**
>>>>   * enum blk_zone_action - Zone state transitions managed from user-space
>>>>   *
>>>> @@ -209,5 +221,6 @@ struct blk_zone_mgmt {
>>>>  #define BLKCLOSEZONE	_IOW(0x12, 135, struct blk_zone_range)
>>>>  #define BLKFINISHZONE	_IOW(0x12, 136, struct blk_zone_range)
>>>>  #define BLKMGMTZONE	_IOR(0x12, 137, struct blk_zone_mgmt)
>>>> +#define BLKZONEDEVPROP	_IOR(0x12, 138, struct blk_zone_dev)
>>>>  #endif /* _UAPI_BLKZONED_H */
>>>
>>> Nak. These properties can already be retrieved using the nvme ioctl
>>> passthru command and support have also been added to nvme-cli.
>>>
>>
>> These properties are intended to be consumed by an application, so
>> nvme-cli is of not much use. I would also like to avoid sysfs variables.
>
>Why not sysfs ? These are device properties, they can be defined as sysfs device
>attributes. If there is an equivalent for ZBC/ZAC drives, you could even
>consider defining them as queue attributes as long as you also patch sd.c, but
>that may be pushing things too far.
>
>In any case, sysfs seems a much better approach to me as that would be limited
>to the NVMe driver rather than all this additional code in the block layer.

Ok. Will send a V2 moving it to sysfs.

Thanks!
Javier
Javier González June 26, 2020, 6:28 a.m. UTC | #10
On 26.06.2020 05:25, Keith Busch wrote:
>On Thu, Jun 25, 2020 at 09:42:48PM +0200, Javier González wrote:
>> We can use nvme passthru, but this bypasses the zoned block abstraction.
>> Why not representing ZNS features in the standard zoned block API?
>
>This looks too nvme zns specific to want the block layer in the middle.
>Just use the driver's passthrough interface.

Ok. Is it OK with you to expose them in sysfs as Damien suggested?

Javier
Keith Busch June 26, 2020, 3:52 p.m. UTC | #11
On Fri, Jun 26, 2020 at 08:28:28AM +0200, Javier González wrote:
> On 26.06.2020 05:25, Keith Busch wrote:
> > On Thu, Jun 25, 2020 at 09:42:48PM +0200, Javier González wrote:
> > > We can use nvme passthru, but this bypasses the zoned block abstraction.
> > > Why not representing ZNS features in the standard zoned block API?
> > 
> > This looks too nvme zns specific to want the block layer in the middle.
> > Just use the driver's passthrough interface.
> 
> Ok. Is it OK with you to expose them in sysfs as Damien suggested?

sysfs sounds fine to me for some attributes (ex: mar/mor), but I don't
think every zns property warrants exposing through this interface. I
just don't want to corner driver maintenance into chasing every spec
field. If applications really want every last detail, they can get it
directly from the device without any kernel dependencies.
Javier González June 26, 2020, 4:25 p.m. UTC | #12
> On 26 Jun 2020, at 17.52, Keith Busch <kbusch@kernel.org> wrote:
> 
> On Fri, Jun 26, 2020 at 08:28:28AM +0200, Javier González wrote:
>>> On 26.06.2020 05:25, Keith Busch wrote:
>>> On Thu, Jun 25, 2020 at 09:42:48PM +0200, Javier González wrote:
>>>> We can use nvme passthru, but this bypasses the zoned block abstraction.
>>>> Why not representing ZNS features in the standard zoned block API?
>>> 
>>> This looks too nvme zns specific to want the block layer in the middle.
>>> Just use the driver's passthrough interface.
>> 
>> Ok. Is it OK with you to expose them in sysfs as Damien suggested?
> 
> sysfs sounds fine to me for some attributes (ex: mar/mor), but I don't
> think every zns property warrants exposing through this interface. I
> just don't want to corner driver maintenance into chasing every spec
> field. If applications really want every last detail, they can get it
> directly from the device without any kernel dependencies.

Ok. Think Niklas sent a patch with mar/mor already. Will check which ones we depend on in user space.
diff mbox series

Patch

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 704fc15813d1..39ec72af9537 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -169,6 +169,17 @@  int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 }
 EXPORT_SYMBOL_GPL(blkdev_report_zones);
 
+static int blkdev_report_zonedev_prop(struct block_device *bdev,
+				      struct blk_zone_dev *zprop)
+{
+	struct gendisk *disk = bdev->bd_disk;
+
+	if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zone_p))
+		return -EOPNOTSUPP;
+
+	return disk->fops->report_zone_p(disk, zprop);
+}
+
 static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
 						sector_t sector,
 						sector_t nr_sectors)
@@ -430,6 +441,41 @@  int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 				GFP_KERNEL);
 }
 
+int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
+			unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct request_queue *q;
+	struct blk_zone_dev zprop;
+	int ret;
+
+	if (!argp)
+		return -EINVAL;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	if (!blk_queue_is_zoned(q))
+		return -ENOTTY;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (!(mode & FMODE_WRITE))
+		return -EBADF;
+
+	ret = blkdev_report_zonedev_prop(bdev, &zprop);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(argp, &zprop, sizeof(struct blk_zone_dev)))
+		return -EFAULT;
+
+out:
+	return ret;
+}
+
 static inline unsigned long *blk_alloc_zone_bitmap(int node,
 						   unsigned int nr_zones)
 {
diff --git a/block/ioctl.c b/block/ioctl.c
index 0ea29754e7dd..f7b4e0f2dd4c 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -517,6 +517,8 @@  static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
 		return blkdev_zone_ops_ioctl(bdev, mode, cmd, arg);
 	case BLKMGMTZONE:
 		return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
+	case BLKZONEDEVPROP:
+		return blkdev_zonedev_prop(bdev, mode, cmd, arg);
 	case BLKGETZONESZ:
 		return put_uint(argp, bdev_zone_sectors(bdev));
 	case BLKGETNRZONES:
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5b95c81d2a2d..a32c909a915f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2254,6 +2254,7 @@  static const struct block_device_operations nvme_fops = {
 	.getgeo		= nvme_getgeo,
 	.revalidate_disk= nvme_revalidate_disk,
 	.report_zones	= nvme_report_zones,
+	.report_zone_p	= nvme_report_zone_prop,
 	.pr_ops		= &nvme_pr_ops,
 };
 
@@ -2280,6 +2281,7 @@  const struct block_device_operations nvme_ns_head_ops = {
 	.compat_ioctl	= nvme_compat_ioctl,
 	.getgeo		= nvme_getgeo,
 	.report_zones	= nvme_report_zones,
+	.report_zone_p	= nvme_report_zone_prop,
 	.pr_ops		= &nvme_pr_ops,
 };
 #endif /* CONFIG_NVME_MULTIPATH */
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ecf443efdf91..172e0531f37f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -407,6 +407,14 @@  struct nvme_ns {
 	u8 pi_type;
 #ifdef CONFIG_BLK_DEV_ZONED
 	u64 zsze;
+
+	u32 nr_zones;
+	u32 mar;
+	u32 mor;
+	u32 rrl;
+	u32 frl;
+	u16 zoc;
+	u16 ozcs;
 #endif
 	unsigned long features;
 	unsigned long flags;
@@ -704,11 +712,14 @@  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
 int nvme_report_zones(struct gendisk *disk, sector_t sector,
 		      unsigned int nr_zones, report_zones_cb cb, void *data);
 
+int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop);
+
 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
 				       struct nvme_command *cmnd,
 				       enum nvme_zone_mgmt_action action);
 #else
 #define nvme_report_zones NULL
+#define nvme_report_zone_prop NULL
 
 static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
 		struct request *req, struct nvme_command *cmnd,
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 2e6512ac6f01..258d03610cc0 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -32,6 +32,28 @@  static int nvme_set_max_append(struct nvme_ctrl *ctrl)
 	return 0;
 }
 
+static u64 nvme_zns_nr_zones(struct nvme_ns *ns)
+{
+	struct nvme_command c = { };
+	struct nvme_zone_report report;
+	int buflen = sizeof(struct nvme_zone_report);
+	int ret;
+
+	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
+	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
+	c.zmr.slba = cpu_to_le64(0);
+	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
+	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
+	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
+	c.zmr.pr = 0;
+
+	ret = nvme_submit_sync_cmd(ns->queue, &c, &report, buflen);
+	if (ret)
+		return ret;
+
+	return le64_to_cpu(report.nr_zones);
+}
+
 int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
 			  unsigned lbaf)
 {
@@ -87,6 +109,13 @@  int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
 		goto free_data;
 	}
 
+	ns->nr_zones = nvme_zns_nr_zones(ns);
+	ns->mar = le32_to_cpu(id->mar);
+	ns->mor = le32_to_cpu(id->mor);
+	ns->rrl = le32_to_cpu(id->rrl);
+	ns->frl = le32_to_cpu(id->frl);
+	ns->zoc = le16_to_cpu(id->zoc);
+
 	q->limits.zoned = BLK_ZONED_HM;
 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
 free_data:
@@ -230,6 +259,38 @@  int nvme_report_zones(struct gendisk *disk, sector_t sector,
 	return ret;
 }
 
+static int nvme_ns_report_zone_prop(struct nvme_ns *ns, struct blk_zone_dev *zprop)
+{
+	zprop->nr_zones = ns->nr_zones;
+	zprop->zoc = ns->zoc;
+	zprop->ozcs = ns->ozcs;
+	zprop->mar = ns->mar;
+	zprop->mor = ns->mor;
+	zprop->rrl = ns->rrl;
+	zprop->frl = ns->frl;
+
+	return 0;
+}
+
+int nvme_report_zone_prop(struct gendisk *disk, struct blk_zone_dev *zprop)
+{
+	struct nvme_ns_head *head = NULL;
+	struct nvme_ns *ns;
+	int srcu_idx, ret;
+
+	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
+	if (unlikely(!ns))
+		return -EWOULDBLOCK;
+
+	if (ns->head->ids.csi == NVME_CSI_ZNS)
+		ret = nvme_ns_report_zone_prop(ns, zprop);
+	else
+		ret = -EINVAL;
+	nvme_put_ns_from_disk(head, srcu_idx);
+
+	return ret;
+}
+
 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *c, enum nvme_zone_mgmt_action action)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8308d8a3720b..0c0faa58b7f4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -372,6 +372,8 @@  extern int blkdev_zone_ops_ioctl(struct block_device *bdev, fmode_t mode,
 				  unsigned int cmd, unsigned long arg);
 extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 				  unsigned int cmd, unsigned long arg);
+extern int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
+			unsigned int cmd, unsigned long arg);
 #else /* CONFIG_BLK_DEV_ZONED */
 
 static inline unsigned int blkdev_nr_zones(struct gendisk *disk)
@@ -400,6 +402,12 @@  static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
 	return -ENOTTY;
 }
 
+static inline int blkdev_zonedev_prop(struct block_device *bdev, fmode_t mode,
+				      unsigned int cmd, unsigned long arg)
+{
+	return -ENOTTY;
+}
+
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 struct request_queue {
@@ -1770,6 +1778,7 @@  struct block_device_operations {
 	int (*report_zones)(struct gendisk *, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
 	char *(*devnode)(struct gendisk *disk, umode_t *mode);
+	int (*report_zone_p)(struct gendisk *disk, struct blk_zone_dev *zprop);
 	struct module *owner;
 	const struct pr_ops *pr_ops;
 };
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index d0978ee10fc7..0c49a4b2ce5d 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -142,6 +142,18 @@  struct blk_zone_range {
 	__u64		nr_sectors;
 };
 
+struct blk_zone_dev {
+	__u32	nr_zones;
+	__u32	mar;
+	__u32	mor;
+	__u32	rrl;
+	__u32	frl;
+	__u16	zoc;
+	__u16	ozcs;
+	__u32	rsv31[2];
+	__u64	rsv63[4];
+};
+
 /**
  * enum blk_zone_action - Zone state transitions managed from user-space
  *
@@ -209,5 +221,6 @@  struct blk_zone_mgmt {
 #define BLKCLOSEZONE	_IOW(0x12, 135, struct blk_zone_range)
 #define BLKFINISHZONE	_IOW(0x12, 136, struct blk_zone_range)
 #define BLKMGMTZONE	_IOR(0x12, 137, struct blk_zone_mgmt)
+#define BLKZONEDEVPROP	_IOR(0x12, 138, struct blk_zone_dev)
 
 #endif /* _UAPI_BLKZONED_H */