diff mbox series

[V2] nvme: enable char device per namespace

Message ID 20201208132934.625-1-javier.gonz@samsung.com (mailing list archive)
State New, archived
Headers show
Series [V2] nvme: enable char device per namespace | expand

Commit Message

Javier González Dec. 8, 2020, 1:29 p.m. UTC
From: Javier González <javier.gonz@samsung.com>

Create a char device per NVMe namespace. This char device is always
initialized, independently of whether thedeatures implemented by the
device are supported by the kernel. User-space can therefore always
issue IOCTLs to the NVMe driver using this char device.

The char device is presented as /dev/nvmeXnYc. This naming aligns with
nvme-cli filters, so the char device should be usable without tool
changes. It also follows a new name conventio to avoid confusion with
the existing hidden device (i.e., nvmeXcYnZ).

Changes since V1:

  - Remove patches 1-3 which are already picked up by Christoph
  - Change the char device and sysfs entries to nvmeXnYc / c signals
    char device
  - Address Minwoo's comments on inline functions and style

Signed-off-by: Javier González <javier.gonz@samsung.com>
---
 drivers/nvme/host/core.c | 141 +++++++++++++++++++++++++++++++++++----
 drivers/nvme/host/nvme.h |   8 +++
 2 files changed, 135 insertions(+), 14 deletions(-)

Comments

Christoph Hellwig Dec. 8, 2020, 2:21 p.m. UTC | #1
A bunch of nitpicks (mostly naming as usual, sorry..):

> +static int __nvme_ns_ioctl(struct gendisk *disk, unsigned int cmd,
> +			   unsigned long arg)
>  {

What about nvme_disk_ioctl instead as that is what it operates on?

> +static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
> +		      unsigned int cmd, unsigned long arg)
> +{
> +	return __nvme_ns_ioctl(bdev->bd_disk, cmd, arg);
> +}
> +
> +static long nvme_cdev_ioctl(struct file *file, unsigned int cmd,
> +			    unsigned long arg)
> +{
> +	return __nvme_ns_ioctl((struct gendisk *)file->private_data, cmd, arg);
> +}

No need for the cast.

Also can we keep all the char device methods together close to the
struct file_operations declaration?  I just prefer to keep the code
a little grouped.

> -static int nvme_open(struct block_device *bdev, fmode_t mode)
> +static int __nvme_open(struct nvme_ns *ns)
>  {
> -	struct nvme_ns *ns = bdev->bd_disk->private_data;
> -
>  #ifdef CONFIG_NVME_MULTIPATH
>  	/* should never be called due to GENHD_FL_HIDDEN */
>  	if (WARN_ON_ONCE(ns->head->disk))
> @@ -1846,12 +1859,24 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
>  	return -ENXIO;
>  }
>  
> +static void __nvme_release(struct nvme_ns *ns)
> +{
> +	module_put(ns->ctrl->ops->module);
> +	nvme_put_ns(ns);
> +}

nvme_ns_open and nvme_ns_release?

> +
> +static int nvme_open(struct block_device *bdev, fmode_t mode)
> +{
> +	struct nvme_ns *ns = bdev->bd_disk->private_data;
> +
> +	return __nvme_open(ns);
> +}
> +
>  static void nvme_release(struct gendisk *disk, fmode_t mode)
>  {
>  	struct nvme_ns *ns = disk->private_data;
>  
> -	module_put(ns->ctrl->ops->module);
> -	nvme_put_ns(ns);
> +	__nvme_release(ns);

No need for the local ns variable in both cases.

> +static int nvme_cdev_open(struct inode *inode, struct file *file)
> +{
> +	struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
> +	int ret;
> +
> +	ret = __nvme_open(ns);
> +	if (!ret)
> +		file->private_data = ns->disk;
> +
> +	return ret;

Do we need the ->private_data assignment at all?  I think the ioctl
handler could just grab it directly from i_cdev.

> +	sprintf(cdisk_name, "nvme%dn%dc", ctrl->instance, ns->head->instance);

And the most important naming decision is this.  I have two issues with
naming still:

 - we aready use the c for controller in the hidden disk naming.  Although
   that is in a different position, but I think this not super intuitive.
 - this is missing multipath support entirely, so once we want to add
   multipath support we'll run into issues.  So maybe use something
   based off the hidden node naming?  E.g.:

	sprintf(disk_name, "nvme-generic-%dc%dn%d", ctrl->subsys->instance,
		ctrl->instance, ns->head->instance);

> +	/* When the device does not support any of the features required by the
> +	 * kernel (or viceversa), hide the block device. We can still rely on
> +	 * the namespace char device for submitting IOCTLs
> +	 */

Normal kernel comment style is the opening

	/*

on its own line.

>  	if (nvme_update_ns_info(ns, id))
> -		goto out_put_disk;
> +		disk->flags |= GENHD_FL_HIDDEN;

I don't think we can do this based on all the error returns.  I think
we'll have to move the flags manipulation into nvme_update_ns_info to
also cover the revalidate case.
Keith Busch Dec. 8, 2020, 7:03 p.m. UTC | #2
On Tue, Dec 08, 2020 at 03:21:51PM +0100, Christoph Hellwig wrote:
> > +	sprintf(cdisk_name, "nvme%dn%dc", ctrl->instance, ns->head->instance);
> 
> And the most important naming decision is this.  I have two issues with
> naming still:
> 
>  - we aready use the c for controller in the hidden disk naming.  Although
>    that is in a different position, but I think this not super intuitive.
>  - this is missing multipath support entirely, so once we want to add
>    multipath support we'll run into issues.  So maybe use something
>    based off the hidden node naming?  E.g.:
> 
> 	sprintf(disk_name, "nvme-generic-%dc%dn%d", ctrl->subsys->instance,
> 		ctrl->instance, ns->head->instance);

+1 for this naming suggestion.
Javier González Dec. 9, 2020, 9:16 a.m. UTC | #3
On 08.12.2020 15:21, Christoph Hellwig wrote:
>A bunch of nitpicks (mostly naming as usual, sorry..):

No worries. Thanks for taking the time.

>
>> +static int __nvme_ns_ioctl(struct gendisk *disk, unsigned int cmd,
>> +			   unsigned long arg)
>>  {
>
>What about nvme_disk_ioctl instead as that is what it operates on?

Sure.

>
>> +static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
>> +		      unsigned int cmd, unsigned long arg)
>> +{
>> +	return __nvme_ns_ioctl(bdev->bd_disk, cmd, arg);
>> +}
>> +
>> +static long nvme_cdev_ioctl(struct file *file, unsigned int cmd,
>> +			    unsigned long arg)
>> +{
>> +	return __nvme_ns_ioctl((struct gendisk *)file->private_data, cmd, arg);
>> +}
>
>No need for the cast.
>
>Also can we keep all the char device methods together close to the
>struct file_operations declaration?  I just prefer to keep the code
>a little grouped.

Perfect.

>
>> -static int nvme_open(struct block_device *bdev, fmode_t mode)
>> +static int __nvme_open(struct nvme_ns *ns)
>>  {
>> -	struct nvme_ns *ns = bdev->bd_disk->private_data;
>> -
>>  #ifdef CONFIG_NVME_MULTIPATH
>>  	/* should never be called due to GENHD_FL_HIDDEN */
>>  	if (WARN_ON_ONCE(ns->head->disk))
>> @@ -1846,12 +1859,24 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
>>  	return -ENXIO;
>>  }
>>
>> +static void __nvme_release(struct nvme_ns *ns)
>> +{
>> +	module_put(ns->ctrl->ops->module);
>> +	nvme_put_ns(ns);
>> +}
>
>nvme_ns_open and nvme_ns_release?

ok.

>
>> +
>> +static int nvme_open(struct block_device *bdev, fmode_t mode)
>> +{
>> +	struct nvme_ns *ns = bdev->bd_disk->private_data;
>> +
>> +	return __nvme_open(ns);
>> +}
>> +
>>  static void nvme_release(struct gendisk *disk, fmode_t mode)
>>  {
>>  	struct nvme_ns *ns = disk->private_data;
>>
>> -	module_put(ns->ctrl->ops->module);
>> -	nvme_put_ns(ns);
>> +	__nvme_release(ns);
>
>No need for the local ns variable in both cases.

ok.
>
>> +static int nvme_cdev_open(struct inode *inode, struct file *file)
>> +{
>> +	struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
>> +	int ret;
>> +
>> +	ret = __nvme_open(ns);
>> +	if (!ret)
>> +		file->private_data = ns->disk;
>> +
>> +	return ret;
>
>Do we need the ->private_data assignment at all?  I think the ioctl
>handler could just grab it directly from i_cdev.

Mmmm. Good point. I'll try that.

>
>> +	sprintf(cdisk_name, "nvme%dn%dc", ctrl->instance, ns->head->instance);
>
>And the most important naming decision is this.  I have two issues with
>naming still:
>
> - we aready use the c for controller in the hidden disk naming.  Although
>   that is in a different position, but I think this not super intuitive.
> - this is missing multipath support entirely, so once we want to add
>   multipath support we'll run into issues.  So maybe use something
>   based off the hidden node naming?  E.g.:
>
>	sprintf(disk_name, "nvme-generic-%dc%dn%d", ctrl->subsys->instance,
>		ctrl->instance, ns->head->instance);

Perfect. Sounds like a good compromise to still keep the original hidden
disk. Keith is happy too, so we have a plan.

>> +	/* When the device does not support any of the features required by the
>> +	 * kernel (or viceversa), hide the block device. We can still rely on
>> +	 * the namespace char device for submitting IOCTLs
>> +	 */
>
>Normal kernel comment style is the opening
>
>	/*
>
>on its own line.

OK.

>
>>  	if (nvme_update_ns_info(ns, id))
>> -		goto out_put_disk;
>> +		disk->flags |= GENHD_FL_HIDDEN;
>
>I don't think we can do this based on all the error returns.  I think
>we'll have to move the flags manipulation into nvme_update_ns_info to
>also cover the revalidate case.

Ok.

I am working on the multipath part. I'll send a V3 with all these
comments and then a follow-up patch with multipath.
diff mbox series

Patch

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 99f91efe3824..e446aaba5ccd 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -86,7 +86,9 @@  static DEFINE_MUTEX(nvme_subsystems_lock);
 
 static DEFINE_IDA(nvme_instance_ida);
 static dev_t nvme_ctrl_base_chr_devt;
+static dev_t nvme_ns_base_chr_devt;
 static struct class *nvme_class;
+static struct class *nvme_ns_class;
 static struct class *nvme_subsys_class;
 
 static void nvme_put_subsystem(struct nvme_subsystem *subsys);
@@ -538,6 +540,7 @@  static void nvme_free_ns(struct kref *kref)
 	if (ns->ndev)
 		nvme_nvm_unregister(ns);
 
+	cdev_device_del(&ns->cdev, &ns->cdev_device);
 	put_disk(ns->disk);
 	nvme_put_ns_head(ns->head);
 	nvme_put_ctrl(ns->ctrl);
@@ -1738,15 +1741,15 @@  static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
 	return ret;
 }
 
-static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
-		unsigned int cmd, unsigned long arg)
+static int __nvme_ns_ioctl(struct gendisk *disk, unsigned int cmd,
+			   unsigned long arg)
 {
 	struct nvme_ns_head *head = NULL;
 	void __user *argp = (void __user *)arg;
 	struct nvme_ns *ns;
 	int srcu_idx, ret;
 
-	ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
+	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
 	if (unlikely(!ns))
 		return -EWOULDBLOCK;
 
@@ -1783,6 +1786,18 @@  static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+		      unsigned int cmd, unsigned long arg)
+{
+	return __nvme_ns_ioctl(bdev->bd_disk, cmd, arg);
+}
+
+static long nvme_cdev_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	return __nvme_ns_ioctl((struct gendisk *)file->private_data, cmd, arg);
+}
+
 #ifdef CONFIG_COMPAT
 struct nvme_user_io32 {
 	__u8	opcode;
@@ -1824,10 +1839,8 @@  static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
 #define nvme_compat_ioctl	NULL
 #endif /* CONFIG_COMPAT */
 
-static int nvme_open(struct block_device *bdev, fmode_t mode)
+static int __nvme_open(struct nvme_ns *ns)
 {
-	struct nvme_ns *ns = bdev->bd_disk->private_data;
-
 #ifdef CONFIG_NVME_MULTIPATH
 	/* should never be called due to GENHD_FL_HIDDEN */
 	if (WARN_ON_ONCE(ns->head->disk))
@@ -1846,12 +1859,24 @@  static int nvme_open(struct block_device *bdev, fmode_t mode)
 	return -ENXIO;
 }
 
+static void __nvme_release(struct nvme_ns *ns)
+{
+	module_put(ns->ctrl->ops->module);
+	nvme_put_ns(ns);
+}
+
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+	struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+	return __nvme_open(ns);
+}
+
 static void nvme_release(struct gendisk *disk, fmode_t mode)
 {
 	struct nvme_ns *ns = disk->private_data;
 
-	module_put(ns->ctrl->ops->module);
-	nvme_put_ns(ns);
+	__nvme_release(ns);
 }
 
 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -1863,6 +1888,26 @@  static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
+static int nvme_cdev_open(struct inode *inode, struct file *file)
+{
+	struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
+	int ret;
+
+	ret = __nvme_open(ns);
+	if (!ret)
+		file->private_data = ns->disk;
+
+	return ret;
+}
+
+static int nvme_cdev_release(struct inode *inode, struct file *file)
+{
+	struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
+
+	__nvme_release(ns);
+	return 0;
+}
+
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
 				u32 max_integrity_segments)
@@ -2346,6 +2391,14 @@  static const struct block_device_operations nvme_bdev_ops = {
 	.pr_ops		= &nvme_pr_ops,
 };
 
+static const struct file_operations nvme_cdev_fops = {
+	.owner		= THIS_MODULE,
+	.open		= nvme_cdev_open,
+	.release	= nvme_cdev_release,
+	.unlocked_ioctl	= nvme_cdev_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+};
+
 #ifdef CONFIG_NVME_MULTIPATH
 static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
 {
@@ -3343,6 +3396,9 @@  static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
+	if (dev->class == nvme_ns_class)
+		return nvme_get_ns_from_cdev(dev)->head;
+
 	if (disk->fops == &nvme_bdev_ops)
 		return nvme_get_ns_from_dev(dev)->head;
 	else
@@ -3474,6 +3530,11 @@  const struct attribute_group *nvme_ns_id_attr_groups[] = {
 	NULL,
 };
 
+const struct attribute_group *nvme_ns_char_id_attr_groups[] = {
+	&nvme_ns_id_attr_group,
+	NULL,
+};
+
 #define nvme_show_str_function(field)						\
 static ssize_t  field##_show(struct device *dev,				\
 			    struct device_attribute *attr, char *buf)		\
@@ -3866,6 +3927,35 @@  struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 }
 EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
 
+static int nvme_alloc_chardev_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+	char cdisk_name[DISK_NAME_LEN];
+	int ret;
+
+	device_initialize(&ns->cdev_device);
+	ns->cdev_device.devt = MKDEV(MAJOR(nvme_ns_base_chr_devt),
+				     ns->head->instance);
+	ns->cdev_device.class = nvme_ns_class;
+	ns->cdev_device.parent = ctrl->device;
+	ns->cdev_device.groups = nvme_ns_char_id_attr_groups;
+	dev_set_drvdata(&ns->cdev_device, ns);
+
+	sprintf(cdisk_name, "nvme%dn%dc", ctrl->instance, ns->head->instance);
+
+	ret = dev_set_name(&ns->cdev_device, "%s", cdisk_name);
+	if (ret)
+		return ret;
+
+	cdev_init(&ns->cdev, &nvme_cdev_fops);
+	ns->cdev.owner = ctrl->ops->module;
+
+	ret = cdev_device_add(&ns->cdev, &ns->cdev_device);
+	if (ret)
+		kfree_const(ns->cdev_device.kobj.name);
+
+	return ret;
+}
+
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 		struct nvme_ns_ids *ids)
 {
@@ -3912,8 +4002,12 @@  static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 	memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
 	ns->disk = disk;
 
+	/* When the device does not support any of the features required by the
+	 * kernel (or viceversa), hide the block device. We can still rely on
+	 * the namespace char device for submitting IOCTLs
+	 */
 	if (nvme_update_ns_info(ns, id))
-		goto out_put_disk;
+		disk->flags |= GENHD_FL_HIDDEN;
 
 	if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
 		if (nvme_nvm_register(ns, disk_name, node)) {
@@ -3929,9 +4023,12 @@  static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 	nvme_get_ctrl(ctrl);
 
 	device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
-
 	nvme_mpath_add_disk(ns, id);
 	nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
+
+	if (nvme_alloc_chardev_ns(ctrl, ns))
+		goto out_put_disk;
+
 	kfree(id);
 
 	return;
@@ -4733,23 +4830,38 @@  static int __init nvme_core_init(void)
 	if (result < 0)
 		goto destroy_delete_wq;
 
+	result = alloc_chrdev_region(&nvme_ns_base_chr_devt, 0,
+			NVME_MINORS, "nvmec");
+	if (result < 0)
+		goto unregister_dev_chrdev;
+
 	nvme_class = class_create(THIS_MODULE, "nvme");
 	if (IS_ERR(nvme_class)) {
 		result = PTR_ERR(nvme_class);
-		goto unregister_chrdev;
+		goto unregister_ns_chrdev;
 	}
 	nvme_class->dev_uevent = nvme_class_uevent;
 
+	nvme_ns_class = class_create(THIS_MODULE, "nvme-ns");
+	if (IS_ERR(nvme_ns_class)) {
+		result = PTR_ERR(nvme_ns_class);
+		goto destroy_dev_class;
+	}
+
 	nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem");
 	if (IS_ERR(nvme_subsys_class)) {
 		result = PTR_ERR(nvme_subsys_class);
-		goto destroy_class;
+		goto destroy_ns_class;
 	}
 	return 0;
 
-destroy_class:
+destroy_ns_class:
+	class_destroy(nvme_ns_class);
+destroy_dev_class:
 	class_destroy(nvme_class);
-unregister_chrdev:
+unregister_ns_chrdev:
+	unregister_chrdev_region(nvme_ns_base_chr_devt, NVME_MINORS);
+unregister_dev_chrdev:
 	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
 destroy_delete_wq:
 	destroy_workqueue(nvme_delete_wq);
@@ -4765,6 +4877,7 @@  static void __exit nvme_core_exit(void)
 {
 	class_destroy(nvme_subsys_class);
 	class_destroy(nvme_class);
+	unregister_chrdev_region(nvme_ns_base_chr_devt, NVME_MINORS);
 	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
 	destroy_workqueue(nvme_delete_wq);
 	destroy_workqueue(nvme_reset_wq);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bfcedfa4b057..1dd99f207aee 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -439,6 +439,9 @@  struct nvme_ns {
 	struct kref kref;
 	struct nvme_ns_head *head;
 
+	struct device cdev_device;	/* char device */
+	struct cdev cdev;
+
 	int lba_shift;
 	u16 ms;
 	u16 sgs;
@@ -818,6 +821,11 @@  static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
 	return dev_to_disk(dev)->private_data;
 }
 
+static inline struct nvme_ns *nvme_get_ns_from_cdev(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
 #ifdef CONFIG_NVME_HWMON
 int nvme_hwmon_init(struct nvme_ctrl *ctrl);
 #else