diff mbox series

[4/4] nvme: enable char device per namespace

Message ID 20201201125610.17138-5-javier.gonz@samsung.com (mailing list archive)
State New, archived
Headers show
Series nvme: enable per-namespace char device | expand

Commit Message

Javier González Dec. 1, 2020, 12:56 p.m. UTC
From: Javier González <javier.gonz@samsung.com>

Create a char device per NVMe namespace. This char device is always
initialized, independently of whether thedeatures implemented by the
device are supported by the kernel. User-space can therefore always
issue IOCTLs to the NVMe driver using this char device.

The char device is presented as /dev/nvmeXcYnZ to follow the hidden
block device. This naming also aligns with nvme-cli filters, so the char
device should be usable without tool changes.

Signed-off-by: Javier González <javier.gonz@samsung.com>
---
 drivers/nvme/host/core.c | 144 +++++++++++++++++++++++++++++++++++----
 drivers/nvme/host/nvme.h |   3 +
 2 files changed, 132 insertions(+), 15 deletions(-)

Comments

Minwoo Im Dec. 1, 2020, 2:03 p.m. UTC | #1
Hello,

On 20-12-01 13:56:10, javier@javigon.com wrote:
> From: Javier González <javier.gonz@samsung.com>
> 
> Create a char device per NVMe namespace. This char device is always
> initialized, independently of whether thedeatures implemented by the
> device are supported by the kernel. User-space can therefore always
> issue IOCTLs to the NVMe driver using this char device.
> 
> The char device is presented as /dev/nvmeXcYnZ to follow the hidden
> block device. This naming also aligns with nvme-cli filters, so the char
> device should be usable without tool changes.
> 
> Signed-off-by: Javier González <javier.gonz@samsung.com>
> ---
>  drivers/nvme/host/core.c | 144 +++++++++++++++++++++++++++++++++++----
>  drivers/nvme/host/nvme.h |   3 +
>  2 files changed, 132 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 2c23ea6dc296..9c4acf2725f3 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -86,7 +86,9 @@ static DEFINE_MUTEX(nvme_subsystems_lock);
>  
>  static DEFINE_IDA(nvme_instance_ida);
>  static dev_t nvme_ctrl_base_chr_devt;
> +static dev_t nvme_ns_base_chr_devt;
>  static struct class *nvme_class;
> +static struct class *nvme_ns_class;
>  static struct class *nvme_subsys_class;
>  
>  static void nvme_put_subsystem(struct nvme_subsystem *subsys);
> @@ -497,6 +499,7 @@ static void nvme_free_ns(struct kref *kref)
>  	if (ns->ndev)
>  		nvme_nvm_unregister(ns);
>  
> +	cdev_device_del(&ns->cdev, &ns->cdev_device);
>  	put_disk(ns->disk);
>  	nvme_put_ns_head(ns->head);
>  	nvme_put_ctrl(ns->ctrl);
> @@ -1696,15 +1699,15 @@ static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
>  	return ret;
>  }
>  
> -static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
> -		unsigned int cmd, unsigned long arg)
> +static int __nvme_ns_ioctl(struct gendisk *disk, unsigned int cmd,
> +			   unsigned long arg)
>  {
>  	struct nvme_ns_head *head = NULL;
>  	void __user *argp = (void __user *)arg;
>  	struct nvme_ns *ns;
>  	int srcu_idx, ret;
>  
> -	ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
> +	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
>  	if (unlikely(!ns))
>  		return -EWOULDBLOCK;
>  
> @@ -1741,6 +1744,18 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
>  	return ret;
>  }
>  
> +static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
> +		      unsigned int cmd, unsigned long arg)
> +{
> +	return __nvme_ns_ioctl(bdev->bd_disk, cmd, arg);
> +}
> +
> +static long nvme_cdev_ioctl(struct file *file, unsigned int cmd,
> +			    unsigned long arg)
> +{
> +	return __nvme_ns_ioctl((struct gendisk *)file->private_data, cmd, arg);
> +}
> +
>  #ifdef CONFIG_COMPAT
>  struct nvme_user_io32 {
>  	__u8	opcode;
> @@ -1782,10 +1797,8 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
>  #define nvme_compat_ioctl	NULL
>  #endif /* CONFIG_COMPAT */
>  
> -static int nvme_open(struct block_device *bdev, fmode_t mode)
> +static int __nvme_open(struct nvme_ns *ns)
>  {
> -	struct nvme_ns *ns = bdev->bd_disk->private_data;
> -
>  #ifdef CONFIG_NVME_MULTIPATH
>  	/* should never be called due to GENHD_FL_HIDDEN */
>  	if (WARN_ON_ONCE(ns->head->disk))
> @@ -1804,12 +1817,24 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
>  	return -ENXIO;
>  }
>  
> +static void __nvme_release(struct nvme_ns *ns)
> +{
> +	module_put(ns->ctrl->ops->module);
> +	nvme_put_ns(ns);
> +}
> +
> +static int nvme_open(struct block_device *bdev, fmode_t mode)
> +{
> +	struct nvme_ns *ns = bdev->bd_disk->private_data;
> +
> +	return __nvme_open(ns);
> +}
> +
>  static void nvme_release(struct gendisk *disk, fmode_t mode)
>  {
>  	struct nvme_ns *ns = disk->private_data;
>  
> -	module_put(ns->ctrl->ops->module);
> -	nvme_put_ns(ns);
> +	__nvme_release(ns);
>  }
>  
>  static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
> @@ -1821,6 +1846,26 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
>  	return 0;
>  }
>  
> +static int nvme_cdev_open(struct inode *inode, struct file *file)
> +{
> +	struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
> +	int ret;
> +
> +	ret = __nvme_open(ns);
> +	if (!ret)
> +		file->private_data = ns->disk;
> +
> +	return ret;
> +}
> +
> +static int nvme_cdev_release(struct inode *inode, struct file *file)
> +{
> +	struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
> +
> +	__nvme_release(ns);
> +	return 0;
> +}
> +
>  #ifdef CONFIG_BLK_DEV_INTEGRITY
>  static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
>  				u32 max_integrity_segments)
> @@ -2303,6 +2348,14 @@ static const struct block_device_operations nvme_bdev_ops = {
>  	.pr_ops		= &nvme_pr_ops,
>  };
>  
> +static const struct file_operations nvme_cdev_fops = {
> +	.owner		= THIS_MODULE,
> +	.open		= nvme_cdev_open,
> +	.release	= nvme_cdev_release,
> +	.unlocked_ioctl	= nvme_cdev_ioctl,
> +	.compat_ioctl	= compat_ptr_ioctl,
> +};
> +
>  #ifdef CONFIG_NVME_MULTIPATH
>  static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
>  {
> @@ -3301,6 +3354,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
>  {
>  	struct gendisk *disk = dev_to_disk(dev);
>  
> +	if (dev->class == nvme_ns_class)
> +		return ((struct nvme_ns *)dev_get_drvdata(dev))->head;

I think it would be better if it can have inline function
nvme_get_ns_from_cdev() just like nvme_get_ns_from_dev().

> +
>  	if (disk->fops == &nvme_bdev_ops)
>  		return nvme_get_ns_from_dev(dev)->head;
>  	else
> @@ -3390,7 +3446,7 @@ static struct attribute *nvme_ns_id_attrs[] = {
>  };
>  
>  static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
> -		struct attribute *a, int n)
> +	       struct attribute *a, int n)

Unrelated changes for this patch.

>  {
>  	struct device *dev = container_of(kobj, struct device, kobj);
>  	struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
> @@ -3432,6 +3488,11 @@ const struct attribute_group *nvme_ns_id_attr_groups[] = {
>  	NULL,
>  };
>  
> +const struct attribute_group *nvme_ns_char_id_attr_groups[] = {
> +	&nvme_ns_id_attr_group,
> +	NULL,
> +};
> +
>  #define nvme_show_str_function(field)						\
>  static ssize_t  field##_show(struct device *dev,				\
>  			    struct device_attribute *attr, char *buf)		\
> @@ -3824,6 +3885,36 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>  }
>  EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
>  
> +static int nvme_alloc_chardev_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
> +{
> +	char cdisk_name[DISK_NAME_LEN];
> +	int ret = 0;

Unnecessary initialization for local variable.

> +
> +	device_initialize(&ns->cdev_device);
> +	ns->cdev_device.devt = MKDEV(MAJOR(nvme_ns_base_chr_devt),
> +				     ns->head->instance);
> +	ns->cdev_device.class = nvme_ns_class;
> +	ns->cdev_device.parent = ctrl->device;
> +	ns->cdev_device.groups = nvme_ns_char_id_attr_groups;
> +	dev_set_drvdata(&ns->cdev_device, ns);
> +
> +	sprintf(cdisk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
> +			ctrl->instance, ns->head->instance);

In multi-path, private namespaces for a head are not in /dev, so I don't
think this will hurt private namespaces (e.g., nvme0c0n1), But it looks
like it will make a little bit confusions between chardev and hidden blkdev.

I don't against to update nvme-cli things also even naming conventions are
going to become different than nvmeXcYnZ.
Javier González Dec. 1, 2020, 6:57 p.m. UTC | #2
On 01.12.2020 23:03, Minwoo Im wrote:
>Hello,
>
>On 20-12-01 13:56:10, javier@javigon.com wrote:
>> From: Javier González <javier.gonz@samsung.com>
>>
>> Create a char device per NVMe namespace. This char device is always
>> initialized, independently of whether thedeatures implemented by the
>> device are supported by the kernel. User-space can therefore always
>> issue IOCTLs to the NVMe driver using this char device.
>>
>> The char device is presented as /dev/nvmeXcYnZ to follow the hidden
>> block device. This naming also aligns with nvme-cli filters, so the char
>> device should be usable without tool changes.
>>
>> Signed-off-by: Javier González <javier.gonz@samsung.com>
>> ---
>>  drivers/nvme/host/core.c | 144 +++++++++++++++++++++++++++++++++++----
>>  drivers/nvme/host/nvme.h |   3 +
>>  2 files changed, 132 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 2c23ea6dc296..9c4acf2725f3 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -86,7 +86,9 @@ static DEFINE_MUTEX(nvme_subsystems_lock);
>>
>>  static DEFINE_IDA(nvme_instance_ida);
>>  static dev_t nvme_ctrl_base_chr_devt;
>> +static dev_t nvme_ns_base_chr_devt;
>>  static struct class *nvme_class;
>> +static struct class *nvme_ns_class;
>>  static struct class *nvme_subsys_class;
>>
>>  static void nvme_put_subsystem(struct nvme_subsystem *subsys);
>> @@ -497,6 +499,7 @@ static void nvme_free_ns(struct kref *kref)
>>  	if (ns->ndev)
>>  		nvme_nvm_unregister(ns);
>>
>> +	cdev_device_del(&ns->cdev, &ns->cdev_device);
>>  	put_disk(ns->disk);
>>  	nvme_put_ns_head(ns->head);
>>  	nvme_put_ctrl(ns->ctrl);
>> @@ -1696,15 +1699,15 @@ static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
>>  	return ret;
>>  }
>>
>> -static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
>> -		unsigned int cmd, unsigned long arg)
>> +static int __nvme_ns_ioctl(struct gendisk *disk, unsigned int cmd,
>> +			   unsigned long arg)
>>  {
>>  	struct nvme_ns_head *head = NULL;
>>  	void __user *argp = (void __user *)arg;
>>  	struct nvme_ns *ns;
>>  	int srcu_idx, ret;
>>
>> -	ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
>> +	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
>>  	if (unlikely(!ns))
>>  		return -EWOULDBLOCK;
>>
>> @@ -1741,6 +1744,18 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
>>  	return ret;
>>  }
>>
>> +static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
>> +		      unsigned int cmd, unsigned long arg)
>> +{
>> +	return __nvme_ns_ioctl(bdev->bd_disk, cmd, arg);
>> +}
>> +
>> +static long nvme_cdev_ioctl(struct file *file, unsigned int cmd,
>> +			    unsigned long arg)
>> +{
>> +	return __nvme_ns_ioctl((struct gendisk *)file->private_data, cmd, arg);
>> +}
>> +
>>  #ifdef CONFIG_COMPAT
>>  struct nvme_user_io32 {
>>  	__u8	opcode;
>> @@ -1782,10 +1797,8 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
>>  #define nvme_compat_ioctl	NULL
>>  #endif /* CONFIG_COMPAT */
>>
>> -static int nvme_open(struct block_device *bdev, fmode_t mode)
>> +static int __nvme_open(struct nvme_ns *ns)
>>  {
>> -	struct nvme_ns *ns = bdev->bd_disk->private_data;
>> -
>>  #ifdef CONFIG_NVME_MULTIPATH
>>  	/* should never be called due to GENHD_FL_HIDDEN */
>>  	if (WARN_ON_ONCE(ns->head->disk))
>> @@ -1804,12 +1817,24 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
>>  	return -ENXIO;
>>  }
>>
>> +static void __nvme_release(struct nvme_ns *ns)
>> +{
>> +	module_put(ns->ctrl->ops->module);
>> +	nvme_put_ns(ns);
>> +}
>> +
>> +static int nvme_open(struct block_device *bdev, fmode_t mode)
>> +{
>> +	struct nvme_ns *ns = bdev->bd_disk->private_data;
>> +
>> +	return __nvme_open(ns);
>> +}
>> +
>>  static void nvme_release(struct gendisk *disk, fmode_t mode)
>>  {
>>  	struct nvme_ns *ns = disk->private_data;
>>
>> -	module_put(ns->ctrl->ops->module);
>> -	nvme_put_ns(ns);
>> +	__nvme_release(ns);
>>  }
>>
>>  static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
>> @@ -1821,6 +1846,26 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
>>  	return 0;
>>  }
>>
>> +static int nvme_cdev_open(struct inode *inode, struct file *file)
>> +{
>> +	struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
>> +	int ret;
>> +
>> +	ret = __nvme_open(ns);
>> +	if (!ret)
>> +		file->private_data = ns->disk;
>> +
>> +	return ret;
>> +}
>> +
>> +static int nvme_cdev_release(struct inode *inode, struct file *file)
>> +{
>> +	struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
>> +
>> +	__nvme_release(ns);
>> +	return 0;
>> +}
>> +
>>  #ifdef CONFIG_BLK_DEV_INTEGRITY
>>  static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
>>  				u32 max_integrity_segments)
>> @@ -2303,6 +2348,14 @@ static const struct block_device_operations nvme_bdev_ops = {
>>  	.pr_ops		= &nvme_pr_ops,
>>  };
>>
>> +static const struct file_operations nvme_cdev_fops = {
>> +	.owner		= THIS_MODULE,
>> +	.open		= nvme_cdev_open,
>> +	.release	= nvme_cdev_release,
>> +	.unlocked_ioctl	= nvme_cdev_ioctl,
>> +	.compat_ioctl	= compat_ptr_ioctl,
>> +};
>> +
>>  #ifdef CONFIG_NVME_MULTIPATH
>>  static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
>>  {
>> @@ -3301,6 +3354,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
>>  {
>>  	struct gendisk *disk = dev_to_disk(dev);
>>
>> +	if (dev->class == nvme_ns_class)
>> +		return ((struct nvme_ns *)dev_get_drvdata(dev))->head;
>
>I think it would be better if it can have inline function
>nvme_get_ns_from_cdev() just like nvme_get_ns_from_dev().

Good point. Will add that.

>
>> +
>>  	if (disk->fops == &nvme_bdev_ops)
>>  		return nvme_get_ns_from_dev(dev)->head;
>>  	else
>> @@ -3390,7 +3446,7 @@ static struct attribute *nvme_ns_id_attrs[] = {
>>  };
>>
>>  static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
>> -		struct attribute *a, int n)
>> +	       struct attribute *a, int n)
>
>Unrelated changes for this patch.
>
>>  {
>>  	struct device *dev = container_of(kobj, struct device, kobj);
>>  	struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
>> @@ -3432,6 +3488,11 @@ const struct attribute_group *nvme_ns_id_attr_groups[] = {
>>  	NULL,
>>  };
>>
>> +const struct attribute_group *nvme_ns_char_id_attr_groups[] = {
>> +	&nvme_ns_id_attr_group,
>> +	NULL,
>> +};
>> +
>>  #define nvme_show_str_function(field)						\
>>  static ssize_t  field##_show(struct device *dev,				\
>>  			    struct device_attribute *attr, char *buf)		\
>> @@ -3824,6 +3885,36 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>>  }
>>  EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
>>
>> +static int nvme_alloc_chardev_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
>> +{
>> +	char cdisk_name[DISK_NAME_LEN];
>> +	int ret = 0;
>
>Unnecessary initialization for local variable.
>
>> +
>> +	device_initialize(&ns->cdev_device);
>> +	ns->cdev_device.devt = MKDEV(MAJOR(nvme_ns_base_chr_devt),
>> +				     ns->head->instance);
>> +	ns->cdev_device.class = nvme_ns_class;
>> +	ns->cdev_device.parent = ctrl->device;
>> +	ns->cdev_device.groups = nvme_ns_char_id_attr_groups;
>> +	dev_set_drvdata(&ns->cdev_device, ns);
>> +
>> +	sprintf(cdisk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
>> +			ctrl->instance, ns->head->instance);
>
>In multi-path, private namespaces for a head are not in /dev, so I don't
>think this will hurt private namespaces (e.g., nvme0c0n1), But it looks
>like it will make a little bit confusions between chardev and hidden blkdev.
>
>I don't against to update nvme-cli things also even naming conventions are
>going to become different than nvmeXcYnZ.

Agree. But as I understand it, Keith had a good argument to keep names
aligned with the hidden bdev. It is also true that in that comment he
suggested nesting the char device in /dev/nvme

Keith, would you mind looking over this? Thanks!
Keith Busch Dec. 1, 2020, 7:30 p.m. UTC | #3
On Tue, Dec 01, 2020 at 07:57:32PM +0100, Javier González wrote:
> On 01.12.2020 23:03, Minwoo Im wrote:
> > > +
> > > +	device_initialize(&ns->cdev_device);
> > > +	ns->cdev_device.devt = MKDEV(MAJOR(nvme_ns_base_chr_devt),
> > > +				     ns->head->instance);
> > > +	ns->cdev_device.class = nvme_ns_class;
> > > +	ns->cdev_device.parent = ctrl->device;
> > > +	ns->cdev_device.groups = nvme_ns_char_id_attr_groups;
> > > +	dev_set_drvdata(&ns->cdev_device, ns);
> > > +
> > > +	sprintf(cdisk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
> > > +			ctrl->instance, ns->head->instance);
> > 
> > In multi-path, private namespaces for a head are not in /dev, so I don't
> > think this will hurt private namespaces (e.g., nvme0c0n1), But it looks
> > like it will make a little bit confusions between chardev and hidden blkdev.
> > 
> > I don't against to update nvme-cli things also even naming conventions are
> > going to become different than nvmeXcYnZ.
> 
> Agree. But as I understand it, Keith had a good argument to keep names
> aligned with the hidden bdev. 

My suggested naming makes it as obvious as possible that the character
device in /dev/ and the hidden block device in /sys/ are referring to
the same thing. What is confusing about that?

> It is also true that in that comment he suggested nesting the char
> device in /dev/nvme

Yeah, I'm okay with sub-directories for these special handles, but there
are arguments against it too. I don't feel that strongly about it either
way.
Christoph Hellwig Dec. 1, 2020, 7:38 p.m. UTC | #4
On Wed, Dec 02, 2020 at 04:30:02AM +0900, Keith Busch wrote:
> > > In multi-path, private namespaces for a head are not in /dev, so I don't
> > > think this will hurt private namespaces (e.g., nvme0c0n1), But it looks
> > > like it will make a little bit confusions between chardev and hidden blkdev.
> > > 
> > > I don't against to update nvme-cli things also even naming conventions are
> > > going to become different than nvmeXcYnZ.
> > 
> > Agree. But as I understand it, Keith had a good argument to keep names
> > aligned with the hidden bdev. 
> 
> My suggested naming makes it as obvious as possible that the character
> device in /dev/ and the hidden block device in /sys/ are referring to
> the same thing. What is confusing about that?
> 
> > It is also true that in that comment he suggested nesting the char
> > device in /dev/nvme
> 
> Yeah, I'm okay with sub-directories for these special handles, but there
> are arguments against it too. I don't feel that strongly about it either
> way.

I'd prefer different naming for the char vs the block devices.  Yes,
this will require a little work in the userspace tools to support the
character device, but I think it is much cleaner.

Devices in subdirectories of /dev/ are very rare and keep causing problem
with userspace tooling for the few drivers that use them, so I don't
think they are a good idea.
Javier González Dec. 1, 2020, 8:44 p.m. UTC | #5
On 01.12.2020 20:38, Christoph Hellwig wrote:
>On Wed, Dec 02, 2020 at 04:30:02AM +0900, Keith Busch wrote:
>> > > In multi-path, private namespaces for a head are not in /dev, so I don't
>> > > think this will hurt private namespaces (e.g., nvme0c0n1), But it looks
>> > > like it will make a little bit confusions between chardev and hidden blkdev.
>> > >
>> > > I don't against to update nvme-cli things also even naming conventions are
>> > > going to become different than nvmeXcYnZ.
>> >
>> > Agree. But as I understand it, Keith had a good argument to keep names
>> > aligned with the hidden bdev.
>>
>> My suggested naming makes it as obvious as possible that the character
>> device in /dev/ and the hidden block device in /sys/ are referring to
>> the same thing. What is confusing about that?

Ok. I see your point. I was thinking of the case where the multipath
bdev is also enabled so we would have the same name in different places
referring to a different device.

>>
>> > It is also true that in that comment he suggested nesting the char
>> > device in /dev/nvme
>>
>> Yeah, I'm okay with sub-directories for these special handles, but there
>> are arguments against it too. I don't feel that strongly about it either
>> way.
>
>I'd prefer different naming for the char vs the block devices.  Yes,
>this will require a little work in the userspace tools to support the
>character device, but I think it is much cleaner.
>
>Devices in subdirectories of /dev/ are very rare and keep causing problem
>with userspace tooling for the few drivers that use them, so I don't
>think they are a good idea.

Would something like nvmeXnYc work here or your prefer something
different where we need to implement new, dedicated filters for
user-space? I thing you suggested nvmegXnY at some point?

Here, the naming would be for both the char device and the sysfs
entry.
Minwoo Im Dec. 2, 2020, 3 a.m. UTC | #6
On 20-12-02 04:30:02, Keith Busch wrote:
> On Tue, Dec 01, 2020 at 07:57:32PM +0100, Javier González wrote:
> > On 01.12.2020 23:03, Minwoo Im wrote:
> > > > +
> > > > +	device_initialize(&ns->cdev_device);
> > > > +	ns->cdev_device.devt = MKDEV(MAJOR(nvme_ns_base_chr_devt),
> > > > +				     ns->head->instance);
> > > > +	ns->cdev_device.class = nvme_ns_class;
> > > > +	ns->cdev_device.parent = ctrl->device;
> > > > +	ns->cdev_device.groups = nvme_ns_char_id_attr_groups;
> > > > +	dev_set_drvdata(&ns->cdev_device, ns);
> > > > +
> > > > +	sprintf(cdisk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
> > > > +			ctrl->instance, ns->head->instance);
> > > 
> > > In multi-path, private namespaces for a head are not in /dev, so I don't
> > > think this will hurt private namespaces (e.g., nvme0c0n1), But it looks
> > > like it will make a little bit confusions between chardev and hidden blkdev.
> > > 
> > > I don't against to update nvme-cli things also even naming conventions are
> > > going to become different than nvmeXcYnZ.
> > 
> > Agree. But as I understand it, Keith had a good argument to keep names
> > aligned with the hidden bdev. 
> 
> My suggested naming makes it as obvious as possible that the character
> device in /dev/ and the hidden block device in /sys/ are referring to
> the same thing. What is confusing about that?

I meant that someone might misunderstand tht /dev/nvmeXcYnZ is also a
blkdev just like /dev/nvmeXnY.  I'm just saying it might be, but I'm
fine with suggested naming as those two are indicating a single
concept (namespace).
Christoph Hellwig Dec. 7, 2020, 2:06 p.m. UTC | #7
On Tue, Dec 01, 2020 at 09:44:56PM +0100, Javier González wrote:
>> Devices in subdirectories of /dev/ are very rare and keep causing problem
>> with userspace tooling for the few drivers that use them, so I don't
>> think they are a good idea.
>
> Would something like nvmeXnYc work here or your prefer something
> different where we need to implement new, dedicated filters for
> user-space? I thing you suggested nvmegXnY at some point?

I think I suggested /dev/ng*, but maybe that is to short for the modern
times, so the above would work for me.
diff mbox series

Patch

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2c23ea6dc296..9c4acf2725f3 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -86,7 +86,9 @@  static DEFINE_MUTEX(nvme_subsystems_lock);
 
 static DEFINE_IDA(nvme_instance_ida);
 static dev_t nvme_ctrl_base_chr_devt;
+static dev_t nvme_ns_base_chr_devt;
 static struct class *nvme_class;
+static struct class *nvme_ns_class;
 static struct class *nvme_subsys_class;
 
 static void nvme_put_subsystem(struct nvme_subsystem *subsys);
@@ -497,6 +499,7 @@  static void nvme_free_ns(struct kref *kref)
 	if (ns->ndev)
 		nvme_nvm_unregister(ns);
 
+	cdev_device_del(&ns->cdev, &ns->cdev_device);
 	put_disk(ns->disk);
 	nvme_put_ns_head(ns->head);
 	nvme_put_ctrl(ns->ctrl);
@@ -1696,15 +1699,15 @@  static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
 	return ret;
 }
 
-static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
-		unsigned int cmd, unsigned long arg)
+static int __nvme_ns_ioctl(struct gendisk *disk, unsigned int cmd,
+			   unsigned long arg)
 {
 	struct nvme_ns_head *head = NULL;
 	void __user *argp = (void __user *)arg;
 	struct nvme_ns *ns;
 	int srcu_idx, ret;
 
-	ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
+	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
 	if (unlikely(!ns))
 		return -EWOULDBLOCK;
 
@@ -1741,6 +1744,18 @@  static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+		      unsigned int cmd, unsigned long arg)
+{
+	return __nvme_ns_ioctl(bdev->bd_disk, cmd, arg);
+}
+
+static long nvme_cdev_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	return __nvme_ns_ioctl((struct gendisk *)file->private_data, cmd, arg);
+}
+
 #ifdef CONFIG_COMPAT
 struct nvme_user_io32 {
 	__u8	opcode;
@@ -1782,10 +1797,8 @@  static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
 #define nvme_compat_ioctl	NULL
 #endif /* CONFIG_COMPAT */
 
-static int nvme_open(struct block_device *bdev, fmode_t mode)
+static int __nvme_open(struct nvme_ns *ns)
 {
-	struct nvme_ns *ns = bdev->bd_disk->private_data;
-
 #ifdef CONFIG_NVME_MULTIPATH
 	/* should never be called due to GENHD_FL_HIDDEN */
 	if (WARN_ON_ONCE(ns->head->disk))
@@ -1804,12 +1817,24 @@  static int nvme_open(struct block_device *bdev, fmode_t mode)
 	return -ENXIO;
 }
 
+static void __nvme_release(struct nvme_ns *ns)
+{
+	module_put(ns->ctrl->ops->module);
+	nvme_put_ns(ns);
+}
+
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+	struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+	return __nvme_open(ns);
+}
+
 static void nvme_release(struct gendisk *disk, fmode_t mode)
 {
 	struct nvme_ns *ns = disk->private_data;
 
-	module_put(ns->ctrl->ops->module);
-	nvme_put_ns(ns);
+	__nvme_release(ns);
 }
 
 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -1821,6 +1846,26 @@  static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
+static int nvme_cdev_open(struct inode *inode, struct file *file)
+{
+	struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
+	int ret;
+
+	ret = __nvme_open(ns);
+	if (!ret)
+		file->private_data = ns->disk;
+
+	return ret;
+}
+
+static int nvme_cdev_release(struct inode *inode, struct file *file)
+{
+	struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev);
+
+	__nvme_release(ns);
+	return 0;
+}
+
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
 				u32 max_integrity_segments)
@@ -2303,6 +2348,14 @@  static const struct block_device_operations nvme_bdev_ops = {
 	.pr_ops		= &nvme_pr_ops,
 };
 
+static const struct file_operations nvme_cdev_fops = {
+	.owner		= THIS_MODULE,
+	.open		= nvme_cdev_open,
+	.release	= nvme_cdev_release,
+	.unlocked_ioctl	= nvme_cdev_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+};
+
 #ifdef CONFIG_NVME_MULTIPATH
 static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
 {
@@ -3301,6 +3354,9 @@  static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
+	if (dev->class == nvme_ns_class)
+		return ((struct nvme_ns *)dev_get_drvdata(dev))->head;
+
 	if (disk->fops == &nvme_bdev_ops)
 		return nvme_get_ns_from_dev(dev)->head;
 	else
@@ -3390,7 +3446,7 @@  static struct attribute *nvme_ns_id_attrs[] = {
 };
 
 static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
-		struct attribute *a, int n)
+	       struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
@@ -3432,6 +3488,11 @@  const struct attribute_group *nvme_ns_id_attr_groups[] = {
 	NULL,
 };
 
+const struct attribute_group *nvme_ns_char_id_attr_groups[] = {
+	&nvme_ns_id_attr_group,
+	NULL,
+};
+
 #define nvme_show_str_function(field)						\
 static ssize_t  field##_show(struct device *dev,				\
 			    struct device_attribute *attr, char *buf)		\
@@ -3824,6 +3885,36 @@  struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 }
 EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
 
+static int nvme_alloc_chardev_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+	char cdisk_name[DISK_NAME_LEN];
+	int ret = 0;
+
+	device_initialize(&ns->cdev_device);
+	ns->cdev_device.devt = MKDEV(MAJOR(nvme_ns_base_chr_devt),
+				     ns->head->instance);
+	ns->cdev_device.class = nvme_ns_class;
+	ns->cdev_device.parent = ctrl->device;
+	ns->cdev_device.groups = nvme_ns_char_id_attr_groups;
+	dev_set_drvdata(&ns->cdev_device, ns);
+
+	sprintf(cdisk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
+			ctrl->instance, ns->head->instance);
+
+	ret = dev_set_name(&ns->cdev_device, "%s", cdisk_name);
+	if (ret)
+		return ret;
+
+	cdev_init(&ns->cdev, &nvme_cdev_fops);
+	ns->cdev.owner = ctrl->ops->module;
+
+	ret = cdev_device_add(&ns->cdev, &ns->cdev_device);
+	if (ret)
+		kfree_const(ns->cdev_device.kobj.name);
+
+	return ret;
+}
+
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 		struct nvme_ns_ids *ids)
 {
@@ -3870,8 +3961,12 @@  static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 	memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
 	ns->disk = disk;
 
+	/* When the device does not support any of the features required by the
+	 * kernel (or viceversa), hide the block device. We can still rely on
+	 * the namespace char device for submitting IOCTLs
+	 */
 	if (nvme_update_ns_info(ns, id))
-		goto out_put_disk;
+		disk->flags |= GENHD_FL_HIDDEN;
 
 	if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
 		if (nvme_nvm_register(ns, disk_name, node)) {
@@ -3887,9 +3982,12 @@  static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 	nvme_get_ctrl(ctrl);
 
 	device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
-
 	nvme_mpath_add_disk(ns, id);
 	nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
+
+	if (nvme_alloc_chardev_ns(ctrl, ns))
+		goto out_put_disk;
+
 	kfree(id);
 
 	return;
@@ -4685,23 +4783,38 @@  static int __init nvme_core_init(void)
 	if (result < 0)
 		goto destroy_delete_wq;
 
+	result = alloc_chrdev_region(&nvme_ns_base_chr_devt, 0,
+			NVME_MINORS, "nvmec");
+	if (result < 0)
+		goto unregister_dev_chrdev;
+
 	nvme_class = class_create(THIS_MODULE, "nvme");
 	if (IS_ERR(nvme_class)) {
 		result = PTR_ERR(nvme_class);
-		goto unregister_chrdev;
+		goto unregister_ns_chrdev;
 	}
 	nvme_class->dev_uevent = nvme_class_uevent;
 
+	nvme_ns_class = class_create(THIS_MODULE, "nvme-ns");
+	if (IS_ERR(nvme_ns_class)) {
+		result = PTR_ERR(nvme_ns_class);
+		goto destroy_dev_class;
+	}
+
 	nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem");
 	if (IS_ERR(nvme_subsys_class)) {
 		result = PTR_ERR(nvme_subsys_class);
-		goto destroy_class;
+		goto destroy_ns_class;
 	}
 	return 0;
 
-destroy_class:
+destroy_ns_class:
+	class_destroy(nvme_ns_class);
+destroy_dev_class:
 	class_destroy(nvme_class);
-unregister_chrdev:
+unregister_ns_chrdev:
+	unregister_chrdev_region(nvme_ns_base_chr_devt, NVME_MINORS);
+unregister_dev_chrdev:
 	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
 destroy_delete_wq:
 	destroy_workqueue(nvme_delete_wq);
@@ -4717,6 +4830,7 @@  static void __exit nvme_core_exit(void)
 {
 	class_destroy(nvme_subsys_class);
 	class_destroy(nvme_class);
+	unregister_chrdev_region(nvme_ns_base_chr_devt, NVME_MINORS);
 	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
 	destroy_workqueue(nvme_delete_wq);
 	destroy_workqueue(nvme_reset_wq);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index cdfce7250da0..533bd54f9194 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -436,6 +436,9 @@  struct nvme_ns {
 	struct kref kref;
 	struct nvme_ns_head *head;
 
+	struct device cdev_device;	/* char device */
+	struct cdev cdev;
+
 	int lba_shift;
 	u16 ms;
 	u16 sgs;