Message ID | 20201216181828.21040-1-javier.gonz@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [V4] nvme: enable char device per namespace | expand |
On 16.12.2020 19:18, javier@javigon.com wrote: >From: Javier González <javier.gonz@samsung.com> > >Create a char device per NVMe namespace. This char device is always >initialized, independently of whether the features implemented by the >device are supported by the kernel. User-space can therefore always >issue IOCTLs to the NVMe driver using the char device. > >The char device is presented as /dev/generic-nvmeXcYnZ. This naming >scheme follows the convention of the hidden device (nvmeXcYnZ). Support >for multipath will follow. > >Keith: Regarding nvme-cli support, should the generic handle show up in >place such as 'nvme list'? If so, I can send a patch for the filters. > >Changes since V3 (merged patch from keith) > - Use a dedicated ida for the generic handle > - Do not abort namespace greation if the generic handle fails > >Changes since V2: > - Apply a number of naming and code structure improvements (from > Christoph) > - Use i_cdev to pull struct nvme_ns in the ioctl path instead of > populating file->private_data (from Christoph) > - Change char device and sysfs entries to /dev/generic-nvmeXcYnZ to > follow the hidden device naming scheme (from Christoph and Keith) > >Changes since V1: > - Remove patches 1-3 which are already picked up by Christoph > - Change the char device and sysfs entries to nvmeXnYc / c signals > char device > - Address Minwoo's comments on inline functions and style > >Signed-off-by: Javier González <javier.gonz@samsung.com> >--- > drivers/nvme/host/core.c | 159 +++++++++++++++++++++++++++++++++++---- > drivers/nvme/host/nvme.h | 9 +++ > 2 files changed, 152 insertions(+), 16 deletions(-) > >diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c >index 99f91efe3824..d1ee462f41fa 100644 >--- a/drivers/nvme/host/core.c >+++ b/drivers/nvme/host/core.c >@@ -86,7 +86,11 @@ static DEFINE_MUTEX(nvme_subsystems_lock); > > static DEFINE_IDA(nvme_instance_ida); > static dev_t nvme_ctrl_base_chr_devt; >+ >+static DEFINE_IDA(nvme_gen_minor_ida); >+static dev_t nvme_ns_base_chr_devt; > static struct class *nvme_class; >+static struct class *nvme_ns_class; > static struct class *nvme_subsys_class; > > static void nvme_put_subsystem(struct nvme_subsystem *subsys); >@@ -537,7 +541,10 @@ static void nvme_free_ns(struct kref *kref) > > if (ns->ndev) > nvme_nvm_unregister(ns); >+ if (ns->minor) >+ ida_simple_remove(&nvme_gen_minor_ida, ns->minor - 1); > >+ cdev_device_del(&ns->cdev, &ns->cdev_device); > put_disk(ns->disk); > nvme_put_ns_head(ns->head); > nvme_put_ctrl(ns->ctrl); >@@ -1738,15 +1745,15 @@ static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, > return ret; > } > >-static int nvme_ioctl(struct block_device *bdev, fmode_t mode, >- unsigned int cmd, unsigned long arg) >+static int nvme_disk_ioctl(struct gendisk *disk, unsigned int cmd, >+ unsigned long arg) > { > struct nvme_ns_head *head = NULL; > void __user *argp = (void __user *)arg; > struct nvme_ns *ns; > int srcu_idx, ret; > >- ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); >+ ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx); > if (unlikely(!ns)) > return -EWOULDBLOCK; > >@@ -1783,6 +1790,12 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, > return ret; > } > >+static int nvme_ioctl(struct block_device *bdev, fmode_t mode, >+ unsigned int cmd, unsigned long arg) >+{ >+ return nvme_disk_ioctl(bdev->bd_disk, cmd, arg); >+} >+ > #ifdef CONFIG_COMPAT > struct nvme_user_io32 { > __u8 opcode; >@@ -1824,10 +1837,8 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, > #define nvme_compat_ioctl NULL > #endif /* CONFIG_COMPAT */ > >-static int nvme_open(struct block_device *bdev, fmode_t mode) >+static int nvme_ns_open(struct nvme_ns *ns) > { >- struct nvme_ns *ns = bdev->bd_disk->private_data; >- > #ifdef CONFIG_NVME_MULTIPATH > /* should never be called due to GENHD_FL_HIDDEN */ > if (WARN_ON_ONCE(ns->head->disk)) >@@ -1846,14 +1857,22 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) > return -ENXIO; > } > >-static void nvme_release(struct gendisk *disk, fmode_t mode) >+static void nvme_ns_release(struct nvme_ns *ns) > { >- struct nvme_ns *ns = disk->private_data; >- > module_put(ns->ctrl->ops->module); > nvme_put_ns(ns); > } > >+static int nvme_open(struct block_device *bdev, fmode_t mode) >+{ >+ return nvme_ns_open(bdev->bd_disk->private_data); >+} >+ >+static void nvme_release(struct gendisk *disk, fmode_t mode) >+{ >+ nvme_ns_release(disk->private_data); >+} >+ > static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) > { > /* some standard values */ >@@ -2209,6 +2228,13 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) > return 0; > > out_unfreeze: >+ /* >+ * When the device does not support any of the features required by the >+ * kernel (or viceversa), hide the block device. We can still rely on >+ * the namespace char device for submitting IOCTLs >+ */ >+ ns->disk->flags |= GENHD_FL_HIDDEN; >+ > blk_mq_unfreeze_queue(ns->disk->queue); > return ret; > } >@@ -2346,6 +2372,38 @@ static const struct block_device_operations nvme_bdev_ops = { > .pr_ops = &nvme_pr_ops, > }; > >+static int nvme_cdev_open(struct inode *inode, struct file *file) >+{ >+ struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev); >+ >+ return nvme_ns_open(ns); >+} >+ >+static int nvme_cdev_release(struct inode *inode, struct file *file) >+{ >+ struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev); >+ >+ nvme_ns_release(ns); >+ return 0; >+} >+ >+static long nvme_cdev_ioctl(struct file *file, unsigned int cmd, >+ unsigned long arg) >+{ >+ struct nvme_ns *ns = container_of(file->f_inode->i_cdev, >+ struct nvme_ns, cdev); >+ >+ return nvme_disk_ioctl(ns->disk, cmd, arg); >+} >+ >+static const struct file_operations nvme_cdev_fops = { >+ .owner = THIS_MODULE, >+ .open = nvme_cdev_open, >+ .release = nvme_cdev_release, >+ .unlocked_ioctl = nvme_cdev_ioctl, >+ .compat_ioctl = compat_ptr_ioctl, >+}; >+ > #ifdef CONFIG_NVME_MULTIPATH > static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) > { >@@ -3343,6 +3401,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) > { > struct gendisk *disk = dev_to_disk(dev); > >+ if (dev->class == nvme_ns_class) >+ return nvme_get_ns_from_cdev(dev)->head; >+ > if (disk->fops == &nvme_bdev_ops) > return nvme_get_ns_from_dev(dev)->head; > else >@@ -3474,6 +3535,11 @@ const struct attribute_group *nvme_ns_id_attr_groups[] = { > NULL, > }; > >+const struct attribute_group *nvme_ns_char_id_attr_groups[] = { >+ &nvme_ns_id_attr_group, >+ NULL, >+}; >+ > #define nvme_show_str_function(field) \ > static ssize_t field##_show(struct device *dev, \ > struct device_attribute *attr, char *buf) \ >@@ -3866,6 +3932,47 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) > } > EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU); > >+static int nvme_alloc_chardev_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) >+{ >+ char cdisk_name[DISK_NAME_LEN]; >+ int ret; >+ >+ ret = ida_simple_get(&nvme_gen_minor_ida, 0, 0, GFP_KERNEL); >+ if (ret < 0) >+ return ret; >+ >+ ns->minor = ret + 1; >+ device_initialize(&ns->cdev_device); >+ ns->cdev_device.devt = MKDEV(MAJOR(nvme_ns_base_chr_devt), ret); >+ ns->cdev_device.class = nvme_ns_class; >+ ns->cdev_device.parent = ctrl->device; >+ ns->cdev_device.groups = nvme_ns_char_id_attr_groups; >+ dev_set_drvdata(&ns->cdev_device, ns); >+ >+ sprintf(cdisk_name, "nvme-generic-%dc%dn%d", ctrl->subsys->instance, >+ ctrl->instance, ns->head->instance); >+ >+ ret = dev_set_name(&ns->cdev_device, "%s", cdisk_name); >+ if (ret) >+ goto put_ida; >+ >+ cdev_init(&ns->cdev, &nvme_cdev_fops); >+ ns->cdev.owner = ctrl->ops->module; >+ >+ ret = cdev_device_add(&ns->cdev, &ns->cdev_device); >+ if (ret) >+ goto free_kobj; >+ >+ return ret; >+ >+free_kobj: >+ kfree_const(ns->cdev_device.kobj.name); >+put_ida: >+ ida_simple_remove(&nvme_gen_minor_ida, ns->minor - 1); >+ ns->minor = 0; >+ return ret; >+} >+ > static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, > struct nvme_ns_ids *ids) > { >@@ -3912,8 +4019,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, > memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); > ns->disk = disk; > >- if (nvme_update_ns_info(ns, id)) >- goto out_put_disk; >+ nvme_update_ns_info(ns, id); > > if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { > if (nvme_nvm_register(ns, disk_name, node)) { >@@ -3929,9 +4035,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, > nvme_get_ctrl(ctrl); > > device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups); >- > nvme_mpath_add_disk(ns, id); > nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); >+ >+ if (nvme_alloc_chardev_ns(ctrl, ns)) >+ dev_warn(ctrl->device, >+ "failed to create generic handle for nsid:%d\n", >+ nsid); >+ > kfree(id); > > return; >@@ -4733,23 +4844,38 @@ static int __init nvme_core_init(void) > if (result < 0) > goto destroy_delete_wq; > >+ result = alloc_chrdev_region(&nvme_ns_base_chr_devt, 0, >+ NVME_MINORS, "nvmec"); >+ if (result < 0) >+ goto unregister_dev_chrdev; >+ > nvme_class = class_create(THIS_MODULE, "nvme"); > if (IS_ERR(nvme_class)) { > result = PTR_ERR(nvme_class); >- goto unregister_chrdev; >+ goto unregister_ns_chrdev; > } > nvme_class->dev_uevent = nvme_class_uevent; > >+ nvme_ns_class = class_create(THIS_MODULE, "nvme-ns"); >+ if (IS_ERR(nvme_ns_class)) { >+ result = PTR_ERR(nvme_ns_class); >+ goto destroy_dev_class; >+ } >+ > nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem"); > if (IS_ERR(nvme_subsys_class)) { > result = PTR_ERR(nvme_subsys_class); >- goto destroy_class; >+ goto destroy_ns_class; > } > return 0; > >-destroy_class: >+destroy_ns_class: >+ class_destroy(nvme_ns_class); >+destroy_dev_class: > class_destroy(nvme_class); >-unregister_chrdev: >+unregister_ns_chrdev: >+ unregister_chrdev_region(nvme_ns_base_chr_devt, NVME_MINORS); >+unregister_dev_chrdev: > unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); > destroy_delete_wq: > destroy_workqueue(nvme_delete_wq); >@@ -4765,6 +4891,7 @@ static void __exit nvme_core_exit(void) > { > class_destroy(nvme_subsys_class); > class_destroy(nvme_class); >+ unregister_chrdev_region(nvme_ns_base_chr_devt, NVME_MINORS); > unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); > destroy_workqueue(nvme_delete_wq); > destroy_workqueue(nvme_reset_wq); >diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h >index bfcedfa4b057..1e52f7da82de 100644 >--- a/drivers/nvme/host/nvme.h >+++ b/drivers/nvme/host/nvme.h >@@ -439,6 +439,10 @@ struct nvme_ns { > struct kref kref; > struct nvme_ns_head *head; > >+ struct device cdev_device; /* char device */ >+ struct cdev cdev; >+ int minor; >+ > int lba_shift; > u16 ms; > u16 sgs; >@@ -818,6 +822,11 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) > return dev_to_disk(dev)->private_data; > } > >+static inline struct nvme_ns *nvme_get_ns_from_cdev(struct device *dev) >+{ >+ return dev_get_drvdata(dev); >+} >+ > #ifdef CONFIG_NVME_HWMON > int nvme_hwmon_init(struct nvme_ctrl *ctrl); > #else >-- >2.17.1 > Christoph, Keith, Did you have a chance to look into this new version? Javier
Hello Javier, I tested this patch based on nvme-5.11: [ 1.219747] BUG: unable to handle page fault for address: 0000000100000041 [ 1.220518] #PF: supervisor read access in kernel mode [ 1.220582] #PF: error_code(0x0000) - not-present page [ 1.220582] PGD 0 P4D 0 [ 1.220582] Oops: 0000 [#1] SMP PTI [ 1.220582] CPU: 0 PID: 7 Comm: kworker/u16:0 Not tainted 5.11.0-rc1+ #46 [ 1.220582] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 1.220582] Workqueue: nvme-wq nvme_scan_work [ 1.220582] RIP: 0010:nvme_ns_id_attrs_are_visible+0x10f/0x152 [ 1.220582] Code: 81 7d d0 80 f9 a1 82 74 0a 48 81 7d d0 a0 f9 a1 82 75 50 48 8b 45 e8 48 89 45 f8 48 8b 45 f8 48 83 e8 60 48 8b 80 60 03 00 00 <48> 8b 40 40 48 3d e0 d1 4d 82 74 07 b8 00 00 00 00 eb 2e 48 8b 45 [ 1.220582] RSP: 0000:ffffc90000047b70 EFLAGS: 00010282 [ 1.220582] RAX: 0000000100000001 RBX: ffffffff824ddb20 RCX: 0000000000000124 [ 1.220582] RDX: ffff8881026eac00 RSI: ffffffff82a1f980 RDI: ffff888102745058 [ 1.220582] RBP: ffffc90000047ba8 R08: ffff888102948718 R09: 0000000000000000 [ 1.220582] R10: 0000000000000000 R11: ffff888100465080 R12: ffff888102745058 [ 1.220582] R13: ffff888102948600 R14: 0000000000000000 R15: ffffffff82a1f548 [ 1.220582] FS: 0000000000000000(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000 [ 1.220582] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1.220582] CR2: 0000000100000041 CR3: 000000000280c001 CR4: 0000000000370ef0 [ 1.220582] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1.220582] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 1.220582] Call Trace: [ 1.220582] internal_create_group+0xde/0x390 [ 1.220582] internal_create_groups.part.4+0x3e/0xa0 [ 1.220582] device_add+0x3cf/0x830 [ 1.220582] ? cdev_get+0x20/0x20 [ 1.220582] ? cdev_purge+0x60/0x60 [ 1.220582] cdev_device_add+0x44/0x70 [ 1.220582] ? cdev_init+0x50/0x60 [ 1.220582] nvme_alloc_chardev_ns+0x187/0x1eb [ 1.220582] nvme_alloc_ns+0x367/0x460 [ 1.220582] nvme_validate_or_alloc_ns+0xe2/0x139 [ 1.220582] nvme_scan_ns_list+0x113/0x17a [ 1.220582] nvme_scan_work+0xa5/0x106 [ 1.220582] process_one_work+0x1dd/0x3e0 [ 1.220582] worker_thread+0x2d/0x3b0 [ 1.220582] ? cancel_delayed_work+0x90/0x90 [ 1.220582] kthread+0x117/0x130 [ 1.220582] ? kthread_park+0x90/0x90 [ 1.220582] ret_from_fork+0x22/0x30 [ 1.220582] Modules linked in: [ 1.220582] CR2: 0000000100000041 [ 1.220582] ---[ end trace b1f509a1bbfbc113 ]--- [ 1.220582] RIP: 0010:nvme_ns_id_attrs_are_visible+0x10f/0x152 [ 1.220582] Code: 81 7d d0 80 f9 a1 82 74 0a 48 81 7d d0 a0 f9 a1 82 75 50 48 8b 45 e8 48 89 45 f8 48 8b 45 f8 48 83 e8 60 48 8b 80 60 03 00 00 <48> 8b 40 40 48 3d e0 d1 4d 82 74 07 b8 00 00 00 00 eb 2e 48 8b 45 [ 1.220582] RSP: 0000:ffffc90000047b70 EFLAGS: 00010282 [ 1.220582] RAX: 0000000100000001 RBX: ffffffff824ddb20 RCX: 0000000000000124 [ 1.220582] RDX: ffff8881026eac00 RSI: ffffffff82a1f980 RDI: ffff888102745058 [ 1.220582] RBP: ffffc90000047ba8 R08: ffff888102948718 R09: 0000000000000000 [ 1.220582] R10: 0000000000000000 R11: ffff888100465080 R12: ffff888102745058 [ 1.220582] R13: ffff888102948600 R14: 0000000000000000 R15: ffffffff82a1f548 [ 1.220582] FS: 0000000000000000(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000 [ 1.220582] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1.220582] CR2: 0000000100000041 CR3: 000000000280c001 CR4: 0000000000370ef0 [ 1.220582] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1.220582] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 And this happens when CONFIG_NVME_MULTIPATH=y configured. Please refere attached log up there :) Thanks!
On 21-01-12 18:22:07, Minwoo Im wrote: > Hello Javier, > > I tested this patch based on nvme-5.11: > > [ 1.219747] BUG: unable to handle page fault for address: 0000000100000041 > [ 1.220518] #PF: supervisor read access in kernel mode > [ 1.220582] #PF: error_code(0x0000) - not-present page > [ 1.220582] PGD 0 P4D 0 > [ 1.220582] Oops: 0000 [#1] SMP PTI > [ 1.220582] CPU: 0 PID: 7 Comm: kworker/u16:0 Not tainted 5.11.0-rc1+ #46 > [ 1.220582] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 > [ 1.220582] Workqueue: nvme-wq nvme_scan_work > [ 1.220582] RIP: 0010:nvme_ns_id_attrs_are_visible+0x10f/0x152 > [ 1.220582] Code: 81 7d d0 80 f9 a1 82 74 0a 48 81 7d d0 a0 f9 a1 82 75 50 48 8b 45 e8 48 89 45 f8 48 8b 45 f8 48 83 e8 60 48 8b 80 60 03 00 00 <48> 8b 40 40 48 3d e0 d1 4d 82 74 07 b8 00 00 00 00 eb 2e 48 8b 45 > [ 1.220582] RSP: 0000:ffffc90000047b70 EFLAGS: 00010282 > [ 1.220582] RAX: 0000000100000001 RBX: ffffffff824ddb20 RCX: 0000000000000124 > [ 1.220582] RDX: ffff8881026eac00 RSI: ffffffff82a1f980 RDI: ffff888102745058 > [ 1.220582] RBP: ffffc90000047ba8 R08: ffff888102948718 R09: 0000000000000000 > [ 1.220582] R10: 0000000000000000 R11: ffff888100465080 R12: ffff888102745058 > [ 1.220582] R13: ffff888102948600 R14: 0000000000000000 R15: ffffffff82a1f548 > [ 1.220582] FS: 0000000000000000(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000 > [ 1.220582] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 1.220582] CR2: 0000000100000041 CR3: 000000000280c001 CR4: 0000000000370ef0 > [ 1.220582] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [ 1.220582] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > [ 1.220582] Call Trace: > [ 1.220582] internal_create_group+0xde/0x390 > [ 1.220582] internal_create_groups.part.4+0x3e/0xa0 > [ 1.220582] device_add+0x3cf/0x830 > [ 1.220582] ? cdev_get+0x20/0x20 > [ 1.220582] ? cdev_purge+0x60/0x60 > [ 1.220582] cdev_device_add+0x44/0x70 > [ 1.220582] ? cdev_init+0x50/0x60 > [ 1.220582] nvme_alloc_chardev_ns+0x187/0x1eb > [ 1.220582] nvme_alloc_ns+0x367/0x460 > [ 1.220582] nvme_validate_or_alloc_ns+0xe2/0x139 > [ 1.220582] nvme_scan_ns_list+0x113/0x17a > [ 1.220582] nvme_scan_work+0xa5/0x106 > [ 1.220582] process_one_work+0x1dd/0x3e0 > [ 1.220582] worker_thread+0x2d/0x3b0 > [ 1.220582] ? cancel_delayed_work+0x90/0x90 > [ 1.220582] kthread+0x117/0x130 > [ 1.220582] ? kthread_park+0x90/0x90 > [ 1.220582] ret_from_fork+0x22/0x30 > [ 1.220582] Modules linked in: > [ 1.220582] CR2: 0000000100000041 > [ 1.220582] ---[ end trace b1f509a1bbfbc113 ]--- > [ 1.220582] RIP: 0010:nvme_ns_id_attrs_are_visible+0x10f/0x152 > [ 1.220582] Code: 81 7d d0 80 f9 a1 82 74 0a 48 81 7d d0 a0 f9 a1 82 75 50 48 8b 45 e8 48 89 45 f8 48 8b 45 f8 48 83 e8 60 48 8b 80 60 03 00 00 <48> 8b 40 40 48 3d e0 d1 4d 82 74 07 b8 00 00 00 00 eb 2e 48 8b 45 > [ 1.220582] RSP: 0000:ffffc90000047b70 EFLAGS: 00010282 > [ 1.220582] RAX: 0000000100000001 RBX: ffffffff824ddb20 RCX: 0000000000000124 > [ 1.220582] RDX: ffff8881026eac00 RSI: ffffffff82a1f980 RDI: ffff888102745058 > [ 1.220582] RBP: ffffc90000047ba8 R08: ffff888102948718 R09: 0000000000000000 > [ 1.220582] R10: 0000000000000000 R11: ffff888100465080 R12: ffff888102745058 > [ 1.220582] R13: ffff888102948600 R14: 0000000000000000 R15: ffffffff82a1f548 > [ 1.220582] FS: 0000000000000000(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000 > [ 1.220582] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 1.220582] CR2: 0000000100000041 CR3: 000000000280c001 CR4: 0000000000370ef0 > [ 1.220582] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [ 1.220582] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > > And this happens when CONFIG_NVME_MULTIPATH=y configured. Please refere > attached log up there :) > > Thanks! If this chardev has to have ns_id_attr_group, we need to consider nvme_ns_id_attrs_are_visible() to take care of dev_to_disk(dev) in case non-block device. Thanks!
On 12.01.2021 18:22, Minwoo Im wrote: >Hello Javier, > >I tested this patch based on nvme-5.11: > >[ 1.219747] BUG: unable to handle page fault for address: 0000000100000041 >[ 1.220518] #PF: supervisor read access in kernel mode >[ 1.220582] #PF: error_code(0x0000) - not-present page >[ 1.220582] PGD 0 P4D 0 >[ 1.220582] Oops: 0000 [#1] SMP PTI >[ 1.220582] CPU: 0 PID: 7 Comm: kworker/u16:0 Not tainted 5.11.0-rc1+ #46 >[ 1.220582] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 >[ 1.220582] Workqueue: nvme-wq nvme_scan_work >[ 1.220582] RIP: 0010:nvme_ns_id_attrs_are_visible+0x10f/0x152 >[ 1.220582] Code: 81 7d d0 80 f9 a1 82 74 0a 48 81 7d d0 a0 f9 a1 82 75 50 48 8b 45 e8 48 89 45 f8 48 8b 45 f8 48 83 e8 60 48 8b 80 60 03 00 00 <48> 8b 40 40 48 3d e0 d1 4d 82 74 07 b8 00 00 00 00 eb 2e 48 8b 45 >[ 1.220582] RSP: 0000:ffffc90000047b70 EFLAGS: 00010282 >[ 1.220582] RAX: 0000000100000001 RBX: ffffffff824ddb20 RCX: 0000000000000124 >[ 1.220582] RDX: ffff8881026eac00 RSI: ffffffff82a1f980 RDI: ffff888102745058 >[ 1.220582] RBP: ffffc90000047ba8 R08: ffff888102948718 R09: 0000000000000000 >[ 1.220582] R10: 0000000000000000 R11: ffff888100465080 R12: ffff888102745058 >[ 1.220582] R13: ffff888102948600 R14: 0000000000000000 R15: ffffffff82a1f548 >[ 1.220582] FS: 0000000000000000(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000 >[ 1.220582] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >[ 1.220582] CR2: 0000000100000041 CR3: 000000000280c001 CR4: 0000000000370ef0 >[ 1.220582] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 >[ 1.220582] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 >[ 1.220582] Call Trace: >[ 1.220582] internal_create_group+0xde/0x390 >[ 1.220582] internal_create_groups.part.4+0x3e/0xa0 >[ 1.220582] device_add+0x3cf/0x830 >[ 1.220582] ? cdev_get+0x20/0x20 >[ 1.220582] ? cdev_purge+0x60/0x60 >[ 1.220582] cdev_device_add+0x44/0x70 >[ 1.220582] ? cdev_init+0x50/0x60 >[ 1.220582] nvme_alloc_chardev_ns+0x187/0x1eb >[ 1.220582] nvme_alloc_ns+0x367/0x460 >[ 1.220582] nvme_validate_or_alloc_ns+0xe2/0x139 >[ 1.220582] nvme_scan_ns_list+0x113/0x17a >[ 1.220582] nvme_scan_work+0xa5/0x106 >[ 1.220582] process_one_work+0x1dd/0x3e0 >[ 1.220582] worker_thread+0x2d/0x3b0 >[ 1.220582] ? cancel_delayed_work+0x90/0x90 >[ 1.220582] kthread+0x117/0x130 >[ 1.220582] ? kthread_park+0x90/0x90 >[ 1.220582] ret_from_fork+0x22/0x30 >[ 1.220582] Modules linked in: >[ 1.220582] CR2: 0000000100000041 >[ 1.220582] ---[ end trace b1f509a1bbfbc113 ]--- >[ 1.220582] RIP: 0010:nvme_ns_id_attrs_are_visible+0x10f/0x152 >[ 1.220582] Code: 81 7d d0 80 f9 a1 82 74 0a 48 81 7d d0 a0 f9 a1 82 75 50 48 8b 45 e8 48 89 45 f8 48 8b 45 f8 48 83 e8 60 48 8b 80 60 03 00 00 <48> 8b 40 40 48 3d e0 d1 4d 82 74 07 b8 00 00 00 00 eb 2e 48 8b 45 >[ 1.220582] RSP: 0000:ffffc90000047b70 EFLAGS: 00010282 >[ 1.220582] RAX: 0000000100000001 RBX: ffffffff824ddb20 RCX: 0000000000000124 >[ 1.220582] RDX: ffff8881026eac00 RSI: ffffffff82a1f980 RDI: ffff888102745058 >[ 1.220582] RBP: ffffc90000047ba8 R08: ffff888102948718 R09: 0000000000000000 >[ 1.220582] R10: 0000000000000000 R11: ffff888100465080 R12: ffff888102745058 >[ 1.220582] R13: ffff888102948600 R14: 0000000000000000 R15: ffffffff82a1f548 >[ 1.220582] FS: 0000000000000000(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000 >[ 1.220582] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >[ 1.220582] CR2: 0000000100000041 CR3: 000000000280c001 CR4: 0000000000370ef0 >[ 1.220582] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 >[ 1.220582] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > >And this happens when CONFIG_NVME_MULTIPATH=y configured. Please refere >attached log up there :) > >Thanks! I have not implemented multipath support, but it should definitely not crash like this. I'll rebase to 5.11 and test. Christoph: Is it OK to send this without multipath or should I just send all together? Javier
On Tue, Jan 12, 2021 at 07:30:55PM +0100, Javier González wrote: > I have not implemented multipath support, but it should definitely not > crash like this. I'll rebase to 5.11 and test. > > Christoph: Is it OK to send this without multipath or should I just send > all together? We're not in a rush at the moment, so I'd prefer working multipath support if you can spare a few cycles.
On 12.01.2021 19:33, Christoph Hellwig wrote: >On Tue, Jan 12, 2021 at 07:30:55PM +0100, Javier González wrote: >> I have not implemented multipath support, but it should definitely not >> crash like this. I'll rebase to 5.11 and test. >> >> Christoph: Is it OK to send this without multipath or should I just send >> all together? > >We're not in a rush at the moment, so I'd prefer working multipath >support if you can spare a few cycles. Sure. I'll work on that and submit all under the same patchset.
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 99f91efe3824..d1ee462f41fa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -86,7 +86,11 @@ static DEFINE_MUTEX(nvme_subsystems_lock); static DEFINE_IDA(nvme_instance_ida); static dev_t nvme_ctrl_base_chr_devt; + +static DEFINE_IDA(nvme_gen_minor_ida); +static dev_t nvme_ns_base_chr_devt; static struct class *nvme_class; +static struct class *nvme_ns_class; static struct class *nvme_subsys_class; static void nvme_put_subsystem(struct nvme_subsystem *subsys); @@ -537,7 +541,10 @@ static void nvme_free_ns(struct kref *kref) if (ns->ndev) nvme_nvm_unregister(ns); + if (ns->minor) + ida_simple_remove(&nvme_gen_minor_ida, ns->minor - 1); + cdev_device_del(&ns->cdev, &ns->cdev_device); put_disk(ns->disk); nvme_put_ns_head(ns->head); nvme_put_ctrl(ns->ctrl); @@ -1738,15 +1745,15 @@ static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, return ret; } -static int nvme_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) +static int nvme_disk_ioctl(struct gendisk *disk, unsigned int cmd, + unsigned long arg) { struct nvme_ns_head *head = NULL; void __user *argp = (void __user *)arg; struct nvme_ns *ns; int srcu_idx, ret; - ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx); if (unlikely(!ns)) return -EWOULDBLOCK; @@ -1783,6 +1790,12 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, return ret; } +static int nvme_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + return nvme_disk_ioctl(bdev->bd_disk, cmd, arg); +} + #ifdef CONFIG_COMPAT struct nvme_user_io32 { __u8 opcode; @@ -1824,10 +1837,8 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, #define nvme_compat_ioctl NULL #endif /* CONFIG_COMPAT */ -static int nvme_open(struct block_device *bdev, fmode_t mode) +static int nvme_ns_open(struct nvme_ns *ns) { - struct nvme_ns *ns = bdev->bd_disk->private_data; - #ifdef CONFIG_NVME_MULTIPATH /* should never be called due to GENHD_FL_HIDDEN */ if (WARN_ON_ONCE(ns->head->disk)) @@ -1846,14 +1857,22 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) return -ENXIO; } -static void nvme_release(struct gendisk *disk, fmode_t mode) +static void nvme_ns_release(struct nvme_ns *ns) { - struct nvme_ns *ns = disk->private_data; - module_put(ns->ctrl->ops->module); nvme_put_ns(ns); } +static int nvme_open(struct block_device *bdev, fmode_t mode) +{ + return nvme_ns_open(bdev->bd_disk->private_data); +} + +static void nvme_release(struct gendisk *disk, fmode_t mode) +{ + nvme_ns_release(disk->private_data); +} + static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) { /* some standard values */ @@ -2209,6 +2228,13 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) return 0; out_unfreeze: + /* + * When the device does not support any of the features required by the + * kernel (or viceversa), hide the block device. We can still rely on + * the namespace char device for submitting IOCTLs + */ + ns->disk->flags |= GENHD_FL_HIDDEN; + blk_mq_unfreeze_queue(ns->disk->queue); return ret; } @@ -2346,6 +2372,38 @@ static const struct block_device_operations nvme_bdev_ops = { .pr_ops = &nvme_pr_ops, }; +static int nvme_cdev_open(struct inode *inode, struct file *file) +{ + struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev); + + return nvme_ns_open(ns); +} + +static int nvme_cdev_release(struct inode *inode, struct file *file) +{ + struct nvme_ns *ns = container_of(inode->i_cdev, struct nvme_ns, cdev); + + nvme_ns_release(ns); + return 0; +} + +static long nvme_cdev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct nvme_ns *ns = container_of(file->f_inode->i_cdev, + struct nvme_ns, cdev); + + return nvme_disk_ioctl(ns->disk, cmd, arg); +} + +static const struct file_operations nvme_cdev_fops = { + .owner = THIS_MODULE, + .open = nvme_cdev_open, + .release = nvme_cdev_release, + .unlocked_ioctl = nvme_cdev_ioctl, + .compat_ioctl = compat_ptr_ioctl, +}; + #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) { @@ -3343,6 +3401,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); + if (dev->class == nvme_ns_class) + return nvme_get_ns_from_cdev(dev)->head; + if (disk->fops == &nvme_bdev_ops) return nvme_get_ns_from_dev(dev)->head; else @@ -3474,6 +3535,11 @@ const struct attribute_group *nvme_ns_id_attr_groups[] = { NULL, }; +const struct attribute_group *nvme_ns_char_id_attr_groups[] = { + &nvme_ns_id_attr_group, + NULL, +}; + #define nvme_show_str_function(field) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -3866,6 +3932,47 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) } EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU); +static int nvme_alloc_chardev_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) +{ + char cdisk_name[DISK_NAME_LEN]; + int ret; + + ret = ida_simple_get(&nvme_gen_minor_ida, 0, 0, GFP_KERNEL); + if (ret < 0) + return ret; + + ns->minor = ret + 1; + device_initialize(&ns->cdev_device); + ns->cdev_device.devt = MKDEV(MAJOR(nvme_ns_base_chr_devt), ret); + ns->cdev_device.class = nvme_ns_class; + ns->cdev_device.parent = ctrl->device; + ns->cdev_device.groups = nvme_ns_char_id_attr_groups; + dev_set_drvdata(&ns->cdev_device, ns); + + sprintf(cdisk_name, "nvme-generic-%dc%dn%d", ctrl->subsys->instance, + ctrl->instance, ns->head->instance); + + ret = dev_set_name(&ns->cdev_device, "%s", cdisk_name); + if (ret) + goto put_ida; + + cdev_init(&ns->cdev, &nvme_cdev_fops); + ns->cdev.owner = ctrl->ops->module; + + ret = cdev_device_add(&ns->cdev, &ns->cdev_device); + if (ret) + goto free_kobj; + + return ret; + +free_kobj: + kfree_const(ns->cdev_device.kobj.name); +put_ida: + ida_simple_remove(&nvme_gen_minor_ida, ns->minor - 1); + ns->minor = 0; + return ret; +} + static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns_ids *ids) { @@ -3912,8 +4019,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); ns->disk = disk; - if (nvme_update_ns_info(ns, id)) - goto out_put_disk; + nvme_update_ns_info(ns, id); if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { if (nvme_nvm_register(ns, disk_name, node)) { @@ -3929,9 +4035,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, nvme_get_ctrl(ctrl); device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups); - nvme_mpath_add_disk(ns, id); nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); + + if (nvme_alloc_chardev_ns(ctrl, ns)) + dev_warn(ctrl->device, + "failed to create generic handle for nsid:%d\n", + nsid); + kfree(id); return; @@ -4733,23 +4844,38 @@ static int __init nvme_core_init(void) if (result < 0) goto destroy_delete_wq; + result = alloc_chrdev_region(&nvme_ns_base_chr_devt, 0, + NVME_MINORS, "nvmec"); + if (result < 0) + goto unregister_dev_chrdev; + nvme_class = class_create(THIS_MODULE, "nvme"); if (IS_ERR(nvme_class)) { result = PTR_ERR(nvme_class); - goto unregister_chrdev; + goto unregister_ns_chrdev; } nvme_class->dev_uevent = nvme_class_uevent; + nvme_ns_class = class_create(THIS_MODULE, "nvme-ns"); + if (IS_ERR(nvme_ns_class)) { + result = PTR_ERR(nvme_ns_class); + goto destroy_dev_class; + } + nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem"); if (IS_ERR(nvme_subsys_class)) { result = PTR_ERR(nvme_subsys_class); - goto destroy_class; + goto destroy_ns_class; } return 0; -destroy_class: +destroy_ns_class: + class_destroy(nvme_ns_class); +destroy_dev_class: class_destroy(nvme_class); -unregister_chrdev: +unregister_ns_chrdev: + unregister_chrdev_region(nvme_ns_base_chr_devt, NVME_MINORS); +unregister_dev_chrdev: unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_delete_wq: destroy_workqueue(nvme_delete_wq); @@ -4765,6 +4891,7 @@ static void __exit nvme_core_exit(void) { class_destroy(nvme_subsys_class); class_destroy(nvme_class); + unregister_chrdev_region(nvme_ns_base_chr_devt, NVME_MINORS); unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_workqueue(nvme_delete_wq); destroy_workqueue(nvme_reset_wq); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bfcedfa4b057..1e52f7da82de 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -439,6 +439,10 @@ struct nvme_ns { struct kref kref; struct nvme_ns_head *head; + struct device cdev_device; /* char device */ + struct cdev cdev; + int minor; + int lba_shift; u16 ms; u16 sgs; @@ -818,6 +822,11 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) return dev_to_disk(dev)->private_data; } +static inline struct nvme_ns *nvme_get_ns_from_cdev(struct device *dev) +{ + return dev_get_drvdata(dev); +} + #ifdef CONFIG_NVME_HWMON int nvme_hwmon_init(struct nvme_ctrl *ctrl); #else