Message ID | 20230117134942.101112-12-yi.l.liu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add vfio_device cdev for iommufd support | expand |
> From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Tuesday, January 17, 2023 9:50 PM > > @@ -156,7 +159,11 @@ static void vfio_device_release(struct device *dev) > container_of(dev, struct vfio_device, device); > > vfio_release_device_set(device); > +#if IS_ENABLED(CONFIG_IOMMUFD) > + ida_free(&vfio.device_ida, MINOR(device->device.devt)); > +#else > ida_free(&vfio.device_ida, device->index); > +#endif There are many #if in this patch, leading to bad readability. for this what about letting device->index always storing the minor value? then here it could just be: ida_free(&vfio.device_ida, device->index); > @@ -232,17 +240,25 @@ static int vfio_init_device(struct vfio_device > *device, struct device *dev, > device->device.release = vfio_device_release; > device->device.class = vfio.device_class; > device->device.parent = device->dev; > +#if IS_ENABLED(CONFIG_IOMMUFD) > + device->device.devt = MKDEV(MAJOR(vfio.device_devt), minor); > + cdev_init(&device->cdev, &vfio_device_fops); > + device->cdev.owner = THIS_MODULE; > +#else > + device->index = minor; > +#endif Probably we can have a vfio_init_device_cdev() in iommufd.c and let it be empty if !CONFIG_IOMMUFD. Then here could be: device->index = minor; vfio_init_device_cdev(device, vfio.device_devt, minor); > @@ -257,7 +273,12 @@ static int __vfio_register_dev(struct vfio_device > *device, > if (!device->dev_set) > vfio_assign_device_set(device, device); > > - ret = dev_set_name(&device->device, "vfio%d", device->index); > +#if IS_ENABLED(CONFIG_IOMMUFD) > + minor = MINOR(device->device.devt); > +#else > + minor = device->index; > +#endif then just "minor = device->index" > > +#if IS_ENABLED(CONFIG_IOMMUFD) > + ret = cdev_device_add(&device->cdev, &device->device); > +#else > ret = device_add(&device->device); > +#endif also add a wrapper vfio_register_device_cdev() which does cdev_device_add() if CONFIG_IOMMUFD and device_add() otherwise. > +#if IS_ENABLED(CONFIG_IOMMUFD) > + /* > + * Balances device_add in register path. Putting it as the first > + * operation in unregister to prevent registration refcount from > + * incrementing per cdev open. > + */ > + cdev_device_del(&device->cdev, &device->device); > +#else > + device_del(&device->device); > +#endif ditto > +#if IS_ENABLED(CONFIG_IOMMUFD) > +static int vfio_device_fops_open(struct inode *inode, struct file *filep) > +{ > + struct vfio_device *device = container_of(inode->i_cdev, > + struct vfio_device, cdev); > + struct vfio_device_file *df; > + int ret; > + > + if (!vfio_device_try_get_registration(device)) > + return -ENODEV; > + > + /* > + * device access is blocked until .open_device() is called > + * in BIND_IOMMUFD. > + */ > + df = vfio_allocate_device_file(device, true); > + if (IS_ERR(df)) { > + ret = PTR_ERR(df); > + goto err_put_registration; > + } > + > + filep->private_data = df; > + > + return 0; > + > +err_put_registration: > + vfio_device_put_registration(device); > + return ret; > +} > +#endif move to iommufd.c > +#if IS_ENABLED(CONFIG_IOMMUFD) > +static char *vfio_device_devnode(const struct device *dev, umode_t *mode) > +{ > + return kasprintf(GFP_KERNEL, "vfio/devices/%s", dev_name(dev)); > +} > +#endif ditto > @@ -1543,9 +1617,21 @@ static int __init vfio_init(void) > goto err_dev_class; > } > > +#if IS_ENABLED(CONFIG_IOMMUFD) > + vfio.device_class->devnode = vfio_device_devnode; > + ret = alloc_chrdev_region(&vfio.device_devt, 0, > + MINORMASK + 1, "vfio-dev"); > + if (ret) > + goto err_alloc_dev_chrdev; > +#endif vfio_cdev_init() > static void __exit vfio_cleanup(void) > { > ida_destroy(&vfio.device_ida); > +#if IS_ENABLED(CONFIG_IOMMUFD) > + unregister_chrdev_region(vfio.device_devt, MINORMASK + 1); > +#endif vfio_cdev_cleanup()
On Tue, Jan 17, 2023 at 05:49:40AM -0800, Yi Liu wrote: > This allows user to directly open a vfio device w/o using the legacy > container/group interface, as a prerequisite for supporting new iommu > features like nested translation. > > The device fd opened in this manner doesn't have the capability to access > the device as the fops open() doesn't open the device until the successful > BIND_IOMMUFD which be added in next patch. > > With this patch, devices registered to vfio core have both group and device > interface created. > > - group interface : /dev/vfio/$groupID > - device interface: /dev/vfio/devices/vfioX (X is the minor number and > is unique across devices) > > Given a vfio device the user can identify the matching vfioX by checking > the sysfs path of the device. Take PCI device (0000:6a:01.0) for example, > /sys/bus/pci/devices/0000\:6a\:01.0/vfio-dev/vfio0/dev contains the > major:minor of the matching vfioX. > > Userspace then opens the /dev/vfio/devices/vfioX and checks with fstat > that the major:minor matches. > > The vfio_device cdev logic in this patch: > *) __vfio_register_dev() path ends up doing cdev_device_add() for each > vfio_device; > *) vfio_unregister_group_dev() path does cdev_device_del(); > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > --- > drivers/vfio/vfio_main.c | 103 ++++++++++++++++++++++++++++++++++++--- > include/linux/vfio.h | 7 ++- > 2 files changed, 102 insertions(+), 8 deletions(-) > > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c > index 78725c28b933..6068ffb7c6b7 100644 > --- a/drivers/vfio/vfio_main.c > +++ b/drivers/vfio/vfio_main.c > @@ -43,6 +43,9 @@ > static struct vfio { > struct class *device_class; > struct ida device_ida; > +#if IS_ENABLED(CONFIG_IOMMUFD) > + dev_t device_devt; > +#endif It is a bit strange to see this called CONFIG_IOMMUFD, maybe we should have a CONFIG_VFIO_DEVICE_FILE that depends on IOMMUFD? Please try to use a plain 'if (IS_ENABLED())' for more of these It probably isn't worth saving a few bytes in memory to complicate all the code, so maybe just always include things like this. > @@ -156,7 +159,11 @@ static void vfio_device_release(struct device *dev) > container_of(dev, struct vfio_device, device); > > vfio_release_device_set(device); > +#if IS_ENABLED(CONFIG_IOMMUFD) > + ida_free(&vfio.device_ida, MINOR(device->device.devt)); > +#else > ida_free(&vfio.device_ida, device->index); > +#endif A vfio_device_get_index() would help this Jason
> From: Tian, Kevin <kevin.tian@intel.com> > Sent: Friday, January 20, 2023 3:27 PM > > > From: Liu, Yi L <yi.l.liu@intel.com> > > Sent: Tuesday, January 17, 2023 9:50 PM > > > > @@ -156,7 +159,11 @@ static void vfio_device_release(struct device *dev) > > container_of(dev, struct vfio_device, device); > > > > vfio_release_device_set(device); > > +#if IS_ENABLED(CONFIG_IOMMUFD) > > + ida_free(&vfio.device_ida, MINOR(device->device.devt)); > > +#else > > ida_free(&vfio.device_ida, device->index); > > +#endif > > There are many #if in this patch, leading to bad readability. > > for this what about letting device->index always storing the minor > value? then here it could just be: > > ida_free(&vfio.device_ida, device->index); Yes. > > @@ -232,17 +240,25 @@ static int vfio_init_device(struct vfio_device > > *device, struct device *dev, > > device->device.release = vfio_device_release; > > device->device.class = vfio.device_class; > > device->device.parent = device->dev; > > +#if IS_ENABLED(CONFIG_IOMMUFD) > > + device->device.devt = MKDEV(MAJOR(vfio.device_devt), minor); > > + cdev_init(&device->cdev, &vfio_device_fops); > > + device->cdev.owner = THIS_MODULE; > > +#else > > + device->index = minor; > > +#endif > > Probably we can have a vfio_init_device_cdev() in iommufd.c and let > it be empty if !CONFIG_IOMMUFD. Then here could be: Yes. Btw. Will adding another device_cdev.c better than reusing iommufd.c? > > device->index = minor; > vfio_init_device_cdev(device, vfio.device_devt, minor); > > > @@ -257,7 +273,12 @@ static int __vfio_register_dev(struct vfio_device > > *device, > > if (!device->dev_set) > > vfio_assign_device_set(device, device); > > > > - ret = dev_set_name(&device->device, "vfio%d", device->index); > > +#if IS_ENABLED(CONFIG_IOMMUFD) > > + minor = MINOR(device->device.devt); > > +#else > > + minor = device->index; > > +#endif > > then just "minor = device->index" Yes. > > > > +#if IS_ENABLED(CONFIG_IOMMUFD) > > + ret = cdev_device_add(&device->cdev, &device->device); > > +#else > > ret = device_add(&device->device); > > +#endif > > also add a wrapper vfio_register_device_cdev() which does > cdev_device_add() if CONFIG_IOMMUFD and device_add() otherwise. Got it. > > > +#if IS_ENABLED(CONFIG_IOMMUFD) > > + /* > > + * Balances device_add in register path. Putting it as the first > > + * operation in unregister to prevent registration refcount from > > + * incrementing per cdev open. > > + */ > > + cdev_device_del(&device->cdev, &device->device); > > +#else > > + device_del(&device->device); > > +#endif > > ditto > > > +#if IS_ENABLED(CONFIG_IOMMUFD) > > +static int vfio_device_fops_open(struct inode *inode, struct file *filep) > > +{ > > + struct vfio_device *device = container_of(inode->i_cdev, > > + struct vfio_device, cdev); > > + struct vfio_device_file *df; > > + int ret; > > + > > + if (!vfio_device_try_get_registration(device)) > > + return -ENODEV; > > + > > + /* > > + * device access is blocked until .open_device() is called > > + * in BIND_IOMMUFD. > > + */ > > + df = vfio_allocate_device_file(device, true); > > + if (IS_ERR(df)) { > > + ret = PTR_ERR(df); > > + goto err_put_registration; > > + } > > + > > + filep->private_data = df; > > + > > + return 0; > > + > > +err_put_registration: > > + vfio_device_put_registration(device); > > + return ret; > > +} > > +#endif > > move to iommufd.c > > > +#if IS_ENABLED(CONFIG_IOMMUFD) > > +static char *vfio_device_devnode(const struct device *dev, umode_t > *mode) > > +{ > > + return kasprintf(GFP_KERNEL, "vfio/devices/%s", dev_name(dev)); > > +} > > +#endif > > ditto > > > @@ -1543,9 +1617,21 @@ static int __init vfio_init(void) > > goto err_dev_class; > > } > > > > +#if IS_ENABLED(CONFIG_IOMMUFD) > > + vfio.device_class->devnode = vfio_device_devnode; > > + ret = alloc_chrdev_region(&vfio.device_devt, 0, > > + MINORMASK + 1, "vfio-dev"); > > + if (ret) > > + goto err_alloc_dev_chrdev; > > +#endif > > vfio_cdev_init() > > > static void __exit vfio_cleanup(void) > > { > > ida_destroy(&vfio.device_ida); > > +#if IS_ENABLED(CONFIG_IOMMUFD) > > + unregister_chrdev_region(vfio.device_devt, MINORMASK + 1); > > +#endif > > vfio_cdev_cleanup() All above comments got. Regards, Yi Liu
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 78725c28b933..6068ffb7c6b7 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -43,6 +43,9 @@ static struct vfio { struct class *device_class; struct ida device_ida; +#if IS_ENABLED(CONFIG_IOMMUFD) + dev_t device_devt; +#endif } vfio; static DEFINE_XARRAY(vfio_device_set_xa); @@ -156,7 +159,11 @@ static void vfio_device_release(struct device *dev) container_of(dev, struct vfio_device, device); vfio_release_device_set(device); +#if IS_ENABLED(CONFIG_IOMMUFD) + ida_free(&vfio.device_ida, MINOR(device->device.devt)); +#else ida_free(&vfio.device_ida, device->index); +#endif if (device->ops->release) device->ops->release(device); @@ -209,15 +216,16 @@ EXPORT_SYMBOL_GPL(_vfio_alloc_device); static int vfio_init_device(struct vfio_device *device, struct device *dev, const struct vfio_device_ops *ops) { + unsigned int minor; int ret; ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL); if (ret < 0) { - dev_dbg(dev, "Error to alloc index\n"); + dev_dbg(dev, "Error to alloc minor\n"); return ret; } - device->index = ret; + minor = ret; init_completion(&device->comp); device->dev = dev; device->ops = ops; @@ -232,17 +240,25 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, device->device.release = vfio_device_release; device->device.class = vfio.device_class; device->device.parent = device->dev; +#if IS_ENABLED(CONFIG_IOMMUFD) + device->device.devt = MKDEV(MAJOR(vfio.device_devt), minor); + cdev_init(&device->cdev, &vfio_device_fops); + device->cdev.owner = THIS_MODULE; +#else + device->index = minor; +#endif return 0; out_uninit: vfio_release_device_set(device); - ida_free(&vfio.device_ida, device->index); + ida_free(&vfio.device_ida, minor); return ret; } static int __vfio_register_dev(struct vfio_device *device, enum vfio_group_type type) { + unsigned int minor; int ret; if (WARN_ON(device->ops->bind_iommufd && @@ -257,7 +273,12 @@ static int __vfio_register_dev(struct vfio_device *device, if (!device->dev_set) vfio_assign_device_set(device, device); - ret = dev_set_name(&device->device, "vfio%d", device->index); +#if IS_ENABLED(CONFIG_IOMMUFD) + minor = MINOR(device->device.devt); +#else + minor = device->index; +#endif + ret = dev_set_name(&device->device, "vfio%d", minor); if (ret) return ret; @@ -265,7 +286,11 @@ static int __vfio_register_dev(struct vfio_device *device, if (ret) return ret; +#if IS_ENABLED(CONFIG_IOMMUFD) + ret = cdev_device_add(&device->cdev, &device->device); +#else ret = device_add(&device->device); +#endif if (ret) goto err_out; @@ -305,6 +330,17 @@ void vfio_unregister_group_dev(struct vfio_device *device) bool interrupted = false; long rc; +#if IS_ENABLED(CONFIG_IOMMUFD) + /* + * Balances device_add in register path. Putting it as the first + * operation in unregister to prevent registration refcount from + * incrementing per cdev open. + */ + cdev_device_del(&device->cdev, &device->device); +#else + device_del(&device->device); +#endif + vfio_device_put_registration(device); rc = try_wait_for_completion(&device->comp); while (rc <= 0) { @@ -330,9 +366,6 @@ void vfio_unregister_group_dev(struct vfio_device *device) vfio_device_group_unregister(device); - /* Balances device_add in register path */ - device_del(&device->device); - /* Balances vfio_device_set_group in register path */ vfio_device_remove_group(device); } @@ -502,6 +535,37 @@ static inline void vfio_device_pm_runtime_put(struct vfio_device *device) /* * VFIO Device fd */ +#if IS_ENABLED(CONFIG_IOMMUFD) +static int vfio_device_fops_open(struct inode *inode, struct file *filep) +{ + struct vfio_device *device = container_of(inode->i_cdev, + struct vfio_device, cdev); + struct vfio_device_file *df; + int ret; + + if (!vfio_device_try_get_registration(device)) + return -ENODEV; + + /* + * device access is blocked until .open_device() is called + * in BIND_IOMMUFD. + */ + df = vfio_allocate_device_file(device, true); + if (IS_ERR(df)) { + ret = PTR_ERR(df); + goto err_put_registration; + } + + filep->private_data = df; + + return 0; + +err_put_registration: + vfio_device_put_registration(device); + return ret; +} +#endif + static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device_file *df = filep->private_data; @@ -1169,6 +1233,9 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) const struct file_operations vfio_device_fops = { .owner = THIS_MODULE, +#if IS_ENABLED(CONFIG_IOMMUFD) + .open = vfio_device_fops_open, +#endif .release = vfio_device_fops_release, .read = vfio_device_fops_read, .write = vfio_device_fops_write, @@ -1522,6 +1589,13 @@ EXPORT_SYMBOL(vfio_dma_rw); /* * Module/class support */ +#if IS_ENABLED(CONFIG_IOMMUFD) +static char *vfio_device_devnode(const struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "vfio/devices/%s", dev_name(dev)); +} +#endif + static int __init vfio_init(void) { int ret; @@ -1543,9 +1617,21 @@ static int __init vfio_init(void) goto err_dev_class; } +#if IS_ENABLED(CONFIG_IOMMUFD) + vfio.device_class->devnode = vfio_device_devnode; + ret = alloc_chrdev_region(&vfio.device_devt, 0, + MINORMASK + 1, "vfio-dev"); + if (ret) + goto err_alloc_dev_chrdev; +#endif pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; +#if IS_ENABLED(CONFIG_IOMMUFD) +err_alloc_dev_chrdev: + class_destroy(vfio.device_class); + vfio.device_class = NULL; +#endif err_dev_class: vfio_virqfd_exit(); err_virqfd: @@ -1556,6 +1642,9 @@ static int __init vfio_init(void) static void __exit vfio_cleanup(void) { ida_destroy(&vfio.device_ida); +#if IS_ENABLED(CONFIG_IOMMUFD) + unregister_chrdev_region(vfio.device_devt, MINORMASK + 1); +#endif class_destroy(vfio.device_class); vfio.device_class = NULL; vfio_virqfd_exit(); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 300318f0d448..4a31842ebe0b 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -13,6 +13,7 @@ #include <linux/mm.h> #include <linux/workqueue.h> #include <linux/poll.h> +#include <linux/cdev.h> #include <uapi/linux/vfio.h> #include <linux/iova_bitmap.h> @@ -50,8 +51,12 @@ struct vfio_device { struct kvm *kvm; /* Members below here are private, not for driver use */ - unsigned int index; struct device device; /* device.kref covers object life circle */ +#if IS_ENABLED(CONFIG_IOMMUFD) + struct cdev cdev; +#else + unsigned int index; +#endif refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp;