diff mbox series

[v2,15/15] vfio: Add struct device to vfio_device

Message ID 20220901143747.32858-16-kevin.tian@intel.com (mailing list archive)
State New, archived
Headers show
Series Tidy up vfio_device life cycle | expand

Commit Message

Tian, Kevin Sept. 1, 2022, 2:37 p.m. UTC
From: Yi Liu <yi.l.liu@intel.com>

and replace kref. With it a 'vfio-dev/vfioX' node is created under the
sysfs path of the parent, indicating the device is bound to a vfio
driver, e.g.:

/sys/devices/pci0000\:6f/0000\:6f\:01.0/vfio-dev/vfio0

It is also a preparatory step toward adding cdev for supporting future
device-oriented uAPI.

Add Documentation/ABI/testing/sysfs-devices-vfio-dev.

Also take this chance to rename chardev 'vfio' to 'vfio-group' in
/proc/devices.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
---
 .../ABI/testing/sysfs-devices-vfio-dev        |  8 +++
 drivers/vfio/vfio_main.c                      | 67 +++++++++++++++----
 include/linux/vfio.h                          |  6 +-
 3 files changed, 66 insertions(+), 15 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-devices-vfio-dev

Comments

Tian, Kevin Sept. 1, 2022, 7:40 a.m. UTC | #1
> From: Tian, Kevin <kevin.tian@intel.com>
> Sent: Thursday, September 1, 2022 10:38 PM
> 
> diff --git a/Documentation/ABI/testing/sysfs-devices-vfio-dev
> b/Documentation/ABI/testing/sysfs-devices-vfio-dev
> new file mode 100644
> index 000000000000..e21424fd9666
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-devices-vfio-dev
> @@ -0,0 +1,8 @@
> +What:		 /sys/.../<device>/vfio-dev/vfioX/
> +Date:		 September 2022
> +Contact:	 Yi Liu <yi.l.liu@intel.com>
> +Description:
> +		 This directory is created when the device is bound to a
> +		 vfio driver. The layout under this directory matches what
> +		 exists for a standard 'struct device'. 'X' is a unique
> +		 index marking this device in vfio.

This missed an update to MAINTAINER file:

diff --git a/MAINTAINERS b/MAINTAINERS
index 589517372408..3fc8c599f4f2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21310,6 +21310,7 @@ R:      Cornelia Huck <cohuck@redhat.com>
 L:	kvm@vger.kernel.org
 S:	Maintained
 T:	git git://github.com/awilliam/linux-vfio.git
+F:	Documentation/ABI/testing/sysfs-devices-vfio-dev
 F:	Documentation/driver-api/vfio.rst
 F:	drivers/vfio/
 F:	include/linux/vfio.h

Alex, I sent a wrong version w/o fixing two checkpatch warnings (this
and the one in patch12). Please let me know whether you want me to
resend.

Thanks
Kevin
Eric Auger Sept. 8, 2022, 9:06 a.m. UTC | #2
Hi Kevin,

On 9/1/22 16:37, Kevin Tian wrote:
> From: Yi Liu <yi.l.liu@intel.com>
>
> and replace kref. With it a 'vfio-dev/vfioX' node is created under the
> sysfs path of the parent, indicating the device is bound to a vfio
> driver, e.g.:
>
> /sys/devices/pci0000\:6f/0000\:6f\:01.0/vfio-dev/vfio0
>
> It is also a preparatory step toward adding cdev for supporting future
> device-oriented uAPI.
>
> Add Documentation/ABI/testing/sysfs-devices-vfio-dev.
>
> Also take this chance to rename chardev 'vfio' to 'vfio-group' in
> /proc/devices.
>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  .../ABI/testing/sysfs-devices-vfio-dev        |  8 +++
>  drivers/vfio/vfio_main.c                      | 67 +++++++++++++++----
>  include/linux/vfio.h                          |  6 +-
>  3 files changed, 66 insertions(+), 15 deletions(-)
>  create mode 100644 Documentation/ABI/testing/sysfs-devices-vfio-dev
>
> diff --git a/Documentation/ABI/testing/sysfs-devices-vfio-dev b/Documentation/ABI/testing/sysfs-devices-vfio-dev
> new file mode 100644
> index 000000000000..e21424fd9666
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-devices-vfio-dev
> @@ -0,0 +1,8 @@
> +What:		 /sys/.../<device>/vfio-dev/vfioX/
> +Date:		 September 2022
> +Contact:	 Yi Liu <yi.l.liu@intel.com>
> +Description:
> +		 This directory is created when the device is bound to a
> +		 vfio driver. The layout under this directory matches what
> +		 exists for a standard 'struct device'. 'X' is a unique
> +		 index marking this device in vfio.
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index bfa675d314ab..141f55c3faf5 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -46,6 +46,8 @@ static struct vfio {
>  	struct mutex			group_lock; /* locks group_list */
>  	struct ida			group_ida;
>  	dev_t				group_devt;
> +	struct class			*device_class;
> +	struct ida			device_ida;
>  } vfio;
>  
>  struct vfio_iommu_driver {
> @@ -483,12 +485,13 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
>   * VFIO driver API
>   */
>  /* Release helper called by vfio_put_device() */
> -void vfio_device_release(struct kref *kref)
> +static void vfio_device_release(struct device *dev)
>  {
>  	struct vfio_device *device =
> -			container_of(kref, struct vfio_device, kref);
> +			container_of(dev, struct vfio_device, device);
>  
>  	vfio_release_device_set(device);
> +	ida_free(&vfio.device_ida, device->index);
>  
>  	/*
>  	 * kvfree() cannot be done here due to a life cycle mess in
> @@ -498,7 +501,6 @@ void vfio_device_release(struct kref *kref)
>  	 */
>  	device->ops->release(device);
>  }
> -EXPORT_SYMBOL_GPL(vfio_device_release);
>  
>  /*
>   * Alloc and initialize vfio_device so it can be registered to vfio
> @@ -546,6 +548,13 @@ int vfio_init_device(struct vfio_device *device, struct device *dev,
>  {
>  	int ret;
>  
> +	ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
> +	if (ret < 0) {
> +		dev_dbg(dev, "Error to alloc index\n");
> +		return ret;
> +	}
> +
> +	device->index = ret;
>  	init_completion(&device->comp);
>  	device->dev = dev;
>  	device->ops = ops;
> @@ -556,11 +565,15 @@ int vfio_init_device(struct vfio_device *device, struct device *dev,
>  			goto out_uninit;
>  	}
>  
> -	kref_init(&device->kref);
> +	device_initialize(&device->device);
> +	device->device.release = vfio_device_release;
> +	device->device.class = vfio.device_class;
> +	device->device.parent = device->dev;
>  	return 0;
>  
>  out_uninit:
>  	vfio_release_device_set(device);
> +	ida_free(&vfio.device_ida, device->index);
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(vfio_init_device);
> @@ -657,6 +670,7 @@ static int __vfio_register_dev(struct vfio_device *device,
>  		struct vfio_group *group)
>  {
>  	struct vfio_device *existing_device;
> +	int ret;
>  
>  	if (IS_ERR(group))
>  		return PTR_ERR(group);
> @@ -673,16 +687,21 @@ static int __vfio_register_dev(struct vfio_device *device,
>  		dev_WARN(device->dev, "Device already exists on group %d\n",
>  			 iommu_group_id(group->iommu_group));
>  		vfio_device_put_registration(existing_device);
> -		if (group->type == VFIO_NO_IOMMU ||
> -		    group->type == VFIO_EMULATED_IOMMU)
> -			iommu_group_remove_device(device->dev);
> -		vfio_group_put(group);
> -		return -EBUSY;
> +		ret = -EBUSY;
> +		goto err_out;
>  	}
>  
>  	/* Our reference on group is moved to the device */
>  	device->group = group;
>  
> +	ret = dev_set_name(&device->device, "vfio%d", device->index);
> +	if (ret)
> +		goto err_out;
> +
> +	ret = device_add(&device->device);
> +	if (ret)
> +		goto err_out;
> +
>  	/* Refcounting can't start until the driver calls register */
>  	refcount_set(&device->refcount, 1);
>  
> @@ -692,6 +711,12 @@ static int __vfio_register_dev(struct vfio_device *device,
>  	mutex_unlock(&group->device_lock);
>  
>  	return 0;
> +err_out:
> +	if (group->type == VFIO_NO_IOMMU ||
> +	    group->type == VFIO_EMULATED_IOMMU)
> +		iommu_group_remove_device(device->dev);
> +	vfio_group_put(group);
> +	return ret;
>  }
>  
>  int vfio_register_group_dev(struct vfio_device *device)
> @@ -779,6 +804,9 @@ void vfio_unregister_group_dev(struct vfio_device *device)
>  	group->dev_counter--;
>  	mutex_unlock(&group->device_lock);
>  
> +	/* Balances device_add in register path */
> +	device_del(&device->device);
> +
>  	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
>  		iommu_group_remove_device(device->dev);
>  
> @@ -2145,6 +2173,7 @@ static int __init vfio_init(void)
>  	int ret;
>  
>  	ida_init(&vfio.group_ida);
> +	ida_init(&vfio.device_ida);
>  	mutex_init(&vfio.group_lock);
>  	mutex_init(&vfio.iommu_drivers_lock);
>  	INIT_LIST_HEAD(&vfio.group_list);
> @@ -2160,12 +2189,20 @@ static int __init vfio_init(void)
>  	vfio.class = class_create(THIS_MODULE, "vfio");
>  	if (IS_ERR(vfio.class)) {
>  		ret = PTR_ERR(vfio.class);
> -		goto err_class;
> +		goto err_group_class;
>  	}
>  
>  	vfio.class->devnode = vfio_devnode;
>  
> -	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
> +	/* /sys/class/vfio-dev/vfioX */
> +	vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
> +	if (IS_ERR(vfio.device_class)) {
> +		ret = PTR_ERR(vfio.device_class);
> +		goto err_dev_class;
> +	}
> +
> +	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1,
> +				  "vfio-group");
>  	if (ret)
>  		goto err_alloc_chrdev;
>  
> @@ -2181,9 +2218,12 @@ static int __init vfio_init(void)
>  err_driver_register:
>  	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
>  err_alloc_chrdev:
> +	class_destroy(vfio.device_class);
> +	vfio.device_class = NULL;
> +err_dev_class:
>  	class_destroy(vfio.class);
>  	vfio.class = NULL;
> -err_class:
> +err_group_class:
>  	misc_deregister(&vfio_dev);
>  	return ret;
>  }
> @@ -2195,8 +2235,11 @@ static void __exit vfio_cleanup(void)
>  #ifdef CONFIG_VFIO_NOIOMMU
>  	vfio_unregister_iommu_driver(&vfio_noiommu_ops);
>  #endif
> +	ida_destroy(&vfio.device_ida);
>  	ida_destroy(&vfio.group_ida);
>  	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
> +	class_destroy(vfio.device_class);
> +	vfio.device_class = NULL;
>  	class_destroy(vfio.class);
>  	vfio.class = NULL;
>  	misc_deregister(&vfio_dev);
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index f03447c8774d..5c13f74da1bb 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -45,7 +45,8 @@ struct vfio_device {
>  	struct kvm *kvm;
>  
>  	/* Members below here are private, not for driver use */
> -	struct kref kref;	/* object life cycle */
> +	unsigned int index;
> +	struct device device;	/* device.kref covers object life circle */
>  	refcount_t refcount;	/* user count on registered device*/
>  	unsigned int open_count;
>  	struct completion comp;
I am not totally clear about remaining 'struct device *dev;' in
vfio_device struct. I see it used in some places. Is it supposed to
disappear at some point?
> @@ -154,10 +155,9 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
>  int vfio_init_device(struct vfio_device *device, struct device *dev,
>  		     const struct vfio_device_ops *ops);
>  void vfio_free_device(struct vfio_device *device);
> -void vfio_device_release(struct kref *kref);
>  static inline void vfio_put_device(struct vfio_device *device)
>  {
> -	kref_put(&device->kref, vfio_device_release);
> +	put_device(&device->device);
>  }
>  
>  int vfio_register_group_dev(struct vfio_device *device);

Thanks

Eric
Yi Liu Sept. 8, 2022, 9:17 a.m. UTC | #3
On 2022/9/8 17:06, Eric Auger wrote:
> Hi Kevin,
> 
> On 9/1/22 16:37, Kevin Tian wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> and replace kref. With it a 'vfio-dev/vfioX' node is created under the
>> sysfs path of the parent, indicating the device is bound to a vfio
>> driver, e.g.:
>>
>> /sys/devices/pci0000\:6f/0000\:6f\:01.0/vfio-dev/vfio0
>>
>> It is also a preparatory step toward adding cdev for supporting future
>> device-oriented uAPI.
>>
>> Add Documentation/ABI/testing/sysfs-devices-vfio-dev.
>>
>> Also take this chance to rename chardev 'vfio' to 'vfio-group' in
>> /proc/devices.
>>
>> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>> ---
>>   .../ABI/testing/sysfs-devices-vfio-dev        |  8 +++
>>   drivers/vfio/vfio_main.c                      | 67 +++++++++++++++----
>>   include/linux/vfio.h                          |  6 +-
>>   3 files changed, 66 insertions(+), 15 deletions(-)
>>   create mode 100644 Documentation/ABI/testing/sysfs-devices-vfio-dev
>>
>> diff --git a/Documentation/ABI/testing/sysfs-devices-vfio-dev b/Documentation/ABI/testing/sysfs-devices-vfio-dev
>> new file mode 100644
>> index 000000000000..e21424fd9666
>> --- /dev/null
>> +++ b/Documentation/ABI/testing/sysfs-devices-vfio-dev
>> @@ -0,0 +1,8 @@
>> +What:		 /sys/.../<device>/vfio-dev/vfioX/
>> +Date:		 September 2022
>> +Contact:	 Yi Liu <yi.l.liu@intel.com>
>> +Description:
>> +		 This directory is created when the device is bound to a
>> +		 vfio driver. The layout under this directory matches what
>> +		 exists for a standard 'struct device'. 'X' is a unique
>> +		 index marking this device in vfio.
>> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
>> index bfa675d314ab..141f55c3faf5 100644
>> --- a/drivers/vfio/vfio_main.c
>> +++ b/drivers/vfio/vfio_main.c
>> @@ -46,6 +46,8 @@ static struct vfio {
>>   	struct mutex			group_lock; /* locks group_list */
>>   	struct ida			group_ida;
>>   	dev_t				group_devt;
>> +	struct class			*device_class;
>> +	struct ida			device_ida;
>>   } vfio;
>>   
>>   struct vfio_iommu_driver {
>> @@ -483,12 +485,13 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
>>    * VFIO driver API
>>    */
>>   /* Release helper called by vfio_put_device() */
>> -void vfio_device_release(struct kref *kref)
>> +static void vfio_device_release(struct device *dev)
>>   {
>>   	struct vfio_device *device =
>> -			container_of(kref, struct vfio_device, kref);
>> +			container_of(dev, struct vfio_device, device);
>>   
>>   	vfio_release_device_set(device);
>> +	ida_free(&vfio.device_ida, device->index);
>>   
>>   	/*
>>   	 * kvfree() cannot be done here due to a life cycle mess in
>> @@ -498,7 +501,6 @@ void vfio_device_release(struct kref *kref)
>>   	 */
>>   	device->ops->release(device);
>>   }
>> -EXPORT_SYMBOL_GPL(vfio_device_release);
>>   
>>   /*
>>    * Alloc and initialize vfio_device so it can be registered to vfio
>> @@ -546,6 +548,13 @@ int vfio_init_device(struct vfio_device *device, struct device *dev,
>>   {
>>   	int ret;
>>   
>> +	ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
>> +	if (ret < 0) {
>> +		dev_dbg(dev, "Error to alloc index\n");
>> +		return ret;
>> +	}
>> +
>> +	device->index = ret;
>>   	init_completion(&device->comp);
>>   	device->dev = dev;
>>   	device->ops = ops;
>> @@ -556,11 +565,15 @@ int vfio_init_device(struct vfio_device *device, struct device *dev,
>>   			goto out_uninit;
>>   	}
>>   
>> -	kref_init(&device->kref);
>> +	device_initialize(&device->device);
>> +	device->device.release = vfio_device_release;
>> +	device->device.class = vfio.device_class;
>> +	device->device.parent = device->dev;
>>   	return 0;
>>   
>>   out_uninit:
>>   	vfio_release_device_set(device);
>> +	ida_free(&vfio.device_ida, device->index);
>>   	return ret;
>>   }
>>   EXPORT_SYMBOL_GPL(vfio_init_device);
>> @@ -657,6 +670,7 @@ static int __vfio_register_dev(struct vfio_device *device,
>>   		struct vfio_group *group)
>>   {
>>   	struct vfio_device *existing_device;
>> +	int ret;
>>   
>>   	if (IS_ERR(group))
>>   		return PTR_ERR(group);
>> @@ -673,16 +687,21 @@ static int __vfio_register_dev(struct vfio_device *device,
>>   		dev_WARN(device->dev, "Device already exists on group %d\n",
>>   			 iommu_group_id(group->iommu_group));
>>   		vfio_device_put_registration(existing_device);
>> -		if (group->type == VFIO_NO_IOMMU ||
>> -		    group->type == VFIO_EMULATED_IOMMU)
>> -			iommu_group_remove_device(device->dev);
>> -		vfio_group_put(group);
>> -		return -EBUSY;
>> +		ret = -EBUSY;
>> +		goto err_out;
>>   	}
>>   
>>   	/* Our reference on group is moved to the device */
>>   	device->group = group;
>>   
>> +	ret = dev_set_name(&device->device, "vfio%d", device->index);
>> +	if (ret)
>> +		goto err_out;
>> +
>> +	ret = device_add(&device->device);
>> +	if (ret)
>> +		goto err_out;
>> +
>>   	/* Refcounting can't start until the driver calls register */
>>   	refcount_set(&device->refcount, 1);
>>   
>> @@ -692,6 +711,12 @@ static int __vfio_register_dev(struct vfio_device *device,
>>   	mutex_unlock(&group->device_lock);
>>   
>>   	return 0;
>> +err_out:
>> +	if (group->type == VFIO_NO_IOMMU ||
>> +	    group->type == VFIO_EMULATED_IOMMU)
>> +		iommu_group_remove_device(device->dev);
>> +	vfio_group_put(group);
>> +	return ret;
>>   }
>>   
>>   int vfio_register_group_dev(struct vfio_device *device)
>> @@ -779,6 +804,9 @@ void vfio_unregister_group_dev(struct vfio_device *device)
>>   	group->dev_counter--;
>>   	mutex_unlock(&group->device_lock);
>>   
>> +	/* Balances device_add in register path */
>> +	device_del(&device->device);
>> +
>>   	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
>>   		iommu_group_remove_device(device->dev);
>>   
>> @@ -2145,6 +2173,7 @@ static int __init vfio_init(void)
>>   	int ret;
>>   
>>   	ida_init(&vfio.group_ida);
>> +	ida_init(&vfio.device_ida);
>>   	mutex_init(&vfio.group_lock);
>>   	mutex_init(&vfio.iommu_drivers_lock);
>>   	INIT_LIST_HEAD(&vfio.group_list);
>> @@ -2160,12 +2189,20 @@ static int __init vfio_init(void)
>>   	vfio.class = class_create(THIS_MODULE, "vfio");
>>   	if (IS_ERR(vfio.class)) {
>>   		ret = PTR_ERR(vfio.class);
>> -		goto err_class;
>> +		goto err_group_class;
>>   	}
>>   
>>   	vfio.class->devnode = vfio_devnode;
>>   
>> -	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
>> +	/* /sys/class/vfio-dev/vfioX */
>> +	vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
>> +	if (IS_ERR(vfio.device_class)) {
>> +		ret = PTR_ERR(vfio.device_class);
>> +		goto err_dev_class;
>> +	}
>> +
>> +	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1,
>> +				  "vfio-group");
>>   	if (ret)
>>   		goto err_alloc_chrdev;
>>   
>> @@ -2181,9 +2218,12 @@ static int __init vfio_init(void)
>>   err_driver_register:
>>   	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
>>   err_alloc_chrdev:
>> +	class_destroy(vfio.device_class);
>> +	vfio.device_class = NULL;
>> +err_dev_class:
>>   	class_destroy(vfio.class);
>>   	vfio.class = NULL;
>> -err_class:
>> +err_group_class:
>>   	misc_deregister(&vfio_dev);
>>   	return ret;
>>   }
>> @@ -2195,8 +2235,11 @@ static void __exit vfio_cleanup(void)
>>   #ifdef CONFIG_VFIO_NOIOMMU
>>   	vfio_unregister_iommu_driver(&vfio_noiommu_ops);
>>   #endif
>> +	ida_destroy(&vfio.device_ida);
>>   	ida_destroy(&vfio.group_ida);
>>   	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
>> +	class_destroy(vfio.device_class);
>> +	vfio.device_class = NULL;
>>   	class_destroy(vfio.class);
>>   	vfio.class = NULL;
>>   	misc_deregister(&vfio_dev);
>> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>> index f03447c8774d..5c13f74da1bb 100644
>> --- a/include/linux/vfio.h
>> +++ b/include/linux/vfio.h
>> @@ -45,7 +45,8 @@ struct vfio_device {
>>   	struct kvm *kvm;
>>   
>>   	/* Members below here are private, not for driver use */
>> -	struct kref kref;	/* object life cycle */
>> +	unsigned int index;
>> +	struct device device;	/* device.kref covers object life circle */
>>   	refcount_t refcount;	/* user count on registered device*/
>>   	unsigned int open_count;
>>   	struct completion comp;
> I am not totally clear about remaining 'struct device *dev;' in
> vfio_device struct. I see it used in some places. Is it supposed to
> disappear at some point?

no, Eric. *dev will not disappear, it stores the dev pointet passed in by
caller of vfio_init_device().

>> @@ -154,10 +155,9 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
>>   int vfio_init_device(struct vfio_device *device, struct device *dev,
>>   		     const struct vfio_device_ops *ops);
>>   void vfio_free_device(struct vfio_device *device);
>> -void vfio_device_release(struct kref *kref);
>>   static inline void vfio_put_device(struct vfio_device *device)
>>   {
>> -	kref_put(&device->kref, vfio_device_release);
>> +	put_device(&device->device);
>>   }
>>   
>>   int vfio_register_group_dev(struct vfio_device *device);
> 
> Thanks
> 
> Eric
>
Eric Auger Sept. 8, 2022, 9:39 a.m. UTC | #4
On 9/8/22 11:17, Yi Liu wrote:
> On 2022/9/8 17:06, Eric Auger wrote:
>> Hi Kevin,
>>
>> On 9/1/22 16:37, Kevin Tian wrote:
>>> From: Yi Liu <yi.l.liu@intel.com>
>>>
>>> and replace kref. With it a 'vfio-dev/vfioX' node is created under the
>>> sysfs path of the parent, indicating the device is bound to a vfio
>>> driver, e.g.:
>>>
>>> /sys/devices/pci0000\:6f/0000\:6f\:01.0/vfio-dev/vfio0
>>>
>>> It is also a preparatory step toward adding cdev for supporting future
>>> device-oriented uAPI.
>>>
>>> Add Documentation/ABI/testing/sysfs-devices-vfio-dev.
>>>
>>> Also take this chance to rename chardev 'vfio' to 'vfio-group' in
>>> /proc/devices.
>>>
>>> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Kevin Tian <kevin.tian@intel.com>
>>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>>> ---
>>>   .../ABI/testing/sysfs-devices-vfio-dev        |  8 +++
>>>   drivers/vfio/vfio_main.c                      | 67
>>> +++++++++++++++----
>>>   include/linux/vfio.h                          |  6 +-
>>>   3 files changed, 66 insertions(+), 15 deletions(-)
>>>   create mode 100644 Documentation/ABI/testing/sysfs-devices-vfio-dev
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-devices-vfio-dev
>>> b/Documentation/ABI/testing/sysfs-devices-vfio-dev
>>> new file mode 100644
>>> index 000000000000..e21424fd9666
>>> --- /dev/null
>>> +++ b/Documentation/ABI/testing/sysfs-devices-vfio-dev
>>> @@ -0,0 +1,8 @@
>>> +What:         /sys/.../<device>/vfio-dev/vfioX/
>>> +Date:         September 2022
>>> +Contact:     Yi Liu <yi.l.liu@intel.com>
>>> +Description:
>>> +         This directory is created when the device is bound to a
>>> +         vfio driver. The layout under this directory matches what
>>> +         exists for a standard 'struct device'. 'X' is a unique
>>> +         index marking this device in vfio.
>>> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
>>> index bfa675d314ab..141f55c3faf5 100644
>>> --- a/drivers/vfio/vfio_main.c
>>> +++ b/drivers/vfio/vfio_main.c
>>> @@ -46,6 +46,8 @@ static struct vfio {
>>>       struct mutex            group_lock; /* locks group_list */
>>>       struct ida            group_ida;
>>>       dev_t                group_devt;
>>> +    struct class            *device_class;
>>> +    struct ida            device_ida;
>>>   } vfio;
>>>     struct vfio_iommu_driver {
>>> @@ -483,12 +485,13 @@ static struct vfio_device
>>> *vfio_group_get_device(struct vfio_group *group,
>>>    * VFIO driver API
>>>    */
>>>   /* Release helper called by vfio_put_device() */
>>> -void vfio_device_release(struct kref *kref)
>>> +static void vfio_device_release(struct device *dev)
>>>   {
>>>       struct vfio_device *device =
>>> -            container_of(kref, struct vfio_device, kref);
>>> +            container_of(dev, struct vfio_device, device);
>>>         vfio_release_device_set(device);
>>> +    ida_free(&vfio.device_ida, device->index);
>>>         /*
>>>        * kvfree() cannot be done here due to a life cycle mess in
>>> @@ -498,7 +501,6 @@ void vfio_device_release(struct kref *kref)
>>>        */
>>>       device->ops->release(device);
>>>   }
>>> -EXPORT_SYMBOL_GPL(vfio_device_release);
>>>     /*
>>>    * Alloc and initialize vfio_device so it can be registered to vfio
>>> @@ -546,6 +548,13 @@ int vfio_init_device(struct vfio_device
>>> *device, struct device *dev,
>>>   {
>>>       int ret;
>>>   +    ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
>>> +    if (ret < 0) {
>>> +        dev_dbg(dev, "Error to alloc index\n");
>>> +        return ret;
>>> +    }
>>> +
>>> +    device->index = ret;
>>>       init_completion(&device->comp);
>>>       device->dev = dev;
>>>       device->ops = ops;
>>> @@ -556,11 +565,15 @@ int vfio_init_device(struct vfio_device
>>> *device, struct device *dev,
>>>               goto out_uninit;
>>>       }
>>>   -    kref_init(&device->kref);
>>> +    device_initialize(&device->device);
>>> +    device->device.release = vfio_device_release;
>>> +    device->device.class = vfio.device_class;
>>> +    device->device.parent = device->dev;
>>>       return 0;
>>>     out_uninit:
>>>       vfio_release_device_set(device);
>>> +    ida_free(&vfio.device_ida, device->index);
>>>       return ret;
>>>   }
>>>   EXPORT_SYMBOL_GPL(vfio_init_device);
>>> @@ -657,6 +670,7 @@ static int __vfio_register_dev(struct
>>> vfio_device *device,
>>>           struct vfio_group *group)
>>>   {
>>>       struct vfio_device *existing_device;
>>> +    int ret;
>>>         if (IS_ERR(group))
>>>           return PTR_ERR(group);
>>> @@ -673,16 +687,21 @@ static int __vfio_register_dev(struct
>>> vfio_device *device,
>>>           dev_WARN(device->dev, "Device already exists on group %d\n",
>>>                iommu_group_id(group->iommu_group));
>>>           vfio_device_put_registration(existing_device);
>>> -        if (group->type == VFIO_NO_IOMMU ||
>>> -            group->type == VFIO_EMULATED_IOMMU)
>>> -            iommu_group_remove_device(device->dev);
>>> -        vfio_group_put(group);
>>> -        return -EBUSY;
>>> +        ret = -EBUSY;
>>> +        goto err_out;
>>>       }
>>>         /* Our reference on group is moved to the device */
>>>       device->group = group;
>>>   +    ret = dev_set_name(&device->device, "vfio%d", device->index);
>>> +    if (ret)
>>> +        goto err_out;
>>> +
>>> +    ret = device_add(&device->device);
>>> +    if (ret)
>>> +        goto err_out;
>>> +
>>>       /* Refcounting can't start until the driver calls register */
>>>       refcount_set(&device->refcount, 1);
>>>   @@ -692,6 +711,12 @@ static int __vfio_register_dev(struct
>>> vfio_device *device,
>>>       mutex_unlock(&group->device_lock);
>>>         return 0;
>>> +err_out:
>>> +    if (group->type == VFIO_NO_IOMMU ||
>>> +        group->type == VFIO_EMULATED_IOMMU)
>>> +        iommu_group_remove_device(device->dev);
>>> +    vfio_group_put(group);
>>> +    return ret;
>>>   }
>>>     int vfio_register_group_dev(struct vfio_device *device)
>>> @@ -779,6 +804,9 @@ void vfio_unregister_group_dev(struct
>>> vfio_device *device)
>>>       group->dev_counter--;
>>>       mutex_unlock(&group->device_lock);
>>>   +    /* Balances device_add in register path */
>>> +    device_del(&device->device);
>>> +
>>>       if (group->type == VFIO_NO_IOMMU || group->type ==
>>> VFIO_EMULATED_IOMMU)
>>>           iommu_group_remove_device(device->dev);
>>>   @@ -2145,6 +2173,7 @@ static int __init vfio_init(void)
>>>       int ret;
>>>         ida_init(&vfio.group_ida);
>>> +    ida_init(&vfio.device_ida);
>>>       mutex_init(&vfio.group_lock);
>>>       mutex_init(&vfio.iommu_drivers_lock);
>>>       INIT_LIST_HEAD(&vfio.group_list);
>>> @@ -2160,12 +2189,20 @@ static int __init vfio_init(void)
>>>       vfio.class = class_create(THIS_MODULE, "vfio");
>>>       if (IS_ERR(vfio.class)) {
>>>           ret = PTR_ERR(vfio.class);
>>> -        goto err_class;
>>> +        goto err_group_class;
>>>       }
>>>         vfio.class->devnode = vfio_devnode;
>>>   -    ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1,
>>> "vfio");
>>> +    /* /sys/class/vfio-dev/vfioX */
>>> +    vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
>>> +    if (IS_ERR(vfio.device_class)) {
>>> +        ret = PTR_ERR(vfio.device_class);
>>> +        goto err_dev_class;
>>> +    }
>>> +
>>> +    ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1,
>>> +                  "vfio-group");
>>>       if (ret)
>>>           goto err_alloc_chrdev;
>>>   @@ -2181,9 +2218,12 @@ static int __init vfio_init(void)
>>>   err_driver_register:
>>>       unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
>>>   err_alloc_chrdev:
>>> +    class_destroy(vfio.device_class);
>>> +    vfio.device_class = NULL;
>>> +err_dev_class:
>>>       class_destroy(vfio.class);
>>>       vfio.class = NULL;
>>> -err_class:
>>> +err_group_class:
>>>       misc_deregister(&vfio_dev);
>>>       return ret;
>>>   }
>>> @@ -2195,8 +2235,11 @@ static void __exit vfio_cleanup(void)
>>>   #ifdef CONFIG_VFIO_NOIOMMU
>>>       vfio_unregister_iommu_driver(&vfio_noiommu_ops);
>>>   #endif
>>> +    ida_destroy(&vfio.device_ida);
>>>       ida_destroy(&vfio.group_ida);
>>>       unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
>>> +    class_destroy(vfio.device_class);
>>> +    vfio.device_class = NULL;
>>>       class_destroy(vfio.class);
>>>       vfio.class = NULL;
>>>       misc_deregister(&vfio_dev);
>>> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>>> index f03447c8774d..5c13f74da1bb 100644
>>> --- a/include/linux/vfio.h
>>> +++ b/include/linux/vfio.h
>>> @@ -45,7 +45,8 @@ struct vfio_device {
>>>       struct kvm *kvm;
>>>         /* Members below here are private, not for driver use */
>>> -    struct kref kref;    /* object life cycle */
>>> +    unsigned int index;
>>> +    struct device device;    /* device.kref covers object life
>>> circle */
>>>       refcount_t refcount;    /* user count on registered device*/
>>>       unsigned int open_count;
>>>       struct completion comp;
>> I am not totally clear about remaining 'struct device *dev;' in
>> vfio_device struct. I see it used in some places. Is it supposed to
>> disappear at some point?
>
> no, Eric. *dev will not disappear, it stores the dev pointet passed in by
> caller of vfio_init_device().

yeah I see but you have device->device.parent = device->dev;

Eric
>
>>> @@ -154,10 +155,9 @@ struct vfio_device *_vfio_alloc_device(size_t
>>> size, struct device *dev,
>>>   int vfio_init_device(struct vfio_device *device, struct device *dev,
>>>                const struct vfio_device_ops *ops);
>>>   void vfio_free_device(struct vfio_device *device);
>>> -void vfio_device_release(struct kref *kref);
>>>   static inline void vfio_put_device(struct vfio_device *device)
>>>   {
>>> -    kref_put(&device->kref, vfio_device_release);
>>> +    put_device(&device->device);
>>>   }
>>>     int vfio_register_group_dev(struct vfio_device *device);
>>
>> Thanks
>>
>> Eric
>>
>
Jason Gunthorpe Sept. 8, 2022, 12:37 p.m. UTC | #5
On Thu, Sep 08, 2022 at 11:39:07AM +0200, Eric Auger wrote:

> >> I am not totally clear about remaining 'struct device *dev;' in
> >> vfio_device struct. I see it used in some places. Is it supposed to
> >> disappear at some point?
> >
> > no, Eric. *dev will not disappear, it stores the dev pointet passed in by
> > caller of vfio_init_device().
> 
> yeah I see but you have device->device.parent = device->dev;

IIRC we have a number of these redundancies now, often the drivers
store another copy of the dev too.

A significant use of dev is for printing things, what should be done
here is to create a subsystem wide vfio_warn/etc that takes in the
vfio_device, and then print properly from there. Now that we have a
struct device all the prints should also include the VFIO struct
device name, and then the PCI device perhaps in brackets.

Jason
Tian, Kevin Sept. 9, 2022, 3:09 a.m. UTC | #6
> From: Jason Gunthorpe
> Sent: Thursday, September 8, 2022 8:37 PM
> 
> On Thu, Sep 08, 2022 at 11:39:07AM +0200, Eric Auger wrote:
> 
> > >> I am not totally clear about remaining 'struct device *dev;' in
> > >> vfio_device struct. I see it used in some places. Is it supposed to
> > >> disappear at some point?
> > >
> > > no, Eric. *dev will not disappear, it stores the dev pointet passed in by
> > > caller of vfio_init_device().
> >
> > yeah I see but you have device->device.parent = device->dev;
> 
> IIRC we have a number of these redundancies now, often the drivers
> store another copy of the dev too.
> 
> A significant use of dev is for printing things, what should be done
> here is to create a subsystem wide vfio_warn/etc that takes in the
> vfio_device, and then print properly from there. Now that we have a
> struct device all the prints should also include the VFIO struct
> device name, and then the PCI device perhaps in brackets.
> 

Let me handle it in a separate patch (after this series).
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-devices-vfio-dev b/Documentation/ABI/testing/sysfs-devices-vfio-dev
new file mode 100644
index 000000000000..e21424fd9666
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-vfio-dev
@@ -0,0 +1,8 @@ 
+What:		 /sys/.../<device>/vfio-dev/vfioX/
+Date:		 September 2022
+Contact:	 Yi Liu <yi.l.liu@intel.com>
+Description:
+		 This directory is created when the device is bound to a
+		 vfio driver. The layout under this directory matches what
+		 exists for a standard 'struct device'. 'X' is a unique
+		 index marking this device in vfio.
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index bfa675d314ab..141f55c3faf5 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -46,6 +46,8 @@  static struct vfio {
 	struct mutex			group_lock; /* locks group_list */
 	struct ida			group_ida;
 	dev_t				group_devt;
+	struct class			*device_class;
+	struct ida			device_ida;
 } vfio;
 
 struct vfio_iommu_driver {
@@ -483,12 +485,13 @@  static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
  * VFIO driver API
  */
 /* Release helper called by vfio_put_device() */
-void vfio_device_release(struct kref *kref)
+static void vfio_device_release(struct device *dev)
 {
 	struct vfio_device *device =
-			container_of(kref, struct vfio_device, kref);
+			container_of(dev, struct vfio_device, device);
 
 	vfio_release_device_set(device);
+	ida_free(&vfio.device_ida, device->index);
 
 	/*
 	 * kvfree() cannot be done here due to a life cycle mess in
@@ -498,7 +501,6 @@  void vfio_device_release(struct kref *kref)
 	 */
 	device->ops->release(device);
 }
-EXPORT_SYMBOL_GPL(vfio_device_release);
 
 /*
  * Alloc and initialize vfio_device so it can be registered to vfio
@@ -546,6 +548,13 @@  int vfio_init_device(struct vfio_device *device, struct device *dev,
 {
 	int ret;
 
+	ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
+	if (ret < 0) {
+		dev_dbg(dev, "Error to alloc index\n");
+		return ret;
+	}
+
+	device->index = ret;
 	init_completion(&device->comp);
 	device->dev = dev;
 	device->ops = ops;
@@ -556,11 +565,15 @@  int vfio_init_device(struct vfio_device *device, struct device *dev,
 			goto out_uninit;
 	}
 
-	kref_init(&device->kref);
+	device_initialize(&device->device);
+	device->device.release = vfio_device_release;
+	device->device.class = vfio.device_class;
+	device->device.parent = device->dev;
 	return 0;
 
 out_uninit:
 	vfio_release_device_set(device);
+	ida_free(&vfio.device_ida, device->index);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_init_device);
@@ -657,6 +670,7 @@  static int __vfio_register_dev(struct vfio_device *device,
 		struct vfio_group *group)
 {
 	struct vfio_device *existing_device;
+	int ret;
 
 	if (IS_ERR(group))
 		return PTR_ERR(group);
@@ -673,16 +687,21 @@  static int __vfio_register_dev(struct vfio_device *device,
 		dev_WARN(device->dev, "Device already exists on group %d\n",
 			 iommu_group_id(group->iommu_group));
 		vfio_device_put_registration(existing_device);
-		if (group->type == VFIO_NO_IOMMU ||
-		    group->type == VFIO_EMULATED_IOMMU)
-			iommu_group_remove_device(device->dev);
-		vfio_group_put(group);
-		return -EBUSY;
+		ret = -EBUSY;
+		goto err_out;
 	}
 
 	/* Our reference on group is moved to the device */
 	device->group = group;
 
+	ret = dev_set_name(&device->device, "vfio%d", device->index);
+	if (ret)
+		goto err_out;
+
+	ret = device_add(&device->device);
+	if (ret)
+		goto err_out;
+
 	/* Refcounting can't start until the driver calls register */
 	refcount_set(&device->refcount, 1);
 
@@ -692,6 +711,12 @@  static int __vfio_register_dev(struct vfio_device *device,
 	mutex_unlock(&group->device_lock);
 
 	return 0;
+err_out:
+	if (group->type == VFIO_NO_IOMMU ||
+	    group->type == VFIO_EMULATED_IOMMU)
+		iommu_group_remove_device(device->dev);
+	vfio_group_put(group);
+	return ret;
 }
 
 int vfio_register_group_dev(struct vfio_device *device)
@@ -779,6 +804,9 @@  void vfio_unregister_group_dev(struct vfio_device *device)
 	group->dev_counter--;
 	mutex_unlock(&group->device_lock);
 
+	/* Balances device_add in register path */
+	device_del(&device->device);
+
 	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
 		iommu_group_remove_device(device->dev);
 
@@ -2145,6 +2173,7 @@  static int __init vfio_init(void)
 	int ret;
 
 	ida_init(&vfio.group_ida);
+	ida_init(&vfio.device_ida);
 	mutex_init(&vfio.group_lock);
 	mutex_init(&vfio.iommu_drivers_lock);
 	INIT_LIST_HEAD(&vfio.group_list);
@@ -2160,12 +2189,20 @@  static int __init vfio_init(void)
 	vfio.class = class_create(THIS_MODULE, "vfio");
 	if (IS_ERR(vfio.class)) {
 		ret = PTR_ERR(vfio.class);
-		goto err_class;
+		goto err_group_class;
 	}
 
 	vfio.class->devnode = vfio_devnode;
 
-	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
+	/* /sys/class/vfio-dev/vfioX */
+	vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
+	if (IS_ERR(vfio.device_class)) {
+		ret = PTR_ERR(vfio.device_class);
+		goto err_dev_class;
+	}
+
+	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1,
+				  "vfio-group");
 	if (ret)
 		goto err_alloc_chrdev;
 
@@ -2181,9 +2218,12 @@  static int __init vfio_init(void)
 err_driver_register:
 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
 err_alloc_chrdev:
+	class_destroy(vfio.device_class);
+	vfio.device_class = NULL;
+err_dev_class:
 	class_destroy(vfio.class);
 	vfio.class = NULL;
-err_class:
+err_group_class:
 	misc_deregister(&vfio_dev);
 	return ret;
 }
@@ -2195,8 +2235,11 @@  static void __exit vfio_cleanup(void)
 #ifdef CONFIG_VFIO_NOIOMMU
 	vfio_unregister_iommu_driver(&vfio_noiommu_ops);
 #endif
+	ida_destroy(&vfio.device_ida);
 	ida_destroy(&vfio.group_ida);
 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
+	class_destroy(vfio.device_class);
+	vfio.device_class = NULL;
 	class_destroy(vfio.class);
 	vfio.class = NULL;
 	misc_deregister(&vfio_dev);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index f03447c8774d..5c13f74da1bb 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -45,7 +45,8 @@  struct vfio_device {
 	struct kvm *kvm;
 
 	/* Members below here are private, not for driver use */
-	struct kref kref;	/* object life cycle */
+	unsigned int index;
+	struct device device;	/* device.kref covers object life circle */
 	refcount_t refcount;	/* user count on registered device*/
 	unsigned int open_count;
 	struct completion comp;
@@ -154,10 +155,9 @@  struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
 int vfio_init_device(struct vfio_device *device, struct device *dev,
 		     const struct vfio_device_ops *ops);
 void vfio_free_device(struct vfio_device *device);
-void vfio_device_release(struct kref *kref);
 static inline void vfio_put_device(struct vfio_device *device)
 {
-	kref_put(&device->kref, vfio_device_release);
+	put_device(&device->device);
 }
 
 int vfio_register_group_dev(struct vfio_device *device);