diff mbox series

[v2,03/14] vfio: Split creation of a vfio_device into init and register ops

Message ID 3-v2-20d933792272+4ff-vfio1_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Embed struct vfio_device in all sub-structures | expand

Commit Message

Jason Gunthorpe March 13, 2021, 12:55 a.m. UTC
This makes the struct vfio_pci_device part of the public interface so it
can be used with container_of and so forth, as is typical for a Linux
subystem.

This is the first step to bring some type-safety to the vfio interface by
allowing the replacement of 'void *' and 'struct device *' inputs with a
simple and clear 'struct vfio_pci_device *'

For now the self-allocating vfio_add_group_dev() interface is kept so each
user can be updated as a separate patch.

The expected usage pattern is

  driver core probe() function:
     my_device = kzalloc(sizeof(*mydevice));
     vfio_init_group_dev(&my_device->vdev, dev, ops, mydevice);
     /* other driver specific prep */
     vfio_register_group_dev(&my_device->vdev);
     dev_set_drvdata(my_device);

  driver core remove() function:
     my_device = dev_get_drvdata(dev);
     vfio_unregister_group_dev(&my_device->vdev);
     /* other driver specific tear down */
     kfree(my_device);

Allowing the driver to be able to use the drvdata and vifo_device to go
to/from its own data.

The pattern also makes it clear that vfio_register_group_dev() must be
last in the sequence, as once it is called the core code can immediately
start calling ops. The init/register gap is provided to allow for the
driver to do setup before ops can be called and thus avoid races.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 Documentation/driver-api/vfio.rst |  31 ++++----
 drivers/vfio/vfio.c               | 123 ++++++++++++++++--------------
 include/linux/vfio.h              |  16 ++++
 3 files changed, 98 insertions(+), 72 deletions(-)

Comments

Tian, Kevin March 16, 2021, 7:55 a.m. UTC | #1
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, March 13, 2021 8:56 AM
> 
> This makes the struct vfio_pci_device part of the public interface so it
> can be used with container_of and so forth, as is typical for a Linux
> subystem.
> 
> This is the first step to bring some type-safety to the vfio interface by
> allowing the replacement of 'void *' and 'struct device *' inputs with a
> simple and clear 'struct vfio_pci_device *'
> 
> For now the self-allocating vfio_add_group_dev() interface is kept so each
> user can be updated as a separate patch.
> 
> The expected usage pattern is
> 
>   driver core probe() function:
>      my_device = kzalloc(sizeof(*mydevice));
>      vfio_init_group_dev(&my_device->vdev, dev, ops, mydevice);
>      /* other driver specific prep */
>      vfio_register_group_dev(&my_device->vdev);
>      dev_set_drvdata(my_device);

dev_set_drvdata(dev, my_device);

> 
>   driver core remove() function:
>      my_device = dev_get_drvdata(dev);
>      vfio_unregister_group_dev(&my_device->vdev);
>      /* other driver specific tear down */
>      kfree(my_device);
> 
> Allowing the driver to be able to use the drvdata and vifo_device to go
> to/from its own data.
> 
> The pattern also makes it clear that vfio_register_group_dev() must be
> last in the sequence, as once it is called the core code can immediately
> start calling ops. The init/register gap is provided to allow for the
> driver to do setup before ops can be called and thus avoid races.
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  Documentation/driver-api/vfio.rst |  31 ++++----
>  drivers/vfio/vfio.c               | 123 ++++++++++++++++--------------
>  include/linux/vfio.h              |  16 ++++
>  3 files changed, 98 insertions(+), 72 deletions(-)
> 
> diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-
> api/vfio.rst
> index f1a4d3c3ba0bb1..d3a02300913a7f 100644
> --- a/Documentation/driver-api/vfio.rst
> +++ b/Documentation/driver-api/vfio.rst
> @@ -249,18 +249,23 @@ VFIO bus driver API
> 
>  VFIO bus drivers, such as vfio-pci make use of only a few interfaces
>  into VFIO core.  When devices are bound and unbound to the driver,
> -the driver should call vfio_add_group_dev() and vfio_del_group_dev()
> -respectively::
> -
> -	extern int vfio_add_group_dev(struct device *dev,
> -				      const struct vfio_device_ops *ops,
> -				      void *device_data);
> -
> -	extern void *vfio_del_group_dev(struct device *dev);
> -
> -vfio_add_group_dev() indicates to the core to begin tracking the
> -iommu_group of the specified dev and register the dev as owned by
> -a VFIO bus driver.  The driver provides an ops structure for callbacks
> +the driver should call vfio_register_group_dev() and
> +vfio_unregister_group_dev() respectively::
> +
> +	void vfio_init_group_dev(struct vfio_device *device,
> +				struct device *dev,
> +				const struct vfio_device_ops *ops,
> +				void *device_data);
> +	int vfio_register_group_dev(struct vfio_device *device);
> +	void vfio_unregister_group_dev(struct vfio_device *device);
> +
> +The driver should embed the vfio_device in its own structure and call
> +vfio_init_group_dev() to pre-configure it before going to registration.
> +vfio_register_group_dev() indicates to the core to begin tracking the
> +iommu_group of the specified dev and register the dev as owned by a VFIO
> bus
> +driver. Once vfio_register_group_dev() returns it is possible for userspace
> to
> +start accessing the driver, thus the driver should ensure it is completely
> +ready before calling it. The driver provides an ops structure for callbacks
>  similar to a file operations structure::
> 
>  	struct vfio_device_ops {
> @@ -276,7 +281,7 @@ similar to a file operations structure::
>  	};
> 
>  Each function is passed the device_data that was originally registered
> -in the vfio_add_group_dev() call above.  This allows the bus driver
> +in the vfio_register_group_dev() call above.  This allows the bus driver
>  an easy place to store its opaque, private data.  The open/release
>  callbacks are issued when a new file descriptor is created for a
>  device (via VFIO_GROUP_GET_DEVICE_FD).  The ioctl interface provides
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 32660e8a69ae20..cfa06ae3b9018b 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -89,16 +89,6 @@ struct vfio_group {
>  	struct blocking_notifier_head	notifier;
>  };
> 
> -struct vfio_device {
> -	refcount_t			refcount;
> -	struct completion		comp;
> -	struct device			*dev;
> -	const struct vfio_device_ops	*ops;
> -	struct vfio_group		*group;
> -	struct list_head		group_next;
> -	void				*device_data;
> -};
> -
>  #ifdef CONFIG_VFIO_NOIOMMU
>  static bool noiommu __read_mostly;
>  module_param_named(enable_unsafe_noiommu_mode,
> @@ -532,35 +522,6 @@ static struct vfio_group
> *vfio_group_get_from_dev(struct device *dev)
>  /**
>   * Device objects - create, release, get, put, search
>   */
> -static
> -struct vfio_device *vfio_group_create_device(struct vfio_group *group,
> -					     struct device *dev,
> -					     const struct vfio_device_ops *ops,
> -					     void *device_data)
> -{
> -	struct vfio_device *device;
> -
> -	device = kzalloc(sizeof(*device), GFP_KERNEL);
> -	if (!device)
> -		return ERR_PTR(-ENOMEM);
> -
> -	refcount_set(&device->refcount, 1);
> -	init_completion(&device->comp);
> -	device->dev = dev;
> -	/* Our reference on group is moved to the device */
> -	device->group = group;
> -	device->ops = ops;
> -	device->device_data = device_data;
> -	dev_set_drvdata(dev, device);
> -
> -	mutex_lock(&group->device_lock);
> -	list_add(&device->group_next, &group->device_list);
> -	group->dev_counter++;
> -	mutex_unlock(&group->device_lock);
> -
> -	return device;
> -}
> -
>  /* Device reference always implies a group reference */
>  void vfio_device_put(struct vfio_device *device)
>  {
> @@ -779,14 +740,23 @@ static int vfio_iommu_group_notifier(struct
> notifier_block *nb,
>  /**
>   * VFIO driver API
>   */
> -int vfio_add_group_dev(struct device *dev,
> -		       const struct vfio_device_ops *ops, void *device_data)
> +void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
> +			 const struct vfio_device_ops *ops, void *device_data)
> +{
> +	init_completion(&device->comp);
> +	device->dev = dev;
> +	device->ops = ops;
> +	device->device_data = device_data;
> +}
> +EXPORT_SYMBOL_GPL(vfio_init_group_dev);
> +
> +int vfio_register_group_dev(struct vfio_device *device)
>  {
> +	struct vfio_device *existing_device;
>  	struct iommu_group *iommu_group;
>  	struct vfio_group *group;
> -	struct vfio_device *device;
> 
> -	iommu_group = iommu_group_get(dev);
> +	iommu_group = iommu_group_get(device->dev);
>  	if (!iommu_group)
>  		return -EINVAL;
> 
> @@ -805,21 +775,50 @@ int vfio_add_group_dev(struct device *dev,
>  		iommu_group_put(iommu_group);
>  	}
> 
> -	device = vfio_group_get_device(group, dev);
> -	if (device) {
> -		dev_WARN(dev, "Device already exists on group %d\n",
> +	existing_device = vfio_group_get_device(group, device->dev);
> +	if (existing_device) {
> +		dev_WARN(device->dev, "Device already exists on
> group %d\n",
>  			 iommu_group_id(iommu_group));
> -		vfio_device_put(device);
> +		vfio_device_put(existing_device);
>  		vfio_group_put(group);
>  		return -EBUSY;
>  	}
> 
> -	device = vfio_group_create_device(group, dev, ops, device_data);
> -	if (IS_ERR(device)) {
> -		vfio_group_put(group);
> -		return PTR_ERR(device);
> -	}
> +	/* Our reference on group is moved to the device */
> +	device->group = group;
> +
> +	/* Refcounting can't start until the driver calls register */
> +	refcount_set(&device->refcount, 1);
> +
> +	mutex_lock(&group->device_lock);
> +	list_add(&device->group_next, &group->device_list);
> +	group->dev_counter++;
> +	mutex_unlock(&group->device_lock);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(vfio_register_group_dev);
> +
> +int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops
> *ops,
> +		       void *device_data)
> +{
> +	struct vfio_device *device;
> +	int ret;
> +
> +	device = kzalloc(sizeof(*device), GFP_KERNEL);
> +	if (!device)
> +		return -ENOMEM;
> +
> +	vfio_init_group_dev(device, dev, ops, device_data);
> +	ret = vfio_register_group_dev(device);
> +	if (ret)
> +		goto err_kfree;
> +	dev_set_drvdata(dev, device);
>  	return 0;
> +
> +err_kfree:
> +	kfree(device);
> +	return ret;
>  }
>  EXPORT_SYMBOL_GPL(vfio_add_group_dev);
> 
> @@ -887,11 +886,9 @@ EXPORT_SYMBOL_GPL(vfio_device_data);
>  /*
>   * Decrement the device reference count and wait for the device to be
>   * removed.  Open file descriptors for the device... */
> -void *vfio_del_group_dev(struct device *dev)
> +void vfio_unregister_group_dev(struct vfio_device *device)
>  {
> -	struct vfio_device *device = dev_get_drvdata(dev);
>  	struct vfio_group *group = device->group;
> -	void *device_data = device->device_data;
>  	struct vfio_unbound_dev *unbound;
>  	unsigned int i = 0;
>  	bool interrupted = false;
> @@ -908,7 +905,7 @@ void *vfio_del_group_dev(struct device *dev)
>  	 */
>  	unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
>  	if (unbound) {
> -		unbound->dev = dev;
> +		unbound->dev = device->dev;
>  		mutex_lock(&group->unbound_lock);
>  		list_add(&unbound->unbound_next, &group->unbound_list);
>  		mutex_unlock(&group->unbound_lock);
> @@ -919,7 +916,7 @@ void *vfio_del_group_dev(struct device *dev)
>  	rc = try_wait_for_completion(&device->comp);
>  	while (rc <= 0) {
>  		if (device->ops->request)
> -			device->ops->request(device_data, i++);
> +			device->ops->request(device->device_data, i++);
> 
>  		if (interrupted) {
>  			rc = wait_for_completion_timeout(&device->comp,
> @@ -929,7 +926,7 @@ void *vfio_del_group_dev(struct device *dev)
>  				&device->comp, HZ * 10);
>  			if (rc < 0) {
>  				interrupted = true;
> -				dev_warn(dev,
> +				dev_warn(device->dev,
>  					 "Device is currently in use, task"
>  					 " \"%s\" (%d) "
>  					 "blocked until device is released",
> @@ -962,9 +959,17 @@ void *vfio_del_group_dev(struct device *dev)
> 
>  	/* Matches the get in vfio_group_create_device() */
>  	vfio_group_put(group);
> +}
> +EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
> +
> +void *vfio_del_group_dev(struct device *dev)
> +{
> +	struct vfio_device *device = dev_get_drvdata(dev);
> +	void *device_data = device->device_data;
> +
> +	vfio_unregister_group_dev(device);
>  	dev_set_drvdata(dev, NULL);

Move to vfio_unregister_group_dev? In the cover letter you mentioned
that drvdata is managed by the driver but removed from the core. Looks
it's also the rule obeyed by the following patches.

Thanks
Kevin

>  	kfree(device);
> -
>  	return device_data;
>  }
>  EXPORT_SYMBOL_GPL(vfio_del_group_dev);
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index b7e18bde5aa8b3..ad8b579d67d34a 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -15,6 +15,18 @@
>  #include <linux/poll.h>
>  #include <uapi/linux/vfio.h>
> 
> +struct vfio_device {
> +	struct device *dev;
> +	const struct vfio_device_ops *ops;
> +	struct vfio_group *group;
> +
> +	/* Members below here are private, not for driver use */
> +	refcount_t refcount;
> +	struct completion comp;
> +	struct list_head group_next;
> +	void *device_data;
> +};
> +
>  /**
>   * struct vfio_device_ops - VFIO bus driver device callbacks
>   *
> @@ -48,11 +60,15 @@ struct vfio_device_ops {
>  extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
>  extern void vfio_iommu_group_put(struct iommu_group *group, struct
> device *dev);
> 
> +void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
> +			 const struct vfio_device_ops *ops, void
> *device_data);
> +int vfio_register_group_dev(struct vfio_device *device);
>  extern int vfio_add_group_dev(struct device *dev,
>  			      const struct vfio_device_ops *ops,
>  			      void *device_data);
> 
>  extern void *vfio_del_group_dev(struct device *dev);
> +void vfio_unregister_group_dev(struct vfio_device *device);
>  extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
>  extern void vfio_device_put(struct vfio_device *device);
>  extern void *vfio_device_data(struct vfio_device *device);
> --
> 2.30.2
Cornelia Huck March 16, 2021, 12:25 p.m. UTC | #2
On Fri, 12 Mar 2021 20:55:55 -0400
Jason Gunthorpe <jgg@nvidia.com> wrote:

> This makes the struct vfio_pci_device part of the public interface so it
> can be used with container_of and so forth, as is typical for a Linux
> subystem.
> 
> This is the first step to bring some type-safety to the vfio interface by
> allowing the replacement of 'void *' and 'struct device *' inputs with a
> simple and clear 'struct vfio_pci_device *'
> 
> For now the self-allocating vfio_add_group_dev() interface is kept so each
> user can be updated as a separate patch.
> 
> The expected usage pattern is
> 
>   driver core probe() function:
>      my_device = kzalloc(sizeof(*mydevice));
>      vfio_init_group_dev(&my_device->vdev, dev, ops, mydevice);
>      /* other driver specific prep */
>      vfio_register_group_dev(&my_device->vdev);
>      dev_set_drvdata(my_device);
> 
>   driver core remove() function:
>      my_device = dev_get_drvdata(dev);
>      vfio_unregister_group_dev(&my_device->vdev);
>      /* other driver specific tear down */
>      kfree(my_device);
> 
> Allowing the driver to be able to use the drvdata and vifo_device to go

s/vifo_device/vfio_device/

> to/from its own data.
> 
> The pattern also makes it clear that vfio_register_group_dev() must be
> last in the sequence, as once it is called the core code can immediately
> start calling ops. The init/register gap is provided to allow for the
> driver to do setup before ops can be called and thus avoid races.
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  Documentation/driver-api/vfio.rst |  31 ++++----
>  drivers/vfio/vfio.c               | 123 ++++++++++++++++--------------
>  include/linux/vfio.h              |  16 ++++
>  3 files changed, 98 insertions(+), 72 deletions(-)
> 
> diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst
> index f1a4d3c3ba0bb1..d3a02300913a7f 100644
> --- a/Documentation/driver-api/vfio.rst
> +++ b/Documentation/driver-api/vfio.rst
> @@ -249,18 +249,23 @@ VFIO bus driver API
>  
>  VFIO bus drivers, such as vfio-pci make use of only a few interfaces
>  into VFIO core.  When devices are bound and unbound to the driver,
> -the driver should call vfio_add_group_dev() and vfio_del_group_dev()
> -respectively::
> -
> -	extern int vfio_add_group_dev(struct device *dev,
> -				      const struct vfio_device_ops *ops,
> -				      void *device_data);
> -
> -	extern void *vfio_del_group_dev(struct device *dev);
> -
> -vfio_add_group_dev() indicates to the core to begin tracking the
> -iommu_group of the specified dev and register the dev as owned by
> -a VFIO bus driver.  The driver provides an ops structure for callbacks
> +the driver should call vfio_register_group_dev() and
> +vfio_unregister_group_dev() respectively::
> +
> +	void vfio_init_group_dev(struct vfio_device *device,
> +				struct device *dev,
> +				const struct vfio_device_ops *ops,
> +				void *device_data);
> +	int vfio_register_group_dev(struct vfio_device *device);
> +	void vfio_unregister_group_dev(struct vfio_device *device);
> +
> +The driver should embed the vfio_device in its own structure and call
> +vfio_init_group_dev() to pre-configure it before going to registration.

s/it/that structure/ (I guess?)

> +vfio_register_group_dev() indicates to the core to begin tracking the
> +iommu_group of the specified dev and register the dev as owned by a VFIO bus
> +driver. Once vfio_register_group_dev() returns it is possible for userspace to
> +start accessing the driver, thus the driver should ensure it is completely
> +ready before calling it. The driver provides an ops structure for callbacks
>  similar to a file operations structure::
>  
>  	struct vfio_device_ops {

Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Max Gurtovoy March 16, 2021, 12:54 p.m. UTC | #3
On 3/13/2021 2:55 AM, Jason Gunthorpe wrote:
> This makes the struct vfio_pci_device part of the public interface so it
> can be used with container_of and so forth, as is typical for a Linux
> subystem.
>
> This is the first step to bring some type-safety to the vfio interface by
> allowing the replacement of 'void *' and 'struct device *' inputs with a
> simple and clear 'struct vfio_pci_device *'
>
> For now the self-allocating vfio_add_group_dev() interface is kept so each
> user can be updated as a separate patch.
>
> The expected usage pattern is
>
>    driver core probe() function:
>       my_device = kzalloc(sizeof(*mydevice));
>       vfio_init_group_dev(&my_device->vdev, dev, ops, mydevice);
>       /* other driver specific prep */
>       vfio_register_group_dev(&my_device->vdev);
>       dev_set_drvdata(my_device);
>
>    driver core remove() function:
>       my_device = dev_get_drvdata(dev);
>       vfio_unregister_group_dev(&my_device->vdev);
>       /* other driver specific tear down */
>       kfree(my_device);
>
> Allowing the driver to be able to use the drvdata and vifo_device to go
> to/from its own data.
>
> The pattern also makes it clear that vfio_register_group_dev() must be
> last in the sequence, as once it is called the core code can immediately
> start calling ops. The init/register gap is provided to allow for the
> driver to do setup before ops can be called and thus avoid races.
>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   Documentation/driver-api/vfio.rst |  31 ++++----
>   drivers/vfio/vfio.c               | 123 ++++++++++++++++--------------
>   include/linux/vfio.h              |  16 ++++
>   3 files changed, 98 insertions(+), 72 deletions(-)

With comments from Cornelia and Kevin, looks good.

Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Jason Gunthorpe March 16, 2021, 1:34 p.m. UTC | #4
On Tue, Mar 16, 2021 at 07:55:11AM +0000, Tian, Kevin wrote:

> > +void *vfio_del_group_dev(struct device *dev)
> > +{
> > +	struct vfio_device *device = dev_get_drvdata(dev);
> > +	void *device_data = device->device_data;
> > +
> > +	vfio_unregister_group_dev(device);
> >  	dev_set_drvdata(dev, NULL);
> 
> Move to vfio_unregister_group_dev? In the cover letter you mentioned
> that drvdata is managed by the driver but removed from the core. 

"removed from the core" means the core code doesn't touch drvdata at
all.

> Looks it's also the rule obeyed by the following patches.

The dev_set_drvdata(NULL) on remove is mostly cargo-cult nonsense. The
driver core sets it to null immediately after the remove function
returns, so to add another set needs a very strong reason.

It is only left here temporarily, the last patch deletes it.

Jason
Alex Williamson March 16, 2021, 9:13 p.m. UTC | #5
On Tue, 16 Mar 2021 13:25:59 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> On Fri, 12 Mar 2021 20:55:55 -0400
> Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> > This makes the struct vfio_pci_device part of the public interface so it

s/_pci//

> > can be used with container_of and so forth, as is typical for a Linux
> > subystem.
> > 
> > This is the first step to bring some type-safety to the vfio interface by
> > allowing the replacement of 'void *' and 'struct device *' inputs with a
> > simple and clear 'struct vfio_pci_device *'

s/_pci//

> > 
> > For now the self-allocating vfio_add_group_dev() interface is kept so each
> > user can be updated as a separate patch.
> > 
> > The expected usage pattern is
> > 
> >   driver core probe() function:
> >      my_device = kzalloc(sizeof(*mydevice));
> >      vfio_init_group_dev(&my_device->vdev, dev, ops, mydevice);
> >      /* other driver specific prep */
> >      vfio_register_group_dev(&my_device->vdev);
> >      dev_set_drvdata(my_device);
> > 
> >   driver core remove() function:
> >      my_device = dev_get_drvdata(dev);
> >      vfio_unregister_group_dev(&my_device->vdev);
> >      /* other driver specific tear down */
> >      kfree(my_device);
> > 
> > Allowing the driver to be able to use the drvdata and vifo_device to go  
> 
> s/vifo_device/vfio_device/
> 
> > to/from its own data.
> > 
> > The pattern also makes it clear that vfio_register_group_dev() must be
> > last in the sequence, as once it is called the core code can immediately
> > start calling ops. The init/register gap is provided to allow for the
> > driver to do setup before ops can be called and thus avoid races.
> > 
> > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> >  Documentation/driver-api/vfio.rst |  31 ++++----
> >  drivers/vfio/vfio.c               | 123 ++++++++++++++++--------------
> >  include/linux/vfio.h              |  16 ++++
> >  3 files changed, 98 insertions(+), 72 deletions(-)
> > 
> > diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst
> > index f1a4d3c3ba0bb1..d3a02300913a7f 100644
> > --- a/Documentation/driver-api/vfio.rst
> > +++ b/Documentation/driver-api/vfio.rst
> > @@ -249,18 +249,23 @@ VFIO bus driver API
> >  
> >  VFIO bus drivers, such as vfio-pci make use of only a few interfaces
> >  into VFIO core.  When devices are bound and unbound to the driver,
> > -the driver should call vfio_add_group_dev() and vfio_del_group_dev()
> > -respectively::
> > -
> > -	extern int vfio_add_group_dev(struct device *dev,
> > -				      const struct vfio_device_ops *ops,
> > -				      void *device_data);
> > -
> > -	extern void *vfio_del_group_dev(struct device *dev);
> > -
> > -vfio_add_group_dev() indicates to the core to begin tracking the
> > -iommu_group of the specified dev and register the dev as owned by
> > -a VFIO bus driver.  The driver provides an ops structure for callbacks
> > +the driver should call vfio_register_group_dev() and
> > +vfio_unregister_group_dev() respectively::
> > +
> > +	void vfio_init_group_dev(struct vfio_device *device,
> > +				struct device *dev,
> > +				const struct vfio_device_ops *ops,
> > +				void *device_data);
> > +	int vfio_register_group_dev(struct vfio_device *device);
> > +	void vfio_unregister_group_dev(struct vfio_device *device);
> > +
> > +The driver should embed the vfio_device in its own structure and call
> > +vfio_init_group_dev() to pre-configure it before going to registration.  
> 
> s/it/that structure/ (I guess?)

Seems less clear actually, is the object of "that structure" the
"vfio_device" or "its own structure".  Phrasing somewhat suggests the
latter.  s/it/the vfio_device structure/ seems excessively verbose.  I
think "it" is probably sufficient here.  Thanks,

Alex

 
> > +vfio_register_group_dev() indicates to the core to begin tracking the
> > +iommu_group of the specified dev and register the dev as owned by a VFIO bus
> > +driver. Once vfio_register_group_dev() returns it is possible for userspace to
> > +start accessing the driver, thus the driver should ensure it is completely
> > +ready before calling it. The driver provides an ops structure for callbacks
> >  similar to a file operations structure::
> >  
> >  	struct vfio_device_ops {  
> 
> Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Jason Gunthorpe March 16, 2021, 11:12 p.m. UTC | #6
On Tue, Mar 16, 2021 at 03:13:06PM -0600, Alex Williamson wrote:

> > > +	void vfio_init_group_dev(struct vfio_device *device,
> > > +				struct device *dev,
> > > +				const struct vfio_device_ops *ops,
> > > +				void *device_data);
> > > +	int vfio_register_group_dev(struct vfio_device *device);
> > > +	void vfio_unregister_group_dev(struct vfio_device *device);
> > > +
> > > +The driver should embed the vfio_device in its own structure and call
> > > +vfio_init_group_dev() to pre-configure it before going to registration.  
> > 
> > s/it/that structure/ (I guess?)
> 
> Seems less clear actually, is the object of "that structure" the
> "vfio_device" or "its own structure".  Phrasing somewhat suggests the
> latter.  s/it/the vfio_device structure/ seems excessively verbose.  I
> think "it" is probably sufficient here.  Thanks,

Right, it says directly above that vfio_init_group_dev() accepts a
vfio_device so I doubt anyone will be confused for long on what "it"
refers to.

I got the other language fixes thanks

Jason
Tian, Kevin March 17, 2021, 12:55 a.m. UTC | #7
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, March 16, 2021 9:34 PM
> 
> On Tue, Mar 16, 2021 at 07:55:11AM +0000, Tian, Kevin wrote:
> 
> > > +void *vfio_del_group_dev(struct device *dev)
> > > +{
> > > +	struct vfio_device *device = dev_get_drvdata(dev);
> > > +	void *device_data = device->device_data;
> > > +
> > > +	vfio_unregister_group_dev(device);
> > >  	dev_set_drvdata(dev, NULL);
> >
> > Move to vfio_unregister_group_dev? In the cover letter you mentioned
> > that drvdata is managed by the driver but removed from the core.
> 
> "removed from the core" means the core code doesn't touch drvdata at
> all.
> 
> > Looks it's also the rule obeyed by the following patches.
> 
> The dev_set_drvdata(NULL) on remove is mostly cargo-cult nonsense. The
> driver core sets it to null immediately after the remove function
> returns, so to add another set needs a very strong reason.
> 
> It is only left here temporarily, the last patch deletes it.
> 

Ah, I didn't realize dev_set_drvdata(NULL) is nonsense here. Just saw
no place clears it after this series. 

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Eric Auger March 18, 2021, 1:18 p.m. UTC | #8
Hi,
On 3/13/21 1:55 AM, Jason Gunthorpe wrote:
> This makes the struct vfio_pci_device part of the public interface so it
> can be used with container_of and so forth, as is typical for a Linux
> subystem.
> 
> This is the first step to bring some type-safety to the vfio interface by
> allowing the replacement of 'void *' and 'struct device *' inputs with a
> simple and clear 'struct vfio_pci_device *'
> 
> For now the self-allocating vfio_add_group_dev() interface is kept so each
> user can be updated as a separate patch.
> 
> The expected usage pattern is
> 
>   driver core probe() function:
>      my_device = kzalloc(sizeof(*mydevice));
>      vfio_init_group_dev(&my_device->vdev, dev, ops, mydevice);
>      /* other driver specific prep */
>      vfio_register_group_dev(&my_device->vdev);
>      dev_set_drvdata(my_device);
> 
>   driver core remove() function:
>      my_device = dev_get_drvdata(dev);
>      vfio_unregister_group_dev(&my_device->vdev);
>      /* other driver specific tear down */
>      kfree(my_device);
> 
> Allowing the driver to be able to use the drvdata and vifo_device to go
> to/from its own data.
> 
> The pattern also makes it clear that vfio_register_group_dev() must be
> last in the sequence, as once it is called the core code can immediately
> start calling ops. The init/register gap is provided to allow for the
> driver to do setup before ops can be called and thus avoid races.
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
With previously commit msg and comment fixes,

Reviewed-by: Eric Auger <eric.auger@redhat.com>

Thanks

Eric

> ---
>  Documentation/driver-api/vfio.rst |  31 ++++----
>  drivers/vfio/vfio.c               | 123 ++++++++++++++++--------------
>  include/linux/vfio.h              |  16 ++++
>  3 files changed, 98 insertions(+), 72 deletions(-)
> 
> diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst
> index f1a4d3c3ba0bb1..d3a02300913a7f 100644
> --- a/Documentation/driver-api/vfio.rst
> +++ b/Documentation/driver-api/vfio.rst
> @@ -249,18 +249,23 @@ VFIO bus driver API
>  
>  VFIO bus drivers, such as vfio-pci make use of only a few interfaces
>  into VFIO core.  When devices are bound and unbound to the driver,
> -the driver should call vfio_add_group_dev() and vfio_del_group_dev()
> -respectively::
> -
> -	extern int vfio_add_group_dev(struct device *dev,
> -				      const struct vfio_device_ops *ops,
> -				      void *device_data);
> -
> -	extern void *vfio_del_group_dev(struct device *dev);
> -
> -vfio_add_group_dev() indicates to the core to begin tracking the
> -iommu_group of the specified dev and register the dev as owned by
> -a VFIO bus driver.  The driver provides an ops structure for callbacks
> +the driver should call vfio_register_group_dev() and
> +vfio_unregister_group_dev() respectively::
> +
> +	void vfio_init_group_dev(struct vfio_device *device,
> +				struct device *dev,
> +				const struct vfio_device_ops *ops,
> +				void *device_data);
> +	int vfio_register_group_dev(struct vfio_device *device);
> +	void vfio_unregister_group_dev(struct vfio_device *device);
> +
> +The driver should embed the vfio_device in its own structure and call
> +vfio_init_group_dev() to pre-configure it before going to registration.
> +vfio_register_group_dev() indicates to the core to begin tracking the
> +iommu_group of the specified dev and register the dev as owned by a VFIO bus
> +driver. Once vfio_register_group_dev() returns it is possible for userspace to
> +start accessing the driver, thus the driver should ensure it is completely
> +ready before calling it. The driver provides an ops structure for callbacks
>  similar to a file operations structure::
>  
>  	struct vfio_device_ops {
> @@ -276,7 +281,7 @@ similar to a file operations structure::
>  	};
>  
>  Each function is passed the device_data that was originally registered
> -in the vfio_add_group_dev() call above.  This allows the bus driver
> +in the vfio_register_group_dev() call above.  This allows the bus driver
>  an easy place to store its opaque, private data.  The open/release
>  callbacks are issued when a new file descriptor is created for a
>  device (via VFIO_GROUP_GET_DEVICE_FD).  The ioctl interface provides
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 32660e8a69ae20..cfa06ae3b9018b 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -89,16 +89,6 @@ struct vfio_group {
>  	struct blocking_notifier_head	notifier;
>  };
>  
> -struct vfio_device {
> -	refcount_t			refcount;
> -	struct completion		comp;
> -	struct device			*dev;
> -	const struct vfio_device_ops	*ops;
> -	struct vfio_group		*group;
> -	struct list_head		group_next;
> -	void				*device_data;
> -};
> -
>  #ifdef CONFIG_VFIO_NOIOMMU
>  static bool noiommu __read_mostly;
>  module_param_named(enable_unsafe_noiommu_mode,
> @@ -532,35 +522,6 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
>  /**
>   * Device objects - create, release, get, put, search
>   */
> -static
> -struct vfio_device *vfio_group_create_device(struct vfio_group *group,
> -					     struct device *dev,
> -					     const struct vfio_device_ops *ops,
> -					     void *device_data)
> -{
> -	struct vfio_device *device;
> -
> -	device = kzalloc(sizeof(*device), GFP_KERNEL);
> -	if (!device)
> -		return ERR_PTR(-ENOMEM);
> -
> -	refcount_set(&device->refcount, 1);
> -	init_completion(&device->comp);
> -	device->dev = dev;
> -	/* Our reference on group is moved to the device */
> -	device->group = group;
> -	device->ops = ops;
> -	device->device_data = device_data;
> -	dev_set_drvdata(dev, device);
> -
> -	mutex_lock(&group->device_lock);
> -	list_add(&device->group_next, &group->device_list);
> -	group->dev_counter++;
> -	mutex_unlock(&group->device_lock);
> -
> -	return device;
> -}
> -
>  /* Device reference always implies a group reference */
>  void vfio_device_put(struct vfio_device *device)
>  {
> @@ -779,14 +740,23 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
>  /**
>   * VFIO driver API
>   */
> -int vfio_add_group_dev(struct device *dev,
> -		       const struct vfio_device_ops *ops, void *device_data)
> +void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
> +			 const struct vfio_device_ops *ops, void *device_data)
> +{
> +	init_completion(&device->comp);
> +	device->dev = dev;
> +	device->ops = ops;
> +	device->device_data = device_data;
> +}
> +EXPORT_SYMBOL_GPL(vfio_init_group_dev);
> +
> +int vfio_register_group_dev(struct vfio_device *device)
>  {
> +	struct vfio_device *existing_device;
>  	struct iommu_group *iommu_group;
>  	struct vfio_group *group;
> -	struct vfio_device *device;
>  
> -	iommu_group = iommu_group_get(dev);
> +	iommu_group = iommu_group_get(device->dev);
>  	if (!iommu_group)
>  		return -EINVAL;
>  
> @@ -805,21 +775,50 @@ int vfio_add_group_dev(struct device *dev,
>  		iommu_group_put(iommu_group);
>  	}
>  
> -	device = vfio_group_get_device(group, dev);
> -	if (device) {
> -		dev_WARN(dev, "Device already exists on group %d\n",
> +	existing_device = vfio_group_get_device(group, device->dev);
> +	if (existing_device) {
> +		dev_WARN(device->dev, "Device already exists on group %d\n",
>  			 iommu_group_id(iommu_group));
> -		vfio_device_put(device);
> +		vfio_device_put(existing_device);
>  		vfio_group_put(group);
>  		return -EBUSY;
>  	}
>  
> -	device = vfio_group_create_device(group, dev, ops, device_data);
> -	if (IS_ERR(device)) {
> -		vfio_group_put(group);
> -		return PTR_ERR(device);
> -	}
> +	/* Our reference on group is moved to the device */
> +	device->group = group;
> +
> +	/* Refcounting can't start until the driver calls register */
> +	refcount_set(&device->refcount, 1);
> +
> +	mutex_lock(&group->device_lock);
> +	list_add(&device->group_next, &group->device_list);
> +	group->dev_counter++;
> +	mutex_unlock(&group->device_lock);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(vfio_register_group_dev);
> +
> +int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops,
> +		       void *device_data)
> +{
> +	struct vfio_device *device;
> +	int ret;
> +
> +	device = kzalloc(sizeof(*device), GFP_KERNEL);
> +	if (!device)
> +		return -ENOMEM;
> +
> +	vfio_init_group_dev(device, dev, ops, device_data);
> +	ret = vfio_register_group_dev(device);
> +	if (ret)
> +		goto err_kfree;
> +	dev_set_drvdata(dev, device);
>  	return 0;
> +
> +err_kfree:
> +	kfree(device);
> +	return ret;
>  }
>  EXPORT_SYMBOL_GPL(vfio_add_group_dev);
>  
> @@ -887,11 +886,9 @@ EXPORT_SYMBOL_GPL(vfio_device_data);
>  /*
>   * Decrement the device reference count and wait for the device to be
>   * removed.  Open file descriptors for the device... */
> -void *vfio_del_group_dev(struct device *dev)
> +void vfio_unregister_group_dev(struct vfio_device *device)
>  {
> -	struct vfio_device *device = dev_get_drvdata(dev);
>  	struct vfio_group *group = device->group;
> -	void *device_data = device->device_data;
>  	struct vfio_unbound_dev *unbound;
>  	unsigned int i = 0;
>  	bool interrupted = false;
> @@ -908,7 +905,7 @@ void *vfio_del_group_dev(struct device *dev)
>  	 */
>  	unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
>  	if (unbound) {
> -		unbound->dev = dev;
> +		unbound->dev = device->dev;
>  		mutex_lock(&group->unbound_lock);
>  		list_add(&unbound->unbound_next, &group->unbound_list);
>  		mutex_unlock(&group->unbound_lock);
> @@ -919,7 +916,7 @@ void *vfio_del_group_dev(struct device *dev)
>  	rc = try_wait_for_completion(&device->comp);
>  	while (rc <= 0) {
>  		if (device->ops->request)
> -			device->ops->request(device_data, i++);
> +			device->ops->request(device->device_data, i++);
>  
>  		if (interrupted) {
>  			rc = wait_for_completion_timeout(&device->comp,
> @@ -929,7 +926,7 @@ void *vfio_del_group_dev(struct device *dev)
>  				&device->comp, HZ * 10);
>  			if (rc < 0) {
>  				interrupted = true;
> -				dev_warn(dev,
> +				dev_warn(device->dev,
>  					 "Device is currently in use, task"
>  					 " \"%s\" (%d) "
>  					 "blocked until device is released",
> @@ -962,9 +959,17 @@ void *vfio_del_group_dev(struct device *dev)
>  
>  	/* Matches the get in vfio_group_create_device() */
>  	vfio_group_put(group);
> +}
> +EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
> +
> +void *vfio_del_group_dev(struct device *dev)
> +{
> +	struct vfio_device *device = dev_get_drvdata(dev);
> +	void *device_data = device->device_data;
> +
> +	vfio_unregister_group_dev(device);
>  	dev_set_drvdata(dev, NULL);
>  	kfree(device);
> -
>  	return device_data;
>  }
>  EXPORT_SYMBOL_GPL(vfio_del_group_dev);
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index b7e18bde5aa8b3..ad8b579d67d34a 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -15,6 +15,18 @@
>  #include <linux/poll.h>
>  #include <uapi/linux/vfio.h>
>  
> +struct vfio_device {
> +	struct device *dev;
> +	const struct vfio_device_ops *ops;
> +	struct vfio_group *group;
> +
> +	/* Members below here are private, not for driver use */
> +	refcount_t refcount;
> +	struct completion comp;
> +	struct list_head group_next;
> +	void *device_data;
> +};
> +
>  /**
>   * struct vfio_device_ops - VFIO bus driver device callbacks
>   *
> @@ -48,11 +60,15 @@ struct vfio_device_ops {
>  extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
>  extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
>  
> +void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
> +			 const struct vfio_device_ops *ops, void *device_data);
> +int vfio_register_group_dev(struct vfio_device *device);
>  extern int vfio_add_group_dev(struct device *dev,
>  			      const struct vfio_device_ops *ops,
>  			      void *device_data);
>  
>  extern void *vfio_del_group_dev(struct device *dev);
> +void vfio_unregister_group_dev(struct vfio_device *device);
>  extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
>  extern void vfio_device_put(struct vfio_device *device);
>  extern void *vfio_device_data(struct vfio_device *device);
>
diff mbox series

Patch

diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst
index f1a4d3c3ba0bb1..d3a02300913a7f 100644
--- a/Documentation/driver-api/vfio.rst
+++ b/Documentation/driver-api/vfio.rst
@@ -249,18 +249,23 @@  VFIO bus driver API
 
 VFIO bus drivers, such as vfio-pci make use of only a few interfaces
 into VFIO core.  When devices are bound and unbound to the driver,
-the driver should call vfio_add_group_dev() and vfio_del_group_dev()
-respectively::
-
-	extern int vfio_add_group_dev(struct device *dev,
-				      const struct vfio_device_ops *ops,
-				      void *device_data);
-
-	extern void *vfio_del_group_dev(struct device *dev);
-
-vfio_add_group_dev() indicates to the core to begin tracking the
-iommu_group of the specified dev and register the dev as owned by
-a VFIO bus driver.  The driver provides an ops structure for callbacks
+the driver should call vfio_register_group_dev() and
+vfio_unregister_group_dev() respectively::
+
+	void vfio_init_group_dev(struct vfio_device *device,
+				struct device *dev,
+				const struct vfio_device_ops *ops,
+				void *device_data);
+	int vfio_register_group_dev(struct vfio_device *device);
+	void vfio_unregister_group_dev(struct vfio_device *device);
+
+The driver should embed the vfio_device in its own structure and call
+vfio_init_group_dev() to pre-configure it before going to registration.
+vfio_register_group_dev() indicates to the core to begin tracking the
+iommu_group of the specified dev and register the dev as owned by a VFIO bus
+driver. Once vfio_register_group_dev() returns it is possible for userspace to
+start accessing the driver, thus the driver should ensure it is completely
+ready before calling it. The driver provides an ops structure for callbacks
 similar to a file operations structure::
 
 	struct vfio_device_ops {
@@ -276,7 +281,7 @@  similar to a file operations structure::
 	};
 
 Each function is passed the device_data that was originally registered
-in the vfio_add_group_dev() call above.  This allows the bus driver
+in the vfio_register_group_dev() call above.  This allows the bus driver
 an easy place to store its opaque, private data.  The open/release
 callbacks are issued when a new file descriptor is created for a
 device (via VFIO_GROUP_GET_DEVICE_FD).  The ioctl interface provides
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 32660e8a69ae20..cfa06ae3b9018b 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -89,16 +89,6 @@  struct vfio_group {
 	struct blocking_notifier_head	notifier;
 };
 
-struct vfio_device {
-	refcount_t			refcount;
-	struct completion		comp;
-	struct device			*dev;
-	const struct vfio_device_ops	*ops;
-	struct vfio_group		*group;
-	struct list_head		group_next;
-	void				*device_data;
-};
-
 #ifdef CONFIG_VFIO_NOIOMMU
 static bool noiommu __read_mostly;
 module_param_named(enable_unsafe_noiommu_mode,
@@ -532,35 +522,6 @@  static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
 /**
  * Device objects - create, release, get, put, search
  */
-static
-struct vfio_device *vfio_group_create_device(struct vfio_group *group,
-					     struct device *dev,
-					     const struct vfio_device_ops *ops,
-					     void *device_data)
-{
-	struct vfio_device *device;
-
-	device = kzalloc(sizeof(*device), GFP_KERNEL);
-	if (!device)
-		return ERR_PTR(-ENOMEM);
-
-	refcount_set(&device->refcount, 1);
-	init_completion(&device->comp);
-	device->dev = dev;
-	/* Our reference on group is moved to the device */
-	device->group = group;
-	device->ops = ops;
-	device->device_data = device_data;
-	dev_set_drvdata(dev, device);
-
-	mutex_lock(&group->device_lock);
-	list_add(&device->group_next, &group->device_list);
-	group->dev_counter++;
-	mutex_unlock(&group->device_lock);
-
-	return device;
-}
-
 /* Device reference always implies a group reference */
 void vfio_device_put(struct vfio_device *device)
 {
@@ -779,14 +740,23 @@  static int vfio_iommu_group_notifier(struct notifier_block *nb,
 /**
  * VFIO driver API
  */
-int vfio_add_group_dev(struct device *dev,
-		       const struct vfio_device_ops *ops, void *device_data)
+void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
+			 const struct vfio_device_ops *ops, void *device_data)
+{
+	init_completion(&device->comp);
+	device->dev = dev;
+	device->ops = ops;
+	device->device_data = device_data;
+}
+EXPORT_SYMBOL_GPL(vfio_init_group_dev);
+
+int vfio_register_group_dev(struct vfio_device *device)
 {
+	struct vfio_device *existing_device;
 	struct iommu_group *iommu_group;
 	struct vfio_group *group;
-	struct vfio_device *device;
 
-	iommu_group = iommu_group_get(dev);
+	iommu_group = iommu_group_get(device->dev);
 	if (!iommu_group)
 		return -EINVAL;
 
@@ -805,21 +775,50 @@  int vfio_add_group_dev(struct device *dev,
 		iommu_group_put(iommu_group);
 	}
 
-	device = vfio_group_get_device(group, dev);
-	if (device) {
-		dev_WARN(dev, "Device already exists on group %d\n",
+	existing_device = vfio_group_get_device(group, device->dev);
+	if (existing_device) {
+		dev_WARN(device->dev, "Device already exists on group %d\n",
 			 iommu_group_id(iommu_group));
-		vfio_device_put(device);
+		vfio_device_put(existing_device);
 		vfio_group_put(group);
 		return -EBUSY;
 	}
 
-	device = vfio_group_create_device(group, dev, ops, device_data);
-	if (IS_ERR(device)) {
-		vfio_group_put(group);
-		return PTR_ERR(device);
-	}
+	/* Our reference on group is moved to the device */
+	device->group = group;
+
+	/* Refcounting can't start until the driver calls register */
+	refcount_set(&device->refcount, 1);
+
+	mutex_lock(&group->device_lock);
+	list_add(&device->group_next, &group->device_list);
+	group->dev_counter++;
+	mutex_unlock(&group->device_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_register_group_dev);
+
+int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops,
+		       void *device_data)
+{
+	struct vfio_device *device;
+	int ret;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device)
+		return -ENOMEM;
+
+	vfio_init_group_dev(device, dev, ops, device_data);
+	ret = vfio_register_group_dev(device);
+	if (ret)
+		goto err_kfree;
+	dev_set_drvdata(dev, device);
 	return 0;
+
+err_kfree:
+	kfree(device);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_add_group_dev);
 
@@ -887,11 +886,9 @@  EXPORT_SYMBOL_GPL(vfio_device_data);
 /*
  * Decrement the device reference count and wait for the device to be
  * removed.  Open file descriptors for the device... */
-void *vfio_del_group_dev(struct device *dev)
+void vfio_unregister_group_dev(struct vfio_device *device)
 {
-	struct vfio_device *device = dev_get_drvdata(dev);
 	struct vfio_group *group = device->group;
-	void *device_data = device->device_data;
 	struct vfio_unbound_dev *unbound;
 	unsigned int i = 0;
 	bool interrupted = false;
@@ -908,7 +905,7 @@  void *vfio_del_group_dev(struct device *dev)
 	 */
 	unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
 	if (unbound) {
-		unbound->dev = dev;
+		unbound->dev = device->dev;
 		mutex_lock(&group->unbound_lock);
 		list_add(&unbound->unbound_next, &group->unbound_list);
 		mutex_unlock(&group->unbound_lock);
@@ -919,7 +916,7 @@  void *vfio_del_group_dev(struct device *dev)
 	rc = try_wait_for_completion(&device->comp);
 	while (rc <= 0) {
 		if (device->ops->request)
-			device->ops->request(device_data, i++);
+			device->ops->request(device->device_data, i++);
 
 		if (interrupted) {
 			rc = wait_for_completion_timeout(&device->comp,
@@ -929,7 +926,7 @@  void *vfio_del_group_dev(struct device *dev)
 				&device->comp, HZ * 10);
 			if (rc < 0) {
 				interrupted = true;
-				dev_warn(dev,
+				dev_warn(device->dev,
 					 "Device is currently in use, task"
 					 " \"%s\" (%d) "
 					 "blocked until device is released",
@@ -962,9 +959,17 @@  void *vfio_del_group_dev(struct device *dev)
 
 	/* Matches the get in vfio_group_create_device() */
 	vfio_group_put(group);
+}
+EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
+
+void *vfio_del_group_dev(struct device *dev)
+{
+	struct vfio_device *device = dev_get_drvdata(dev);
+	void *device_data = device->device_data;
+
+	vfio_unregister_group_dev(device);
 	dev_set_drvdata(dev, NULL);
 	kfree(device);
-
 	return device_data;
 }
 EXPORT_SYMBOL_GPL(vfio_del_group_dev);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index b7e18bde5aa8b3..ad8b579d67d34a 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -15,6 +15,18 @@ 
 #include <linux/poll.h>
 #include <uapi/linux/vfio.h>
 
+struct vfio_device {
+	struct device *dev;
+	const struct vfio_device_ops *ops;
+	struct vfio_group *group;
+
+	/* Members below here are private, not for driver use */
+	refcount_t refcount;
+	struct completion comp;
+	struct list_head group_next;
+	void *device_data;
+};
+
 /**
  * struct vfio_device_ops - VFIO bus driver device callbacks
  *
@@ -48,11 +60,15 @@  struct vfio_device_ops {
 extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
 extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
 
+void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
+			 const struct vfio_device_ops *ops, void *device_data);
+int vfio_register_group_dev(struct vfio_device *device);
 extern int vfio_add_group_dev(struct device *dev,
 			      const struct vfio_device_ops *ops,
 			      void *device_data);
 
 extern void *vfio_del_group_dev(struct device *dev);
+void vfio_unregister_group_dev(struct vfio_device *device);
 extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
 extern void vfio_device_put(struct vfio_device *device);
 extern void *vfio_device_data(struct vfio_device *device);