diff mbox series

[5/6] vfio: Simplify the life cycle of the group FD

Message ID 5-v1-c1d14aae2e8f+2f4-vfio_group_locking_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Fully lock the container members of struct vfio_group | expand

Commit Message

Jason Gunthorpe May 6, 2022, 12:25 a.m. UTC
Once userspace opens a group FD it is prevented from opening another
instance of that same group FD until all the prior group FDs and users of
the container are done.

The first is done trivially by checking the group->owned during group FD
open.

However, things get a little weird if userspace creates a device FD and
then closes the group FD. The group FD still cannot be re-opened, but this
time it is because the group->container is still set and container_users
is elevated by the device FD.

Due to this mismatched lifecycle we have the
vfio_group_try_dissolve_container() which tries to auto-free a container
after the group FD is closed but the device FD remains open.

Instead have the device FD hold onto a reference to the single group
FD. This directly prevents vfio_group_fops_release() from being called
when any device FD exists and makes the lifecycle model more
understandable.

vfio_group_try_dissolve_container() is removed as the only place a
container is auto-deleted is during vfio_group_fops_release(). At this
point the container_users is either 1 or 0 since all device FDs must be
closed.

Change group->owner to group->singleton_filep which points to the single
struct file * that is open for the group. If the group->singleton_filep is
NULL then group->container == NULL.

If all device FDs have closed then the group's notifier list must be
empty.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio.c | 49 +++++++++++++++++++--------------------------
 1 file changed, 21 insertions(+), 28 deletions(-)

Comments

Alex Williamson May 10, 2022, 7:59 p.m. UTC | #1
On Thu,  5 May 2022 21:25:05 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> Once userspace opens a group FD it is prevented from opening another
> instance of that same group FD until all the prior group FDs and users of
> the container are done.
> 
> The first is done trivially by checking the group->owned during group FD
> open.
> 
> However, things get a little weird if userspace creates a device FD and
> then closes the group FD. The group FD still cannot be re-opened, but this
> time it is because the group->container is still set and container_users
> is elevated by the device FD.
> 
> Due to this mismatched lifecycle we have the
> vfio_group_try_dissolve_container() which tries to auto-free a container
> after the group FD is closed but the device FD remains open.
> 
> Instead have the device FD hold onto a reference to the single group
> FD. This directly prevents vfio_group_fops_release() from being called
> when any device FD exists and makes the lifecycle model more
> understandable.
> 
> vfio_group_try_dissolve_container() is removed as the only place a
> container is auto-deleted is during vfio_group_fops_release(). At this
> point the container_users is either 1 or 0 since all device FDs must be
> closed.
> 
> Change group->owner to group->singleton_filep which points to the single
> struct file * that is open for the group. If the group->singleton_filep is
> NULL then group->container == NULL.
> 
> If all device FDs have closed then the group's notifier list must be
> empty.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/vfio/vfio.c | 49 +++++++++++++++++++--------------------------
>  1 file changed, 21 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 63f7fa872eae60..94ab415190011d 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -73,12 +73,12 @@ struct vfio_group {
>  	struct mutex			device_lock;
>  	struct list_head		vfio_next;
>  	struct list_head		container_next;
> -	bool				opened;
>  	wait_queue_head_t		container_q;
>  	enum vfio_group_type		type;
>  	unsigned int			dev_counter;
>  	struct rw_semaphore		group_rwsem;
>  	struct kvm			*kvm;
> +	struct file			*singleton_file;

I'm not really a fan of this name, if we have a single struct file
pointer on the group, it's necessarily singleton.  Maybe just
"opened_file"?

>  	struct blocking_notifier_head	notifier;
>  };
>  
> @@ -987,20 +987,6 @@ static int vfio_group_unset_container(struct vfio_group *group)
>  	return 0;
>  }
>  
> -/*
> - * When removing container users, anything that removes the last user
> - * implicitly removes the group from the container.  That is, if the
> - * group file descriptor is closed, as well as any device file descriptors,
> - * the group is free.
> - */
> -static void vfio_group_try_dissolve_container(struct vfio_group *group)
> -{
> -	down_write(&group->group_rwsem);
> -	if (0 == atomic_dec_if_positive(&group->container_users))
> -		__vfio_group_unset_container(group);
> -	up_write(&group->group_rwsem);
> -}
> -
>  static int vfio_group_set_container(struct vfio_group *group, int container_fd)
>  {
>  	struct fd f;
> @@ -1093,10 +1079,19 @@ static int vfio_device_assign_container(struct vfio_device *device)
>  			 current->comm, task_pid_nr(current));
>  	}
>  
> +	get_file(group->singleton_file);
>  	atomic_inc(&group->container_users);
>  	return 0;
>  }
>  
> +static void vfio_device_unassign_container(struct vfio_device *device)
> +{
> +	down_write(&device->group->group_rwsem);
> +	atomic_dec(&device->group->container_users);
> +	fput(device->group->singleton_file);
> +	up_write(&device->group->group_rwsem);
> +}
> +
>  static struct file *vfio_device_open(struct vfio_device *device)
>  {
>  	struct file *filep;
> @@ -1155,7 +1150,7 @@ static struct file *vfio_device_open(struct vfio_device *device)
>  	mutex_unlock(&device->dev_set->lock);
>  	module_put(device->dev->driver->owner);
>  err_unassign_container:
> -	vfio_group_try_dissolve_container(device->group);
> +	vfio_device_unassign_container(device);
>  	return ERR_PTR(ret);
>  }
>  
> @@ -1286,18 +1281,12 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
>  
>  	/*
>  	 * Do we need multiple instances of the group open?  Seems not.
> -	 * Is something still in use from a previous open?
>  	 */
> -	if (group->opened || group->container) {
> +	if (group->singleton_file) {
>  		ret = -EBUSY;
>  		goto err_put;
>  	}
> -	group->opened = true;
> -
> -	/* Warn if previous user didn't cleanup and re-init to drop them */
> -	if (WARN_ON(group->notifier.head))
> -		BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
> -
> +	group->singleton_file = filep;
>  	filep->private_data = group;
>  
>  	up_write(&group->group_rwsem);
> @@ -1315,10 +1304,14 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
>  
>  	filep->private_data = NULL;
>  
> -	vfio_group_try_dissolve_container(group);
> -
>  	down_write(&group->group_rwsem);
> -	group->opened = false;
> +	/* All device FDs must be released before the group fd releases. */

This sounds more like a user directive as it's phrased, maybe something
like:

	/*
	 * Device FDs hold a group file reference, therefore the group
	 * release is only called when there are no open devices.
	 */

Thanks,
Alex

> +	WARN_ON(group->notifier.head);
> +	if (group->container) {
> +		WARN_ON(atomic_read(&group->container_users) != 1);
> +		__vfio_group_unset_container(group);
> +	}
> +	group->singleton_file = NULL;
>  	up_write(&group->group_rwsem);
>  
>  	vfio_group_put(group);
> @@ -1350,7 +1343,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
>  
>  	module_put(device->dev->driver->owner);
>  
> -	vfio_group_try_dissolve_container(device->group);
> +	vfio_device_unassign_container(device);
>  
>  	vfio_device_put(device);
>
Jason Gunthorpe May 12, 2022, 11:35 p.m. UTC | #2
On Tue, May 10, 2022 at 01:59:59PM -0600, Alex Williamson wrote:
> On Thu,  5 May 2022 21:25:05 -0300
> Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> > Once userspace opens a group FD it is prevented from opening another
> > instance of that same group FD until all the prior group FDs and users of
> > the container are done.
> > 
> > The first is done trivially by checking the group->owned during group FD
> > open.
> > 
> > However, things get a little weird if userspace creates a device FD and
> > then closes the group FD. The group FD still cannot be re-opened, but this
> > time it is because the group->container is still set and container_users
> > is elevated by the device FD.
> > 
> > Due to this mismatched lifecycle we have the
> > vfio_group_try_dissolve_container() which tries to auto-free a container
> > after the group FD is closed but the device FD remains open.
> > 
> > Instead have the device FD hold onto a reference to the single group
> > FD. This directly prevents vfio_group_fops_release() from being called
> > when any device FD exists and makes the lifecycle model more
> > understandable.
> > 
> > vfio_group_try_dissolve_container() is removed as the only place a
> > container is auto-deleted is during vfio_group_fops_release(). At this
> > point the container_users is either 1 or 0 since all device FDs must be
> > closed.
> > 
> > Change group->owner to group->singleton_filep which points to the single
> > struct file * that is open for the group. If the group->singleton_filep is
> > NULL then group->container == NULL.
> > 
> > If all device FDs have closed then the group's notifier list must be
> > empty.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> >  drivers/vfio/vfio.c | 49 +++++++++++++++++++--------------------------
> >  1 file changed, 21 insertions(+), 28 deletions(-)
> > 
> > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> > index 63f7fa872eae60..94ab415190011d 100644
> > +++ b/drivers/vfio/vfio.c
> > @@ -73,12 +73,12 @@ struct vfio_group {
> >  	struct mutex			device_lock;
> >  	struct list_head		vfio_next;
> >  	struct list_head		container_next;
> > -	bool				opened;
> >  	wait_queue_head_t		container_q;
> >  	enum vfio_group_type		type;
> >  	unsigned int			dev_counter;
> >  	struct rw_semaphore		group_rwsem;
> >  	struct kvm			*kvm;
> > +	struct file			*singleton_file;
> 
> I'm not really a fan of this name, if we have a single struct file
> pointer on the group, it's necessarily singleton.  Maybe just
> "opened_file"?

Sure

> > @@ -1315,10 +1304,14 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
> >  
> >  	filep->private_data = NULL;
> >  
> > -	vfio_group_try_dissolve_container(group);
> > -
> >  	down_write(&group->group_rwsem);
> > -	group->opened = false;
> > +	/* All device FDs must be released before the group fd releases. */
> 
> This sounds more like a user directive as it's phrased, maybe something
> like:
> 
> 	/*
> 	 * Device FDs hold a group file reference, therefore the group
> 	 * release is only called when there are no open devices.
> 	 */

OK

What do you want to do with this series?

As-posted it requires the iommu series, and Joerg has vanished again
so I don't know what will happen there.

However, it doesn't require it, there are just some textual conflicts.

It does need the KVM series though, which I expect we will just go
ahead with unacked. Oh well.

Do you want to still try to get it in, or just give up for this cycle?
If yes, which base should I use :)

Thanks,
Jason
Tian, Kevin May 13, 2022, 9:57 a.m. UTC | #3
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, May 6, 2022 8:25 AM
> 
> Once userspace opens a group FD it is prevented from opening another
> instance of that same group FD until all the prior group FDs and users of
> the container are done.
> 
> The first is done trivially by checking the group->owned during group FD

s/owned/opened/

> open.
> 
> However, things get a little weird if userspace creates a device FD and
> then closes the group FD. The group FD still cannot be re-opened, but this
> time it is because the group->container is still set and container_users
> is elevated by the device FD.
> 
> Due to this mismatched lifecycle we have the
> vfio_group_try_dissolve_container() which tries to auto-free a container
> after the group FD is closed but the device FD remains open.
> 
> Instead have the device FD hold onto a reference to the single group
> FD. This directly prevents vfio_group_fops_release() from being called
> when any device FD exists and makes the lifecycle model more
> understandable.
> 
> vfio_group_try_dissolve_container() is removed as the only place a
> container is auto-deleted is during vfio_group_fops_release(). At this
> point the container_users is either 1 or 0 since all device FDs must be
> closed.
> 
> Change group->owner to group->singleton_filep which points to the single

s/owner/opened/

> struct file * that is open for the group. If the group->singleton_filep is
> NULL then group->container == NULL.
> 
> If all device FDs have closed then the group's notifier list must be
> empty.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

With the name change suggested by Alex:

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
>  drivers/vfio/vfio.c | 49 +++++++++++++++++++--------------------------
>  1 file changed, 21 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 63f7fa872eae60..94ab415190011d 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -73,12 +73,12 @@ struct vfio_group {
>  	struct mutex			device_lock;
>  	struct list_head		vfio_next;
>  	struct list_head		container_next;
> -	bool				opened;
>  	wait_queue_head_t		container_q;
>  	enum vfio_group_type		type;
>  	unsigned int			dev_counter;
>  	struct rw_semaphore		group_rwsem;
>  	struct kvm			*kvm;
> +	struct file			*singleton_file;
>  	struct blocking_notifier_head	notifier;
>  };
> 
> @@ -987,20 +987,6 @@ static int vfio_group_unset_container(struct
> vfio_group *group)
>  	return 0;
>  }
> 
> -/*
> - * When removing container users, anything that removes the last user
> - * implicitly removes the group from the container.  That is, if the
> - * group file descriptor is closed, as well as any device file descriptors,
> - * the group is free.
> - */
> -static void vfio_group_try_dissolve_container(struct vfio_group *group)
> -{
> -	down_write(&group->group_rwsem);
> -	if (0 == atomic_dec_if_positive(&group->container_users))
> -		__vfio_group_unset_container(group);
> -	up_write(&group->group_rwsem);
> -}
> -
>  static int vfio_group_set_container(struct vfio_group *group, int
> container_fd)
>  {
>  	struct fd f;
> @@ -1093,10 +1079,19 @@ static int vfio_device_assign_container(struct
> vfio_device *device)
>  			 current->comm, task_pid_nr(current));
>  	}
> 
> +	get_file(group->singleton_file);
>  	atomic_inc(&group->container_users);
>  	return 0;
>  }
> 
> +static void vfio_device_unassign_container(struct vfio_device *device)
> +{
> +	down_write(&device->group->group_rwsem);
> +	atomic_dec(&device->group->container_users);
> +	fput(device->group->singleton_file);
> +	up_write(&device->group->group_rwsem);
> +}
> +
>  static struct file *vfio_device_open(struct vfio_device *device)
>  {
>  	struct file *filep;
> @@ -1155,7 +1150,7 @@ static struct file *vfio_device_open(struct
> vfio_device *device)
>  	mutex_unlock(&device->dev_set->lock);
>  	module_put(device->dev->driver->owner);
>  err_unassign_container:
> -	vfio_group_try_dissolve_container(device->group);
> +	vfio_device_unassign_container(device);
>  	return ERR_PTR(ret);
>  }
> 
> @@ -1286,18 +1281,12 @@ static int vfio_group_fops_open(struct inode
> *inode, struct file *filep)
> 
>  	/*
>  	 * Do we need multiple instances of the group open?  Seems not.
> -	 * Is something still in use from a previous open?
>  	 */
> -	if (group->opened || group->container) {
> +	if (group->singleton_file) {
>  		ret = -EBUSY;
>  		goto err_put;
>  	}
> -	group->opened = true;
> -
> -	/* Warn if previous user didn't cleanup and re-init to drop them */
> -	if (WARN_ON(group->notifier.head))
> -		BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
> -
> +	group->singleton_file = filep;
>  	filep->private_data = group;
> 
>  	up_write(&group->group_rwsem);
> @@ -1315,10 +1304,14 @@ static int vfio_group_fops_release(struct inode
> *inode, struct file *filep)
> 
>  	filep->private_data = NULL;
> 
> -	vfio_group_try_dissolve_container(group);
> -
>  	down_write(&group->group_rwsem);
> -	group->opened = false;
> +	/* All device FDs must be released before the group fd releases. */
> +	WARN_ON(group->notifier.head);
> +	if (group->container) {
> +		WARN_ON(atomic_read(&group->container_users) != 1);
> +		__vfio_group_unset_container(group);
> +	}
> +	group->singleton_file = NULL;
>  	up_write(&group->group_rwsem);
> 
>  	vfio_group_put(group);
> @@ -1350,7 +1343,7 @@ static int vfio_device_fops_release(struct inode
> *inode, struct file *filep)
> 
>  	module_put(device->dev->driver->owner);
> 
> -	vfio_group_try_dissolve_container(device->group);
> +	vfio_device_unassign_container(device);
> 
>  	vfio_device_put(device);
> 
> --
> 2.36.0
diff mbox series

Patch

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 63f7fa872eae60..94ab415190011d 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -73,12 +73,12 @@  struct vfio_group {
 	struct mutex			device_lock;
 	struct list_head		vfio_next;
 	struct list_head		container_next;
-	bool				opened;
 	wait_queue_head_t		container_q;
 	enum vfio_group_type		type;
 	unsigned int			dev_counter;
 	struct rw_semaphore		group_rwsem;
 	struct kvm			*kvm;
+	struct file			*singleton_file;
 	struct blocking_notifier_head	notifier;
 };
 
@@ -987,20 +987,6 @@  static int vfio_group_unset_container(struct vfio_group *group)
 	return 0;
 }
 
-/*
- * When removing container users, anything that removes the last user
- * implicitly removes the group from the container.  That is, if the
- * group file descriptor is closed, as well as any device file descriptors,
- * the group is free.
- */
-static void vfio_group_try_dissolve_container(struct vfio_group *group)
-{
-	down_write(&group->group_rwsem);
-	if (0 == atomic_dec_if_positive(&group->container_users))
-		__vfio_group_unset_container(group);
-	up_write(&group->group_rwsem);
-}
-
 static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 {
 	struct fd f;
@@ -1093,10 +1079,19 @@  static int vfio_device_assign_container(struct vfio_device *device)
 			 current->comm, task_pid_nr(current));
 	}
 
+	get_file(group->singleton_file);
 	atomic_inc(&group->container_users);
 	return 0;
 }
 
+static void vfio_device_unassign_container(struct vfio_device *device)
+{
+	down_write(&device->group->group_rwsem);
+	atomic_dec(&device->group->container_users);
+	fput(device->group->singleton_file);
+	up_write(&device->group->group_rwsem);
+}
+
 static struct file *vfio_device_open(struct vfio_device *device)
 {
 	struct file *filep;
@@ -1155,7 +1150,7 @@  static struct file *vfio_device_open(struct vfio_device *device)
 	mutex_unlock(&device->dev_set->lock);
 	module_put(device->dev->driver->owner);
 err_unassign_container:
-	vfio_group_try_dissolve_container(device->group);
+	vfio_device_unassign_container(device);
 	return ERR_PTR(ret);
 }
 
@@ -1286,18 +1281,12 @@  static int vfio_group_fops_open(struct inode *inode, struct file *filep)
 
 	/*
 	 * Do we need multiple instances of the group open?  Seems not.
-	 * Is something still in use from a previous open?
 	 */
-	if (group->opened || group->container) {
+	if (group->singleton_file) {
 		ret = -EBUSY;
 		goto err_put;
 	}
-	group->opened = true;
-
-	/* Warn if previous user didn't cleanup and re-init to drop them */
-	if (WARN_ON(group->notifier.head))
-		BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
-
+	group->singleton_file = filep;
 	filep->private_data = group;
 
 	up_write(&group->group_rwsem);
@@ -1315,10 +1304,14 @@  static int vfio_group_fops_release(struct inode *inode, struct file *filep)
 
 	filep->private_data = NULL;
 
-	vfio_group_try_dissolve_container(group);
-
 	down_write(&group->group_rwsem);
-	group->opened = false;
+	/* All device FDs must be released before the group fd releases. */
+	WARN_ON(group->notifier.head);
+	if (group->container) {
+		WARN_ON(atomic_read(&group->container_users) != 1);
+		__vfio_group_unset_container(group);
+	}
+	group->singleton_file = NULL;
 	up_write(&group->group_rwsem);
 
 	vfio_group_put(group);
@@ -1350,7 +1343,7 @@  static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 
 	module_put(device->dev->driver->owner);
 
-	vfio_group_try_dissolve_container(device->group);
+	vfio_device_unassign_container(device);
 
 	vfio_device_put(device);