diff mbox series

[9/9] vfio: Remove calls to vfio_group_add_container_user()

Message ID 9-v1-a8faf768d202+125dd-vfio_mdev_no_group_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Make the rest of the VFIO driver interface use vfio_device | expand

Commit Message

Jason Gunthorpe April 12, 2022, 3:53 p.m. UTC
When the open_device() op is called the container_users is incremented and
held incremented until close_device(). Thus, so long as drivers call
functions within their open_device()/close_device() region they do not
need to worry about the container_users.

These functions can all only be called between
open_device()/close_device():

  vfio_pin_pages()
  vfio_unpin_pages()
  vfio_dma_rw()
  vfio_register_notifier()
  vfio_unregister_notifier()

So eliminate the calls to vfio_group_add_container_user() and add a simple
WARN_ON to detect mis-use by drivers.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio.c | 67 +++++++++------------------------------------
 1 file changed, 13 insertions(+), 54 deletions(-)

Comments

Christoph Hellwig April 13, 2022, 6:11 a.m. UTC | #1
On Tue, Apr 12, 2022 at 12:53:36PM -0300, Jason Gunthorpe wrote:
> +	if (WARN_ON(!READ_ONCE(vdev->open_count)))
> +		return -EINVAL;

I think all the WARN_ON()s in this patch need to be WARN_ON_ONCE,
otherwise there will be too many backtraces to be useful if a driver
ever gets the API wrong.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Jason Gunthorpe April 13, 2022, 2:03 p.m. UTC | #2
On Wed, Apr 13, 2022 at 08:11:05AM +0200, Christoph Hellwig wrote:
> On Tue, Apr 12, 2022 at 12:53:36PM -0300, Jason Gunthorpe wrote:
> > +	if (WARN_ON(!READ_ONCE(vdev->open_count)))
> > +		return -EINVAL;
> 
> I think all the WARN_ON()s in this patch need to be WARN_ON_ONCE,
> otherwise there will be too many backtraces to be useful if a driver
> ever gets the API wrong.

Sure, I added a wrapper to make that have less overhead and merged it
with the other 'driver is calling this correctly' checks:

@@ -1330,6 +1330,12 @@ static int vfio_group_add_container_user(struct vfio_group *group)
 
 static const struct file_operations vfio_device_fops;
 
+/* true if the vfio_device has open_device() called but not close_device() */
+static bool vfio_assert_device_open(struct vfio_device *device)
+{
+	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
+}
+
 static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 {
 	struct vfio_device *device;
@@ -1544,6 +1550,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 	struct vfio_device *device = filep->private_data;
 
 	mutex_lock(&device->dev_set->lock);
+	vfio_assert_device_open(device);
 	if (!--device->open_count && device->ops->close_device)
 		device->ops->close_device(device);
 	mutex_unlock(&device->dev_set->lock);
@@ -2112,7 +2119,7 @@ int vfio_pin_pages(struct vfio_device *vdev, unsigned long *user_pfn, int npage,
 	struct vfio_iommu_driver *driver;
 	int ret;
 
-	if (!user_pfn || !phys_pfn || !npage)
+	if (!user_pfn || !phys_pfn || !npage || !vfio_assert_device_open(vdev))
 		return -EINVAL;
 
 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
@@ -2121,9 +2128,6 @@ int vfio_pin_pages(struct vfio_device *vdev, unsigned long *user_pfn, int npage,
 	if (group->dev_counter > 1)
 		return -EINVAL;
 
-	if (WARN_ON(!READ_ONCE(vdev->open_count)))
-		return -EINVAL;
-
 	container = group->container;
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->pin_pages))
@@ -2153,15 +2157,12 @@ int vfio_unpin_pages(struct vfio_device *vdev, unsigned long *user_pfn,
 	struct vfio_iommu_driver *driver;
 	int ret;
 
-	if (!user_pfn || !npage)
+	if (!user_pfn || !npage || !vfio_assert_device_open(vdev))
 		return -EINVAL;
 
 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
 		return -E2BIG;
 
-	if (WARN_ON(!READ_ONCE(vdev->open_count)))
-		return -EINVAL;
-
 	container = vdev->group->container;
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->unpin_pages))
@@ -2198,10 +2199,7 @@ int vfio_dma_rw(struct vfio_device *vdev, dma_addr_t user_iova,
 	struct vfio_iommu_driver *driver;
 	int ret = 0;
 
-	if (!data || len <= 0)
-		return -EINVAL;
-
-	if (WARN_ON(!READ_ONCE(vdev->open_count)))
+	if (!data || len <= 0 || !vfio_assert_device_open(vdev))
 		return -EINVAL;
 
 	container = vdev->group->container;
@@ -2294,10 +2292,7 @@ int vfio_register_notifier(struct vfio_device *dev, enum vfio_notify_type type,
 	struct vfio_group *group = dev->group;
 	int ret;
 
-	if (!nb || !events || (*events == 0))
-		return -EINVAL;
-
-	if (WARN_ON(!READ_ONCE(dev->open_count)))
+	if (!nb || !events || (*events == 0) || !vfio_assert_device_open(dev))
 		return -EINVAL;
 
 	switch (type) {
@@ -2321,10 +2316,7 @@ int vfio_unregister_notifier(struct vfio_device *dev,
 	struct vfio_group *group = dev->group;
 	int ret;
 
-	if (!nb)
-		return -EINVAL;
-
-	if (WARN_ON(!READ_ONCE(dev->open_count)))
+	if (!nb || !vfio_assert_device_open(dev))
 		return -EINVAL;
 
 	switch (type) {

Thanks,
Jason
Christoph Hellwig April 13, 2022, 4:07 p.m. UTC | #3
On Wed, Apr 13, 2022 at 11:03:05AM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 13, 2022 at 08:11:05AM +0200, Christoph Hellwig wrote:
> > On Tue, Apr 12, 2022 at 12:53:36PM -0300, Jason Gunthorpe wrote:
> > > +	if (WARN_ON(!READ_ONCE(vdev->open_count)))
> > > +		return -EINVAL;
> > 
> > I think all the WARN_ON()s in this patch need to be WARN_ON_ONCE,
> > otherwise there will be too many backtraces to be useful if a driver
> > ever gets the API wrong.
> 
> Sure, I added a wrapper to make that have less overhead and merged it
> with the other 'driver is calling this correctly' checks:

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Matthew Rosato April 14, 2022, 1:51 p.m. UTC | #4
On 4/12/22 11:53 AM, Jason Gunthorpe wrote:
> When the open_device() op is called the container_users is incremented and
> held incremented until close_device(). Thus, so long as drivers call
> functions within their open_device()/close_device() region they do not
> need to worry about the container_users.
> 
> These functions can all only be called between
> open_device()/close_device():
> 
>    vfio_pin_pages()
>    vfio_unpin_pages()
>    vfio_dma_rw()
>    vfio_register_notifier()
>    vfio_unregister_notifier()
> 
> So eliminate the calls to vfio_group_add_container_user() and add a simple
> WARN_ON to detect mis-use by drivers.
> 

vfio_device_fops_release decrements dev->open_count immediately before 
calling dev->ops->close_device, which means we could enter close_device 
with a dev_count of 0.

Maybe vfio_device_fops_release should handle the same way as 
vfio_group_get_device_fd?

	if (device->open_count == 1 && device->ops->close_device)
		device->ops->close_device(device);
	device->open_count--;
Jason Gunthorpe April 14, 2022, 2:22 p.m. UTC | #5
On Thu, Apr 14, 2022 at 09:51:49AM -0400, Matthew Rosato wrote:
> On 4/12/22 11:53 AM, Jason Gunthorpe wrote:
> > When the open_device() op is called the container_users is incremented and
> > held incremented until close_device(). Thus, so long as drivers call
> > functions within their open_device()/close_device() region they do not
> > need to worry about the container_users.
> > 
> > These functions can all only be called between
> > open_device()/close_device():
> > 
> >    vfio_pin_pages()
> >    vfio_unpin_pages()
> >    vfio_dma_rw()
> >    vfio_register_notifier()
> >    vfio_unregister_notifier()
> > 
> > So eliminate the calls to vfio_group_add_container_user() and add a simple
> > WARN_ON to detect mis-use by drivers.
> > 
> 
> vfio_device_fops_release decrements dev->open_count immediately before
> calling dev->ops->close_device, which means we could enter close_device with
> a dev_count of 0.
> 
> Maybe vfio_device_fops_release should handle the same way as
> vfio_group_get_device_fd?
> 
> 	if (device->open_count == 1 && device->ops->close_device)
> 		device->ops->close_device(device);
> 	device->open_count--;

Yes, thanks alot! I have nothing to test these flows on...

It matches the ordering in the only other place to call close_device.

I folded this into the patch:

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 0f735f9f206002..29761f0cf0a227 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1551,8 +1551,9 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 
 	mutex_lock(&device->dev_set->lock);
 	vfio_assert_device_open(device);
-	if (!--device->open_count && device->ops->close_device)
+	if (device->open_count == 1 && device->ops->close_device)
 		device->ops->close_device(device);
+	device->open_count--;
 	mutex_unlock(&device->dev_set->lock);
 
 	module_put(device->dev->driver->owner);

Jason
Tian, Kevin April 15, 2022, 2:32 a.m. UTC | #6
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Thursday, April 14, 2022 10:22 PM
> 
> On Thu, Apr 14, 2022 at 09:51:49AM -0400, Matthew Rosato wrote:
> > On 4/12/22 11:53 AM, Jason Gunthorpe wrote:
> > > When the open_device() op is called the container_users is incremented
> and
> > > held incremented until close_device(). Thus, so long as drivers call
> > > functions within their open_device()/close_device() region they do not
> > > need to worry about the container_users.
> > >
> > > These functions can all only be called between
> > > open_device()/close_device():
> > >
> > >    vfio_pin_pages()
> > >    vfio_unpin_pages()
> > >    vfio_dma_rw()
> > >    vfio_register_notifier()
> > >    vfio_unregister_notifier()
> > >
> > > So eliminate the calls to vfio_group_add_container_user() and add a
> simple
> > > WARN_ON to detect mis-use by drivers.
> > >
> >
> > vfio_device_fops_release decrements dev->open_count immediately
> before
> > calling dev->ops->close_device, which means we could enter close_device
> with
> > a dev_count of 0.
> >
> > Maybe vfio_device_fops_release should handle the same way as
> > vfio_group_get_device_fd?
> >
> > 	if (device->open_count == 1 && device->ops->close_device)
> > 		device->ops->close_device(device);
> > 	device->open_count--;
> 
> Yes, thanks alot! I have nothing to test these flows on...
> 
> It matches the ordering in the only other place to call close_device.
> 
> I folded this into the patch:

While it's a welcomed fix is it actually related to this series? The point
of this patch is that those functions are called when container_users
is non-zero. This is true even without this fix given container_users
is decremented after calling device->ops->close_device().

iiuc this might be better sent out as a separate fix out of this series?
Or at least add a comment in the commit msg about taking chance
to fix an unrelated issue to not cause confusion...

Thanks
Kevin
Jason Gunthorpe April 15, 2022, 12:07 p.m. UTC | #7
On Fri, Apr 15, 2022 at 02:32:08AM +0000, Tian, Kevin wrote:

> While it's a welcomed fix is it actually related to this series? The point
> of this patch is that those functions are called when container_users
> is non-zero. This is true even without this fix given container_users
> is decremented after calling device->ops->close_device().

It isn't, it is decremented before which causes it to be 0 when the
assertions are called.

Jason
Tian, Kevin April 15, 2022, 11:45 p.m. UTC | #8
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, April 15, 2022 8:07 PM
> 
> On Fri, Apr 15, 2022 at 02:32:08AM +0000, Tian, Kevin wrote:
> 
> > While it's a welcomed fix is it actually related to this series? The point
> > of this patch is that those functions are called when container_users
> > is non-zero. This is true even without this fix given container_users
> > is decremented after calling device->ops->close_device().
> 
> It isn't, it is decremented before which causes it to be 0 when the
> assertions are called.
> 

right, it's quite obvious when I read it the second time.
diff mbox series

Patch

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 3d75505bf3cc26..ab0c3f5635905c 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -2121,9 +2121,8 @@  int vfio_pin_pages(struct vfio_device *vdev, unsigned long *user_pfn, int npage,
 	if (group->dev_counter > 1)
 		return -EINVAL;
 
-	ret = vfio_group_add_container_user(group);
-	if (ret)
-		return ret;
+	if (WARN_ON(!READ_ONCE(vdev->open_count)))
+		return -EINVAL;
 
 	container = group->container;
 	driver = container->iommu_driver;
@@ -2134,8 +2133,6 @@  int vfio_pin_pages(struct vfio_device *vdev, unsigned long *user_pfn, int npage,
 	else
 		ret = -ENOTTY;
 
-	vfio_group_try_dissolve_container(group);
-
 	return ret;
 }
 EXPORT_SYMBOL(vfio_pin_pages);
@@ -2162,9 +2159,8 @@  int vfio_unpin_pages(struct vfio_device *vdev, unsigned long *user_pfn,
 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
 		return -E2BIG;
 
-	ret = vfio_group_add_container_user(vdev->group);
-	if (ret)
-		return ret;
+	if (WARN_ON(!READ_ONCE(vdev->open_count)))
+		return -EINVAL;
 
 	container = vdev->group->container;
 	driver = container->iommu_driver;
@@ -2174,8 +2170,6 @@  int vfio_unpin_pages(struct vfio_device *vdev, unsigned long *user_pfn,
 	else
 		ret = -ENOTTY;
 
-	vfio_group_try_dissolve_container(vdev->group);
-
 	return ret;
 }
 EXPORT_SYMBOL(vfio_unpin_pages);
@@ -2207,9 +2201,8 @@  int vfio_dma_rw(struct vfio_device *vdev, dma_addr_t user_iova,
 	if (!data || len <= 0)
 		return -EINVAL;
 
-	ret = vfio_group_add_container_user(vdev->group);
-	if (ret)
-		return ret;
+	if (WARN_ON(!READ_ONCE(vdev->open_count)))
+		return -EINVAL;
 
 	container = vdev->group->container;
 	driver = container->iommu_driver;
@@ -2219,9 +2212,6 @@  int vfio_dma_rw(struct vfio_device *vdev, dma_addr_t user_iova,
 					  user_iova, data, len, write);
 	else
 		ret = -ENOTTY;
-
-	vfio_group_try_dissolve_container(vdev->group);
-
 	return ret;
 }
 EXPORT_SYMBOL(vfio_dma_rw);
@@ -2234,10 +2224,6 @@  static int vfio_register_iommu_notifier(struct vfio_group *group,
 	struct vfio_iommu_driver *driver;
 	int ret;
 
-	ret = vfio_group_add_container_user(group);
-	if (ret)
-		return -EINVAL;
-
 	container = group->container;
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->register_notifier))
@@ -2245,9 +2231,6 @@  static int vfio_register_iommu_notifier(struct vfio_group *group,
 						     events, nb);
 	else
 		ret = -ENOTTY;
-
-	vfio_group_try_dissolve_container(group);
-
 	return ret;
 }
 
@@ -2258,10 +2241,6 @@  static int vfio_unregister_iommu_notifier(struct vfio_group *group,
 	struct vfio_iommu_driver *driver;
 	int ret;
 
-	ret = vfio_group_add_container_user(group);
-	if (ret)
-		return -EINVAL;
-
 	container = group->container;
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->unregister_notifier))
@@ -2269,9 +2248,6 @@  static int vfio_unregister_iommu_notifier(struct vfio_group *group,
 						       nb);
 	else
 		ret = -ENOTTY;
-
-	vfio_group_try_dissolve_container(group);
-
 	return ret;
 }
 
@@ -2300,10 +2276,6 @@  static int vfio_register_group_notifier(struct vfio_group *group,
 	if (*events)
 		return -EINVAL;
 
-	ret = vfio_group_add_container_user(group);
-	if (ret)
-		return -EINVAL;
-
 	ret = blocking_notifier_chain_register(&group->notifier, nb);
 
 	/*
@@ -2313,25 +2285,6 @@  static int vfio_register_group_notifier(struct vfio_group *group,
 	if (!ret && set_kvm && group->kvm)
 		blocking_notifier_call_chain(&group->notifier,
 					VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
-
-	vfio_group_try_dissolve_container(group);
-
-	return ret;
-}
-
-static int vfio_unregister_group_notifier(struct vfio_group *group,
-					 struct notifier_block *nb)
-{
-	int ret;
-
-	ret = vfio_group_add_container_user(group);
-	if (ret)
-		return -EINVAL;
-
-	ret = blocking_notifier_chain_unregister(&group->notifier, nb);
-
-	vfio_group_try_dissolve_container(group);
-
 	return ret;
 }
 
@@ -2344,6 +2297,9 @@  int vfio_register_notifier(struct vfio_device *dev, enum vfio_notify_type type,
 	if (!nb || !events || (*events == 0))
 		return -EINVAL;
 
+	if (WARN_ON(!READ_ONCE(dev->open_count)))
+		return -EINVAL;
+
 	switch (type) {
 	case VFIO_IOMMU_NOTIFY:
 		ret = vfio_register_iommu_notifier(group, events, nb);
@@ -2368,12 +2324,15 @@  int vfio_unregister_notifier(struct vfio_device *dev,
 	if (!nb)
 		return -EINVAL;
 
+	if (WARN_ON(!READ_ONCE(dev->open_count)))
+		return -EINVAL;
+
 	switch (type) {
 	case VFIO_IOMMU_NOTIFY:
 		ret = vfio_unregister_iommu_notifier(group, nb);
 		break;
 	case VFIO_GROUP_NOTIFY:
-		ret = vfio_unregister_group_notifier(group, nb);
+		ret = blocking_notifier_chain_unregister(&group->notifier, nb);
 		break;
 	default:
 		ret = -EINVAL;