diff mbox series

[2/2] vfio: Replace the iommu notifier with a device list

Message ID 2-v1-896844109f36+a-vfio_unmap_notif_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Remove the VFIO_IOMMU_NOTIFY_DMA_UNMAP notifier | expand

Commit Message

Jason Gunthorpe June 7, 2022, 12:34 a.m. UTC
Instead of bouncing the function call to the driver op through a blocking
notifier just have the iommu layer call it directly.

Register each device that is being attached to the iommu with the lower
driver which then threads them on a linked list and calls the appropriate
driver op at the right time.

Currently the only use is if dma_unmap() is defined.

Also, fully lock all the debugging tests on the pinning path that a
dma_unmap is registered.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio.c             | 39 ++++-------------
 drivers/vfio/vfio.h             | 14 ++-----
 drivers/vfio/vfio_iommu_type1.c | 74 ++++++++++++++++++++-------------
 include/linux/vfio.h            |  2 +-
 4 files changed, 58 insertions(+), 71 deletions(-)

Comments

Christoph Hellwig June 7, 2022, 5:44 a.m. UTC | #1
On Mon, Jun 06, 2022 at 09:34:36PM -0300, Jason Gunthorpe wrote:
> +			if (!list_empty(&iommu->device_list)) {
> +				mutex_lock(&iommu->device_list_lock);
> +				mutex_unlock(&iommu->lock);
> +
> +				list_for_each_entry(device,
> +						    &iommu->device_list,
> +						    iommu_entry)
> +					device->ops->dma_unmap(
> +						device, dma->iova, dma->size);
> +
> +				mutex_unlock(&iommu->device_list_lock);
> +				mutex_lock(&iommu->lock);
> +			}

I wonder if factoring this into a little helper instead of the
very deep indentation might be a bit better for readability.

> +static void vfio_iommu_type1_register_device(void *iommu_data,
> +					     struct vfio_device *vdev)
>  {
>  	struct vfio_iommu *iommu = iommu_data;
>  
> +	if (!vdev->ops->dma_unmap)
> +		return;
>  
> +	mutex_lock(&iommu->lock);
> +	mutex_lock(&iommu->device_list_lock);
> +	list_add(&vdev->iommu_entry, &iommu->device_list);
> +	mutex_unlock(&iommu->device_list_lock);
> +	mutex_unlock(&iommu->lock);

Why do we need both iommu->lock and the device_list_lock everywhere?
Maybe explain the locking scheme somewhere so that people don't have
to guess, because it seems to me that just using iommu->lock would
be enough right now.
Jason Gunthorpe June 7, 2022, 11:57 a.m. UTC | #2
On Tue, Jun 07, 2022 at 07:44:37AM +0200, Christoph Hellwig wrote:
> On Mon, Jun 06, 2022 at 09:34:36PM -0300, Jason Gunthorpe wrote:
> > +			if (!list_empty(&iommu->device_list)) {
> > +				mutex_lock(&iommu->device_list_lock);
> > +				mutex_unlock(&iommu->lock);
> > +
> > +				list_for_each_entry(device,
> > +						    &iommu->device_list,
> > +						    iommu_entry)
> > +					device->ops->dma_unmap(
> > +						device, dma->iova, dma->size);
> > +
> > +				mutex_unlock(&iommu->device_list_lock);
> > +				mutex_lock(&iommu->lock);
> > +			}
> 
> I wonder if factoring this into a little helper instead of the
> very deep indentation might be a bit better for readability.
> 
> > +static void vfio_iommu_type1_register_device(void *iommu_data,
> > +					     struct vfio_device *vdev)
> >  {
> >  	struct vfio_iommu *iommu = iommu_data;
> >  
> > +	if (!vdev->ops->dma_unmap)
> > +		return;
> >  
> > +	mutex_lock(&iommu->lock);
> > +	mutex_lock(&iommu->device_list_lock);
> > +	list_add(&vdev->iommu_entry, &iommu->device_list);
> > +	mutex_unlock(&iommu->device_list_lock);
> > +	mutex_unlock(&iommu->lock);
> 
> Why do we need both iommu->lock and the device_list_lock everywhere?

Not everwhere, all the readers are using only one of the locks.  The
list empty calls that were previously unlocked are done under the
iommu->lock and only the list iteration was done under the
device_list.

> Maybe explain the locking scheme somewhere so that people don't have
> to guess, because it seems to me that just using iommu->lock would
> be enough right now.

The expectation is that the dma_umap callback will re-enter the type1
driver via vfio_unpin_pages calls and this will recurse back onto the
iommu->lock - so it must be dropped before invoking the callback.

I'll add a note

Jason
diff mbox series

Patch

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index f005b644ab9e69..05623f52e38d32 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1077,17 +1077,6 @@  static void vfio_device_unassign_container(struct vfio_device *device)
 	up_write(&device->group->group_rwsem);
 }
 
-static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
-			       void *data)
-{
-	struct vfio_device *vfio_device =
-		container_of(nb, struct vfio_device, iommu_nb);
-	struct vfio_iommu_type1_dma_unmap *unmap = data;
-
-	vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
-	return NOTIFY_OK;
-}
-
 static struct file *vfio_device_open(struct vfio_device *device)
 {
 	struct vfio_iommu_driver *iommu_driver;
@@ -1123,15 +1112,9 @@  static struct file *vfio_device_open(struct vfio_device *device)
 		}
 
 		iommu_driver = device->group->container->iommu_driver;
-		if (device->ops->dma_unmap && iommu_driver &&
-		    iommu_driver->ops->register_notifier) {
-			unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
-
-			device->iommu_nb.notifier_call = vfio_iommu_notifier;
-			iommu_driver->ops->register_notifier(
-				device->group->container->iommu_data, &events,
-				&device->iommu_nb);
-		}
+		if (iommu_driver && iommu_driver->ops->register_device)
+			iommu_driver->ops->register_device(
+				device->group->container->iommu_data, device);
 
 		up_read(&device->group->group_rwsem);
 	}
@@ -1171,11 +1154,9 @@  static struct file *vfio_device_open(struct vfio_device *device)
 		device->ops->close_device(device);
 
 		iommu_driver = device->group->container->iommu_driver;
-		if (device->ops->dma_unmap && iommu_driver &&
-		    iommu_driver->ops->register_notifier)
-			iommu_driver->ops->unregister_notifier(
-				device->group->container->iommu_data,
-				&device->iommu_nb);
+		if (iommu_driver && iommu_driver->ops->register_device)
+			iommu_driver->ops->unregister_device(
+				device->group->container->iommu_data, device);
 	}
 err_undo_count:
 	device->open_count--;
@@ -1380,11 +1361,9 @@  static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 		device->ops->close_device(device);
 
 	iommu_driver = device->group->container->iommu_driver;
-	if (device->ops->dma_unmap && iommu_driver &&
-	    iommu_driver->ops->register_notifier)
-		iommu_driver->ops->unregister_notifier(
-			device->group->container->iommu_data,
-			&device->iommu_nb);
+	if (iommu_driver && iommu_driver->ops->unregister_device)
+		iommu_driver->ops->unregister_device(
+			device->group->container->iommu_data, device);
 	up_read(&device->group->group_rwsem);
 	device->open_count--;
 	if (device->open_count == 0)
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index cb2e4e9baa8fe8..4a7db1f3c33e7e 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -33,11 +33,6 @@  enum vfio_iommu_notify_type {
 	VFIO_IOMMU_CONTAINER_CLOSE = 0,
 };
 
-/* events for register_notifier() */
-enum {
-	VFIO_IOMMU_NOTIFY_DMA_UNMAP = 1,
-};
-
 /**
  * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
  */
@@ -60,11 +55,10 @@  struct vfio_iommu_driver_ops {
 				     unsigned long *phys_pfn);
 	int		(*unpin_pages)(void *iommu_data,
 				       unsigned long *user_pfn, int npage);
-	int		(*register_notifier)(void *iommu_data,
-					     unsigned long *events,
-					     struct notifier_block *nb);
-	int		(*unregister_notifier)(void *iommu_data,
-					       struct notifier_block *nb);
+	void		(*register_device)(void *iommu_data,
+					   struct vfio_device *vdev);
+	void		(*unregister_device)(void *iommu_data,
+					     struct vfio_device *vdev);
 	int		(*dma_rw)(void *iommu_data, dma_addr_t user_iova,
 				  void *data, size_t count, bool write);
 	struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c13b9290e35759..7011fdeaf7db08 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -67,7 +67,8 @@  struct vfio_iommu {
 	struct list_head	iova_list;
 	struct mutex		lock;
 	struct rb_root		dma_list;
-	struct blocking_notifier_head notifier;
+	struct list_head	device_list;
+	struct mutex		device_list_lock;
 	unsigned int		dma_avail;
 	unsigned int		vaddr_invalid_count;
 	uint64_t		pgsize_bitmap;
@@ -865,8 +866,8 @@  static int vfio_iommu_type1_pin_pages(void *iommu_data,
 		}
 	}
 
-	/* Fail if notifier list is empty */
-	if (!iommu->notifier.head) {
+	/* Fail if no dma_umap notifier is registered */
+	if (list_empty(&iommu->device_list)) {
 		ret = -EINVAL;
 		goto pin_done;
 	}
@@ -1406,7 +1407,7 @@  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 		}
 
 		if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
-			struct vfio_iommu_type1_dma_unmap nb_unmap;
+			struct vfio_device *device;
 
 			if (dma_last == dma) {
 				BUG_ON(++retries > 10);
@@ -1415,20 +1416,25 @@  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 				retries = 0;
 			}
 
-			nb_unmap.iova = dma->iova;
-			nb_unmap.size = dma->size;
-
 			/*
 			 * Notify anyone (mdev vendor drivers) to invalidate and
 			 * unmap iovas within the range we're about to unmap.
 			 * Vendor drivers MUST unpin pages in response to an
 			 * invalidation.
 			 */
-			mutex_unlock(&iommu->lock);
-			blocking_notifier_call_chain(&iommu->notifier,
-						    VFIO_IOMMU_NOTIFY_DMA_UNMAP,
-						    &nb_unmap);
-			mutex_lock(&iommu->lock);
+			if (!list_empty(&iommu->device_list)) {
+				mutex_lock(&iommu->device_list_lock);
+				mutex_unlock(&iommu->lock);
+
+				list_for_each_entry(device,
+						    &iommu->device_list,
+						    iommu_entry)
+					device->ops->dma_unmap(
+						device, dma->iova, dma->size);
+
+				mutex_unlock(&iommu->device_list_lock);
+				mutex_lock(&iommu->lock);
+			}
 			goto again;
 		}
 
@@ -2478,7 +2484,7 @@  static void vfio_iommu_type1_detach_group(void *iommu_data,
 
 		if (list_empty(&iommu->emulated_iommu_groups) &&
 		    list_empty(&iommu->domain_list)) {
-			WARN_ON(iommu->notifier.head);
+			WARN_ON(!list_empty(&iommu->device_list));
 			vfio_iommu_unmap_unpin_all(iommu);
 		}
 		goto detach_group_done;
@@ -2510,7 +2516,8 @@  static void vfio_iommu_type1_detach_group(void *iommu_data,
 		if (list_empty(&domain->group_list)) {
 			if (list_is_singular(&iommu->domain_list)) {
 				if (list_empty(&iommu->emulated_iommu_groups)) {
-					WARN_ON(iommu->notifier.head);
+					WARN_ON(!list_empty(
+						&iommu->device_list));
 					vfio_iommu_unmap_unpin_all(iommu);
 				} else {
 					vfio_iommu_unmap_unpin_reaccount(iommu);
@@ -2571,7 +2578,8 @@  static void *vfio_iommu_type1_open(unsigned long arg)
 	iommu->dma_avail = dma_entry_limit;
 	iommu->container_open = true;
 	mutex_init(&iommu->lock);
-	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
+	mutex_init(&iommu->device_list_lock);
+	INIT_LIST_HEAD(&iommu->device_list);
 	init_waitqueue_head(&iommu->vaddr_wait);
 	iommu->pgsize_bitmap = PAGE_MASK;
 	INIT_LIST_HEAD(&iommu->emulated_iommu_groups);
@@ -3008,28 +3016,34 @@  static long vfio_iommu_type1_ioctl(void *iommu_data,
 	}
 }
 
-static int vfio_iommu_type1_register_notifier(void *iommu_data,
-					      unsigned long *events,
-					      struct notifier_block *nb)
+static void vfio_iommu_type1_register_device(void *iommu_data,
+					     struct vfio_device *vdev)
 {
 	struct vfio_iommu *iommu = iommu_data;
 
-	/* clear known events */
-	*events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP;
-
-	/* refuse to register if still events remaining */
-	if (*events)
-		return -EINVAL;
+	if (!vdev->ops->dma_unmap)
+		return;
 
-	return blocking_notifier_chain_register(&iommu->notifier, nb);
+	mutex_lock(&iommu->lock);
+	mutex_lock(&iommu->device_list_lock);
+	list_add(&vdev->iommu_entry, &iommu->device_list);
+	mutex_unlock(&iommu->device_list_lock);
+	mutex_unlock(&iommu->lock);
 }
 
-static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
-						struct notifier_block *nb)
+static void vfio_iommu_type1_unregister_device(void *iommu_data,
+					       struct vfio_device *vdev)
 {
 	struct vfio_iommu *iommu = iommu_data;
 
-	return blocking_notifier_chain_unregister(&iommu->notifier, nb);
+	if (!vdev->ops->dma_unmap)
+		return;
+
+	mutex_lock(&iommu->lock);
+	mutex_lock(&iommu->device_list_lock);
+	list_del(&vdev->iommu_entry);
+	mutex_unlock(&iommu->device_list_lock);
+	mutex_unlock(&iommu->lock);
 }
 
 static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
@@ -3163,8 +3177,8 @@  static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
 	.detach_group		= vfio_iommu_type1_detach_group,
 	.pin_pages		= vfio_iommu_type1_pin_pages,
 	.unpin_pages		= vfio_iommu_type1_unpin_pages,
-	.register_notifier	= vfio_iommu_type1_register_notifier,
-	.unregister_notifier	= vfio_iommu_type1_unregister_notifier,
+	.register_device	= vfio_iommu_type1_register_device,
+	.unregister_device	= vfio_iommu_type1_unregister_device,
 	.dma_rw			= vfio_iommu_type1_dma_rw,
 	.group_iommu_domain	= vfio_iommu_type1_group_iommu_domain,
 	.notify			= vfio_iommu_type1_notify,
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index b76623e3b92fca..c22d3f1e13b66c 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,7 +44,7 @@  struct vfio_device {
 	unsigned int open_count;
 	struct completion comp;
 	struct list_head group_next;
-	struct notifier_block iommu_nb;
+	struct list_head iommu_entry;
 };
 
 /**