diff mbox series

[v3,12/17] iommufd: Add iommufd_device_replace()

Message ID 12-v3-61d41fd9e13e+1f5-iommufd_alloc_jgg@nvidia.com (mailing list archive)
State New
Headers show
Series Add iommufd physical device operations for replace and alloc hwpt | expand

Commit Message

Jason Gunthorpe March 21, 2023, 7:14 p.m. UTC
Replace allows all the devices in a group to move in one step to a new
HWPT. Further, the HWPT move is done without going through a blocking
domain so that the IOMMU driver can implement some level of
non-distruption to ongoing DMA if that has meaning for it (eg for future
special driver domains)

Replace uses a lot of the same logic as normal attach, except the actual
domain change over has different restrictions, and we are careful to
sequence things so that failure is going to leave everything the way it
was, and not get trapped in a blocking domain or something if there is
ENOMEM.

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/device.c | 96 ++++++++++++++++++++++++++++++++++
 drivers/iommu/iommufd/main.c   |  1 +
 2 files changed, 97 insertions(+)

Comments

Tian, Kevin March 23, 2023, 7:31 a.m. UTC | #1
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Wednesday, March 22, 2023 3:15 AM
> +
> +	mutex_lock(&idev->igroup->lock);
> +
> +	if (igroup->hwpt == NULL) {
> +		rc = -EINVAL;
> +		goto err_unlock;
> +	}
> +
> +	if (hwpt == igroup->hwpt) {
> +		mutex_unlock(&idev->igroup->lock);
> +		return NULL;
> +	}

goto err_unlock;

> +
> +	/* Move the refcounts held by the device_list to the new hwpt */
> +	refcount_add(num_devices, &hwpt->obj.users);
> +	if (num_devices > 1)
> +		WARN_ON(refcount_sub_and_test(num_devices - 1,
> +					      &old_hwpt->obj.users));

A comment is welcomed to match "caller must destroy old_hwpt".

Otherwise looks good.

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Jason Gunthorpe March 23, 2023, 2:30 p.m. UTC | #2
On Thu, Mar 23, 2023 at 07:31:02AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Wednesday, March 22, 2023 3:15 AM
> > +
> > +	mutex_lock(&idev->igroup->lock);
> > +
> > +	if (igroup->hwpt == NULL) {
> > +		rc = -EINVAL;
> > +		goto err_unlock;
> > +	}
> > +
> > +	if (hwpt == igroup->hwpt) {
> > +		mutex_unlock(&idev->igroup->lock);
> > +		return NULL;
> > +	}
> 
> goto err_unlock;

No, this is a success path, it should not jumpt to an err label or use

  return ERR_PTR(0)

> > +	/* Move the refcounts held by the device_list to the new hwpt */
> > +	refcount_add(num_devices, &hwpt->obj.users);
> > +	if (num_devices > 1)
> > +		WARN_ON(refcount_sub_and_test(num_devices - 1,
> > +					      &old_hwpt->obj.users));
> 
> A comment is welcomed to match "caller must destroy old_hwpt".

??

Jason
Tian, Kevin March 24, 2023, 1:42 a.m. UTC | #3
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Thursday, March 23, 2023 10:31 PM
> 
> On Thu, Mar 23, 2023 at 07:31:02AM +0000, Tian, Kevin wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Wednesday, March 22, 2023 3:15 AM
> > > +
> > > +	mutex_lock(&idev->igroup->lock);
> > > +
> > > +	if (igroup->hwpt == NULL) {
> > > +		rc = -EINVAL;
> > > +		goto err_unlock;
> > > +	}
> > > +
> > > +	if (hwpt == igroup->hwpt) {
> > > +		mutex_unlock(&idev->igroup->lock);
> > > +		return NULL;
> > > +	}
> >
> > goto err_unlock;
> 
> No, this is a success path, it should not jumpt to an err label or use
> 
>   return ERR_PTR(0)

My bad.

> 
> > > +	/* Move the refcounts held by the device_list to the new hwpt */
> > > +	refcount_add(num_devices, &hwpt->obj.users);
> > > +	if (num_devices > 1)
> > > +		WARN_ON(refcount_sub_and_test(num_devices - 1,
> > > +					      &old_hwpt->obj.users));
> >
> > A comment is welcomed to match "caller must destroy old_hwpt".
> 
> ??
> 

It's not intuitive when moving the refcnt why we subtract num_devices
from the old_hwpt only when it's greater than 1. It's really about
the destroy must be done by the caller.
Jason Gunthorpe March 24, 2023, 3:03 p.m. UTC | #4
On Fri, Mar 24, 2023 at 01:42:58AM +0000, Tian, Kevin wrote:

> > > > +	/* Move the refcounts held by the device_list to the new hwpt */
> > > > +	refcount_add(num_devices, &hwpt->obj.users);
> > > > +	if (num_devices > 1)
> > > > +		WARN_ON(refcount_sub_and_test(num_devices - 1,
> > > > +					      &old_hwpt->obj.users));
> > >
> > > A comment is welcomed to match "caller must destroy old_hwpt".
> > 
> > ??
> > 
> 
> It's not intuitive when moving the refcnt why we subtract num_devices
> from the old_hwpt only when it's greater than 1. It's really about
> the destroy must be done by the caller.


	/*
	 * Move the refcounts held by the device_list to the new hwpt. Retain a
	 * refcount for this thread as the caller will free it.
	 */


Jason
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 40466152b68132..5b5c2745b4a088 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -4,6 +4,7 @@ 
 #include <linux/iommufd.h>
 #include <linux/slab.h>
 #include <linux/iommu.h>
+#include "../iommu-priv.h"
 
 #include "io_pagetable.h"
 #include "iommufd_private.h"
@@ -360,6 +361,81 @@  iommufd_device_do_attach(struct iommufd_device *idev,
 	return NULL;
 }
 
+static struct iommufd_hw_pagetable *
+iommufd_device_do_replace(struct iommufd_device *idev,
+			  struct iommufd_hw_pagetable *hwpt)
+{
+	struct iommufd_group *igroup = idev->igroup;
+	struct iommufd_hw_pagetable *old_hwpt;
+	unsigned int num_devices = 0;
+	struct iommufd_device *cur;
+	int rc;
+
+	mutex_lock(&idev->igroup->lock);
+
+	if (igroup->hwpt == NULL) {
+		rc = -EINVAL;
+		goto err_unlock;
+	}
+
+	if (hwpt == igroup->hwpt) {
+		mutex_unlock(&idev->igroup->lock);
+		return NULL;
+	}
+
+	/* Try to upgrade the domain we have */
+	list_for_each_entry(cur, &igroup->device_list, group_item) {
+		num_devices++;
+		if (cur->enforce_cache_coherency) {
+			rc = iommufd_hw_pagetable_enforce_cc(hwpt);
+			if (rc)
+				goto err_unlock;
+		}
+	}
+
+	old_hwpt = igroup->hwpt;
+	if (hwpt->ioas != old_hwpt->ioas) {
+		list_for_each_entry(cur, &igroup->device_list, group_item) {
+			rc = iopt_table_enforce_dev_resv_regions(
+				&hwpt->ioas->iopt, cur->dev, NULL);
+			if (rc)
+				goto err_unresv;
+		}
+	}
+
+	rc = iommufd_group_setup_msi(idev->igroup, hwpt);
+	if (rc)
+		goto err_unresv;
+
+	rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
+	if (rc)
+		goto err_unresv;
+
+	if (hwpt->ioas != old_hwpt->ioas) {
+		list_for_each_entry(cur, &igroup->device_list, group_item)
+			iopt_remove_reserved_iova(&old_hwpt->ioas->iopt,
+						  cur->dev);
+	}
+
+	igroup->hwpt = hwpt;
+
+	/* Move the refcounts held by the device_list to the new hwpt */
+	refcount_add(num_devices, &hwpt->obj.users);
+	if (num_devices > 1)
+		WARN_ON(refcount_sub_and_test(num_devices - 1,
+					      &old_hwpt->obj.users));
+	mutex_unlock(&idev->igroup->lock);
+
+	/* Caller must destroy old_hwpt */
+	return old_hwpt;
+err_unresv:
+	list_for_each_entry(cur, &igroup->device_list, group_item)
+		iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev);
+err_unlock:
+	mutex_unlock(&idev->igroup->lock);
+	return ERR_PTR(rc);
+}
+
 typedef struct iommufd_hw_pagetable *(*attach_fn)(
 	struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
 
@@ -518,6 +594,26 @@  int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
 
+/**
+ * iommufd_device_replace - Change the device's iommu_domain
+ * @idev: device to change
+ * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
+ *         Output the IOMMUFD_OBJ_HW_PAGETABLE ID
+ *
+ * This is the same as
+ *   iommufd_device_detach();
+ *   iommufd_device_attach();
+ * If it fails then no change is made to the attachment. The iommu driver may
+ * implement this so there is no disruption in translation. This can only be
+ * called if iommufd_device_attach() has already succeeded.
+ */
+int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
+{
+	return iommufd_device_change_pt(idev, pt_id,
+					&iommufd_device_do_replace);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD);
+
 /**
  * iommufd_device_detach - Disconnect a device to an iommu_domain
  * @idev: device to detach
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index e5ed5dfa91a0b5..8597f2fb88da3a 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -461,5 +461,6 @@  module_exit(iommufd_exit);
 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
 MODULE_ALIAS("devname:vfio/vfio");
 #endif
+MODULE_IMPORT_NS(IOMMUFD_INTERNAL);
 MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
 MODULE_LICENSE("GPL");