diff mbox series

[v12,14/24] iommufd/device: Add iommufd_access_detach() API

Message ID 20230602121653.80017-15-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series Add vfio_device cdev for iommufd support | expand

Commit Message

Yi Liu June 2, 2023, 12:16 p.m. UTC
From: Nicolin Chen <nicolinc@nvidia.com>

Previously, the detach routine is only done by the destroy(). And it was
called by vfio_iommufd_emulated_unbind() when the device runs close(), so
all the mappings in iopt were cleaned in that setup, when the call trace
reaches this detach() routine.

Now, there's a need of a detach uAPI, meaning that it does not only need
a new iommufd_access_detach() API, but also requires access->ops->unmap()
call as a cleanup. So add one.

However, leaving that unprotected can introduce some potential of a race
condition during the pin_/unpin_pages() call, where access->ioas->iopt is
getting referenced. So, add an ioas_lock to protect the context of iopt
referencings.

Also, to allow the iommufd_access_unpin_pages() callback to happen via
this unmap() call, add an ioas_unpin pointer, so the unpin routine won't
be affected by the "access->ioas = NULL" trick.

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Tested-by: Terrence Xu <terrence.xu@intel.com>
Tested-by: Yanting Jiang <yanting.jiang@intel.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/iommufd/device.c          | 76 +++++++++++++++++++++++--
 drivers/iommu/iommufd/iommufd_private.h |  2 +
 include/linux/iommufd.h                 |  1 +
 3 files changed, 74 insertions(+), 5 deletions(-)

Comments

Jason Gunthorpe June 23, 2023, 2:15 p.m. UTC | #1
On Fri, Jun 02, 2023 at 05:16:43AM -0700, Yi Liu wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
> 
> Previously, the detach routine is only done by the destroy(). And it was
> called by vfio_iommufd_emulated_unbind() when the device runs close(), so
> all the mappings in iopt were cleaned in that setup, when the call trace
> reaches this detach() routine.
> 
> Now, there's a need of a detach uAPI, meaning that it does not only need
> a new iommufd_access_detach() API, but also requires access->ops->unmap()
> call as a cleanup. So add one.
> 
> However, leaving that unprotected can introduce some potential of a race
> condition during the pin_/unpin_pages() call, where access->ioas->iopt is
> getting referenced. So, add an ioas_lock to protect the context of iopt
> referencings.
> 
> Also, to allow the iommufd_access_unpin_pages() callback to happen via
> this unmap() call, add an ioas_unpin pointer, so the unpin routine won't
> be affected by the "access->ioas = NULL" trick.
> 
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> Tested-by: Terrence Xu <terrence.xu@intel.com>
> Tested-by: Yanting Jiang <yanting.jiang@intel.com>
> Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>  drivers/iommu/iommufd/device.c          | 76 +++++++++++++++++++++++--
>  drivers/iommu/iommufd/iommufd_private.h |  2 +
>  include/linux/iommufd.h                 |  1 +
>  3 files changed, 74 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> index 96d4281bfa7c..6b4ff635c15e 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -486,6 +486,7 @@ iommufd_access_create(struct iommufd_ctx *ictx,
>  	iommufd_ctx_get(ictx);
>  	iommufd_object_finalize(ictx, &access->obj);
>  	*id = access->obj.id;
> +	mutex_init(&access->ioas_lock);
>  	return access;
>  }
>  EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
> @@ -505,26 +506,66 @@ void iommufd_access_destroy(struct iommufd_access *access)
>  }
>  EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
>  
> +static void __iommufd_access_detach(struct iommufd_access *access)
> +{
> +	struct iommufd_ioas *cur_ioas = access->ioas;
> +
> +	lockdep_assert_held(&access->ioas_lock);
> +	/*
> +	 * Set ioas to NULL to block any further iommufd_access_pin_pages().
> +	 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
> +	 */
> +	access->ioas = NULL;
> +
> +	if (access->ops->unmap) {
> +		mutex_unlock(&access->ioas_lock);
> +		access->ops->unmap(access->data, 0, ULONG_MAX);
> +		mutex_lock(&access->ioas_lock);
> +	}
> +	iopt_remove_access(&cur_ioas->iopt, access);
> +	refcount_dec(&cur_ioas->obj.users);
> +}
> +
> +void iommufd_access_detach(struct iommufd_access *access)
> +{
> +	mutex_lock(&access->ioas_lock);
> +	if (WARN_ON(!access->ioas))
> +		goto out;
> +	__iommufd_access_detach(access);
> +out:
> +	access->ioas_unpin = NULL;
> +	mutex_unlock(&access->ioas_lock);
> +}
> +EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);

There is not really any benefit to make this two functions

> int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
> {
[..]
> 	if (access->ioas) {

if (access->ioas || access->ioas_unpin) {

But I wonder if it should be a WARN_ON? Does VFIO protect against
the userspace racing detach and attach, or do we expect to do it here?

> @@ -579,8 +620,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
>  void iommufd_access_unpin_pages(struct iommufd_access *access,
>  				unsigned long iova, unsigned long length)
>  {
> -	struct io_pagetable *iopt = &access->ioas->iopt;
>  	struct iopt_area_contig_iter iter;
> +	struct io_pagetable *iopt;
>  	unsigned long last_iova;
>  	struct iopt_area *area;
>  
> @@ -588,6 +629,13 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
>  	    WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
>  		return;
>  
> +	mutex_lock(&access->ioas_lock);
> +	if (!access->ioas_unpin) {

This should be WARN_ON(), the driver has done something wrong if we
call this after the access has been detached.

Jason
Nicolin Chen June 25, 2023, 6:26 p.m. UTC | #2
On Fri, Jun 23, 2023 at 11:15:40AM -0300, Jason Gunthorpe wrote:

> > +static void __iommufd_access_detach(struct iommufd_access *access)
> > +{
> > +	struct iommufd_ioas *cur_ioas = access->ioas;
> > +
> > +	lockdep_assert_held(&access->ioas_lock);
> > +	/*
> > +	 * Set ioas to NULL to block any further iommufd_access_pin_pages().
> > +	 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
> > +	 */
> > +	access->ioas = NULL;
> > +
> > +	if (access->ops->unmap) {
> > +		mutex_unlock(&access->ioas_lock);
> > +		access->ops->unmap(access->data, 0, ULONG_MAX);
> > +		mutex_lock(&access->ioas_lock);
> > +	}
> > +	iopt_remove_access(&cur_ioas->iopt, access);
> > +	refcount_dec(&cur_ioas->obj.users);
> > +}
> > +
> > +void iommufd_access_detach(struct iommufd_access *access)
> > +{
> > +	mutex_lock(&access->ioas_lock);
> > +	if (WARN_ON(!access->ioas))
> > +		goto out;
> > +	__iommufd_access_detach(access);
> > +out:
> > +	access->ioas_unpin = NULL;
> > +	mutex_unlock(&access->ioas_lock);
> > +}
> > +EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);
> 
> There is not really any benefit to make this two functions

The __iommufd_access_detach() will be used by replace() in the
following series. Yet, let's merge them here then. And I'll add
__iommufd_access_detach() back in the replace series.

> > int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
> > {
> [..]
> > 	if (access->ioas) {
> 
> if (access->ioas || access->ioas_unpin) {

Ack.

> But I wonder if it should be a WARN_ON? Does VFIO protect against
> the userspace racing detach and attach, or do we expect to do it here?

VFIO has a vdev->iommufd_attached flag to prevent a double call
of this function. And detach and attach there also have a mutex
protection. So it should be a WARN_ON here, I think.

> > @@ -579,8 +620,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
> >  void iommufd_access_unpin_pages(struct iommufd_access *access,
> >  				unsigned long iova, unsigned long length)
> >  {
> > -	struct io_pagetable *iopt = &access->ioas->iopt;
> >  	struct iopt_area_contig_iter iter;
> > +	struct io_pagetable *iopt;
> >  	unsigned long last_iova;
> >  	struct iopt_area *area;
> >  
> > @@ -588,6 +629,13 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
> >  	    WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
> >  		return;
> >  
> > +	mutex_lock(&access->ioas_lock);
> > +	if (!access->ioas_unpin) {
> 
> This should be WARN_ON(), the driver has done something wrong if we
> call this after the access has been detached.

Ack. Also adding a line of comments for that:
+       /*
+        * The driver must be doing something wrong if it calls this before an
+        * iommufd_access_attach() or after an iommufd_access_detach().
+        */
+       if (WARN_ON(!access->ioas_unpin)) {

Thanks
Nic
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 96d4281bfa7c..6b4ff635c15e 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -486,6 +486,7 @@  iommufd_access_create(struct iommufd_ctx *ictx,
 	iommufd_ctx_get(ictx);
 	iommufd_object_finalize(ictx, &access->obj);
 	*id = access->obj.id;
+	mutex_init(&access->ioas_lock);
 	return access;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
@@ -505,26 +506,66 @@  void iommufd_access_destroy(struct iommufd_access *access)
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
 
+static void __iommufd_access_detach(struct iommufd_access *access)
+{
+	struct iommufd_ioas *cur_ioas = access->ioas;
+
+	lockdep_assert_held(&access->ioas_lock);
+	/*
+	 * Set ioas to NULL to block any further iommufd_access_pin_pages().
+	 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
+	 */
+	access->ioas = NULL;
+
+	if (access->ops->unmap) {
+		mutex_unlock(&access->ioas_lock);
+		access->ops->unmap(access->data, 0, ULONG_MAX);
+		mutex_lock(&access->ioas_lock);
+	}
+	iopt_remove_access(&cur_ioas->iopt, access);
+	refcount_dec(&cur_ioas->obj.users);
+}
+
+void iommufd_access_detach(struct iommufd_access *access)
+{
+	mutex_lock(&access->ioas_lock);
+	if (WARN_ON(!access->ioas))
+		goto out;
+	__iommufd_access_detach(access);
+out:
+	access->ioas_unpin = NULL;
+	mutex_unlock(&access->ioas_lock);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);
+
 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
 {
 	struct iommufd_ioas *new_ioas;
 	int rc = 0;
 
-	if (access->ioas)
+	mutex_lock(&access->ioas_lock);
+	if (access->ioas) {
+		mutex_unlock(&access->ioas_lock);
 		return -EINVAL;
+	}
 
 	new_ioas = iommufd_get_ioas(access->ictx, ioas_id);
-	if (IS_ERR(new_ioas))
+	if (IS_ERR(new_ioas)) {
+		mutex_unlock(&access->ioas_lock);
 		return PTR_ERR(new_ioas);
+	}
 
 	rc = iopt_add_access(&new_ioas->iopt, access);
 	if (rc) {
+		mutex_unlock(&access->ioas_lock);
 		iommufd_put_object(&new_ioas->obj);
 		return rc;
 	}
 	iommufd_ref_to_users(&new_ioas->obj);
 
 	access->ioas = new_ioas;
+	access->ioas_unpin = new_ioas;
+	mutex_unlock(&access->ioas_lock);
 	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD);
@@ -579,8 +620,8 @@  void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
 void iommufd_access_unpin_pages(struct iommufd_access *access,
 				unsigned long iova, unsigned long length)
 {
-	struct io_pagetable *iopt = &access->ioas->iopt;
 	struct iopt_area_contig_iter iter;
+	struct io_pagetable *iopt;
 	unsigned long last_iova;
 	struct iopt_area *area;
 
@@ -588,6 +629,13 @@  void iommufd_access_unpin_pages(struct iommufd_access *access,
 	    WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
 		return;
 
+	mutex_lock(&access->ioas_lock);
+	if (!access->ioas_unpin) {
+		mutex_unlock(&access->ioas_lock);
+		return;
+	}
+	iopt = &access->ioas_unpin->iopt;
+
 	down_read(&iopt->iova_rwsem);
 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
 		iopt_area_remove_access(
@@ -597,6 +645,7 @@  void iommufd_access_unpin_pages(struct iommufd_access *access,
 				min(last_iova, iopt_area_last_iova(area))));
 	up_read(&iopt->iova_rwsem);
 	WARN_ON(!iopt_area_contig_done(&iter));
+	mutex_unlock(&access->ioas_lock);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD);
 
@@ -642,8 +691,8 @@  int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
 			     unsigned long length, struct page **out_pages,
 			     unsigned int flags)
 {
-	struct io_pagetable *iopt = &access->ioas->iopt;
 	struct iopt_area_contig_iter iter;
+	struct io_pagetable *iopt;
 	unsigned long last_iova;
 	struct iopt_area *area;
 	int rc;
@@ -658,6 +707,13 @@  int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
 	if (check_add_overflow(iova, length - 1, &last_iova))
 		return -EOVERFLOW;
 
+	mutex_lock(&access->ioas_lock);
+	if (!access->ioas) {
+		mutex_unlock(&access->ioas_lock);
+		return -ENOENT;
+	}
+	iopt = &access->ioas->iopt;
+
 	down_read(&iopt->iova_rwsem);
 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
 		unsigned long last = min(last_iova, iopt_area_last_iova(area));
@@ -688,6 +744,7 @@  int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
 	}
 
 	up_read(&iopt->iova_rwsem);
+	mutex_unlock(&access->ioas_lock);
 	return 0;
 
 err_remove:
@@ -702,6 +759,7 @@  int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
 						  iopt_area_last_iova(area))));
 	}
 	up_read(&iopt->iova_rwsem);
+	mutex_unlock(&access->ioas_lock);
 	return rc;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
@@ -721,8 +779,8 @@  EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
 		      void *data, size_t length, unsigned int flags)
 {
-	struct io_pagetable *iopt = &access->ioas->iopt;
 	struct iopt_area_contig_iter iter;
+	struct io_pagetable *iopt;
 	struct iopt_area *area;
 	unsigned long last_iova;
 	int rc;
@@ -732,6 +790,13 @@  int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
 	if (check_add_overflow(iova, length - 1, &last_iova))
 		return -EOVERFLOW;
 
+	mutex_lock(&access->ioas_lock);
+	if (!access->ioas) {
+		mutex_unlock(&access->ioas_lock);
+		return -ENOENT;
+	}
+	iopt = &access->ioas->iopt;
+
 	down_read(&iopt->iova_rwsem);
 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
 		unsigned long last = min(last_iova, iopt_area_last_iova(area));
@@ -758,6 +823,7 @@  int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
 		rc = -ENOENT;
 err_out:
 	up_read(&iopt->iova_rwsem);
+	mutex_unlock(&access->ioas_lock);
 	return rc;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index b38e67d1988b..3dcaf86aab97 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -285,6 +285,8 @@  struct iommufd_access {
 	struct iommufd_object obj;
 	struct iommufd_ctx *ictx;
 	struct iommufd_ioas *ioas;
+	struct iommufd_ioas *ioas_unpin;
+	struct mutex ioas_lock;
 	const struct iommufd_access_ops *ops;
 	void *data;
 	unsigned long iova_alignment;
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 33933b0f95fc..c8508daf9bd9 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -48,6 +48,7 @@  iommufd_access_create(struct iommufd_ctx *ictx,
 		      const struct iommufd_access_ops *ops, void *data, u32 *id);
 void iommufd_access_destroy(struct iommufd_access *access);
 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id);
+void iommufd_access_detach(struct iommufd_access *access);
 
 void iommufd_ctx_get(struct iommufd_ctx *ictx);