diff mbox series

[2/4] vfio: Move the sanity check of the group to vfio_create_group()

Message ID 2-v1-ef00ffecea52+2cb-iommu_group_lifetime_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Fix splats releated to using the iommu_group after destroying devices | expand

Commit Message

Jason Gunthorpe Sept. 8, 2022, 6:44 p.m. UTC
__vfio_register_dev() has a bit of code to sanity check if an (existing)
group is not corrupted by having two copies of the same struct device in
it. This should be impossible.

It then has some complicated error unwind to uncreate the group.

Instead check if the existing group is sane at the same time we locate
it. If a bug is found then there is no error unwind, just simply fail
allocation.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio_main.c | 79 ++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 43 deletions(-)

Comments

Alex Williamson Sept. 22, 2022, 7:10 p.m. UTC | #1
On Thu,  8 Sep 2022 15:44:59 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> __vfio_register_dev() has a bit of code to sanity check if an (existing)
> group is not corrupted by having two copies of the same struct device in
> it. This should be impossible.
> 
> It then has some complicated error unwind to uncreate the group.
> 
> Instead check if the existing group is sane at the same time we locate
> it. If a bug is found then there is no error unwind, just simply fail
> allocation.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/vfio/vfio_main.c | 79 ++++++++++++++++++----------------------
>  1 file changed, 36 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index 4ab13808b536e1..ba8b6bed12c7e7 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -306,15 +306,15 @@ static void vfio_container_put(struct vfio_container *container)
>   * Group objects - create, release, get, put, search
>   */
>  static struct vfio_group *
> -__vfio_group_get_from_iommu(struct iommu_group *iommu_group)
> +vfio_group_find_from_iommu(struct iommu_group *iommu_group)
>  {
>  	struct vfio_group *group;
>  
> +	lockdep_assert_held(&vfio.group_lock);
> +
>  	list_for_each_entry(group, &vfio.group_list, vfio_next) {
> -		if (group->iommu_group == iommu_group) {
> -			vfio_group_get(group);
> +		if (group->iommu_group == iommu_group)
>  			return group;
> -		}
>  	}
>  	return NULL;
>  }
> @@ -365,11 +365,27 @@ static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
>  	return group;
>  }
>  
> +static bool vfio_group_has_device(struct vfio_group *group, struct device *dev)
> +{
> +	struct vfio_device *device;
> +
> +	mutex_lock(&group->device_lock);
> +	list_for_each_entry(device, &group->device_list, group_next) {
> +		if (device->dev == dev) {
> +			mutex_unlock(&group->device_lock);
> +			return true;
> +		}
> +	}
> +	mutex_unlock(&group->device_lock);
> +	return false;
> +}
> +
>  /*
>   * Return a struct vfio_group * for the given iommu_group. If no vfio_group
>   * already exists then create a new one.
>   */
> -static struct vfio_group *vfio_get_group(struct iommu_group *iommu_group,
> +static struct vfio_group *vfio_get_group(struct device *dev,
> +					 struct iommu_group *iommu_group,
>  					 enum vfio_group_type type)
>  {
>  	struct vfio_group *group;
> @@ -378,13 +394,20 @@ static struct vfio_group *vfio_get_group(struct iommu_group *iommu_group,
>  
>  	mutex_lock(&vfio.group_lock);
>  
> -	ret = __vfio_group_get_from_iommu(iommu_group);
> -	if (ret)
> -		goto err_unlock;
> +	ret = vfio_group_find_from_iommu(iommu_group);
> +	if (ret) {
> +		if (WARN_ON(vfio_group_has_device(ret, dev))) {
> +			ret = ERR_PTR(-EINVAL);
> +			goto out_unlock;
> +		}

This still looks racy.  We only know within vfio_group_has_device() that
the device is not present in the group, what prevents a race between
here and when we finally do add it to group->device_list?  We can't
make any guarantees if we drop group->device_lock between test and add.

The semantics of vfio_get_group() are also rather strange, 'return a
vfio_group for this iommu_group, but make sure it doesn't include this
device' :-\  Thanks,

Alex
Jason Gunthorpe Sept. 22, 2022, 7:36 p.m. UTC | #2
On Thu, Sep 22, 2022 at 01:10:50PM -0600, Alex Williamson wrote:
> > @@ -378,13 +394,20 @@ static struct vfio_group *vfio_get_group(struct iommu_group *iommu_group,
> >  
> >  	mutex_lock(&vfio.group_lock);
> >  
> > -	ret = __vfio_group_get_from_iommu(iommu_group);
> > -	if (ret)
> > -		goto err_unlock;
> > +	ret = vfio_group_find_from_iommu(iommu_group);
> > +	if (ret) {
> > +		if (WARN_ON(vfio_group_has_device(ret, dev))) {
> > +			ret = ERR_PTR(-EINVAL);
> > +			goto out_unlock;
> > +		}
> 
> This still looks racy.  We only know within vfio_group_has_device() that
> the device is not present in the group, what prevents a race between
> here and when we finally do add it to group->device_list?

This is a condition which is defined to be impossible and by
auditing I've checked it can't happen.

There is no race in the sense that this can't actually happen, if it
does happen it means the group is corrupted. At that point reasoning
about locks and such goes out the window too - eg it might be
corrupted because of bad locking.

When it comes to self-debugging tests we often have these
"races", eg in the destroy path we do:

	WARN_ON(!list_empty(&group->device_list));

Which doesn't hold the appropriate locks either.

The point is just to detect that group has been corrupted at a point
in time in hopes of guarding against a future kernel bug.

> The semantics of vfio_get_group() are also rather strange, 'return a
> vfio_group for this iommu_group, but make sure it doesn't include this
> device' :-\  Thanks,

I think of it as "return a group and also do sanity checks that the
returned group has not been corrupted"

I don't like the name of this function but couldn't figure a better
one. It is something like "find or create a group for a device which
we know doesn't already have a group"

Jason
Alex Williamson Sept. 22, 2022, 9:23 p.m. UTC | #3
On Thu, 22 Sep 2022 16:36:14 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> On Thu, Sep 22, 2022 at 01:10:50PM -0600, Alex Williamson wrote:
> > The semantics of vfio_get_group() are also rather strange, 'return a
> > vfio_group for this iommu_group, but make sure it doesn't include this
> > device' :-\  Thanks,  
> 
> I think of it as "return a group and also do sanity checks that the
> returned group has not been corrupted"
> 
> I don't like the name of this function but couldn't figure a better
> one. It is something like "find or create a group for a device which
> we know doesn't already have a group"

Well, we don't really need to have this behavior, we could choose to
implement the first two patches with the caller holding the
group_lock.  Only one of the callers needs the duplicate test, no-iommu
creates its own iommu_group and therefore cannot have an existing
device.  I think patches 1 & 2 would look like below*, with patch 3
simply moving the change from vfio_group_get() to refcount_inc() into
the equivalent place in vfio_group_find_of_alloc().  Thanks,

Alex

*only compile tested

---
 drivers/vfio/vfio_main.c |   33 ++++++++++-----------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index f9d10dbcf3e6..aa33944cb759 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -310,10 +310,12 @@ static void vfio_container_put(struct vfio_container *container)
  * Group objects - create, release, get, put, search
  */
 static struct vfio_group *
-__vfio_group_get_from_iommu(struct iommu_group *iommu_group)
+vfio_group_get_from_iommu(struct iommu_group *iommu_group)
 {
 	struct vfio_group *group;
 
+	lockdep_assert_held(&vfio.group_lock);
+
 	list_for_each_entry(group, &vfio.group_list, vfio_next) {
 		if (group->iommu_group == iommu_group) {
 			vfio_group_get(group);
@@ -323,17 +325,6 @@ __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
 	return NULL;
 }
 
-static struct vfio_group *
-vfio_group_get_from_iommu(struct iommu_group *iommu_group)
-{
-	struct vfio_group *group;
-
-	mutex_lock(&vfio.group_lock);
-	group = __vfio_group_get_from_iommu(iommu_group);
-	mutex_unlock(&vfio.group_lock);
-	return group;
-}
-
 static void vfio_group_release(struct device *dev)
 {
 	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
@@ -387,6 +378,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
 	struct vfio_group *ret;
 	int err;
 
+	lockdep_assert_held(&vfio.group_lock);
+
 	group = vfio_group_alloc(iommu_group, type);
 	if (IS_ERR(group))
 		return group;
@@ -399,26 +392,16 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
 		goto err_put;
 	}
 
-	mutex_lock(&vfio.group_lock);
-
-	/* Did we race creating this group? */
-	ret = __vfio_group_get_from_iommu(iommu_group);
-	if (ret)
-		goto err_unlock;
-
 	err = cdev_device_add(&group->cdev, &group->dev);
 	if (err) {
 		ret = ERR_PTR(err);
-		goto err_unlock;
+		goto err_put;
 	}
 
 	list_add(&group->vfio_next, &vfio.group_list);
 
-	mutex_unlock(&vfio.group_lock);
 	return group;
 
-err_unlock:
-	mutex_unlock(&vfio.group_lock);
 err_put:
 	put_device(&group->dev);
 	return ret;
@@ -609,7 +592,9 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
 	if (ret)
 		goto out_put_group;
 
+	mutex_lock(&vfio.group_lock);
 	group = vfio_create_group(iommu_group, type);
+	mutex_unlock(&vfio.group_lock);
 	if (IS_ERR(group)) {
 		ret = PTR_ERR(group);
 		goto out_remove_device;
@@ -659,9 +644,11 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 		return ERR_PTR(-EINVAL);
 	}
 
+	mutex_lock(&vfio.group_lock);
 	group = vfio_group_get_from_iommu(iommu_group);
 	if (!group)
 		group = vfio_create_group(iommu_group, VFIO_IOMMU);
+	mutex_unlock(&vfio.group_lock);
 
 	/* The vfio_group holds a reference to the iommu_group */
 	iommu_group_put(iommu_group);


---
 drivers/vfio/vfio_main.c |   58 ++++++++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 33 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index aa33944cb759..4692493d386a 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -310,17 +310,15 @@ static void vfio_container_put(struct vfio_container *container)
  * Group objects - create, release, get, put, search
  */
 static struct vfio_group *
-vfio_group_get_from_iommu(struct iommu_group *iommu_group)
+vfio_group_find_from_iommu(struct iommu_group *iommu_group)
 {
 	struct vfio_group *group;
 
 	lockdep_assert_held(&vfio.group_lock);
 
 	list_for_each_entry(group, &vfio.group_list, vfio_next) {
-		if (group->iommu_group == iommu_group) {
-			vfio_group_get(group);
+		if (group->iommu_group == iommu_group)
 			return group;
-		}
 	}
 	return NULL;
 }
@@ -449,23 +447,6 @@ static bool vfio_device_try_get_registration(struct vfio_device *device)
 	return refcount_inc_not_zero(&device->refcount);
 }
 
-static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
-						 struct device *dev)
-{
-	struct vfio_device *device;
-
-	mutex_lock(&group->device_lock);
-	list_for_each_entry(device, &group->device_list, group_next) {
-		if (device->dev == dev &&
-		    vfio_device_try_get_registration(device)) {
-			mutex_unlock(&group->device_lock);
-			return device;
-		}
-	}
-	mutex_unlock(&group->device_lock);
-	return NULL;
-}
-
 /*
  * VFIO driver API
  */
@@ -609,6 +590,21 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
 	return ERR_PTR(ret);
 }
 
+static bool vfio_group_has_device(struct vfio_group *group, struct device *dev)
+{
+	struct vfio_device *device;
+
+	mutex_lock(&group->device_lock);
+	list_for_each_entry(device, &group->device_list, group_next) {
+		if (device->dev == dev) {
+			mutex_unlock(&group->device_lock);
+			return true;
+		}
+	}
+	mutex_unlock(&group->device_lock);
+	return false;
+}
+
 static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 {
 	struct iommu_group *iommu_group;
@@ -645,9 +641,15 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 	}
 
 	mutex_lock(&vfio.group_lock);
-	group = vfio_group_get_from_iommu(iommu_group);
-	if (!group)
+	group = vfio_group_find_from_iommu(iommu_group);
+	if (group) {
+		if (WARN_ON(vfio_group_has_device(group, dev)))
+			group = ERR_PTR(-EINVAL);
+		else
+			vfio_group_get(group);
+	} else {
 		group = vfio_create_group(iommu_group, VFIO_IOMMU);
+	}
 	mutex_unlock(&vfio.group_lock);
 
 	/* The vfio_group holds a reference to the iommu_group */
@@ -658,7 +660,6 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 static int __vfio_register_dev(struct vfio_device *device,
 		struct vfio_group *group)
 {
-	struct vfio_device *existing_device;
 	int ret;
 
 	if (IS_ERR(group))
@@ -671,15 +672,6 @@ static int __vfio_register_dev(struct vfio_device *device,
 	if (!device->dev_set)
 		vfio_assign_device_set(device, device);
 
-	existing_device = vfio_group_get_device(group, device->dev);
-	if (existing_device) {
-		dev_WARN(device->dev, "Device already exists on group %d\n",
-			 iommu_group_id(group->iommu_group));
-		vfio_device_put_registration(existing_device);
-		ret = -EBUSY;
-		goto err_out;
-	}
-
 	/* Our reference on group is moved to the device */
 	device->group = group;
Jason Gunthorpe Sept. 22, 2022, 11:12 p.m. UTC | #4
On Thu, Sep 22, 2022 at 03:23:38PM -0600, Alex Williamson wrote:

> Well, we don't really need to have this behavior, we could choose to
> implement the first two patches with the caller holding the
> group_lock.  Only one of the callers needs the duplicate test, no-iommu
> creates its own iommu_group and therefore cannot have an existing
> device.  I think patches 1 & 2 would look like below*, with patch 3
> simply moving the change from vfio_group_get() to refcount_inc() into
> the equivalent place in vfio_group_find_of_alloc().  Thanks,

Yeah, this is nice

I just rebased it onto Kevins series and the reason I did this patch
has evaporated so let me check it again, maybe we don't need it.

Jason
diff mbox series

Patch

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 4ab13808b536e1..ba8b6bed12c7e7 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -306,15 +306,15 @@  static void vfio_container_put(struct vfio_container *container)
  * Group objects - create, release, get, put, search
  */
 static struct vfio_group *
-__vfio_group_get_from_iommu(struct iommu_group *iommu_group)
+vfio_group_find_from_iommu(struct iommu_group *iommu_group)
 {
 	struct vfio_group *group;
 
+	lockdep_assert_held(&vfio.group_lock);
+
 	list_for_each_entry(group, &vfio.group_list, vfio_next) {
-		if (group->iommu_group == iommu_group) {
-			vfio_group_get(group);
+		if (group->iommu_group == iommu_group)
 			return group;
-		}
 	}
 	return NULL;
 }
@@ -365,11 +365,27 @@  static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
 	return group;
 }
 
+static bool vfio_group_has_device(struct vfio_group *group, struct device *dev)
+{
+	struct vfio_device *device;
+
+	mutex_lock(&group->device_lock);
+	list_for_each_entry(device, &group->device_list, group_next) {
+		if (device->dev == dev) {
+			mutex_unlock(&group->device_lock);
+			return true;
+		}
+	}
+	mutex_unlock(&group->device_lock);
+	return false;
+}
+
 /*
  * Return a struct vfio_group * for the given iommu_group. If no vfio_group
  * already exists then create a new one.
  */
-static struct vfio_group *vfio_get_group(struct iommu_group *iommu_group,
+static struct vfio_group *vfio_get_group(struct device *dev,
+					 struct iommu_group *iommu_group,
 					 enum vfio_group_type type)
 {
 	struct vfio_group *group;
@@ -378,13 +394,20 @@  static struct vfio_group *vfio_get_group(struct iommu_group *iommu_group,
 
 	mutex_lock(&vfio.group_lock);
 
-	ret = __vfio_group_get_from_iommu(iommu_group);
-	if (ret)
-		goto err_unlock;
+	ret = vfio_group_find_from_iommu(iommu_group);
+	if (ret) {
+		if (WARN_ON(vfio_group_has_device(ret, dev))) {
+			ret = ERR_PTR(-EINVAL);
+			goto out_unlock;
+		}
+		/* Found an existing group */
+		vfio_group_get(ret);
+		goto out_unlock;
+	}
 
 	group = ret = vfio_group_alloc(iommu_group, type);
 	if (IS_ERR(ret))
-		goto err_unlock;
+		goto out_unlock;
 
 	err = dev_set_name(&group->dev, "%s%d",
 			   group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
@@ -397,7 +420,7 @@  static struct vfio_group *vfio_get_group(struct iommu_group *iommu_group,
 	err = cdev_device_add(&group->cdev, &group->dev);
 	if (err) {
 		ret = ERR_PTR(err);
-		goto err_unlock;
+		goto out_unlock;
 	}
 
 	list_add(&group->vfio_next, &vfio.group_list);
@@ -407,7 +430,7 @@  static struct vfio_group *vfio_get_group(struct iommu_group *iommu_group,
 
 err_put:
 	put_device(&group->dev);
-err_unlock:
+out_unlock:
 	mutex_unlock(&vfio.group_lock);
 	return ret;
 }
@@ -454,22 +477,6 @@  static bool vfio_device_try_get(struct vfio_device *device)
 	return refcount_inc_not_zero(&device->refcount);
 }
 
-static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
-						 struct device *dev)
-{
-	struct vfio_device *device;
-
-	mutex_lock(&group->device_lock);
-	list_for_each_entry(device, &group->device_list, group_next) {
-		if (device->dev == dev && vfio_device_try_get(device)) {
-			mutex_unlock(&group->device_lock);
-			return device;
-		}
-	}
-	mutex_unlock(&group->device_lock);
-	return NULL;
-}
-
 /*
  * VFIO driver API
  */
@@ -506,7 +513,7 @@  static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
 	if (ret)
 		goto out_put_group;
 
-	group = vfio_get_group(iommu_group, type);
+	group = vfio_get_group(dev, iommu_group, type);
 	if (IS_ERR(group)) {
 		ret = PTR_ERR(group);
 		goto out_remove_device;
@@ -556,7 +563,7 @@  static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 		return ERR_PTR(-EINVAL);
 	}
 
-	group = vfio_get_group(iommu_group, VFIO_IOMMU);
+	group = vfio_get_group(dev, iommu_group, VFIO_IOMMU);
 
 	/* The vfio_group holds a reference to the iommu_group */
 	iommu_group_put(iommu_group);
@@ -566,8 +573,6 @@  static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 static int __vfio_register_dev(struct vfio_device *device,
 		struct vfio_group *group)
 {
-	struct vfio_device *existing_device;
-
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
@@ -578,18 +583,6 @@  static int __vfio_register_dev(struct vfio_device *device,
 	if (!device->dev_set)
 		vfio_assign_device_set(device, device);
 
-	existing_device = vfio_group_get_device(group, device->dev);
-	if (existing_device) {
-		dev_WARN(device->dev, "Device already exists on group %d\n",
-			 iommu_group_id(group->iommu_group));
-		vfio_device_put(existing_device);
-		if (group->type == VFIO_NO_IOMMU ||
-		    group->type == VFIO_EMULATED_IOMMU)
-			iommu_group_remove_device(device->dev);
-		vfio_group_put(group);
-		return -EBUSY;
-	}
-
 	/* Our reference on group is moved to the device */
 	device->group = group;