diff mbox series

vfio/pci: Fix vf_token mechanism when device-specific VF drivers are used

Message ID 0-v1-466f18ca49f5+26f-vfio_vf_token_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series vfio/pci: Fix vf_token mechanism when device-specific VF drivers are used | expand

Commit Message

Jason Gunthorpe April 8, 2022, 3:10 p.m. UTC
get_pf_vdev() tries to check if a PF is a VFIO PF by looking at the driver:

       if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) {

However now that we have multiple VF and PF drivers this is no longer
reliable.

This means that security tests realted to vf_token can be skipped by
mixing and matching different VFIO PCI drivers.

Instead of trying to use the driver core to find the PF devices maintain a
linked list of all PF vfio_pci_core_device's that we have called
pci_enable_sriov() on.

When registering a VF just search the list to see if the PF is present and
record the match permanently in the struct. PCI core locking prevents a PF
from passing pci_disable_sriov() while VF drivers are attached so the VFIO
owned PF becomes a static property of the VF.

In common cases where vfio does not own the PF the global list remains
empty and the VF's pointer is statically NULL.

This also fixes a lockdep splat from recursive locking of the
vfio_group::device_lock between vfio_device_get_from_name() and
vfio_device_get_from_dev(). If the VF and PF share the same group this
would deadlock.

Fixes: ff53edf6d6ab ("vfio/pci: Split the pci_driver code out of vfio_pci_core.c")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/pci/vfio_pci_core.c | 109 ++++++++++++++++---------------
 include/linux/vfio_pci_core.h    |   2 +
 2 files changed, 60 insertions(+), 51 deletions(-)

This is probably for the rc cycle since it only became a problem when the
migration drivers were merged.


base-commit: 3123109284176b1532874591f7c81f3837bbdc17

Comments

Alex Williamson April 8, 2022, 4:53 p.m. UTC | #1
On Fri,  8 Apr 2022 12:10:15 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> get_pf_vdev() tries to check if a PF is a VFIO PF by looking at the driver:
> 
>        if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) {
> 
> However now that we have multiple VF and PF drivers this is no longer
> reliable.
> 
> This means that security tests realted to vf_token can be skipped by
> mixing and matching different VFIO PCI drivers.
> 
> Instead of trying to use the driver core to find the PF devices maintain a
> linked list of all PF vfio_pci_core_device's that we have called
> pci_enable_sriov() on.
> 
> When registering a VF just search the list to see if the PF is present and
> record the match permanently in the struct. PCI core locking prevents a PF
> from passing pci_disable_sriov() while VF drivers are attached so the VFIO
> owned PF becomes a static property of the VF.
> 
> In common cases where vfio does not own the PF the global list remains
> empty and the VF's pointer is statically NULL.
> 
> This also fixes a lockdep splat from recursive locking of the
> vfio_group::device_lock between vfio_device_get_from_name() and
> vfio_device_get_from_dev(). If the VF and PF share the same group this
> would deadlock.
> 
> Fixes: ff53edf6d6ab ("vfio/pci: Split the pci_driver code out of vfio_pci_core.c")
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 109 ++++++++++++++++---------------
>  include/linux/vfio_pci_core.h    |   2 +
>  2 files changed, 60 insertions(+), 51 deletions(-)
> 
> This is probably for the rc cycle since it only became a problem when the
> migration drivers were merged.
...  
> @@ -1942,14 +1935,28 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
>  	if (!device)
>  		return -ENODEV;
>  
> -	if (nr_virtfn == 0)
> -		pci_disable_sriov(pdev);
> -	else
> +	vdev = container_of(device, struct vfio_pci_core_device, vdev);
> +
> +	if (nr_virtfn) {
> +		mutex_lock(&vfio_pci_sriov_pfs_mutex);
> +		list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs);
> +		mutex_unlock(&vfio_pci_sriov_pfs_mutex);
>  		ret = pci_enable_sriov(pdev, nr_virtfn);
> +		if (ret)
> +			goto out_del;
> +		ret = nr_virtfn;
> +		goto out_put;
> +	}

If a user were to do:

	# echo 1 > sriov_numvfs
	# echo 2 > sriov_numvfs

Don't we have a problem that we've botched the list and the PF still
exists with 1 VF?  Thanks,

Alex

>  
> +	pci_disable_sriov(pdev);
> +
> +out_del:
> +	mutex_lock(&vfio_pci_sriov_pfs_mutex);
> +	list_del_init(&vdev->sriov_pfs_item);
> +	mutex_unlock(&vfio_pci_sriov_pfs_mutex);
> +out_put:
>  	vfio_device_put(device);
> -
> -	return ret < 0 ? ret : nr_virtfn;
> +	return ret;
>  }
>  EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure);
Jason Gunthorpe April 8, 2022, 5:08 p.m. UTC | #2
On Fri, Apr 08, 2022 at 10:53:05AM -0600, Alex Williamson wrote:
> On Fri,  8 Apr 2022 12:10:15 -0300
> Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> > get_pf_vdev() tries to check if a PF is a VFIO PF by looking at the driver:
> > 
> >        if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) {
> > 
> > However now that we have multiple VF and PF drivers this is no longer
> > reliable.
> > 
> > This means that security tests realted to vf_token can be skipped by
> > mixing and matching different VFIO PCI drivers.
> > 
> > Instead of trying to use the driver core to find the PF devices maintain a
> > linked list of all PF vfio_pci_core_device's that we have called
> > pci_enable_sriov() on.
> > 
> > When registering a VF just search the list to see if the PF is present and
> > record the match permanently in the struct. PCI core locking prevents a PF
> > from passing pci_disable_sriov() while VF drivers are attached so the VFIO
> > owned PF becomes a static property of the VF.
> > 
> > In common cases where vfio does not own the PF the global list remains
> > empty and the VF's pointer is statically NULL.
> > 
> > This also fixes a lockdep splat from recursive locking of the
> > vfio_group::device_lock between vfio_device_get_from_name() and
> > vfio_device_get_from_dev(). If the VF and PF share the same group this
> > would deadlock.
> > 
> > Fixes: ff53edf6d6ab ("vfio/pci: Split the pci_driver code out of vfio_pci_core.c")
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> >  drivers/vfio/pci/vfio_pci_core.c | 109 ++++++++++++++++---------------
> >  include/linux/vfio_pci_core.h    |   2 +
> >  2 files changed, 60 insertions(+), 51 deletions(-)
> > 
> > This is probably for the rc cycle since it only became a problem when the
> > migration drivers were merged.
> ...  
> > @@ -1942,14 +1935,28 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
> >  	if (!device)
> >  		return -ENODEV;
> >  
> > -	if (nr_virtfn == 0)
> > -		pci_disable_sriov(pdev);
> > -	else
> > +	vdev = container_of(device, struct vfio_pci_core_device, vdev);
> > +
> > +	if (nr_virtfn) {
> > +		mutex_lock(&vfio_pci_sriov_pfs_mutex);
> > +		list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs);
> > +		mutex_unlock(&vfio_pci_sriov_pfs_mutex);
> >  		ret = pci_enable_sriov(pdev, nr_virtfn);
> > +		if (ret)
> > +			goto out_del;
> > +		ret = nr_virtfn;
> > +		goto out_put;
> > +	}
> 
> If a user were to do:
> 
> 	# echo 1 > sriov_numvfs
> 	# echo 2 > sriov_numvfs
> 
> Don't we have a problem that we've botched the list and the PF still
> exists with 1 VF?  Thanks,

Yes, that is a mistake. We need to do the list_add before the
pci_enable_sriov because the probe() will inspect the
vfio_pci_sriov_pfs list.

But since pci_enable_sriov can only be called once we can just gaurd
directly against that.

I fixed it like this:

		mutex_lock(&vfio_pci_sriov_pfs_mutex);
		/*
		 * The thread that adds the vdev to the list is the only thread
		 * that gets to call pci_enable_sriov() and we will only allow
		 * it to be called once without going through
		 * pci_disable_sriov()
		 */
		if (!list_empty(&vdev->sriov_pfs_item)) {
			ret = -EINVAL;
			goto out_unlock;
		}
		list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs);
		mutex_unlock(&vfio_pci_sriov_pfs_mutex);
		ret = pci_enable_sriov(pdev, nr_virtfn);
		if (ret)
			goto out_del;

Let me know if you have any other notes and I will fix them before
resending

Thanks,
Jason
Alex Williamson April 8, 2022, 5:17 p.m. UTC | #3
On Fri, 8 Apr 2022 14:08:39 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> On Fri, Apr 08, 2022 at 10:53:05AM -0600, Alex Williamson wrote:
> > On Fri,  8 Apr 2022 12:10:15 -0300
> > Jason Gunthorpe <jgg@nvidia.com> wrote:
> >   
> > > get_pf_vdev() tries to check if a PF is a VFIO PF by looking at the driver:
> > > 
> > >        if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) {
> > > 
> > > However now that we have multiple VF and PF drivers this is no longer
> > > reliable.
> > > 
> > > This means that security tests realted to vf_token can be skipped by
> > > mixing and matching different VFIO PCI drivers.
> > > 
> > > Instead of trying to use the driver core to find the PF devices maintain a
> > > linked list of all PF vfio_pci_core_device's that we have called
> > > pci_enable_sriov() on.
> > > 
> > > When registering a VF just search the list to see if the PF is present and
> > > record the match permanently in the struct. PCI core locking prevents a PF
> > > from passing pci_disable_sriov() while VF drivers are attached so the VFIO
> > > owned PF becomes a static property of the VF.
> > > 
> > > In common cases where vfio does not own the PF the global list remains
> > > empty and the VF's pointer is statically NULL.
> > > 
> > > This also fixes a lockdep splat from recursive locking of the
> > > vfio_group::device_lock between vfio_device_get_from_name() and
> > > vfio_device_get_from_dev(). If the VF and PF share the same group this
> > > would deadlock.
> > > 
> > > Fixes: ff53edf6d6ab ("vfio/pci: Split the pci_driver code out of vfio_pci_core.c")
> > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > >  drivers/vfio/pci/vfio_pci_core.c | 109 ++++++++++++++++---------------
> > >  include/linux/vfio_pci_core.h    |   2 +
> > >  2 files changed, 60 insertions(+), 51 deletions(-)
> > > 
> > > This is probably for the rc cycle since it only became a problem when the
> > > migration drivers were merged.  
> > ...    
> > > @@ -1942,14 +1935,28 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
> > >  	if (!device)
> > >  		return -ENODEV;
> > >  
> > > -	if (nr_virtfn == 0)
> > > -		pci_disable_sriov(pdev);
> > > -	else
> > > +	vdev = container_of(device, struct vfio_pci_core_device, vdev);
> > > +
> > > +	if (nr_virtfn) {
> > > +		mutex_lock(&vfio_pci_sriov_pfs_mutex);
> > > +		list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs);
> > > +		mutex_unlock(&vfio_pci_sriov_pfs_mutex);
> > >  		ret = pci_enable_sriov(pdev, nr_virtfn);
> > > +		if (ret)
> > > +			goto out_del;
> > > +		ret = nr_virtfn;
> > > +		goto out_put;
> > > +	}  
> > 
> > If a user were to do:
> > 
> > 	# echo 1 > sriov_numvfs
> > 	# echo 2 > sriov_numvfs
> > 
> > Don't we have a problem that we've botched the list and the PF still
> > exists with 1 VF?  Thanks,  
> 
> Yes, that is a mistake. We need to do the list_add before the
> pci_enable_sriov because the probe() will inspect the
> vfio_pci_sriov_pfs list.
> 
> But since pci_enable_sriov can only be called once we can just gaurd
> directly against that.
> 
> I fixed it like this:
> 
> 		mutex_lock(&vfio_pci_sriov_pfs_mutex);
> 		/*
> 		 * The thread that adds the vdev to the list is the only thread
> 		 * that gets to call pci_enable_sriov() and we will only allow
> 		 * it to be called once without going through
> 		 * pci_disable_sriov()
> 		 */
> 		if (!list_empty(&vdev->sriov_pfs_item)) {
> 			ret = -EINVAL;
> 			goto out_unlock;
> 		}
> 		list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs);
> 		mutex_unlock(&vfio_pci_sriov_pfs_mutex);
> 		ret = pci_enable_sriov(pdev, nr_virtfn);
> 		if (ret)
> 			goto out_del;
> 
> Let me know if you have any other notes and I will fix them before
> resending

Nope, that's all I spotted.  Looks like a reasonable fix.  Thanks,

Alex
diff mbox series

Patch

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index b7bb16f92ac628..156efe33427e13 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -36,6 +36,10 @@  static bool nointxmask;
 static bool disable_vga;
 static bool disable_idle_d3;
 
+/* List of PF's that vfio_pci_core_sriov_configure() has been called on */
+static DEFINE_MUTEX(vfio_pci_sriov_pfs_mutex);
+static LIST_HEAD(vfio_pci_sriov_pfs);
+
 static inline bool vfio_vga_disabled(void)
 {
 #ifdef CONFIG_VFIO_PCI_VGA
@@ -434,47 +438,17 @@  void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_disable);
 
-static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev)
-{
-	struct pci_dev *physfn = pci_physfn(vdev->pdev);
-	struct vfio_device *pf_dev;
-
-	if (!vdev->pdev->is_virtfn)
-		return NULL;
-
-	pf_dev = vfio_device_get_from_dev(&physfn->dev);
-	if (!pf_dev)
-		return NULL;
-
-	if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) {
-		vfio_device_put(pf_dev);
-		return NULL;
-	}
-
-	return container_of(pf_dev, struct vfio_pci_core_device, vdev);
-}
-
-static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val)
-{
-	struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev);
-
-	if (!pf_vdev)
-		return;
-
-	mutex_lock(&pf_vdev->vf_token->lock);
-	pf_vdev->vf_token->users += val;
-	WARN_ON(pf_vdev->vf_token->users < 0);
-	mutex_unlock(&pf_vdev->vf_token->lock);
-
-	vfio_device_put(&pf_vdev->vdev);
-}
-
 void vfio_pci_core_close_device(struct vfio_device *core_vdev)
 {
 	struct vfio_pci_core_device *vdev =
 		container_of(core_vdev, struct vfio_pci_core_device, vdev);
 
-	vfio_pci_vf_token_user_add(vdev, -1);
+	if (vdev->sriov_pf_core_dev) {
+		mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock);
+		WARN_ON(!vdev->sriov_pf_core_dev->vf_token->users);
+		vdev->sriov_pf_core_dev->vf_token->users--;
+		mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
+	}
 	vfio_spapr_pci_eeh_release(vdev->pdev);
 	vfio_pci_core_disable(vdev);
 
@@ -495,7 +469,12 @@  void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev)
 {
 	vfio_pci_probe_mmaps(vdev);
 	vfio_spapr_pci_eeh_open(vdev->pdev);
-	vfio_pci_vf_token_user_add(vdev, 1);
+
+	if (vdev->sriov_pf_core_dev) {
+		mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock);
+		vdev->sriov_pf_core_dev->vf_token->users++;
+		mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
+	}
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable);
 
@@ -1583,11 +1562,8 @@  static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
 	 *
 	 * If the VF token is provided but unused, an error is generated.
 	 */
-	if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)
-		return 0; /* No VF token provided or required */
-
 	if (vdev->pdev->is_virtfn) {
-		struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev);
+		struct vfio_pci_core_device *pf_vdev = vdev->sriov_pf_core_dev;
 		bool match;
 
 		if (!pf_vdev) {
@@ -1600,7 +1576,6 @@  static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
 		}
 
 		if (!vf_token) {
-			vfio_device_put(&pf_vdev->vdev);
 			pci_info_ratelimited(vdev->pdev,
 				"VF token required to access device\n");
 			return -EACCES;
@@ -1610,8 +1585,6 @@  static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
 		match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
 		mutex_unlock(&pf_vdev->vf_token->lock);
 
-		vfio_device_put(&pf_vdev->vdev);
-
 		if (!match) {
 			pci_info_ratelimited(vdev->pdev,
 				"Incorrect VF token provided for device\n");
@@ -1732,10 +1705,28 @@  static int vfio_pci_bus_notifier(struct notifier_block *nb,
 static int vfio_pci_vf_init(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
+	struct vfio_pci_core_device *cur;
+	struct pci_dev *physfn;
 	int ret;
 
-	if (!pdev->is_physfn)
+	if (!pdev->is_physfn) {
+		/*
+		 * If this VF was created by our vfio_pci_core_sriov_configure()
+		 * then we can find the PF vfio_pci_core_device now, and due to
+		 * the locking in pci_disable_sriov() it cannot change until
+		 * this VF device driver is removed.
+		 */
+		physfn = pci_physfn(vdev->pdev);
+		mutex_lock(&vfio_pci_sriov_pfs_mutex);
+		list_for_each_entry (cur, &vfio_pci_sriov_pfs, sriov_pfs_item) {
+			if (cur->pdev == physfn) {
+				vdev->sriov_pf_core_dev = cur;
+				break;
+			}
+		}
+		mutex_unlock(&vfio_pci_sriov_pfs_mutex);
 		return 0;
+	}
 
 	vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
 	if (!vdev->vf_token)
@@ -1805,6 +1796,7 @@  void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,
 	INIT_LIST_HEAD(&vdev->ioeventfds_list);
 	mutex_init(&vdev->vma_lock);
 	INIT_LIST_HEAD(&vdev->vma_list);
+	INIT_LIST_HEAD(&vdev->sriov_pfs_item);
 	init_rwsem(&vdev->memory_lock);
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_init_device);
@@ -1896,7 +1888,7 @@  void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
 
-	pci_disable_sriov(pdev);
+	vfio_pci_core_sriov_configure(pdev, 0);
 
 	vfio_unregister_group_dev(&vdev->vdev);
 
@@ -1935,6 +1927,7 @@  EXPORT_SYMBOL_GPL(vfio_pci_core_aer_err_detected);
 
 int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
 {
+	struct vfio_pci_core_device *vdev;
 	struct vfio_device *device;
 	int ret = 0;
 
@@ -1942,14 +1935,28 @@  int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
 	if (!device)
 		return -ENODEV;
 
-	if (nr_virtfn == 0)
-		pci_disable_sriov(pdev);
-	else
+	vdev = container_of(device, struct vfio_pci_core_device, vdev);
+
+	if (nr_virtfn) {
+		mutex_lock(&vfio_pci_sriov_pfs_mutex);
+		list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs);
+		mutex_unlock(&vfio_pci_sriov_pfs_mutex);
 		ret = pci_enable_sriov(pdev, nr_virtfn);
+		if (ret)
+			goto out_del;
+		ret = nr_virtfn;
+		goto out_put;
+	}
 
+	pci_disable_sriov(pdev);
+
+out_del:
+	mutex_lock(&vfio_pci_sriov_pfs_mutex);
+	list_del_init(&vdev->sriov_pfs_item);
+	mutex_unlock(&vfio_pci_sriov_pfs_mutex);
+out_put:
 	vfio_device_put(device);
-
-	return ret < 0 ? ret : nr_virtfn;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure);
 
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 74a4a0f17b28bd..48f2dd3c568c83 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -133,6 +133,8 @@  struct vfio_pci_core_device {
 	struct mutex		ioeventfds_lock;
 	struct list_head	ioeventfds_list;
 	struct vfio_pci_vf_token	*vf_token;
+	struct list_head		sriov_pfs_item;
+	struct vfio_pci_core_device	*sriov_pf_core_dev;
 	struct notifier_block	nb;
 	struct mutex		vma_lock;
 	struct list_head	vma_list;