diff mbox series

[05/10] vfio/pci: Use vfio_init/register/unregister_group_dev

Message ID 5-v1-7355d38b9344+17481-vfio1_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Embed struct vfio_device in all sub-structures | expand

Commit Message

Jason Gunthorpe March 9, 2021, 9:38 p.m. UTC
pci already allocates a struct vfio_pci_device with exactly the same
lifetime as vfio_device, switch to the new API and embed vfio_device in
vfio_pci_device.

Add a note the probe ordering is racy.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/pci/vfio_pci.c         | 17 +++++++++++------
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 2 files changed, 12 insertions(+), 6 deletions(-)

Comments

Christoph Hellwig March 10, 2021, 7:31 a.m. UTC | #1
> -	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
> +	/*
> +	 * FIXME: vfio_register_group_dev() allows VFIO_GROUP_GET_DEVICE_FD to
> +	 * immediately return the device to userspace, but we haven't finished
> +	 * setting it up yet.
> +	 */
> +	ret = vfio_register_group_dev(&vdev->vdev);

Again, why not include a patch in the series to fix this up as well?

The patch itself looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Liu, Yi L March 12, 2021, 12:53 p.m. UTC | #2
Hi Jason,

> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Wednesday, March 10, 2021 5:39 AM
> 
> pci already allocates a struct vfio_pci_device with exactly the same
> lifetime as vfio_device, switch to the new API and embed vfio_device in
> vfio_pci_device.
> 
> Add a note the probe ordering is racy.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
>
> ---
>  drivers/vfio/pci/vfio_pci.c         | 17 +++++++++++------
>  drivers/vfio/pci/vfio_pci_private.h |  1 +
>  2 files changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index 65e7e6b44578c2..fae573c6f86bdf 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -1957,6 +1957,7 @@ static int vfio_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *id)
>  		goto out_group_put;
>  	}
> 
> +	vfio_init_group_dev(&vdev->vdev, &pdev->dev, &vfio_pci_ops,
> vdev);
>  	vdev->pdev = pdev;
>  	vdev->irq_type = VFIO_PCI_NUM_IRQS;
>  	mutex_init(&vdev->igate);
> @@ -1968,7 +1969,12 @@ static int vfio_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *id)
>  	INIT_LIST_HEAD(&vdev->vma_list);
>  	init_rwsem(&vdev->memory_lock);
> 
> -	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
> +	/*
> +	 * FIXME: vfio_register_group_dev() allows VFIO_GROUP_GET_DEVICE_FD to
> +	 * immediately return the device to userspace, but we haven't finished
> +	 * setting it up yet.
> +	 */

this patch looks good to me.

Reviewed-by: Liu Yi L <yi.l.liu@intel.com>

But I have a question on the FIXME comment here. I checked the code below.
Even after vfio_register_group_dev(), userspace is not able to get DEVICE_FD
until the group has been added to a container. So perhaps you can give more
details behind this FIXME.

static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
{
        struct vfio_device *device;
        struct file *filep;
        int ret;

        if (0 == atomic_read(&group->container_users) ||
            !group->container->iommu_driver || !vfio_group_viable(group))
                return -EINVAL;

...
}

Regards,
Yi Liu

> +	ret = vfio_register_group_dev(&vdev->vdev);
>  	if (ret)
>  		goto out_free;
> 
> @@ -2014,6 +2020,7 @@ static int vfio_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *id)
>  		vfio_pci_set_power_state(vdev, PCI_D3hot);
>  	}
> 
> +	dev_set_drvdata(&pdev->dev, vdev);
>  	return ret;
> 
>  out_vf_token:
> @@ -2021,7 +2028,7 @@ static int vfio_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *id)
>  out_reflck:
>  	vfio_pci_reflck_put(vdev->reflck);
>  out_del_group_dev:
> -	vfio_del_group_dev(&pdev->dev);
> +	vfio_unregister_group_dev(&vdev->vdev);
>  out_free:
>  	kfree(vdev);
>  out_group_put:
> @@ -2031,13 +2038,11 @@ static int vfio_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *id)
> 
>  static void vfio_pci_remove(struct pci_dev *pdev)
>  {
> -	struct vfio_pci_device *vdev;
> +	struct vfio_pci_device *vdev = dev_get_drvdata(&pdev->dev);
> 
>  	pci_disable_sriov(pdev);
> 
> -	vdev = vfio_del_group_dev(&pdev->dev);
> -	if (!vdev)
> -		return;
> +	vfio_unregister_group_dev(&vdev->vdev);
> 
>  	if (vdev->vf_token) {
>  		WARN_ON(vdev->vf_token->users);
> diff --git a/drivers/vfio/pci/vfio_pci_private.h
> b/drivers/vfio/pci/vfio_pci_private.h
> index 9cd1882a05af69..8755a0febd054a 100644
> --- a/drivers/vfio/pci/vfio_pci_private.h
> +++ b/drivers/vfio/pci/vfio_pci_private.h
> @@ -100,6 +100,7 @@ struct vfio_pci_mmap_vma {
>  };
> 
>  struct vfio_pci_device {
> +	struct vfio_device	vdev;
>  	struct pci_dev		*pdev;
>  	void __iomem		*barmap[PCI_STD_NUM_BARS];
>  	bool
> 	bar_mmap_supported[PCI_STD_NUM_BARS];
> --
> 2.30.1
Jason Gunthorpe March 12, 2021, 1:58 p.m. UTC | #3
On Fri, Mar 12, 2021 at 12:53:18PM +0000, Liu, Yi L wrote:

> But I have a question on the FIXME comment here. I checked the code below.
> Even after vfio_register_group_dev(), userspace is not able to get DEVICE_FD
> until the group has been added to a container. 

The race here is between VFIO_GROUP_GET_DEVICE_FD and bind

Userspace would have already done VFIO_GROUP_SET_CONTAINER before it
does the VFIO_GROUP_GET_DEVICE_FD.

Jason
diff mbox series

Patch

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 65e7e6b44578c2..fae573c6f86bdf 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1957,6 +1957,7 @@  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto out_group_put;
 	}
 
+	vfio_init_group_dev(&vdev->vdev, &pdev->dev, &vfio_pci_ops, vdev);
 	vdev->pdev = pdev;
 	vdev->irq_type = VFIO_PCI_NUM_IRQS;
 	mutex_init(&vdev->igate);
@@ -1968,7 +1969,12 @@  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_LIST_HEAD(&vdev->vma_list);
 	init_rwsem(&vdev->memory_lock);
 
-	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
+	/*
+	 * FIXME: vfio_register_group_dev() allows VFIO_GROUP_GET_DEVICE_FD to
+	 * immediately return the device to userspace, but we haven't finished
+	 * setting it up yet.
+	 */
+	ret = vfio_register_group_dev(&vdev->vdev);
 	if (ret)
 		goto out_free;
 
@@ -2014,6 +2020,7 @@  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		vfio_pci_set_power_state(vdev, PCI_D3hot);
 	}
 
+	dev_set_drvdata(&pdev->dev, vdev);
 	return ret;
 
 out_vf_token:
@@ -2021,7 +2028,7 @@  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 out_reflck:
 	vfio_pci_reflck_put(vdev->reflck);
 out_del_group_dev:
-	vfio_del_group_dev(&pdev->dev);
+	vfio_unregister_group_dev(&vdev->vdev);
 out_free:
 	kfree(vdev);
 out_group_put:
@@ -2031,13 +2038,11 @@  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 static void vfio_pci_remove(struct pci_dev *pdev)
 {
-	struct vfio_pci_device *vdev;
+	struct vfio_pci_device *vdev = dev_get_drvdata(&pdev->dev);
 
 	pci_disable_sriov(pdev);
 
-	vdev = vfio_del_group_dev(&pdev->dev);
-	if (!vdev)
-		return;
+	vfio_unregister_group_dev(&vdev->vdev);
 
 	if (vdev->vf_token) {
 		WARN_ON(vdev->vf_token->users);
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 9cd1882a05af69..8755a0febd054a 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -100,6 +100,7 @@  struct vfio_pci_mmap_vma {
 };
 
 struct vfio_pci_device {
+	struct vfio_device	vdev;
 	struct pci_dev		*pdev;
 	void __iomem		*barmap[PCI_STD_NUM_BARS];
 	bool			bar_mmap_supported[PCI_STD_NUM_BARS];