diff mbox series

[v2,08/14] vfio/pci: Re-order vfio_pci_probe()

Message ID 8-v2-20d933792272+4ff-vfio1_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Embed struct vfio_device in all sub-structures | expand

Commit Message

Jason Gunthorpe March 13, 2021, 12:56 a.m. UTC
vfio_add_group_dev() must be called only after all of the private data in
vdev is fully setup and ready, otherwise there could be races with user
space instantiating a device file descriptor and starting to call ops.

For instance vfio_pci_reflck_attach() sets vdev->reflck and
vfio_pci_open(), called by fops open, unconditionally derefs it, which
will crash if things get out of order.

Fixes: cc20d7999000 ("vfio/pci: Introduce VF token")
Fixes: e309df5b0c9e ("vfio/pci: Parallelize device open and release")
Fixes: 6eb7018705de ("vfio-pci: Move idle devices to D3hot power state")
Fixes: ecaa1f6a0154 ("vfio-pci: Add VGA arbiter client")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/pci/vfio_pci.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

Comments

Christoph Hellwig March 15, 2021, 8:46 a.m. UTC | #1
On Fri, Mar 12, 2021 at 08:56:00PM -0400, Jason Gunthorpe wrote:
> vfio_add_group_dev() must be called only after all of the private data in
> vdev is fully setup and ready, otherwise there could be races with user
> space instantiating a device file descriptor and starting to call ops.
> 
> For instance vfio_pci_reflck_attach() sets vdev->reflck and
> vfio_pci_open(), called by fops open, unconditionally derefs it, which
> will crash if things get out of order.
> 
> Fixes: cc20d7999000 ("vfio/pci: Introduce VF token")
> Fixes: e309df5b0c9e ("vfio/pci: Parallelize device open and release")
> Fixes: 6eb7018705de ("vfio-pci: Move idle devices to D3hot power state")
> Fixes: ecaa1f6a0154 ("vfio-pci: Add VGA arbiter client")
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>
Tian, Kevin March 16, 2021, 8:04 a.m. UTC | #2
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, March 13, 2021 8:56 AM
> 
> vfio_add_group_dev() must be called only after all of the private data in
> vdev is fully setup and ready, otherwise there could be races with user
> space instantiating a device file descriptor and starting to call ops.
> 
> For instance vfio_pci_reflck_attach() sets vdev->reflck and
> vfio_pci_open(), called by fops open, unconditionally derefs it, which
> will crash if things get out of order.
> 
> Fixes: cc20d7999000 ("vfio/pci: Introduce VF token")
> Fixes: e309df5b0c9e ("vfio/pci: Parallelize device open and release")
> Fixes: 6eb7018705de ("vfio-pci: Move idle devices to D3hot power state")
> Fixes: ecaa1f6a0154 ("vfio-pci: Add VGA arbiter client")
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/vfio/pci/vfio_pci.c | 17 +++++++++--------
>  1 file changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f95b58376156a0..0e7682e7a0b478 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -2030,13 +2030,9 @@ static int vfio_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *id)
>  	INIT_LIST_HEAD(&vdev->vma_list);
>  	init_rwsem(&vdev->memory_lock);
> 
> -	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
> -	if (ret)
> -		goto out_free;
> -
>  	ret = vfio_pci_reflck_attach(vdev);
>  	if (ret)
> -		goto out_del_group_dev;
> +		goto out_free;
>  	ret = vfio_pci_vf_init(vdev);
>  	if (ret)
>  		goto out_reflck;
> @@ -2060,15 +2056,20 @@ static int vfio_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *id)
>  		vfio_pci_set_power_state(vdev, PCI_D3hot);
>  	}
> 
> -	return ret;
> +	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
> +	if (ret)
> +		goto out_power;
> +	return 0;
> 
> +out_power:
> +	if (!disable_idle_d3)
> +		vfio_pci_set_power_state(vdev, PCI_D0);

Just curious whether the power state must be recovered upon failure here.
From the comment several lines above, the power state is set to an unknown
state before doing D3 transaction. From this point it looks fine if leaving the
device in D3 since there is no expected state to be recovered?

>  out_vf:
>  	vfio_pci_vf_uninit(vdev);
>  out_reflck:
>  	vfio_pci_reflck_put(vdev->reflck);
> -out_del_group_dev:
> -	vfio_del_group_dev(&pdev->dev);
>  out_free:
> +	kfree(vdev->pm_save);
>  	kfree(vdev);
>  out_group_put:
>  	vfio_iommu_group_put(group, &pdev->dev);
> --
> 2.30.2
Max Gurtovoy March 16, 2021, 11:28 a.m. UTC | #3
On 3/13/2021 2:56 AM, Jason Gunthorpe wrote:
> vfio_add_group_dev() must be called only after all of the private data in
> vdev is fully setup and ready, otherwise there could be races with user
> space instantiating a device file descriptor and starting to call ops.
>
> For instance vfio_pci_reflck_attach() sets vdev->reflck and
> vfio_pci_open(), called by fops open, unconditionally derefs it, which
> will crash if things get out of order.
>
> Fixes: cc20d7999000 ("vfio/pci: Introduce VF token")
> Fixes: e309df5b0c9e ("vfio/pci: Parallelize device open and release")
> Fixes: 6eb7018705de ("vfio-pci: Move idle devices to D3hot power state")
> Fixes: ecaa1f6a0154 ("vfio-pci: Add VGA arbiter client")
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   drivers/vfio/pci/vfio_pci.c | 17 +++++++++--------
>   1 file changed, 9 insertions(+), 8 deletions(-)

Looks good,

Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Jason Gunthorpe March 16, 2021, 1:20 p.m. UTC | #4
On Tue, Mar 16, 2021 at 08:04:55AM +0000, Tian, Kevin wrote:
> > @@ -2060,15 +2056,20 @@ static int vfio_pci_probe(struct pci_dev *pdev,
> > const struct pci_device_id *id)
> >  		vfio_pci_set_power_state(vdev, PCI_D3hot);
> >  	}
> > 
> > -	return ret;
> > +	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
> > +	if (ret)
> > +		goto out_power;
> > +	return 0;
> > 
> > +out_power:
> > +	if (!disable_idle_d3)
> > +		vfio_pci_set_power_state(vdev, PCI_D0);
> 
> Just curious whether the power state must be recovered upon failure here.
> From the comment several lines above, the power state is set to an unknown
> state before doing D3 transaction. From this point it looks fine if leaving the
> device in D3 since there is no expected state to be recovered?

I don't know, this is what the remove function does, so I can't see a
reason why remove should do it but not here.

Jason
Alex Williamson March 16, 2021, 10:27 p.m. UTC | #5
On Tue, 16 Mar 2021 10:20:58 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> On Tue, Mar 16, 2021 at 08:04:55AM +0000, Tian, Kevin wrote:
> > > @@ -2060,15 +2056,20 @@ static int vfio_pci_probe(struct pci_dev *pdev,
> > > const struct pci_device_id *id)
> > >  		vfio_pci_set_power_state(vdev, PCI_D3hot);
> > >  	}
> > > 
> > > -	return ret;
> > > +	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
> > > +	if (ret)
> > > +		goto out_power;
> > > +	return 0;
> > > 
> > > +out_power:
> > > +	if (!disable_idle_d3)
> > > +		vfio_pci_set_power_state(vdev, PCI_D0);  
> > 
> > Just curious whether the power state must be recovered upon failure here.
> > From the comment several lines above, the power state is set to an unknown
> > state before doing D3 transaction. From this point it looks fine if leaving the
> > device in D3 since there is no expected state to be recovered?  
> 
> I don't know, this is what the remove function does, so I can't see a
> reason why remove should do it but not here.

I'm not sure it matters in either case, we're just trying to be most
similar to expected driver behavior.  pci_enable_device() puts the
device in D0 but pci_disable_device() doesn't touch the power state, so
the device would typically be released from a PCI driver in D0 afaict.
Thanks,

Alex
Tian, Kevin March 17, 2021, 12:56 a.m. UTC | #6
> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Wednesday, March 17, 2021 6:27 AM
> 
> On Tue, 16 Mar 2021 10:20:58 -0300
> Jason Gunthorpe <jgg@nvidia.com> wrote:
> 
> > On Tue, Mar 16, 2021 at 08:04:55AM +0000, Tian, Kevin wrote:
> > > > @@ -2060,15 +2056,20 @@ static int vfio_pci_probe(struct pci_dev
> *pdev,
> > > > const struct pci_device_id *id)
> > > >  		vfio_pci_set_power_state(vdev, PCI_D3hot);
> > > >  	}
> > > >
> > > > -	return ret;
> > > > +	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
> > > > +	if (ret)
> > > > +		goto out_power;
> > > > +	return 0;
> > > >
> > > > +out_power:
> > > > +	if (!disable_idle_d3)
> > > > +		vfio_pci_set_power_state(vdev, PCI_D0);
> > >
> > > Just curious whether the power state must be recovered upon failure
> here.
> > > From the comment several lines above, the power state is set to an
> unknown
> > > state before doing D3 transaction. From this point it looks fine if leaving
> the
> > > device in D3 since there is no expected state to be recovered?
> >
> > I don't know, this is what the remove function does, so I can't see a
> > reason why remove should do it but not here.
> 
> I'm not sure it matters in either case, we're just trying to be most
> similar to expected driver behavior.  pci_enable_device() puts the
> device in D0 but pci_disable_device() doesn't touch the power state, so
> the device would typically be released from a PCI driver in D0 afaict.
> Thanks,
> 

OK. Then,

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Cornelia Huck March 17, 2021, 10:32 a.m. UTC | #7
On Fri, 12 Mar 2021 20:56:00 -0400
Jason Gunthorpe <jgg@nvidia.com> wrote:

> vfio_add_group_dev() must be called only after all of the private data in
> vdev is fully setup and ready, otherwise there could be races with user
> space instantiating a device file descriptor and starting to call ops.
> 
> For instance vfio_pci_reflck_attach() sets vdev->reflck and
> vfio_pci_open(), called by fops open, unconditionally derefs it, which
> will crash if things get out of order.
> 
> Fixes: cc20d7999000 ("vfio/pci: Introduce VF token")
> Fixes: e309df5b0c9e ("vfio/pci: Parallelize device open and release")
> Fixes: 6eb7018705de ("vfio-pci: Move idle devices to D3hot power state")
> Fixes: ecaa1f6a0154 ("vfio-pci: Add VGA arbiter client")
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/vfio/pci/vfio_pci.c | 17 +++++++++--------
>  1 file changed, 9 insertions(+), 8 deletions(-)

Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Eric Auger March 18, 2021, 4:50 p.m. UTC | #8
Hi Jason,

On 3/13/21 1:56 AM, Jason Gunthorpe wrote:
> vfio_add_group_dev() must be called only after all of the private data in
> vdev is fully setup and ready, otherwise there could be races with user
> space instantiating a device file descriptor and starting to call ops.
> 
> For instance vfio_pci_reflck_attach() sets vdev->reflck and
> vfio_pci_open(), called by fops open, unconditionally derefs it, which
> will crash if things get out of order.>
> Fixes: cc20d7999000 ("vfio/pci: Introduce VF token")
> Fixes: e309df5b0c9e ("vfio/pci: Parallelize device open and release")
> Fixes: 6eb7018705de ("vfio-pci: Move idle devices to D3hot power state")
> Fixes: ecaa1f6a0154 ("vfio-pci: Add VGA arbiter client")
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>

Thanks

Eric
> ---
>  drivers/vfio/pci/vfio_pci.c | 17 +++++++++--------
>  1 file changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f95b58376156a0..0e7682e7a0b478 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -2030,13 +2030,9 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	INIT_LIST_HEAD(&vdev->vma_list);
>  	init_rwsem(&vdev->memory_lock);
>  
> -	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
> -	if (ret)
> -		goto out_free;
> -
>  	ret = vfio_pci_reflck_attach(vdev);
>  	if (ret)
> -		goto out_del_group_dev;
> +		goto out_free;
>  	ret = vfio_pci_vf_init(vdev);
>  	if (ret)
>  		goto out_reflck;
> @@ -2060,15 +2056,20 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  		vfio_pci_set_power_state(vdev, PCI_D3hot);
>  	}
>  
> -	return ret;
> +	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
> +	if (ret)
> +		goto out_power;
> +	return 0;
>  
> +out_power:
> +	if (!disable_idle_d3)
> +		vfio_pci_set_power_state(vdev, PCI_D0);
>  out_vf:
>  	vfio_pci_vf_uninit(vdev);
>  out_reflck:
>  	vfio_pci_reflck_put(vdev->reflck);
> -out_del_group_dev:
> -	vfio_del_group_dev(&pdev->dev);
>  out_free:
> +	kfree(vdev->pm_save);
>  	kfree(vdev);
>  out_group_put:
>  	vfio_iommu_group_put(group, &pdev->dev);
>
diff mbox series

Patch

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index f95b58376156a0..0e7682e7a0b478 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -2030,13 +2030,9 @@  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_LIST_HEAD(&vdev->vma_list);
 	init_rwsem(&vdev->memory_lock);
 
-	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
-	if (ret)
-		goto out_free;
-
 	ret = vfio_pci_reflck_attach(vdev);
 	if (ret)
-		goto out_del_group_dev;
+		goto out_free;
 	ret = vfio_pci_vf_init(vdev);
 	if (ret)
 		goto out_reflck;
@@ -2060,15 +2056,20 @@  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		vfio_pci_set_power_state(vdev, PCI_D3hot);
 	}
 
-	return ret;
+	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
+	if (ret)
+		goto out_power;
+	return 0;
 
+out_power:
+	if (!disable_idle_d3)
+		vfio_pci_set_power_state(vdev, PCI_D0);
 out_vf:
 	vfio_pci_vf_uninit(vdev);
 out_reflck:
 	vfio_pci_reflck_put(vdev->reflck);
-out_del_group_dev:
-	vfio_del_group_dev(&pdev->dev);
 out_free:
+	kfree(vdev->pm_save);
 	kfree(vdev);
 out_group_put:
 	vfio_iommu_group_put(group, &pdev->dev);