diff mbox

[v2] Enable SR-IOV instantiation through /sys file

Message ID 20171208214758.6959-1-jeffrey.t.kirsher@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kirsher, Jeffrey T Dec. 8, 2017, 9:47 p.m. UTC
From: Liang-Min Wang <liang-min.wang@intel.com>

When a SR-IOV capable device is bound with vfio-pci, the
device loses capability of creating SR-IOV instances through /sy/bus/
pci/devices/.../sriov_numvfs. This patch re-activates this capability
for a PCIe device that is SR-IOV capable and is bound with vfio-pci.ko.
This patch also disables drivers_autoprobe attribute of SR-IOV devices
created from vfio-pci bound device by default, so user-space PF device
can coordinate the bring-up of SR-IOV devices

Signed-off-by: Liang-Min Wang <liang-min.wang@intel.com>
---
 drivers/pci/pci-driver.c    | 12 ++++++++++++
 drivers/vfio/pci/vfio_pci.c | 22 ++++++++++++++++++++++
 include/linux/pci.h         |  1 +
 3 files changed, 35 insertions(+)

Comments

Alex Williamson Dec. 8, 2017, 10:58 p.m. UTC | #1
On Fri,  8 Dec 2017 13:47:58 -0800
Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:

> From: Liang-Min Wang <liang-min.wang@intel.com>
> 
> When a SR-IOV capable device is bound with vfio-pci, the
> device loses capability of creating SR-IOV instances through /sy/bus/
> pci/devices/.../sriov_numvfs. This patch re-activates this capability
> for a PCIe device that is SR-IOV capable and is bound with vfio-pci.ko.
> This patch also disables drivers_autoprobe attribute of SR-IOV devices
> created from vfio-pci bound device by default, so user-space PF device
> can coordinate the bring-up of SR-IOV devices
> 
> Signed-off-by: Liang-Min Wang <liang-min.wang@intel.com>
> ---
>  drivers/pci/pci-driver.c    | 12 ++++++++++++
>  drivers/vfio/pci/vfio_pci.c | 22 ++++++++++++++++++++++
>  include/linux/pci.h         |  1 +
>  3 files changed, 35 insertions(+)
> 
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index 7f47bb7..19522fe 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -1467,6 +1467,18 @@ void pci_dev_put(struct pci_dev *dev)
>  }
>  EXPORT_SYMBOL(pci_dev_put);
>  
> +/**
> + * pci_dev_sriov_autoprobe_set - set device sriov driver autoprobe
> + * @dev: device with which sriov autoprobe will be set
> + *
> + */
> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe)
> +{
> +	if (dev && dev->sriov)
> +		dev->sriov->drivers_autoprobe = autoprobe;
> +}
> +EXPORT_SYMBOL(pci_dev_sriov_autoprobe_set);

_GPL?

It'd also be best to separate the pci and vfio changes into different
patches.  Bjorn would need to at least ack this PCI interface.

> +
>  static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
>  {
>  	struct pci_dev *pdev;
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f041b1a..004836c 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -1213,6 +1213,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  		return -ENOMEM;
>  	}
>  
> +	/* disable sriov automatic driver attachment */
> +	pci_dev_sriov_autoprobe_set(pdev, false);

This looks stateful, VF autoprobe is not restored on release.  Also,
how would we know the initial state to restore it to?

>  	vdev->pdev = pdev;
>  	vdev->irq_type = VFIO_PCI_NUM_IRQS;
>  	mutex_init(&vdev->igate);
> @@ -1256,6 +1258,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
>  	if (!vdev)
>  		return;
>  
> +	pci_disable_sriov(pdev);
>  	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
>  	kfree(vdev->region);
>  	kfree(vdev);
> @@ -1303,12 +1306,31 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
>  	.error_detected = vfio_pci_aer_err_detected,
>  };
>  
> +static int vfio_sriov_configure(struct pci_dev *pdev, int num_vfs)
> +{
> +	int status;
> +
> +	if (!num_vfs) {
> +		pci_disable_sriov(pdev);
> +		return 0;
> +	}
> +
> +	status = pci_enable_sriov(pdev, num_vfs);
> +	if (!status) {
> +		pr_crit("Created %d SR-IOV from a user-space driver based upon vfio-pci\n", num_vfs);
> +		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> +	}

This probably deserves more explanation, in comment and commit log.
Probably a dev_crit() is also justified, knowing the PF would be
useful.  Thanks,

Alex

> +
> +	return status;
> +}
> +
>  static struct pci_driver vfio_pci_driver = {
>  	.name		= "vfio-pci",
>  	.id_table	= NULL, /* only dynamic ids */
>  	.probe		= vfio_pci_probe,
>  	.remove		= vfio_pci_remove,
>  	.err_handler	= &vfio_err_handlers,
> +	.sriov_configure = vfio_sriov_configure,
>  };
>  
>  struct vfio_devices {
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 0403894..e04b69d 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -921,6 +921,7 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev,
>  struct pci_dev *pci_dev_get(struct pci_dev *dev);
>  void pci_dev_put(struct pci_dev *dev);
>  void pci_remove_bus(struct pci_bus *b);
> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe);
>  void pci_stop_and_remove_bus_device(struct pci_dev *dev);
>  void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev);
>  void pci_stop_root_bus(struct pci_bus *bus);
Alexander Duyck Dec. 8, 2017, 11:19 p.m. UTC | #2
On Fri, Dec 8, 2017 at 2:58 PM, Alex Williamson
<alex.williamson@redhat.com> wrote:
> On Fri,  8 Dec 2017 13:47:58 -0800
> Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:
>
>> From: Liang-Min Wang <liang-min.wang@intel.com>
>>
>> When a SR-IOV capable device is bound with vfio-pci, the
>> device loses capability of creating SR-IOV instances through /sy/bus/
>> pci/devices/.../sriov_numvfs. This patch re-activates this capability
>> for a PCIe device that is SR-IOV capable and is bound with vfio-pci.ko.
>> This patch also disables drivers_autoprobe attribute of SR-IOV devices
>> created from vfio-pci bound device by default, so user-space PF device
>> can coordinate the bring-up of SR-IOV devices
>>
>> Signed-off-by: Liang-Min Wang <liang-min.wang@intel.com>
>> ---
>>  drivers/pci/pci-driver.c    | 12 ++++++++++++
>>  drivers/vfio/pci/vfio_pci.c | 22 ++++++++++++++++++++++
>>  include/linux/pci.h         |  1 +
>>  3 files changed, 35 insertions(+)
>>
>> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
>> index 7f47bb7..19522fe 100644
>> --- a/drivers/pci/pci-driver.c
>> +++ b/drivers/pci/pci-driver.c
>> @@ -1467,6 +1467,18 @@ void pci_dev_put(struct pci_dev *dev)
>>  }
>>  EXPORT_SYMBOL(pci_dev_put);
>>
>> +/**
>> + * pci_dev_sriov_autoprobe_set - set device sriov driver autoprobe
>> + * @dev: device with which sriov autoprobe will be set
>> + *
>> + */
>> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe)
>> +{
>> +     if (dev && dev->sriov)
>> +             dev->sriov->drivers_autoprobe = autoprobe;
>> +}
>> +EXPORT_SYMBOL(pci_dev_sriov_autoprobe_set);
>
> _GPL?
>
> It'd also be best to separate the pci and vfio changes into different
> patches.  Bjorn would need to at least ack this PCI interface.
>
>> +
>>  static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
>>  {
>>       struct pci_dev *pdev;
>> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
>> index f041b1a..004836c 100644
>> --- a/drivers/vfio/pci/vfio_pci.c
>> +++ b/drivers/vfio/pci/vfio_pci.c
>> @@ -1213,6 +1213,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>               return -ENOMEM;
>>       }
>>
>> +     /* disable sriov automatic driver attachment */
>> +     pci_dev_sriov_autoprobe_set(pdev, false);
>
> This looks stateful, VF autoprobe is not restored on release.  Also,
> how would we know the initial state to restore it to?

The initial state is whatever the user set it to. It is something that
can be toggled on and off via sysfs, and it defaults to true at
initialization. In this case we are opting to toggle it off when VFIO
is attached to the device. Restoring it after unloading the driver
might be even more confusing since it is something the user could
toggle at any time so a restore would end up overwriting that.

>>       vdev->pdev = pdev;
>>       vdev->irq_type = VFIO_PCI_NUM_IRQS;
>>       mutex_init(&vdev->igate);
>> @@ -1256,6 +1258,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
>>       if (!vdev)
>>               return;
>>
>> +     pci_disable_sriov(pdev);
>>       vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
>>       kfree(vdev->region);
>>       kfree(vdev);
>> @@ -1303,12 +1306,31 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
>>       .error_detected = vfio_pci_aer_err_detected,
>>  };
>>
>> +static int vfio_sriov_configure(struct pci_dev *pdev, int num_vfs)
>> +{
>> +     int status;
>> +
>> +     if (!num_vfs) {
>> +             pci_disable_sriov(pdev);
>> +             return 0;
>> +     }
>> +
>> +     status = pci_enable_sriov(pdev, num_vfs);
>> +     if (!status) {
>> +             pr_crit("Created %d SR-IOV from a user-space driver based upon vfio-pci\n", num_vfs);
>> +             add_taint(TAINT_USER, LOCKDEP_STILL_OK);
>> +     }
>
> This probably deserves more explanation, in comment and commit log.
> Probably a dev_crit() is also justified, knowing the PF would be
> useful.  Thanks,
>
> Alex
>
>> +
>> +     return status;
>> +}
>> +
>>  static struct pci_driver vfio_pci_driver = {
>>       .name           = "vfio-pci",
>>       .id_table       = NULL, /* only dynamic ids */
>>       .probe          = vfio_pci_probe,
>>       .remove         = vfio_pci_remove,
>>       .err_handler    = &vfio_err_handlers,
>> +     .sriov_configure = vfio_sriov_configure,
>>  };
>>
>>  struct vfio_devices {
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index 0403894..e04b69d 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -921,6 +921,7 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev,
>>  struct pci_dev *pci_dev_get(struct pci_dev *dev);
>>  void pci_dev_put(struct pci_dev *dev);
>>  void pci_remove_bus(struct pci_bus *b);
>> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe);
>>  void pci_stop_and_remove_bus_device(struct pci_dev *dev);
>>  void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev);
>>  void pci_stop_root_bus(struct pci_bus *bus);
>
Alex Williamson Dec. 8, 2017, 11:34 p.m. UTC | #3
On Fri, 8 Dec 2017 15:19:18 -0800
Alexander Duyck <alexander.duyck@gmail.com> wrote:

> On Fri, Dec 8, 2017 at 2:58 PM, Alex Williamson
> <alex.williamson@redhat.com> wrote:
> > On Fri,  8 Dec 2017 13:47:58 -0800
> > Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:
> >  
> >> From: Liang-Min Wang <liang-min.wang@intel.com>
> >>
> >> When a SR-IOV capable device is bound with vfio-pci, the
> >> device loses capability of creating SR-IOV instances through /sy/bus/
> >> pci/devices/.../sriov_numvfs. This patch re-activates this capability
> >> for a PCIe device that is SR-IOV capable and is bound with vfio-pci.ko.
> >> This patch also disables drivers_autoprobe attribute of SR-IOV devices
> >> created from vfio-pci bound device by default, so user-space PF device
> >> can coordinate the bring-up of SR-IOV devices
> >>
> >> Signed-off-by: Liang-Min Wang <liang-min.wang@intel.com>
> >> ---
> >>  drivers/pci/pci-driver.c    | 12 ++++++++++++
> >>  drivers/vfio/pci/vfio_pci.c | 22 ++++++++++++++++++++++
> >>  include/linux/pci.h         |  1 +
> >>  3 files changed, 35 insertions(+)
> >>
> >> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> >> index 7f47bb7..19522fe 100644
> >> --- a/drivers/pci/pci-driver.c
> >> +++ b/drivers/pci/pci-driver.c
> >> @@ -1467,6 +1467,18 @@ void pci_dev_put(struct pci_dev *dev)
> >>  }
> >>  EXPORT_SYMBOL(pci_dev_put);
> >>
> >> +/**
> >> + * pci_dev_sriov_autoprobe_set - set device sriov driver autoprobe
> >> + * @dev: device with which sriov autoprobe will be set
> >> + *
> >> + */
> >> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe)
> >> +{
> >> +     if (dev && dev->sriov)
> >> +             dev->sriov->drivers_autoprobe = autoprobe;
> >> +}
> >> +EXPORT_SYMBOL(pci_dev_sriov_autoprobe_set);  
> >
> > _GPL?
> >
> > It'd also be best to separate the pci and vfio changes into different
> > patches.  Bjorn would need to at least ack this PCI interface.
> >  
> >> +
> >>  static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
> >>  {
> >>       struct pci_dev *pdev;
> >> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> >> index f041b1a..004836c 100644
> >> --- a/drivers/vfio/pci/vfio_pci.c
> >> +++ b/drivers/vfio/pci/vfio_pci.c
> >> @@ -1213,6 +1213,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >>               return -ENOMEM;
> >>       }
> >>
> >> +     /* disable sriov automatic driver attachment */
> >> +     pci_dev_sriov_autoprobe_set(pdev, false);  
> >
> > This looks stateful, VF autoprobe is not restored on release.  Also,
> > how would we know the initial state to restore it to?  
> 
> The initial state is whatever the user set it to. It is something that
> can be toggled on and off via sysfs, and it defaults to true at
> initialization. In this case we are opting to toggle it off when VFIO
> is attached to the device. Restoring it after unloading the driver
> might be even more confusing since it is something the user could
> toggle at any time so a restore would end up overwriting that.

I'm not really willing to sign up for the inevitable bug reports when
users can't figure out how to make their VFs work again in the host
after they've used the PF for userspace drivers with vfio-pci.  I
agree, both options are confusing, how do we make it not confusing?
Can PCI core reset the autoprobe attribute to the default at some
obvious point?  Thanks,

Alex
Wang, Liang-min Dec. 11, 2017, 2:22 p.m. UTC | #4
> -----Original Message-----
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Friday, December 08, 2017 6:35 PM
> To: Alexander Duyck <alexander.duyck@gmail.com>
> Cc: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; Wang, Liang-min <liang-
> min.wang@intel.com>; kvm@vger.kernel.org; linux-pci@vger.kernel.org; linux-
> kernel@vger.kernel.org; Bjorn Helgaas <bhelgaas@google.com>; Duyck,
> Alexander H <alexander.h.duyck@intel.com>
> Subject: Re: [PATCH v2] Enable SR-IOV instantiation through /sys file
> 
> On Fri, 8 Dec 2017 15:19:18 -0800
> Alexander Duyck <alexander.duyck@gmail.com> wrote:
> 
> > On Fri, Dec 8, 2017 at 2:58 PM, Alex Williamson
> > <alex.williamson@redhat.com> wrote:
> > > On Fri,  8 Dec 2017 13:47:58 -0800
> > > Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:
> > >
> > >> From: Liang-Min Wang <liang-min.wang@intel.com>
> > >>
> > >> When a SR-IOV capable device is bound with vfio-pci, the
> > >> device loses capability of creating SR-IOV instances through /sy/bus/
> > >> pci/devices/.../sriov_numvfs. This patch re-activates this capability
> > >> for a PCIe device that is SR-IOV capable and is bound with vfio-pci.ko.
> > >> This patch also disables drivers_autoprobe attribute of SR-IOV devices
> > >> created from vfio-pci bound device by default, so user-space PF device
> > >> can coordinate the bring-up of SR-IOV devices
> > >>
> > >> Signed-off-by: Liang-Min Wang <liang-min.wang@intel.com>
> > >> ---
> > >>  drivers/pci/pci-driver.c    | 12 ++++++++++++
> > >>  drivers/vfio/pci/vfio_pci.c | 22 ++++++++++++++++++++++
> > >>  include/linux/pci.h         |  1 +
> > >>  3 files changed, 35 insertions(+)
> > >>
> > >> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> > >> index 7f47bb7..19522fe 100644
> > >> --- a/drivers/pci/pci-driver.c
> > >> +++ b/drivers/pci/pci-driver.c
> > >> @@ -1467,6 +1467,18 @@ void pci_dev_put(struct pci_dev *dev)
> > >>  }
> > >>  EXPORT_SYMBOL(pci_dev_put);
> > >>
> > >> +/**
> > >> + * pci_dev_sriov_autoprobe_set - set device sriov driver autoprobe
> > >> + * @dev: device with which sriov autoprobe will be set
> > >> + *
> > >> + */
> > >> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe)
> > >> +{
> > >> +     if (dev && dev->sriov)
> > >> +             dev->sriov->drivers_autoprobe = autoprobe;
> > >> +}
> > >> +EXPORT_SYMBOL(pci_dev_sriov_autoprobe_set);
> > >
> > > _GPL?
> > >
> > > It'd also be best to separate the pci and vfio changes into different
> > > patches.  Bjorn would need to at least ack this PCI interface.
> > >
> > >> +
> > >>  static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
> > >>  {
> > >>       struct pci_dev *pdev;
> > >> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> > >> index f041b1a..004836c 100644
> > >> --- a/drivers/vfio/pci/vfio_pci.c
> > >> +++ b/drivers/vfio/pci/vfio_pci.c
> > >> @@ -1213,6 +1213,8 @@ static int vfio_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *id)
> > >>               return -ENOMEM;
> > >>       }
> > >>
> > >> +     /* disable sriov automatic driver attachment */
> > >> +     pci_dev_sriov_autoprobe_set(pdev, false);
> > >
> > > This looks stateful, VF autoprobe is not restored on release.  Also,
> > > how would we know the initial state to restore it to?
> >
> > The initial state is whatever the user set it to. It is something that
> > can be toggled on and off via sysfs, and it defaults to true at
> > initialization. In this case we are opting to toggle it off when VFIO
> > is attached to the device. Restoring it after unloading the driver
> > might be even more confusing since it is something the user could
> > toggle at any time so a restore would end up overwriting that.
> 
> I'm not really willing to sign up for the inevitable bug reports when
> users can't figure out how to make their VFs work again in the host
> after they've used the PF for userspace drivers with vfio-pci.  I
> agree, both options are confusing, how do we make it not confusing?
> Can PCI core reset the autoprobe attribute to the default at some
> obvious point?  Thanks,
> 

I would like to confirm the scenario discussed here is to unload PF driver, right?
Since users need to release all SR-IOV from PF driver first before PF driver is
released, does it make sense to restore autoprobe when VFs are released?

> Alex
Duyck, Alexander H Dec. 11, 2017, 4:06 p.m. UTC | #5
We could do that. It shouldn't be an issue as long as we disable SR-IOV first.

It would just be a matter of recording the state of kernel_pf_autoprobe vs user_pf_autoprobe. It might be useful to make that distinction in the comments somewhere in the code and in the patch description.

- Alex

> -----Original Message-----
> From: Wang, Liang-min
> Sent: Monday, December 11, 2017 6:23 AM
> To: Alex Williamson <alex.williamson@redhat.com>; Alexander Duyck
> <alexander.duyck@gmail.com>
> Cc: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; kvm@vger.kernel.org; linux-
> pci@vger.kernel.org; linux-kernel@vger.kernel.org; Bjorn Helgaas
> <bhelgaas@google.com>; Duyck, Alexander H <alexander.h.duyck@intel.com>
> Subject: RE: [PATCH v2] Enable SR-IOV instantiation through /sys file
> 
> 
> 
> > -----Original Message-----
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Friday, December 08, 2017 6:35 PM
> > To: Alexander Duyck <alexander.duyck@gmail.com>
> > Cc: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; Wang, Liang-min
> > <liang- min.wang@intel.com>; kvm@vger.kernel.org;
> > linux-pci@vger.kernel.org; linux- kernel@vger.kernel.org; Bjorn
> > Helgaas <bhelgaas@google.com>; Duyck, Alexander H
> > <alexander.h.duyck@intel.com>
> > Subject: Re: [PATCH v2] Enable SR-IOV instantiation through /sys file
> >
> > On Fri, 8 Dec 2017 15:19:18 -0800
> > Alexander Duyck <alexander.duyck@gmail.com> wrote:
> >
> > > On Fri, Dec 8, 2017 at 2:58 PM, Alex Williamson
> > > <alex.williamson@redhat.com> wrote:
> > > > On Fri,  8 Dec 2017 13:47:58 -0800 Jeff Kirsher
> > > > <jeffrey.t.kirsher@intel.com> wrote:
> > > >
> > > >> From: Liang-Min Wang <liang-min.wang@intel.com>
> > > >>
> > > >> When a SR-IOV capable device is bound with vfio-pci, the device
> > > >> loses capability of creating SR-IOV instances through /sy/bus/
> > > >> pci/devices/.../sriov_numvfs. This patch re-activates this
> > > >> capability for a PCIe device that is SR-IOV capable and is bound with vfio-
> pci.ko.
> > > >> This patch also disables drivers_autoprobe attribute of SR-IOV
> > > >> devices created from vfio-pci bound device by default, so
> > > >> user-space PF device can coordinate the bring-up of SR-IOV
> > > >> devices
> > > >>
> > > >> Signed-off-by: Liang-Min Wang <liang-min.wang@intel.com>
> > > >> ---
> > > >>  drivers/pci/pci-driver.c    | 12 ++++++++++++
> > > >>  drivers/vfio/pci/vfio_pci.c | 22 ++++++++++++++++++++++
> > > >>  include/linux/pci.h         |  1 +
> > > >>  3 files changed, 35 insertions(+)
> > > >>
> > > >> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> > > >> index 7f47bb7..19522fe 100644
> > > >> --- a/drivers/pci/pci-driver.c
> > > >> +++ b/drivers/pci/pci-driver.c
> > > >> @@ -1467,6 +1467,18 @@ void pci_dev_put(struct pci_dev *dev)  }
> > > >> EXPORT_SYMBOL(pci_dev_put);
> > > >>
> > > >> +/**
> > > >> + * pci_dev_sriov_autoprobe_set - set device sriov driver
> > > >> +autoprobe
> > > >> + * @dev: device with which sriov autoprobe will be set
> > > >> + *
> > > >> + */
> > > >> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool
> > > >> +autoprobe) {
> > > >> +     if (dev && dev->sriov)
> > > >> +             dev->sriov->drivers_autoprobe = autoprobe; }
> > > >> +EXPORT_SYMBOL(pci_dev_sriov_autoprobe_set);
> > > >
> > > > _GPL?
> > > >
> > > > It'd also be best to separate the pci and vfio changes into
> > > > different patches.  Bjorn would need to at least ack this PCI interface.
> > > >
> > > >> +
> > > >>  static int pci_uevent(struct device *dev, struct kobj_uevent_env
> > > >> *env)  {
> > > >>       struct pci_dev *pdev;
> > > >> diff --git a/drivers/vfio/pci/vfio_pci.c
> > > >> b/drivers/vfio/pci/vfio_pci.c index f041b1a..004836c 100644
> > > >> --- a/drivers/vfio/pci/vfio_pci.c
> > > >> +++ b/drivers/vfio/pci/vfio_pci.c
> > > >> @@ -1213,6 +1213,8 @@ static int vfio_pci_probe(struct pci_dev
> > > >> *pdev,
> > const struct pci_device_id *id)
> > > >>               return -ENOMEM;
> > > >>       }
> > > >>
> > > >> +     /* disable sriov automatic driver attachment */
> > > >> +     pci_dev_sriov_autoprobe_set(pdev, false);
> > > >
> > > > This looks stateful, VF autoprobe is not restored on release.
> > > > Also, how would we know the initial state to restore it to?
> > >
> > > The initial state is whatever the user set it to. It is something
> > > that can be toggled on and off via sysfs, and it defaults to true at
> > > initialization. In this case we are opting to toggle it off when
> > > VFIO is attached to the device. Restoring it after unloading the
> > > driver might be even more confusing since it is something the user
> > > could toggle at any time so a restore would end up overwriting that.
> >
> > I'm not really willing to sign up for the inevitable bug reports when
> > users can't figure out how to make their VFs work again in the host
> > after they've used the PF for userspace drivers with vfio-pci.  I
> > agree, both options are confusing, how do we make it not confusing?
> > Can PCI core reset the autoprobe attribute to the default at some
> > obvious point?  Thanks,
> >
> 
> I would like to confirm the scenario discussed here is to unload PF driver, right?
> Since users need to release all SR-IOV from PF driver first before PF driver is
> released, does it make sense to restore autoprobe when VFs are released?
> 
> > Alex
Alex Williamson Dec. 11, 2017, 6:13 p.m. UTC | #6
On Mon, 11 Dec 2017 14:22:30 +0000
"Wang, Liang-min" <liang-min.wang@intel.com> wrote:

> > -----Original Message-----
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Friday, December 08, 2017 6:35 PM
> > To: Alexander Duyck <alexander.duyck@gmail.com>
> > Cc: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; Wang, Liang-min <liang-  
> > min.wang@intel.com>; kvm@vger.kernel.org; linux-pci@vger.kernel.org; linux-  
> > kernel@vger.kernel.org; Bjorn Helgaas <bhelgaas@google.com>; Duyck,
> > Alexander H <alexander.h.duyck@intel.com>
> > Subject: Re: [PATCH v2] Enable SR-IOV instantiation through /sys file
> > 
> > On Fri, 8 Dec 2017 15:19:18 -0800
> > Alexander Duyck <alexander.duyck@gmail.com> wrote:
> >   
> > > On Fri, Dec 8, 2017 at 2:58 PM, Alex Williamson
> > > <alex.williamson@redhat.com> wrote:  
> > > > On Fri,  8 Dec 2017 13:47:58 -0800
> > > > Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:
> > > >  
> > > >> From: Liang-Min Wang <liang-min.wang@intel.com>
> > > >>
> > > >> When a SR-IOV capable device is bound with vfio-pci, the
> > > >> device loses capability of creating SR-IOV instances through /sy/bus/
> > > >> pci/devices/.../sriov_numvfs. This patch re-activates this capability
> > > >> for a PCIe device that is SR-IOV capable and is bound with vfio-pci.ko.
> > > >> This patch also disables drivers_autoprobe attribute of SR-IOV devices
> > > >> created from vfio-pci bound device by default, so user-space PF device
> > > >> can coordinate the bring-up of SR-IOV devices
> > > >>
> > > >> Signed-off-by: Liang-Min Wang <liang-min.wang@intel.com>
> > > >> ---
> > > >>  drivers/pci/pci-driver.c    | 12 ++++++++++++
> > > >>  drivers/vfio/pci/vfio_pci.c | 22 ++++++++++++++++++++++
> > > >>  include/linux/pci.h         |  1 +
> > > >>  3 files changed, 35 insertions(+)
> > > >>
> > > >> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> > > >> index 7f47bb7..19522fe 100644
> > > >> --- a/drivers/pci/pci-driver.c
> > > >> +++ b/drivers/pci/pci-driver.c
> > > >> @@ -1467,6 +1467,18 @@ void pci_dev_put(struct pci_dev *dev)
> > > >>  }
> > > >>  EXPORT_SYMBOL(pci_dev_put);
> > > >>
> > > >> +/**
> > > >> + * pci_dev_sriov_autoprobe_set - set device sriov driver autoprobe
> > > >> + * @dev: device with which sriov autoprobe will be set
> > > >> + *
> > > >> + */
> > > >> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe)
> > > >> +{
> > > >> +     if (dev && dev->sriov)
> > > >> +             dev->sriov->drivers_autoprobe = autoprobe;
> > > >> +}
> > > >> +EXPORT_SYMBOL(pci_dev_sriov_autoprobe_set);  
> > > >
> > > > _GPL?
> > > >
> > > > It'd also be best to separate the pci and vfio changes into different
> > > > patches.  Bjorn would need to at least ack this PCI interface.
> > > >  
> > > >> +
> > > >>  static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
> > > >>  {
> > > >>       struct pci_dev *pdev;
> > > >> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> > > >> index f041b1a..004836c 100644
> > > >> --- a/drivers/vfio/pci/vfio_pci.c
> > > >> +++ b/drivers/vfio/pci/vfio_pci.c
> > > >> @@ -1213,6 +1213,8 @@ static int vfio_pci_probe(struct pci_dev *pdev,  
> > const struct pci_device_id *id)  
> > > >>               return -ENOMEM;
> > > >>       }
> > > >>
> > > >> +     /* disable sriov automatic driver attachment */
> > > >> +     pci_dev_sriov_autoprobe_set(pdev, false);  
> > > >
> > > > This looks stateful, VF autoprobe is not restored on release.  Also,
> > > > how would we know the initial state to restore it to?  
> > >
> > > The initial state is whatever the user set it to. It is something that
> > > can be toggled on and off via sysfs, and it defaults to true at
> > > initialization. In this case we are opting to toggle it off when VFIO
> > > is attached to the device. Restoring it after unloading the driver
> > > might be even more confusing since it is something the user could
> > > toggle at any time so a restore would end up overwriting that.  
> > 
> > I'm not really willing to sign up for the inevitable bug reports when
> > users can't figure out how to make their VFs work again in the host
> > after they've used the PF for userspace drivers with vfio-pci.  I
> > agree, both options are confusing, how do we make it not confusing?
> > Can PCI core reset the autoprobe attribute to the default at some
> > obvious point?  Thanks,
> >   
> 
> I would like to confirm the scenario discussed here is to unload PF driver, right?
> Since users need to release all SR-IOV from PF driver first before PF driver is
> released, does it make sense to restore autoprobe when VFs are released?

We don't know that VFs will be created, but perhaps you mean the point
at which SR-IOV would be disabled in the PF unbind process.  Still, I
think you're left with userspace and kernel-space both trying to use
the same control bit and they're going to step on each other and create
corner cases that are inconsistent.  It's not necessarily even clear
how using separate trackers solves it since the kernel isn't
necessarily behaving in the way the user directed.  Maybe the user
interface to autoprobe needs to be locked out if the PF driver disables
it.

Didn't we also discuss whether or not it's safe for vfio to enable
SR-IOV while the PF is in use by a user?  Does that all fall into the
"Dr. it hurts when I do this..." category and contributes to tainting
the kernel when enabled?  Definitely worth some comment words to
describe the various considerations contributing to kernel tainting.
Thanks,

Alex
Wang, Liang-min Dec. 11, 2017, 6:58 p.m. UTC | #7
> -----Original Message-----
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Monday, December 11, 2017 1:13 PM
> To: Wang, Liang-min <liang-min.wang@intel.com>
> Cc: Alexander Duyck <alexander.duyck@gmail.com>; Kirsher, Jeffrey T
> <jeffrey.t.kirsher@intel.com>; kvm@vger.kernel.org; linux-
> pci@vger.kernel.org; linux-kernel@vger.kernel.org; Bjorn Helgaas
> <bhelgaas@google.com>; Duyck, Alexander H <alexander.h.duyck@intel.com>
> Subject: Re: [PATCH v2] Enable SR-IOV instantiation through /sys file
> 
> On Mon, 11 Dec 2017 14:22:30 +0000
> "Wang, Liang-min" <liang-min.wang@intel.com> wrote:
> 
> > > -----Original Message-----
> > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > Sent: Friday, December 08, 2017 6:35 PM
> > > To: Alexander Duyck <alexander.duyck@gmail.com>
> > > Cc: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; Wang, Liang-min
> <liang-
> > > min.wang@intel.com>; kvm@vger.kernel.org; linux-pci@vger.kernel.org;
> linux-
> > > kernel@vger.kernel.org; Bjorn Helgaas <bhelgaas@google.com>; Duyck,
> > > Alexander H <alexander.h.duyck@intel.com>
> > > Subject: Re: [PATCH v2] Enable SR-IOV instantiation through /sys file
> > >
> > > On Fri, 8 Dec 2017 15:19:18 -0800
> > > Alexander Duyck <alexander.duyck@gmail.com> wrote:
> > >
> > > > On Fri, Dec 8, 2017 at 2:58 PM, Alex Williamson
> > > > <alex.williamson@redhat.com> wrote:
> > > > > On Fri,  8 Dec 2017 13:47:58 -0800
> > > > > Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:
> > > > >
> > > > >> From: Liang-Min Wang <liang-min.wang@intel.com>
> > > > >>
> > > > >> When a SR-IOV capable device is bound with vfio-pci, the
> > > > >> device loses capability of creating SR-IOV instances through /sy/bus/
> > > > >> pci/devices/.../sriov_numvfs. This patch re-activates this capability
> > > > >> for a PCIe device that is SR-IOV capable and is bound with vfio-pci.ko.
> > > > >> This patch also disables drivers_autoprobe attribute of SR-IOV devices
> > > > >> created from vfio-pci bound device by default, so user-space PF device
> > > > >> can coordinate the bring-up of SR-IOV devices
> > > > >>
> > > > >> Signed-off-by: Liang-Min Wang <liang-min.wang@intel.com>
> > > > >> ---
> > > > >>  drivers/pci/pci-driver.c    | 12 ++++++++++++
> > > > >>  drivers/vfio/pci/vfio_pci.c | 22 ++++++++++++++++++++++
> > > > >>  include/linux/pci.h         |  1 +
> > > > >>  3 files changed, 35 insertions(+)
> > > > >>
> > > > >> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> > > > >> index 7f47bb7..19522fe 100644
> > > > >> --- a/drivers/pci/pci-driver.c
> > > > >> +++ b/drivers/pci/pci-driver.c
> > > > >> @@ -1467,6 +1467,18 @@ void pci_dev_put(struct pci_dev *dev)
> > > > >>  }
> > > > >>  EXPORT_SYMBOL(pci_dev_put);
> > > > >>
> > > > >> +/**
> > > > >> + * pci_dev_sriov_autoprobe_set - set device sriov driver autoprobe
> > > > >> + * @dev: device with which sriov autoprobe will be set
> > > > >> + *
> > > > >> + */
> > > > >> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool
> autoprobe)
> > > > >> +{
> > > > >> +     if (dev && dev->sriov)
> > > > >> +             dev->sriov->drivers_autoprobe = autoprobe;
> > > > >> +}
> > > > >> +EXPORT_SYMBOL(pci_dev_sriov_autoprobe_set);
> > > > >
> > > > > _GPL?
> > > > >
> > > > > It'd also be best to separate the pci and vfio changes into different
> > > > > patches.  Bjorn would need to at least ack this PCI interface.
> > > > >
> > > > >> +
> > > > >>  static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
> > > > >>  {
> > > > >>       struct pci_dev *pdev;
> > > > >> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> > > > >> index f041b1a..004836c 100644
> > > > >> --- a/drivers/vfio/pci/vfio_pci.c
> > > > >> +++ b/drivers/vfio/pci/vfio_pci.c
> > > > >> @@ -1213,6 +1213,8 @@ static int vfio_pci_probe(struct pci_dev
> *pdev,
> > > const struct pci_device_id *id)
> > > > >>               return -ENOMEM;
> > > > >>       }
> > > > >>
> > > > >> +     /* disable sriov automatic driver attachment */
> > > > >> +     pci_dev_sriov_autoprobe_set(pdev, false);
> > > > >
> > > > > This looks stateful, VF autoprobe is not restored on release.  Also,
> > > > > how would we know the initial state to restore it to?
> > > >
> > > > The initial state is whatever the user set it to. It is something that
> > > > can be toggled on and off via sysfs, and it defaults to true at
> > > > initialization. In this case we are opting to toggle it off when VFIO
> > > > is attached to the device. Restoring it after unloading the driver
> > > > might be even more confusing since it is something the user could
> > > > toggle at any time so a restore would end up overwriting that.
> > >
> > > I'm not really willing to sign up for the inevitable bug reports when
> > > users can't figure out how to make their VFs work again in the host
> > > after they've used the PF for userspace drivers with vfio-pci.  I
> > > agree, both options are confusing, how do we make it not confusing?
> > > Can PCI core reset the autoprobe attribute to the default at some
> > > obvious point?  Thanks,
> > >
> >
> > I would like to confirm the scenario discussed here is to unload PF driver,
> right?
> > Since users need to release all SR-IOV from PF driver first before PF driver is
> > released, does it make sense to restore autoprobe when VFs are released?
> 
> We don't know that VFs will be created, but perhaps you mean the point
> at which SR-IOV would be disabled in the PF unbind process.  Still, I
> think you're left with userspace and kernel-space both trying to use
> the same control bit and they're going to step on each other and create
> corner cases that are inconsistent.  It's not necessarily even clear
> how using separate trackers solves it since the kernel isn't
> necessarily behaving in the way the user directed.  Maybe the user
> interface to autoprobe needs to be locked out if the PF driver disables
> it.
 
To ensure there is no overlap between user-space applications to modify
autoprobe bit while kernel is changing its value. One approach is to move
the overwrite and restore into vfio_sriov_configure like below:

static int vfio_sriov_configure(struct pci_dev *pdev, int num_vfs)
{
	int status;
	bool old_autoprobe;

	if (!num_vfs) {		
		pci_disable_sriov(pdev);
		return 0;
	}

	/*
	 To avoid newly created VF to be bound with driver when
	 user-space PF may not be ready, disable the SR-IOV
	 instance driver automatic attachment, and save the original
	 setting and restore the setting after SR-IOV are created.
	 */
	old_autoprobe = pci_dev_sriov_autoprobe_set(pdev, vdev->sriov_driver_autoprobe);

	status = pci_enable_sriov(pdev, num_vfs);
	if (!status) {
		dev_crit("Created %d SR-IOV from a user-space driver based upon vfio-pci\n"
			"These VFs are not bound with driver when they are created.\n"
			"User needs to bring up user-space PF driver first,"
			" then bind new VFs with respective driver to"
			" ensure there is a PF driver to respond any VF request\n", num_vfs);
		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
	}
	pci_dev_sriov_autoprobe_set(pdev, old_autoprobe);

Would that resolve the issue as described? If not, could there be an example to illustrate the scenario that was described.

Larry

> 
> Didn't we also discuss whether or not it's safe for vfio to enable
> SR-IOV while the PF is in use by a user?  Does that all fall into the
> "Dr. it hurts when I do this..." category and contributes to tainting
> the kernel when enabled?  Definitely worth some comment words to
> describe the various considerations contributing to kernel tainting.
> Thanks,
> 
> Alex
Bjorn Helgaas Jan. 10, 2018, 11:30 p.m. UTC | #8
On Fri, Dec 08, 2017 at 01:47:58PM -0800, Jeff Kirsher wrote:
> From: Liang-Min Wang <liang-min.wang@intel.com>
> 
> When a SR-IOV capable device is bound with vfio-pci, the
> device loses capability of creating SR-IOV instances through /sy/bus/
> pci/devices/.../sriov_numvfs. This patch re-activates this capability
> for a PCIe device that is SR-IOV capable and is bound with vfio-pci.ko.
> This patch also disables drivers_autoprobe attribute of SR-IOV devices
> created from vfio-pci bound device by default, so user-space PF device
> can coordinate the bring-up of SR-IOV devices

Seems like the discussion here petered out without a real conclusion,
so I'm going to drop this one for now and wait for another try.

> Signed-off-by: Liang-Min Wang <liang-min.wang@intel.com>
> ---
>  drivers/pci/pci-driver.c    | 12 ++++++++++++
>  drivers/vfio/pci/vfio_pci.c | 22 ++++++++++++++++++++++
>  include/linux/pci.h         |  1 +
>  3 files changed, 35 insertions(+)
> 
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index 7f47bb7..19522fe 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -1467,6 +1467,18 @@ void pci_dev_put(struct pci_dev *dev)
>  }
>  EXPORT_SYMBOL(pci_dev_put);
>  
> +/**
> + * pci_dev_sriov_autoprobe_set - set device sriov driver autoprobe
> + * @dev: device with which sriov autoprobe will be set
> + *
> + */
> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe)
> +{
> +	if (dev && dev->sriov)
> +		dev->sriov->drivers_autoprobe = autoprobe;
> +}
> +EXPORT_SYMBOL(pci_dev_sriov_autoprobe_set);
> +
>  static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
>  {
>  	struct pci_dev *pdev;
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f041b1a..004836c 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -1213,6 +1213,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  		return -ENOMEM;
>  	}
>  
> +	/* disable sriov automatic driver attachment */
> +	pci_dev_sriov_autoprobe_set(pdev, false);
>  	vdev->pdev = pdev;
>  	vdev->irq_type = VFIO_PCI_NUM_IRQS;
>  	mutex_init(&vdev->igate);
> @@ -1256,6 +1258,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
>  	if (!vdev)
>  		return;
>  
> +	pci_disable_sriov(pdev);
>  	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
>  	kfree(vdev->region);
>  	kfree(vdev);
> @@ -1303,12 +1306,31 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
>  	.error_detected = vfio_pci_aer_err_detected,
>  };
>  
> +static int vfio_sriov_configure(struct pci_dev *pdev, int num_vfs)
> +{
> +	int status;
> +
> +	if (!num_vfs) {
> +		pci_disable_sriov(pdev);
> +		return 0;
> +	}
> +
> +	status = pci_enable_sriov(pdev, num_vfs);
> +	if (!status) {
> +		pr_crit("Created %d SR-IOV from a user-space driver based upon vfio-pci\n", num_vfs);
> +		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> +	}
> +
> +	return status;
> +}
> +
>  static struct pci_driver vfio_pci_driver = {
>  	.name		= "vfio-pci",
>  	.id_table	= NULL, /* only dynamic ids */
>  	.probe		= vfio_pci_probe,
>  	.remove		= vfio_pci_remove,
>  	.err_handler	= &vfio_err_handlers,
> +	.sriov_configure = vfio_sriov_configure,
>  };
>  
>  struct vfio_devices {
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 0403894..e04b69d 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -921,6 +921,7 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev,
>  struct pci_dev *pci_dev_get(struct pci_dev *dev);
>  void pci_dev_put(struct pci_dev *dev);
>  void pci_remove_bus(struct pci_bus *b);
> +void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe);
>  void pci_stop_and_remove_bus_device(struct pci_dev *dev);
>  void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev);
>  void pci_stop_root_bus(struct pci_bus *bus);
> -- 
> 1.9.1
>
diff mbox

Patch

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 7f47bb7..19522fe 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1467,6 +1467,18 @@  void pci_dev_put(struct pci_dev *dev)
 }
 EXPORT_SYMBOL(pci_dev_put);
 
+/**
+ * pci_dev_sriov_autoprobe_set - set device sriov driver autoprobe
+ * @dev: device with which sriov autoprobe will be set
+ *
+ */
+void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe)
+{
+	if (dev && dev->sriov)
+		dev->sriov->drivers_autoprobe = autoprobe;
+}
+EXPORT_SYMBOL(pci_dev_sriov_autoprobe_set);
+
 static int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	struct pci_dev *pdev;
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index f041b1a..004836c 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1213,6 +1213,8 @@  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENOMEM;
 	}
 
+	/* disable sriov automatic driver attachment */
+	pci_dev_sriov_autoprobe_set(pdev, false);
 	vdev->pdev = pdev;
 	vdev->irq_type = VFIO_PCI_NUM_IRQS;
 	mutex_init(&vdev->igate);
@@ -1256,6 +1258,7 @@  static void vfio_pci_remove(struct pci_dev *pdev)
 	if (!vdev)
 		return;
 
+	pci_disable_sriov(pdev);
 	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
 	kfree(vdev->region);
 	kfree(vdev);
@@ -1303,12 +1306,31 @@  static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
 	.error_detected = vfio_pci_aer_err_detected,
 };
 
+static int vfio_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	int status;
+
+	if (!num_vfs) {
+		pci_disable_sriov(pdev);
+		return 0;
+	}
+
+	status = pci_enable_sriov(pdev, num_vfs);
+	if (!status) {
+		pr_crit("Created %d SR-IOV from a user-space driver based upon vfio-pci\n", num_vfs);
+		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+	}
+
+	return status;
+}
+
 static struct pci_driver vfio_pci_driver = {
 	.name		= "vfio-pci",
 	.id_table	= NULL, /* only dynamic ids */
 	.probe		= vfio_pci_probe,
 	.remove		= vfio_pci_remove,
 	.err_handler	= &vfio_err_handlers,
+	.sriov_configure = vfio_sriov_configure,
 };
 
 struct vfio_devices {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0403894..e04b69d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -921,6 +921,7 @@  struct resource *pci_find_parent_resource(const struct pci_dev *dev,
 struct pci_dev *pci_dev_get(struct pci_dev *dev);
 void pci_dev_put(struct pci_dev *dev);
 void pci_remove_bus(struct pci_bus *b);
+void pci_dev_sriov_autoprobe_set(struct pci_dev *dev, bool autoprobe);
 void pci_stop_and_remove_bus_device(struct pci_dev *dev);
 void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev);
 void pci_stop_root_bus(struct pci_bus *bus);