diff mbox series

[1/2] PCI: Add VF reset notification to PF's VFIO user mode driver

Message ID 20240205071538.2665628-1-Emily.Deng@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/2] PCI: Add VF reset notification to PF's VFIO user mode driver | expand

Commit Message

Emily Deng Feb. 5, 2024, 7:15 a.m. UTC
VF doesn't have the ability to reset itself completely which will cause the
hardware in unstable state. So notify PF driver when the VF has been reset
to let the PF resets the VF completely, and remove the VF out of schedule.

How to implement this?
Add the reset callback function in pci_driver

Implement the callback functin in VFIO_PCI driver.

Add the VF RESET IRQ for user mode driver to let the user mode driver
know the VF has been reset.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/pci/pci.c   | 8 ++++++++
 include/linux/pci.h | 1 +
 2 files changed, 9 insertions(+)

Comments

Leon Romanovsky Feb. 5, 2024, 9:04 a.m. UTC | #1
On Mon, Feb 05, 2024 at 03:15:37PM +0800, Emily Deng wrote:
> VF doesn't have the ability to reset itself completely which will cause the
> hardware in unstable state. So notify PF driver when the VF has been reset
> to let the PF resets the VF completely, and remove the VF out of schedule.


I'm sorry but this explanation is not different from the previous
version. Please provide a better explanation of the problem, why it is
needed, which VFs need and can't reset themselves, how and why it worked
before e.t.c.

In addition, please follow kernel submission guidelines, write
changelong, add versions, cover letter e.t.c.

Thanks

> 
> How to implement this?
> Add the reset callback function in pci_driver
> 
> Implement the callback functin in VFIO_PCI driver.
> 
> Add the VF RESET IRQ for user mode driver to let the user mode driver
> know the VF has been reset.
> 
> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
> ---
>  drivers/pci/pci.c   | 8 ++++++++
>  include/linux/pci.h | 1 +
>  2 files changed, 9 insertions(+)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 60230da957e0..aca937b05531 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -4780,6 +4780,14 @@ EXPORT_SYMBOL_GPL(pcie_flr);
>   */
>  int pcie_reset_flr(struct pci_dev *dev, bool probe)
>  {
> +	struct pci_dev *pf_dev;
> +
> +	if (dev->is_virtfn) {
> +		pf_dev = dev->physfn;
> +		if (pf_dev->driver->sriov_vf_reset_notification)
> +			pf_dev->driver->sriov_vf_reset_notification(pf_dev, dev);
> +	}
> +
>  	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
>  		return -ENOTTY;
>  
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index c69a2cc1f412..4fa31d9b0aa7 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -926,6 +926,7 @@ struct pci_driver {
>  	int  (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */
>  	int  (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */
>  	u32  (*sriov_get_vf_total_msix)(struct pci_dev *pf);
> +	void  (*sriov_vf_reset_notification)(struct pci_dev *pf, struct pci_dev *vf);
>  	const struct pci_error_handlers *err_handler;
>  	const struct attribute_group **groups;
>  	const struct attribute_group **dev_groups;
> -- 
> 2.36.1
> 
>
Alex Williamson Feb. 5, 2024, 4:43 p.m. UTC | #2
On Mon, 5 Feb 2024 15:15:37 +0800
Emily Deng <Emily.Deng@amd.com> wrote:

> VF doesn't have the ability to reset itself completely which will cause the
> hardware in unstable state. So notify PF driver when the VF has been reset
> to let the PF resets the VF completely, and remove the VF out of schedule.
> 
> How to implement this?
> Add the reset callback function in pci_driver
> 
> Implement the callback functin in VFIO_PCI driver.
> 
> Add the VF RESET IRQ for user mode driver to let the user mode driver
> know the VF has been reset.

The solution that already exists for this sort of issue is a vfio-pci
variant driver for the VF which communicates with an in-kernel PF
driver to coordinate the VF FLR with the PF driver.  This can be done
by intercepting the userspace access to the VF FLR config space region.

This solution of involving PCI-core and extending the vfio-pci interface
only exists for userspace PF drivers.  I don't see that facilitating
vendors to implement their PF drivers in userspace to avoid upstreaming
is a compelling reason to extend the vfio-pci interface.  Thanks,

Alex
 
> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
> ---
>  drivers/pci/pci.c   | 8 ++++++++
>  include/linux/pci.h | 1 +
>  2 files changed, 9 insertions(+)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 60230da957e0..aca937b05531 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -4780,6 +4780,14 @@ EXPORT_SYMBOL_GPL(pcie_flr);
>   */
>  int pcie_reset_flr(struct pci_dev *dev, bool probe)
>  {
> +	struct pci_dev *pf_dev;
> +
> +	if (dev->is_virtfn) {
> +		pf_dev = dev->physfn;
> +		if (pf_dev->driver->sriov_vf_reset_notification)
> +			pf_dev->driver->sriov_vf_reset_notification(pf_dev, dev);
> +	}
> +
>  	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
>  		return -ENOTTY;
>  
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index c69a2cc1f412..4fa31d9b0aa7 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -926,6 +926,7 @@ struct pci_driver {
>  	int  (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */
>  	int  (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */
>  	u32  (*sriov_get_vf_total_msix)(struct pci_dev *pf);
> +	void  (*sriov_vf_reset_notification)(struct pci_dev *pf, struct pci_dev *vf);
>  	const struct pci_error_handlers *err_handler;
>  	const struct attribute_group **groups;
>  	const struct attribute_group **dev_groups;
Alex Williamson Feb. 6, 2024, 7:38 p.m. UTC | #3
On Tue, 6 Feb 2024 04:03:46 +0000
"Liu, Monk" <Monk.Liu@amd.com> wrote:

> [AMD Official Use Only - General]
> 
> Hi Alex
> 
> Thanks for your comment, I’m still not quite following you.
> 
> >>This can be done by intercepting the userspace access to the VF FLR config space region.  
> 
> Would you mind let us know to do that ?

See basically any of the vfio-pci variant drivers in drivers/vfio/pci/*/

For instance the virtio-vfio-pci driver is emulating a PCI IO BAR in
config space and accesses through that BAR interact with the in-kernel
PF driver.

> our scenario is that:
> 1, vf already pass-throughed to qemu
> 2, a user mode driver on PF’s vfio arch is running there, and it want
> to receive VF’s reset request (by Qemu through vfio interface), and
> do some hw/fw sequences to achieve the real Vf reset goal
> 
> >>.  I don't see that facilitating vendors to implement their PF
> >>drivers in userspace to avoid upstreaming  
> is a compelling reason to extend the vfio-pci interface.
> 
> Some background here (our user mode PF driver is not given the
> purpose to avoid upstream): We don’t see value to upstream a user
> mode pf driver that only benefit AMD device, as it must be out of
> kernel-tree so we don’t see where the appropriate repo for it is to
> upstream to … (we cannot make it in Qemu right ? otherwise Qemu will
> be over designed if it knows hw/fw details of vendors)

An in-kernel driver within the mainline kernel is the right place for
it.  I'm not asking to upstream a user mode driver, you're right that
there is no repo for that, I'm questioning the motivation for making it
a user mode PF driver in the first place.

All device drivers are essentially meant to benefit the device vendor,
but in-kernel drivers also offer a benefit to the community and users of
those devices for ongoing support and development.

Let me turn the question around, what benefit does it provide this
PF driver to exist in userspace?

Without having seen it, I'd venture that there's nothing this userspace
PF driver could do that couldn't also be done via a kernel driver,
either in-tree or out-of-tree, but the userspace driver avoids the
upstreaming work of an in-tree driver and the tainting of an
out-of-tree driver.
 
> Isn’t VFIO arch intentionally to give user mode driver freedom and
> ability to manipulate the HW ?

VFIO is not intended to provide an alternative means to implement
kernel drivers.  VFIO is intended to provide secure, isolated access to
devices for userspace drivers.

What's the isolation relative to the VF if this PF driver needs to be
involved in reset?  How much VF data does the PF device have access to?

This is the reason that vfio-pci introduced the vf-token barrier with
SR-IOV support.  The intention of this vf-token is to indicate a gap in
trust.  Only another userspace process knowing the vf-token configured
by the PF userspace driver can access the device.  We should not
normalize vf-tokens.

The requirement of a vf-token for a driver not strictly developed
alongside the PF userspace driver should effectively be considered a
tainted device.

Therefore, what does this userspace PF driver offer that isn't better
reflected using the existing vfio-pci-core code split to implement a
vfio-pci variant driver, which has no need for the extension proposed
here?  Thanks,

Alex

> From: Alex Williamson <alex.williamson@redhat.com>
> Date: Tuesday, February 6, 2024 at 00:43
> To: Deng, Emily <Emily.Deng@amd.com>
> Cc: bhelgaas@google.com <bhelgaas@google.com>,
> linux-pci@vger.kernel.org <linux-pci@vger.kernel.org>,
> linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>,
> kvm@vger.kernel.org <kvm@vger.kernel.org>, Jiang, Jerry (SW)
> <Jerry.Jiang@amd.com>, Zhang, Andy <Andy.Zhang@amd.com>, Chang,
> HaiJun <HaiJun.Chang@amd.com>, Liu, Monk <Monk.Liu@amd.com>, Chen,
> Horace <Horace.Chen@amd.com>, Yin, ZhenGuo (Chris)
> <ZhenGuo.Yin@amd.com> Subject: Re: [PATCH 1/2] PCI: Add VF reset
> notification to PF's VFIO user mode driver On Mon, 5 Feb 2024
> 15:15:37 +0800 Emily Deng <Emily.Deng@amd.com> wrote:
> 
> > VF doesn't have the ability to reset itself completely which will
> > cause the hardware in unstable state. So notify PF driver when the
> > VF has been reset to let the PF resets the VF completely, and
> > remove the VF out of schedule.
> >
> > How to implement this?
> > Add the reset callback function in pci_driver
> >
> > Implement the callback functin in VFIO_PCI driver.
> >
> > Add the VF RESET IRQ for user mode driver to let the user mode
> > driver know the VF has been reset.  
> 
> The solution that already exists for this sort of issue is a vfio-pci
> variant driver for the VF which communicates with an in-kernel PF
> driver to coordinate the VF FLR with the PF driver.  This can be done
> by intercepting the userspace access to the VF FLR config space
> region.
> 
> This solution of involving PCI-core and extending the vfio-pci
> interface only exists for userspace PF drivers.  I don't see that
> facilitating vendors to implement their PF drivers in userspace to
> avoid upstreaming is a compelling reason to extend the vfio-pci
> interface.  Thanks,
> 
> Alex
> 
> > Signed-off-by: Emily Deng <Emily.Deng@amd.com>
> > ---
> >  drivers/pci/pci.c   | 8 ++++++++
> >  include/linux/pci.h | 1 +
> >  2 files changed, 9 insertions(+)
> >
> > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > index 60230da957e0..aca937b05531 100644
> > --- a/drivers/pci/pci.c
> > +++ b/drivers/pci/pci.c
> > @@ -4780,6 +4780,14 @@ EXPORT_SYMBOL_GPL(pcie_flr);
> >   */
> >  int pcie_reset_flr(struct pci_dev *dev, bool probe)
> >  {
> > +     struct pci_dev *pf_dev;
> > +
> > +     if (dev->is_virtfn) {
> > +             pf_dev = dev->physfn;
> > +             if (pf_dev->driver->sriov_vf_reset_notification)
> > +
> > pf_dev->driver->sriov_vf_reset_notification(pf_dev, dev);
> > +     }
> > +
> >        if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
> >                return -ENOTTY;
> >
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index c69a2cc1f412..4fa31d9b0aa7 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -926,6 +926,7 @@ struct pci_driver {
> >        int  (*sriov_configure)(struct pci_dev *dev, int num_vfs);
> > /* On PF */ int  (*sriov_set_msix_vec_count)(struct pci_dev *vf,
> > int msix_vec_count); /* On PF */ u32
> > (*sriov_get_vf_total_msix)(struct pci_dev *pf);
> > +     void  (*sriov_vf_reset_notification)(struct pci_dev *pf,
> > struct pci_dev *vf); const struct pci_error_handlers *err_handler;
> >        const struct attribute_group **groups;
> >        const struct attribute_group **dev_groups;
Alex Williamson Feb. 6, 2024, 8:06 p.m. UTC | #4
On Tue, 6 Feb 2024 04:08:18 +0000
"Liu, Monk" <Monk.Liu@amd.com> wrote:

> [AMD Official Use Only - General]
> 
> Hi Leon
> 
> The thing is when qemu reset a VM it calls vfio’s reset ioctl to the
> given VF device, and in kernel the VFIO-pci module will do the reset
> to that VF device via its PCI config space register, but
> unfortunately our VF GPU isnot designed to support those
> “reset”/”flr” commands … not supported by the VF, (and even many PF
> cannot handle those commands well)

PFs are not required to implement FLR, VFs are.

SR-IOV spec, rev. 1.1:

	2.2.2. FLR That Targets a VF

	VFs must support Function Level Reset (FLR).

> 
> So the idea we can cook up is to move those Vf’s reset notification
> to our PF driver (which is a user mode driver running on PF’s VFIO
> arch), and our user mode driver can program HW and do the reset for
> that VF.

The PF driver being able to arbitrarily reset a VF device provided to
another userspace process doesn't sound like separate, isolated
devices.  What else can the PF access?

As noted in my other reply, vf-tokens should not be normalized and
users of VFs provided by third party userspace PF drivers should
consider the device no less tainted than if it were provided by an
out-of-tree kernel driver.

The idea to virtualize FLR on the VF to resolve the hardware defect is a
good one, but it should be done in the context of a vfio-pci variant
driver.  Thanks,

Alex


> From: Leon Romanovsky <leon@kernel.org>
> Date: Monday, February 5, 2024 at 17:04
> To: Deng, Emily <Emily.Deng@amd.com>
> Cc: bhelgaas@google.com <bhelgaas@google.com>,
> alex.williamson@redhat.com <alex.williamson@redhat.com>,
> linux-pci@vger.kernel.org <linux-pci@vger.kernel.org>,
> linux-kernel@vger.kernel.org <linux-kernel@vger.kernel.org>,
> kvm@vger.kernel.org <kvm@vger.kernel.org>, Jiang, Jerry (SW)
> <Jerry.Jiang@amd.com>, Zhang, Andy <Andy.Zhang@amd.com>, Chang,
> HaiJun <HaiJun.Chang@amd.com>, Liu, Monk <Monk.Liu@amd.com>, Chen,
> Horace <Horace.Chen@amd.com>, Yin, ZhenGuo (Chris)
> <ZhenGuo.Yin@amd.com> Subject: Re: [PATCH 1/2] PCI: Add VF reset
> notification to PF's VFIO user mode driver On Mon, Feb 05, 2024 at
> 03:15:37PM +0800, Emily Deng wrote:
> > VF doesn't have the ability to reset itself completely which will
> > cause the hardware in unstable state. So notify PF driver when the
> > VF has been reset to let the PF resets the VF completely, and
> > remove the VF out of schedule.  
> 
> 
> I'm sorry but this explanation is not different from the previous
> version. Please provide a better explanation of the problem, why it is
> needed, which VFs need and can't reset themselves, how and why it
> worked before e.t.c.
> 
> In addition, please follow kernel submission guidelines, write
> changelong, add versions, cover letter e.t.c.
> 
> Thanks
> 
> >
> > How to implement this?
> > Add the reset callback function in pci_driver
> >
> > Implement the callback functin in VFIO_PCI driver.
> >
> > Add the VF RESET IRQ for user mode driver to let the user mode
> > driver know the VF has been reset.
> >
> > Signed-off-by: Emily Deng <Emily.Deng@amd.com>
> > ---
> >  drivers/pci/pci.c   | 8 ++++++++
> >  include/linux/pci.h | 1 +
> >  2 files changed, 9 insertions(+)
> >
> > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > index 60230da957e0..aca937b05531 100644
> > --- a/drivers/pci/pci.c
> > +++ b/drivers/pci/pci.c
> > @@ -4780,6 +4780,14 @@ EXPORT_SYMBOL_GPL(pcie_flr);
> >   */
> >  int pcie_reset_flr(struct pci_dev *dev, bool probe)
> >  {
> > +     struct pci_dev *pf_dev;
> > +
> > +     if (dev->is_virtfn) {
> > +             pf_dev = dev->physfn;
> > +             if (pf_dev->driver->sriov_vf_reset_notification)
> > +
> > pf_dev->driver->sriov_vf_reset_notification(pf_dev, dev);
> > +     }
> > +
> >        if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
> >                return -ENOTTY;
> >
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index c69a2cc1f412..4fa31d9b0aa7 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -926,6 +926,7 @@ struct pci_driver {
> >        int  (*sriov_configure)(struct pci_dev *dev, int num_vfs);
> > /* On PF */ int  (*sriov_set_msix_vec_count)(struct pci_dev *vf,
> > int msix_vec_count); /* On PF */ u32
> > (*sriov_get_vf_total_msix)(struct pci_dev *pf);
> > +     void  (*sriov_vf_reset_notification)(struct pci_dev *pf,
> > struct pci_dev *vf); const struct pci_error_handlers *err_handler;
> >        const struct attribute_group **groups;
> >        const struct attribute_group **dev_groups;
> > --
> > 2.36.1
> >
> >
diff mbox series

Patch

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 60230da957e0..aca937b05531 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4780,6 +4780,14 @@  EXPORT_SYMBOL_GPL(pcie_flr);
  */
 int pcie_reset_flr(struct pci_dev *dev, bool probe)
 {
+	struct pci_dev *pf_dev;
+
+	if (dev->is_virtfn) {
+		pf_dev = dev->physfn;
+		if (pf_dev->driver->sriov_vf_reset_notification)
+			pf_dev->driver->sriov_vf_reset_notification(pf_dev, dev);
+	}
+
 	if (dev->dev_flags & PCI_DEV_FLAGS_NO_FLR_RESET)
 		return -ENOTTY;
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c69a2cc1f412..4fa31d9b0aa7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -926,6 +926,7 @@  struct pci_driver {
 	int  (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */
 	int  (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */
 	u32  (*sriov_get_vf_total_msix)(struct pci_dev *pf);
+	void  (*sriov_vf_reset_notification)(struct pci_dev *pf, struct pci_dev *vf);
 	const struct pci_error_handlers *err_handler;
 	const struct attribute_group **groups;
 	const struct attribute_group **dev_groups;