diff mbox series

[v2] vfio-pci: Accept a non-zero open_count on reset

Message ID 20221026194245.1769-1-ajderossi@gmail.com (mailing list archive)
State Superseded
Headers show
Series [v2] vfio-pci: Accept a non-zero open_count on reset | expand

Commit Message

Anthony DeRossi Oct. 26, 2022, 7:42 p.m. UTC
The implementation of close_device() for vfio-pci inspects the
open_count of every device in the device set to determine whether a
reset is needed. This count is always non-zero for the device being
closed, effectively disabling the reset logic.

After commit 2cd8b14aaa66 ("vfio/pci: Move to the device set
infrastructure"), failure to create a new file for a device would cause
the reset to be skipped due to open_count being decremented after
calling close_device() in the error path.

After commit eadd86f835c6 ("vfio: Remove calls to
vfio_group_add_container_user()"), releasing a device would always skip
the reset due to an ordering change in vfio_device_fops_release().

Failing to reset the device leaves it in an unknown state, potentially
causing errors when it is bound to a different driver.

This issue was observed with a Radeon RX Vega 56 [1002:687f] (rev c3)
assigned to a Windows guest. After shutting down the guest, unbinding
the device from vfio-pci, and binding the device to amdgpu:

[  548.007102] [drm:psp_hw_start [amdgpu]] *ERROR* PSP create ring failed!
[  548.027174] [drm:psp_hw_init [amdgpu]] *ERROR* PSP firmware loading failed
[  548.027242] [drm:amdgpu_device_fw_loading [amdgpu]] *ERROR* hw_init of IP block <psp> failed -22
[  548.027306] amdgpu 0000:0a:00.0: amdgpu: amdgpu_device_ip_init failed
[  548.027308] amdgpu 0000:0a:00.0: amdgpu: Fatal error during GPU init

Fixes: 2cd8b14aaa66 ("vfio/pci: Move to the device set infrastructure")
Fixes: eadd86f835c6 ("vfio: Remove calls to vfio_group_add_container_user()")
Signed-off-by: Anthony DeRossi <ajderossi@gmail.com>
---
v1 -> v2:
- Changed reset behavior instead of open_count ordering
- Retitled from "vfio: Decrement open_count before close_device()"
v1: https://lore.kernel.org/kvm/20221025193820.4412-1-ajderossi@gmail.com/

 drivers/vfio/pci/vfio_pci_core.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

Comments

Tian, Kevin Nov. 1, 2022, 8:49 a.m. UTC | #1
> From: Anthony DeRossi <ajderossi@gmail.com>
> Sent: Thursday, October 27, 2022 3:43 AM
> 
> -static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)
> +static bool vfio_pci_core_needs_reset(struct vfio_pci_core_device *vdev)
>  {
> +	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
>  	struct vfio_pci_core_device *cur;
>  	bool needs_reset = false;
> 
> +	if (vdev->vdev.open_count > 1)
> +		return false;

WARN_ON()

> +
>  	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
> -		/* No VFIO device in the set can have an open device FD */
> -		if (cur->vdev.open_count)
> +		/* Only the VFIO device being reset can have an open FD */
> +		if (cur != vdev && cur->vdev.open_count)
>  			return false;

not caused by this patch but while at it...

open_count is defined not for driver use:

	/* Members below here are private, not for driver use */
	unsigned int index;
	struct device device;   /* device.kref covers object life circle */
	refcount_t refcount;    /* user count on registered device*/
	unsigned int open_count;
	struct completion comp;
	struct list_head group_next;
	struct list_head iommu_entry;

prefer to a wrapper or move it to the public section of vfio_device.
Jason Gunthorpe Nov. 1, 2022, 11:38 a.m. UTC | #2
On Tue, Nov 01, 2022 at 08:49:28AM +0000, Tian, Kevin wrote:
> > From: Anthony DeRossi <ajderossi@gmail.com>
> > Sent: Thursday, October 27, 2022 3:43 AM
> > 
> > -static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)
> > +static bool vfio_pci_core_needs_reset(struct vfio_pci_core_device *vdev)
> >  {
> > +	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
> >  	struct vfio_pci_core_device *cur;
> >  	bool needs_reset = false;
> > 
> > +	if (vdev->vdev.open_count > 1)
> > +		return false;
> 
> WARN_ON()
> 
> > +
> >  	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
> > -		/* No VFIO device in the set can have an open device FD */
> > -		if (cur->vdev.open_count)
> > +		/* Only the VFIO device being reset can have an open FD */
> > +		if (cur != vdev && cur->vdev.open_count)
> >  			return false;
> 
> not caused by this patch but while at it...
> 
> open_count is defined not for driver use:
> 
> 	/* Members below here are private, not for driver use */
> 	unsigned int index;
> 	struct device device;   /* device.kref covers object life circle */
> 	refcount_t refcount;    /* user count on registered device*/
> 	unsigned int open_count;
> 	struct completion comp;
> 	struct list_head group_next;
> 	struct list_head iommu_entry;
> 
> prefer to a wrapper or move it to the public section of vfio_device.

I've been meaning to take a deeper look, but I'm thinking vfio_pci
doesn't need open_count at all any more.

open_count was from before we changed the core code to call
open_device only once. If we are only a call chain from
open_device/close_device then we know that the open_count must be 1.

Jason
Anthony DeRossi Nov. 2, 2022, 6:08 a.m. UTC | #3
On Tue, Nov 1, 2022 at 8:49 AM Tian, Kevin <kevin.tian@intel.com> wrote:
> WARN_ON()

I sent an updated patch with this change. Thanks.

v3: https://lore.kernel.org/kvm/20221102055732.2110-1-ajderossi@gmail.com/

Anthony
Anthony DeRossi Nov. 2, 2022, 6:14 a.m. UTC | #4
On Tue, Nov 1, 2022 at 11:38 AM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> I've been meaning to take a deeper look, but I'm thinking vfio_pci
> doesn't need open_count at all any more.

I spent some time looking at it, but I'm not very familiar with this
area.

None of the fields in vfio_pci_core_device look usable as a substitute
for open_count, but calling pci_is_enabled() on the PCI device might be
sufficient. pci_enable_device()/pci_disable_device() appear to be called
in the right locations in vfio_pci_core.

I'm happy to submit a separate patch to check pci_is_enabled() if the
PCI device life cycle makes sense.

Anthony
Alex Williamson Nov. 2, 2022, 9:45 p.m. UTC | #5
On Tue, 1 Nov 2022 08:38:21 -0300
Jason Gunthorpe <jgg@ziepe.ca> wrote:

> On Tue, Nov 01, 2022 at 08:49:28AM +0000, Tian, Kevin wrote:
> > > From: Anthony DeRossi <ajderossi@gmail.com>
> > > Sent: Thursday, October 27, 2022 3:43 AM
> > > 
> > > -static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)
> > > +static bool vfio_pci_core_needs_reset(struct vfio_pci_core_device *vdev)
> > >  {
> > > +	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
> > >  	struct vfio_pci_core_device *cur;
> > >  	bool needs_reset = false;
> > > 
> > > +	if (vdev->vdev.open_count > 1)
> > > +		return false;  
> > 
> > WARN_ON()
> >   
> > > +
> > >  	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
> > > -		/* No VFIO device in the set can have an open device FD */
> > > -		if (cur->vdev.open_count)
> > > +		/* Only the VFIO device being reset can have an open FD */
> > > +		if (cur != vdev && cur->vdev.open_count)
> > >  			return false;  
> > 
> > not caused by this patch but while at it...
> > 
> > open_count is defined not for driver use:
> > 
> > 	/* Members below here are private, not for driver use */
> > 	unsigned int index;
> > 	struct device device;   /* device.kref covers object life circle */
> > 	refcount_t refcount;    /* user count on registered device*/
> > 	unsigned int open_count;
> > 	struct completion comp;
> > 	struct list_head group_next;
> > 	struct list_head iommu_entry;
> > 
> > prefer to a wrapper or move it to the public section of vfio_device.  
> 
> I've been meaning to take a deeper look, but I'm thinking vfio_pci
> doesn't need open_count at all any more.
> 
> open_count was from before we changed the core code to call
> open_device only once. If we are only a call chain from
> open_device/close_device then we know that the open_count must be 1.

That accounts for the first test, we don't need to test open_count on
the calling device in this path, but the idea here is that we want to
test whether we're the last close_device among the set.  Not sure how
we'd do that w/o some visibility to open_count.  Maybe we need a
vfio_device_set_open_count() that when 1 we know we're the first open
or last close?  Thanks,

Alex
Alex Williamson Nov. 2, 2022, 9:58 p.m. UTC | #6
On Wed, 2 Nov 2022 06:14:24 +0000
Anthony DeRossi <ajderossi@gmail.com> wrote:

> On Tue, Nov 1, 2022 at 11:38 AM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > I've been meaning to take a deeper look, but I'm thinking vfio_pci
> > doesn't need open_count at all any more.  
> 
> I spent some time looking at it, but I'm not very familiar with this
> area.
> 
> None of the fields in vfio_pci_core_device look usable as a substitute
> for open_count, but calling pci_is_enabled() on the PCI device might be
> sufficient. pci_enable_device()/pci_disable_device() appear to be called
> in the right locations in vfio_pci_core.

I think that could work too, but of course it's PCI specific.  If we
had a vfio core helper to get the open count for the device set, that
would make it more universally available.  Thanks,

Alex
Jason Gunthorpe Nov. 3, 2022, 10:25 p.m. UTC | #7
On Wed, Nov 02, 2022 at 03:45:27PM -0600, Alex Williamson wrote:
> > open_count was from before we changed the core code to call
> > open_device only once. If we are only a call chain from
> > open_device/close_device then we know that the open_count must be 1.
> 
> That accounts for the first test, we don't need to test open_count on
> the calling device in this path, but the idea here is that we want to
> test whether we're the last close_device among the set.  Not sure how
> we'd do that w/o some visibility to open_count.  Maybe we need a
> vfio_device_set_open_count() that when 1 we know we're the first open
> or last close?  Thanks,

Right, we are checking the open count on all the devices in the set,
so I think that just this hunk is fine:

> > > > -		if (cur->vdev.open_count)
> > > > +		if (cur != vdev && cur->vdev.open_count)
> > > >  			return false;  

Because vfio_pci_dev_set_needs_reset() is always called from
open/close (which is how it picks up the devset lock), so we never
need to consider the current device by definition, it is always "just
being closed"

A little comment to explain this and that should be it?

Jason
Alex Williamson Nov. 3, 2022, 10:30 p.m. UTC | #8
On Thu, 3 Nov 2022 19:25:06 -0300
Jason Gunthorpe <jgg@ziepe.ca> wrote:

> On Wed, Nov 02, 2022 at 03:45:27PM -0600, Alex Williamson wrote:
> > > open_count was from before we changed the core code to call
> > > open_device only once. If we are only a call chain from
> > > open_device/close_device then we know that the open_count must be 1.  
> > 
> > That accounts for the first test, we don't need to test open_count on
> > the calling device in this path, but the idea here is that we want to
> > test whether we're the last close_device among the set.  Not sure how
> > we'd do that w/o some visibility to open_count.  Maybe we need a
> > vfio_device_set_open_count() that when 1 we know we're the first open
> > or last close?  Thanks,  
> 
> Right, we are checking the open count on all the devices in the set,
> so I think that just this hunk is fine:
> 
> > > > > -		if (cur->vdev.open_count)
> > > > > +		if (cur != vdev && cur->vdev.open_count)
> > > > >  			return false;    
> 
> Because vfio_pci_dev_set_needs_reset() is always called from
> open/close (which is how it picks up the devset lock), so we never
> need to consider the current device by definition, it is always "just
> being closed"
> 
> A little comment to explain this and that should be it?

Yes, but the open question Kevin raised was that open_count is listed
as private in the header, so we ought not to be looking at it at all
from vfio-pci-core unless we want to change that or provide an alternate
interface to it.  Thanks,

Alex
Anthony DeRossi Nov. 4, 2022, 8:06 p.m. UTC | #9
On Wed, Nov 2, 2022 at 9:59 PM Alex Williamson
<alex.williamson@redhat.com> wrote:
> I think that could work too, but of course it's PCI specific.  If we
> had a vfio core helper to get the open count for the device set, that
> would make it more universally available.

Thanks for the suggestion. I sent a new series with this change.

v4: https://lore.kernel.org/kvm/20221104195727.4629-1-ajderossi@gmail.com/

Anthony
diff mbox series

Patch

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index badc9d828cac..bd50525b8a13 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -174,7 +174,7 @@  static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
 }
 
 struct vfio_pci_group_info;
-static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
+static void vfio_pci_core_try_reset(struct vfio_pci_core_device *vdev);
 static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 				      struct vfio_pci_group_info *groups);
 
@@ -667,7 +667,7 @@  void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
 out:
 	pci_disable_device(pdev);
 
-	vfio_pci_dev_set_try_reset(vdev->vdev.dev_set);
+	vfio_pci_core_try_reset(vdev);
 
 	/* Put the pm-runtime usage counter acquired during enable */
 	if (!disable_idle_d3)
@@ -2483,14 +2483,18 @@  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 	return ret;
 }
 
-static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)
+static bool vfio_pci_core_needs_reset(struct vfio_pci_core_device *vdev)
 {
+	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
 	struct vfio_pci_core_device *cur;
 	bool needs_reset = false;
 
+	if (vdev->vdev.open_count > 1)
+		return false;
+
 	list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
-		/* No VFIO device in the set can have an open device FD */
-		if (cur->vdev.open_count)
+		/* Only the VFIO device being reset can have an open FD */
+		if (cur != vdev && cur->vdev.open_count)
 			return false;
 		needs_reset |= cur->needs_reset;
 	}
@@ -2498,19 +2502,20 @@  static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)
 }
 
 /*
- * If a bus or slot reset is available for the provided dev_set and:
+ * If a bus or slot reset is available for the provided device and:
  *  - All of the devices affected by that bus or slot reset are unused
  *  - At least one of the affected devices is marked dirty via
  *    needs_reset (such as by lack of FLR support)
  * Then attempt to perform that bus or slot reset.
  */
-static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)
+static void vfio_pci_core_try_reset(struct vfio_pci_core_device *vdev)
 {
+	struct vfio_device_set *dev_set = vdev->vdev.dev_set;
 	struct vfio_pci_core_device *cur;
 	struct pci_dev *pdev;
 	bool reset_done = false;
 
-	if (!vfio_pci_dev_set_needs_reset(dev_set))
+	if (!vfio_pci_core_needs_reset(vdev))
 		return;
 
 	pdev = vfio_pci_dev_set_resettable(dev_set);