Message ID | 20240412082121.33382-3-yi.l.liu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-pci support pasid attach/detach | expand |
> From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Friday, April 12, 2024 4:21 PM > > void vfio_iommufd_physical_unbind(struct vfio_device *vdev) > { > + int pasid = 0; > + > lockdep_assert_held(&vdev->dev_set->lock); > > + while (!ida_is_empty(&vdev->pasids)) { > + pasid = ida_get_lowest(&vdev->pasids, pasid, INT_MAX); > + if (pasid < 0) > + break; WARN_ON as this shouldn't happen when ida is not empty. > > +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, > + u32 pasid, u32 *pt_id) the name is too long. What about removing 'physical' as there is no plan (unlikely) to support pasid on mdev? > +{ > + int rc; > + > + lockdep_assert_held(&vdev->dev_set->lock); > + > + if (WARN_ON(!vdev->iommufd_device)) > + return -EINVAL; > + > + rc = ida_get_lowest(&vdev->pasids, pasid, pasid); > + if (rc == pasid) > + return iommufd_device_pasid_replace(vdev- > >iommufd_device, > + pasid, pt_id); > + > + rc = iommufd_device_pasid_attach(vdev->iommufd_device, pasid, > pt_id); > + if (rc) > + return rc; > + > + rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL); > + if (rc < 0) { > + iommufd_device_pasid_detach(vdev->iommufd_device, > pasid); > + return rc; > + } I'd do simple operation (ida_alloc_range()) first before doing attach.
On 2024/4/16 17:01, Tian, Kevin wrote: >> From: Liu, Yi L <yi.l.liu@intel.com> >> Sent: Friday, April 12, 2024 4:21 PM >> >> void vfio_iommufd_physical_unbind(struct vfio_device *vdev) >> { >> + int pasid = 0; >> + >> lockdep_assert_held(&vdev->dev_set->lock); >> >> + while (!ida_is_empty(&vdev->pasids)) { >> + pasid = ida_get_lowest(&vdev->pasids, pasid, INT_MAX); >> + if (pasid < 0) >> + break; > > WARN_ON as this shouldn't happen when ida is not empty. ok. >> >> +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, >> + u32 pasid, u32 *pt_id) > > the name is too long. What about removing 'physical' as there is no > plan (unlikely) to support pasid on mdev? I'm ok to do it. >> +{ >> + int rc; >> + >> + lockdep_assert_held(&vdev->dev_set->lock); >> + >> + if (WARN_ON(!vdev->iommufd_device)) >> + return -EINVAL; >> + >> + rc = ida_get_lowest(&vdev->pasids, pasid, pasid); >> + if (rc == pasid) >> + return iommufd_device_pasid_replace(vdev- >>> iommufd_device, >> + pasid, pt_id); >> + >> + rc = iommufd_device_pasid_attach(vdev->iommufd_device, pasid, >> pt_id); >> + if (rc) >> + return rc; >> + >> + rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL); >> + if (rc < 0) { >> + iommufd_device_pasid_detach(vdev->iommufd_device, >> pasid); >> + return rc; >> + } > > I'd do simple operation (ida_alloc_range()) first before doing attach. > But that means we rely on the ida_alloc_range() to return -ENOSPC to indicate the pasid is allocated, hence this attach is actually a replacement. This is easy to be broken if ida_alloc_range() returns -ENOSPC for other reasons in future.
> From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Tuesday, April 16, 2024 5:25 PM > > On 2024/4/16 17:01, Tian, Kevin wrote: > >> From: Liu, Yi L <yi.l.liu@intel.com> > >> Sent: Friday, April 12, 2024 4:21 PM > >> > >> + > >> + rc = ida_get_lowest(&vdev->pasids, pasid, pasid); > >> + if (rc == pasid) > >> + return iommufd_device_pasid_replace(vdev- > >>> iommufd_device, > >> + pasid, pt_id); > >> + > >> + rc = iommufd_device_pasid_attach(vdev->iommufd_device, pasid, > >> pt_id); > >> + if (rc) > >> + return rc; > >> + > >> + rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL); > >> + if (rc < 0) { > >> + iommufd_device_pasid_detach(vdev->iommufd_device, > >> pasid); > >> + return rc; > >> + } > > > > I'd do simple operation (ida_alloc_range()) first before doing attach. > > > > But that means we rely on the ida_alloc_range() to return -ENOSPC to > indicate the pasid is allocated, hence this attach is actually a > replacement. This is easy to be broken if ida_alloc_range() returns > -ENOSPC for other reasons in future. > ida_alloc_range() could fail for other reasons e.g. -ENOMEM. in case I didn't make it clear I just meant to swap the order between iommufd_device_pasid_attach() and ida_alloc_range(). replacement is still checked against ida_get_lowest().
On 2024/4/16 17:47, Tian, Kevin wrote: >> From: Liu, Yi L <yi.l.liu@intel.com> >> Sent: Tuesday, April 16, 2024 5:25 PM >> >> On 2024/4/16 17:01, Tian, Kevin wrote: >>>> From: Liu, Yi L <yi.l.liu@intel.com> >>>> Sent: Friday, April 12, 2024 4:21 PM >>>> >>>> + >>>> + rc = ida_get_lowest(&vdev->pasids, pasid, pasid); >>>> + if (rc == pasid) >>>> + return iommufd_device_pasid_replace(vdev- >>>>> iommufd_device, >>>> + pasid, pt_id); >>>> + >>>> + rc = iommufd_device_pasid_attach(vdev->iommufd_device, pasid, >>>> pt_id); >>>> + if (rc) >>>> + return rc; >>>> + >>>> + rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL); >>>> + if (rc < 0) { >>>> + iommufd_device_pasid_detach(vdev->iommufd_device, >>>> pasid); >>>> + return rc; >>>> + } >>> >>> I'd do simple operation (ida_alloc_range()) first before doing attach. >>> >> >> But that means we rely on the ida_alloc_range() to return -ENOSPC to >> indicate the pasid is allocated, hence this attach is actually a >> replacement. This is easy to be broken if ida_alloc_range() returns >> -ENOSPC for other reasons in future. >> > > ida_alloc_range() could fail for other reasons e.g. -ENOMEM. > > in case I didn't make it clear I just meant to swap the order > between iommufd_device_pasid_attach() and ida_alloc_range(). > > replacement is still checked against ida_get_lowest(). aha, I see.
On Fri, Apr 12, 2024 at 01:21:19AM -0700, Yi Liu wrote: > +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, > + u32 pasid, u32 *pt_id) > +{ > + int rc; > + > + lockdep_assert_held(&vdev->dev_set->lock); > + > + if (WARN_ON(!vdev->iommufd_device)) > + return -EINVAL; > + > + rc = ida_get_lowest(&vdev->pasids, pasid, pasid); A helper inline bool ida_is_allocate(&ida, id) Would be nicer for that > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c > index cb5b7f865d58..e0198851ffd2 100644 > --- a/drivers/vfio/pci/vfio_pci.c > +++ b/drivers/vfio/pci/vfio_pci.c > @@ -142,6 +142,8 @@ static const struct vfio_device_ops vfio_pci_ops = { > .unbind_iommufd = vfio_iommufd_physical_unbind, > .attach_ioas = vfio_iommufd_physical_attach_ioas, > .detach_ioas = vfio_iommufd_physical_detach_ioas, > + .pasid_attach_ioas = vfio_iommufd_physical_pasid_attach_ioas, > + .pasid_detach_ioas = vfio_iommufd_physical_pasid_detach_ioas, > }; This should be copied into mlx5 and nvgrace-gpu at least as well Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, April 23, 2024 8:44 PM > > On Fri, Apr 12, 2024 at 01:21:19AM -0700, Yi Liu wrote: > > +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, > > + u32 pasid, u32 *pt_id) > > +{ > > + int rc; > > + > > + lockdep_assert_held(&vdev->dev_set->lock); > > + > > + if (WARN_ON(!vdev->iommufd_device)) > > + return -EINVAL; > > + > > + rc = ida_get_lowest(&vdev->pasids, pasid, pasid); > > A helper inline > > bool ida_is_allocate(&ida, id) > > Would be nicer for that > > > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c > > index cb5b7f865d58..e0198851ffd2 100644 > > --- a/drivers/vfio/pci/vfio_pci.c > > +++ b/drivers/vfio/pci/vfio_pci.c > > @@ -142,6 +142,8 @@ static const struct vfio_device_ops vfio_pci_ops = { > > .unbind_iommufd = vfio_iommufd_physical_unbind, > > .attach_ioas = vfio_iommufd_physical_attach_ioas, > > .detach_ioas = vfio_iommufd_physical_detach_ioas, > > + .pasid_attach_ioas = vfio_iommufd_physical_pasid_attach_ioas, > > + .pasid_detach_ioas = vfio_iommufd_physical_pasid_detach_ioas, > > }; > > This should be copied into mlx5 and nvgrace-gpu at least as well > I'd prefer to the driver owners to add them separately. They know their hardware and can do proper test.
On 2024/4/23 20:43, Jason Gunthorpe wrote: > On Fri, Apr 12, 2024 at 01:21:19AM -0700, Yi Liu wrote: >> +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, >> + u32 pasid, u32 *pt_id) >> +{ >> + int rc; >> + >> + lockdep_assert_held(&vdev->dev_set->lock); >> + >> + if (WARN_ON(!vdev->iommufd_device)) >> + return -EINVAL; >> + >> + rc = ida_get_lowest(&vdev->pasids, pasid, pasid); > > A helper inline > > bool ida_is_allocate(&ida, id) > > Would be nicer for that ok. >> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c >> index cb5b7f865d58..e0198851ffd2 100644 >> --- a/drivers/vfio/pci/vfio_pci.c >> +++ b/drivers/vfio/pci/vfio_pci.c >> @@ -142,6 +142,8 @@ static const struct vfio_device_ops vfio_pci_ops = { >> .unbind_iommufd = vfio_iommufd_physical_unbind, >> .attach_ioas = vfio_iommufd_physical_attach_ioas, >> .detach_ioas = vfio_iommufd_physical_detach_ioas, >> + .pasid_attach_ioas = vfio_iommufd_physical_pasid_attach_ioas, >> + .pasid_detach_ioas = vfio_iommufd_physical_pasid_detach_ioas, >> }; > > This should be copied into mlx5 and nvgrace-gpu at least as well looks like Kevin has a different idea on it.
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 82eba6966fa5..fc533416c75d 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -119,14 +119,26 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, if (IS_ERR(idev)) return PTR_ERR(idev); vdev->iommufd_device = idev; + ida_init(&vdev->pasids); return 0; } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind); void vfio_iommufd_physical_unbind(struct vfio_device *vdev) { + int pasid = 0; + lockdep_assert_held(&vdev->dev_set->lock); + while (!ida_is_empty(&vdev->pasids)) { + pasid = ida_get_lowest(&vdev->pasids, pasid, INT_MAX); + if (pasid < 0) + break; + + iommufd_device_pasid_detach(vdev->iommufd_device, pasid); + ida_free(&vdev->pasids, pasid); + } + if (vdev->iommufd_attached) { iommufd_device_detach(vdev->iommufd_device); vdev->iommufd_attached = false; @@ -168,6 +180,54 @@ void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev) } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_detach_ioas); +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, + u32 pasid, u32 *pt_id) +{ + int rc; + + lockdep_assert_held(&vdev->dev_set->lock); + + if (WARN_ON(!vdev->iommufd_device)) + return -EINVAL; + + rc = ida_get_lowest(&vdev->pasids, pasid, pasid); + if (rc == pasid) + return iommufd_device_pasid_replace(vdev->iommufd_device, + pasid, pt_id); + + rc = iommufd_device_pasid_attach(vdev->iommufd_device, pasid, pt_id); + if (rc) + return rc; + + rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL); + if (rc < 0) { + iommufd_device_pasid_detach(vdev->iommufd_device, pasid); + return rc; + } + + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_pasid_attach_ioas); + +void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev, + u32 pasid) +{ + int rc; + + lockdep_assert_held(&vdev->dev_set->lock); + + if (WARN_ON(!vdev->iommufd_device)) + return; + + rc = ida_get_lowest(&vdev->pasids, pasid, pasid); + if (rc < 0) + return; + + iommufd_device_pasid_detach(vdev->iommufd_device, pasid); + ida_free(&vdev->pasids, pasid); +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_pasid_detach_ioas); + /* * The emulated standard ops mean that vfio_device is going to use the * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index cb5b7f865d58..e0198851ffd2 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -142,6 +142,8 @@ static const struct vfio_device_ops vfio_pci_ops = { .unbind_iommufd = vfio_iommufd_physical_unbind, .attach_ioas = vfio_iommufd_physical_attach_ioas, .detach_ioas = vfio_iommufd_physical_detach_ioas, + .pasid_attach_ioas = vfio_iommufd_physical_pasid_attach_ioas, + .pasid_detach_ioas = vfio_iommufd_physical_pasid_detach_ioas, }; static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 8b1a29820409..8fd1db173e84 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -66,6 +66,7 @@ struct vfio_device { void (*put_kvm)(struct kvm *kvm); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; + struct ida pasids; u8 iommufd_attached:1; #endif u8 cdev_opened:1; @@ -90,6 +91,8 @@ struct vfio_device { * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not * called. * @detach_ioas: Opposite of attach_ioas + * @pasid_attach_ioas: The pasid variation of attach_ioas + * @pasid_detach_ioas: Opposite of pasid_attach_ioas * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -114,6 +117,8 @@ struct vfio_device_ops { void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); void (*detach_ioas)(struct vfio_device *vdev); + int (*pasid_attach_ioas)(struct vfio_device *vdev, u32 pasid, u32 *pt_id); + void (*pasid_detach_ioas)(struct vfio_device *vdev, u32 pasid); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -138,6 +143,8 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev); +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, u32 pasid, u32 *pt_id); +void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev, u32 pasid); int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); @@ -165,6 +172,10 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) #define vfio_iommufd_physical_detach_ioas \ ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_physical_pasid_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 pasid, u32 *pt_id)) NULL) +#define vfio_iommufd_physical_pasid_detach_ioas \ + ((void (*)(struct vfio_device *vdev, u32 pasid)) NULL) #define vfio_iommufd_emulated_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL)