diff mbox series

[v2,2/4] vfio-iommufd: Support pasid [at|de]tach for physical VFIO devices

Message ID 20240412082121.33382-3-yi.l.liu@intel.com (mailing list archive)
State New
Headers show
Series vfio-pci support pasid attach/detach | expand

Commit Message

Yi Liu April 12, 2024, 8:21 a.m. UTC
This adds pasid_at|de]tach_ioas ops for attaching hwpt to pasid of a
device and the helpers for it. For now, only vfio-pci supports pasid
attach/detach.

Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/vfio/iommufd.c      | 60 +++++++++++++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci.c |  2 ++
 include/linux/vfio.h        | 11 +++++++
 3 files changed, 73 insertions(+)

Comments

Tian, Kevin April 16, 2024, 9:01 a.m. UTC | #1
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Friday, April 12, 2024 4:21 PM
> 
>  void vfio_iommufd_physical_unbind(struct vfio_device *vdev)
>  {
> +	int pasid = 0;
> +
>  	lockdep_assert_held(&vdev->dev_set->lock);
> 
> +	while (!ida_is_empty(&vdev->pasids)) {
> +		pasid = ida_get_lowest(&vdev->pasids, pasid, INT_MAX);
> +		if (pasid < 0)
> +			break;

WARN_ON as this shouldn't happen when ida is not empty.

> 
> +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev,
> +					    u32 pasid, u32 *pt_id)

the name is too long. What about removing 'physical' as there is no
plan (unlikely) to support pasid on mdev?

> +{
> +	int rc;
> +
> +	lockdep_assert_held(&vdev->dev_set->lock);
> +
> +	if (WARN_ON(!vdev->iommufd_device))
> +		return -EINVAL;
> +
> +	rc = ida_get_lowest(&vdev->pasids, pasid, pasid);
> +	if (rc == pasid)
> +		return iommufd_device_pasid_replace(vdev-
> >iommufd_device,
> +						    pasid, pt_id);
> +
> +	rc = iommufd_device_pasid_attach(vdev->iommufd_device, pasid,
> pt_id);
> +	if (rc)
> +		return rc;
> +
> +	rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL);
> +	if (rc < 0) {
> +		iommufd_device_pasid_detach(vdev->iommufd_device,
> pasid);
> +		return rc;
> +	}

I'd do simple operation (ida_alloc_range()) first before doing attach.
Yi Liu April 16, 2024, 9:24 a.m. UTC | #2
On 2024/4/16 17:01, Tian, Kevin wrote:
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Sent: Friday, April 12, 2024 4:21 PM
>>
>>   void vfio_iommufd_physical_unbind(struct vfio_device *vdev)
>>   {
>> +	int pasid = 0;
>> +
>>   	lockdep_assert_held(&vdev->dev_set->lock);
>>
>> +	while (!ida_is_empty(&vdev->pasids)) {
>> +		pasid = ida_get_lowest(&vdev->pasids, pasid, INT_MAX);
>> +		if (pasid < 0)
>> +			break;
> 
> WARN_ON as this shouldn't happen when ida is not empty.

ok.

>>
>> +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev,
>> +					    u32 pasid, u32 *pt_id)
> 
> the name is too long. What about removing 'physical' as there is no
> plan (unlikely) to support pasid on mdev?

I'm ok to do it.

>> +{
>> +	int rc;
>> +
>> +	lockdep_assert_held(&vdev->dev_set->lock);
>> +
>> +	if (WARN_ON(!vdev->iommufd_device))
>> +		return -EINVAL;
>> +
>> +	rc = ida_get_lowest(&vdev->pasids, pasid, pasid);
>> +	if (rc == pasid)
>> +		return iommufd_device_pasid_replace(vdev-
>>> iommufd_device,
>> +						    pasid, pt_id);
>> +
>> +	rc = iommufd_device_pasid_attach(vdev->iommufd_device, pasid,
>> pt_id);
>> +	if (rc)
>> +		return rc;
>> +
>> +	rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL);
>> +	if (rc < 0) {
>> +		iommufd_device_pasid_detach(vdev->iommufd_device,
>> pasid);
>> +		return rc;
>> +	}
> 
> I'd do simple operation (ida_alloc_range()) first before doing attach.
> 

But that means we rely on the ida_alloc_range() to return -ENOSPC to
indicate the pasid is allocated, hence this attach is actually a
replacement. This is easy to be broken if ida_alloc_range() returns
-ENOSPC for other reasons in future.
Tian, Kevin April 16, 2024, 9:47 a.m. UTC | #3
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Tuesday, April 16, 2024 5:25 PM
> 
> On 2024/4/16 17:01, Tian, Kevin wrote:
> >> From: Liu, Yi L <yi.l.liu@intel.com>
> >> Sent: Friday, April 12, 2024 4:21 PM
> >>
> >> +
> >> +	rc = ida_get_lowest(&vdev->pasids, pasid, pasid);
> >> +	if (rc == pasid)
> >> +		return iommufd_device_pasid_replace(vdev-
> >>> iommufd_device,
> >> +						    pasid, pt_id);
> >> +
> >> +	rc = iommufd_device_pasid_attach(vdev->iommufd_device, pasid,
> >> pt_id);
> >> +	if (rc)
> >> +		return rc;
> >> +
> >> +	rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL);
> >> +	if (rc < 0) {
> >> +		iommufd_device_pasid_detach(vdev->iommufd_device,
> >> pasid);
> >> +		return rc;
> >> +	}
> >
> > I'd do simple operation (ida_alloc_range()) first before doing attach.
> >
> 
> But that means we rely on the ida_alloc_range() to return -ENOSPC to
> indicate the pasid is allocated, hence this attach is actually a
> replacement. This is easy to be broken if ida_alloc_range() returns
> -ENOSPC for other reasons in future.
> 

ida_alloc_range() could fail for other reasons e.g. -ENOMEM.

in case I didn't make it clear I just meant to swap the order
between iommufd_device_pasid_attach() and ida_alloc_range().

replacement is still checked against ida_get_lowest().
Yi Liu April 18, 2024, 7:04 a.m. UTC | #4
On 2024/4/16 17:47, Tian, Kevin wrote:
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Sent: Tuesday, April 16, 2024 5:25 PM
>>
>> On 2024/4/16 17:01, Tian, Kevin wrote:
>>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>>> Sent: Friday, April 12, 2024 4:21 PM
>>>>
>>>> +
>>>> +	rc = ida_get_lowest(&vdev->pasids, pasid, pasid);
>>>> +	if (rc == pasid)
>>>> +		return iommufd_device_pasid_replace(vdev-
>>>>> iommufd_device,
>>>> +						    pasid, pt_id);
>>>> +
>>>> +	rc = iommufd_device_pasid_attach(vdev->iommufd_device, pasid,
>>>> pt_id);
>>>> +	if (rc)
>>>> +		return rc;
>>>> +
>>>> +	rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL);
>>>> +	if (rc < 0) {
>>>> +		iommufd_device_pasid_detach(vdev->iommufd_device,
>>>> pasid);
>>>> +		return rc;
>>>> +	}
>>>
>>> I'd do simple operation (ida_alloc_range()) first before doing attach.
>>>
>>
>> But that means we rely on the ida_alloc_range() to return -ENOSPC to
>> indicate the pasid is allocated, hence this attach is actually a
>> replacement. This is easy to be broken if ida_alloc_range() returns
>> -ENOSPC for other reasons in future.
>>
> 
> ida_alloc_range() could fail for other reasons e.g. -ENOMEM.
> 
> in case I didn't make it clear I just meant to swap the order
> between iommufd_device_pasid_attach() and ida_alloc_range().
> 
> replacement is still checked against ida_get_lowest().

aha, I see.
Jason Gunthorpe April 23, 2024, 12:43 p.m. UTC | #5
On Fri, Apr 12, 2024 at 01:21:19AM -0700, Yi Liu wrote:
> +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev,
> +					    u32 pasid, u32 *pt_id)
> +{
> +	int rc;
> +
> +	lockdep_assert_held(&vdev->dev_set->lock);
> +
> +	if (WARN_ON(!vdev->iommufd_device))
> +		return -EINVAL;
> +
> +	rc = ida_get_lowest(&vdev->pasids, pasid, pasid);

A helper inline

    bool ida_is_allocate(&ida, id)

Would be nicer for that

> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index cb5b7f865d58..e0198851ffd2 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -142,6 +142,8 @@ static const struct vfio_device_ops vfio_pci_ops = {
>  	.unbind_iommufd	= vfio_iommufd_physical_unbind,
>  	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
>  	.detach_ioas	= vfio_iommufd_physical_detach_ioas,
> +	.pasid_attach_ioas	= vfio_iommufd_physical_pasid_attach_ioas,
> +	.pasid_detach_ioas	= vfio_iommufd_physical_pasid_detach_ioas,
>  };

This should be copied into mlx5 and nvgrace-gpu at least as well

Jason
Tian, Kevin April 24, 2024, 12:33 a.m. UTC | #6
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, April 23, 2024 8:44 PM
> 
> On Fri, Apr 12, 2024 at 01:21:19AM -0700, Yi Liu wrote:
> > +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev,
> > +					    u32 pasid, u32 *pt_id)
> > +{
> > +	int rc;
> > +
> > +	lockdep_assert_held(&vdev->dev_set->lock);
> > +
> > +	if (WARN_ON(!vdev->iommufd_device))
> > +		return -EINVAL;
> > +
> > +	rc = ida_get_lowest(&vdev->pasids, pasid, pasid);
> 
> A helper inline
> 
>     bool ida_is_allocate(&ida, id)
> 
> Would be nicer for that
> 
> > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> > index cb5b7f865d58..e0198851ffd2 100644
> > --- a/drivers/vfio/pci/vfio_pci.c
> > +++ b/drivers/vfio/pci/vfio_pci.c
> > @@ -142,6 +142,8 @@ static const struct vfio_device_ops vfio_pci_ops = {
> >  	.unbind_iommufd	= vfio_iommufd_physical_unbind,
> >  	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
> >  	.detach_ioas	= vfio_iommufd_physical_detach_ioas,
> > +	.pasid_attach_ioas	= vfio_iommufd_physical_pasid_attach_ioas,
> > +	.pasid_detach_ioas	= vfio_iommufd_physical_pasid_detach_ioas,
> >  };
> 
> This should be copied into mlx5 and nvgrace-gpu at least as well
> 

I'd prefer to the driver owners to add them separately. They know
their hardware and can do proper test.
Yi Liu April 24, 2024, 4:48 a.m. UTC | #7
On 2024/4/23 20:43, Jason Gunthorpe wrote:
> On Fri, Apr 12, 2024 at 01:21:19AM -0700, Yi Liu wrote:
>> +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev,
>> +					    u32 pasid, u32 *pt_id)
>> +{
>> +	int rc;
>> +
>> +	lockdep_assert_held(&vdev->dev_set->lock);
>> +
>> +	if (WARN_ON(!vdev->iommufd_device))
>> +		return -EINVAL;
>> +
>> +	rc = ida_get_lowest(&vdev->pasids, pasid, pasid);
> 
> A helper inline
> 
>      bool ida_is_allocate(&ida, id)
> 
> Would be nicer for that

ok.

>> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
>> index cb5b7f865d58..e0198851ffd2 100644
>> --- a/drivers/vfio/pci/vfio_pci.c
>> +++ b/drivers/vfio/pci/vfio_pci.c
>> @@ -142,6 +142,8 @@ static const struct vfio_device_ops vfio_pci_ops = {
>>   	.unbind_iommufd	= vfio_iommufd_physical_unbind,
>>   	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
>>   	.detach_ioas	= vfio_iommufd_physical_detach_ioas,
>> +	.pasid_attach_ioas	= vfio_iommufd_physical_pasid_attach_ioas,
>> +	.pasid_detach_ioas	= vfio_iommufd_physical_pasid_detach_ioas,
>>   };
> 
> This should be copied into mlx5 and nvgrace-gpu at least as well

looks like Kevin has a different idea on it.
diff mbox series

Patch

diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
index 82eba6966fa5..fc533416c75d 100644
--- a/drivers/vfio/iommufd.c
+++ b/drivers/vfio/iommufd.c
@@ -119,14 +119,26 @@  int vfio_iommufd_physical_bind(struct vfio_device *vdev,
 	if (IS_ERR(idev))
 		return PTR_ERR(idev);
 	vdev->iommufd_device = idev;
+	ida_init(&vdev->pasids);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind);
 
 void vfio_iommufd_physical_unbind(struct vfio_device *vdev)
 {
+	int pasid = 0;
+
 	lockdep_assert_held(&vdev->dev_set->lock);
 
+	while (!ida_is_empty(&vdev->pasids)) {
+		pasid = ida_get_lowest(&vdev->pasids, pasid, INT_MAX);
+		if (pasid < 0)
+			break;
+
+		iommufd_device_pasid_detach(vdev->iommufd_device, pasid);
+		ida_free(&vdev->pasids, pasid);
+	}
+
 	if (vdev->iommufd_attached) {
 		iommufd_device_detach(vdev->iommufd_device);
 		vdev->iommufd_attached = false;
@@ -168,6 +180,54 @@  void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev)
 }
 EXPORT_SYMBOL_GPL(vfio_iommufd_physical_detach_ioas);
 
+int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev,
+					    u32 pasid, u32 *pt_id)
+{
+	int rc;
+
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	if (WARN_ON(!vdev->iommufd_device))
+		return -EINVAL;
+
+	rc = ida_get_lowest(&vdev->pasids, pasid, pasid);
+	if (rc == pasid)
+		return iommufd_device_pasid_replace(vdev->iommufd_device,
+						    pasid, pt_id);
+
+	rc = iommufd_device_pasid_attach(vdev->iommufd_device, pasid, pt_id);
+	if (rc)
+		return rc;
+
+	rc = ida_alloc_range(&vdev->pasids, pasid, pasid, GFP_KERNEL);
+	if (rc < 0) {
+		iommufd_device_pasid_detach(vdev->iommufd_device, pasid);
+		return rc;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_pasid_attach_ioas);
+
+void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev,
+					     u32 pasid)
+{
+	int rc;
+
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	if (WARN_ON(!vdev->iommufd_device))
+		return;
+
+	rc = ida_get_lowest(&vdev->pasids, pasid, pasid);
+	if (rc < 0)
+		return;
+
+	iommufd_device_pasid_detach(vdev->iommufd_device, pasid);
+	ida_free(&vdev->pasids, pasid);
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_pasid_detach_ioas);
+
 /*
  * The emulated standard ops mean that vfio_device is going to use the
  * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index cb5b7f865d58..e0198851ffd2 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -142,6 +142,8 @@  static const struct vfio_device_ops vfio_pci_ops = {
 	.unbind_iommufd	= vfio_iommufd_physical_unbind,
 	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
 	.detach_ioas	= vfio_iommufd_physical_detach_ioas,
+	.pasid_attach_ioas	= vfio_iommufd_physical_pasid_attach_ioas,
+	.pasid_detach_ioas	= vfio_iommufd_physical_pasid_detach_ioas,
 };
 
 static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 8b1a29820409..8fd1db173e84 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -66,6 +66,7 @@  struct vfio_device {
 	void (*put_kvm)(struct kvm *kvm);
 #if IS_ENABLED(CONFIG_IOMMUFD)
 	struct iommufd_device *iommufd_device;
+	struct ida pasids;
 	u8 iommufd_attached:1;
 #endif
 	u8 cdev_opened:1;
@@ -90,6 +91,8 @@  struct vfio_device {
  *		 bound iommufd. Undo in unbind_iommufd if @detach_ioas is not
  *		 called.
  * @detach_ioas: Opposite of attach_ioas
+ * @pasid_attach_ioas: The pasid variation of attach_ioas
+ * @pasid_detach_ioas: Opposite of pasid_attach_ioas
  * @open_device: Called when the first file descriptor is opened for this device
  * @close_device: Opposite of open_device
  * @read: Perform read(2) on device file descriptor
@@ -114,6 +117,8 @@  struct vfio_device_ops {
 	void	(*unbind_iommufd)(struct vfio_device *vdev);
 	int	(*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
 	void	(*detach_ioas)(struct vfio_device *vdev);
+	int	(*pasid_attach_ioas)(struct vfio_device *vdev, u32 pasid, u32 *pt_id);
+	void	(*pasid_detach_ioas)(struct vfio_device *vdev, u32 pasid);
 	int	(*open_device)(struct vfio_device *vdev);
 	void	(*close_device)(struct vfio_device *vdev);
 	ssize_t	(*read)(struct vfio_device *vdev, char __user *buf,
@@ -138,6 +143,8 @@  int vfio_iommufd_physical_bind(struct vfio_device *vdev,
 void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
 int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
 void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev);
+int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, u32 pasid, u32 *pt_id);
+void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev, u32 pasid);
 int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
 			       struct iommufd_ctx *ictx, u32 *out_device_id);
 void vfio_iommufd_emulated_unbind(struct vfio_device *vdev);
@@ -165,6 +172,10 @@  vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx)
 	((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
 #define vfio_iommufd_physical_detach_ioas \
 	((void (*)(struct vfio_device *vdev)) NULL)
+#define vfio_iommufd_physical_pasid_attach_ioas \
+	((int (*)(struct vfio_device *vdev, u32 pasid, u32 *pt_id)) NULL)
+#define vfio_iommufd_physical_pasid_detach_ioas \
+	((void (*)(struct vfio_device *vdev, u32 pasid)) NULL)
 #define vfio_iommufd_emulated_bind                                      \
 	((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx,   \
 		  u32 *out_device_id)) NULL)