diff mbox series

[v2,07/11] vfio-iommufd: Support iommufd for physical VFIO devices

Message ID 7-v2-65016290f146+33e-vfio_iommufd_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Connect VFIO to IOMMUFD | expand

Commit Message

Jason Gunthorpe Nov. 8, 2022, 12:52 a.m. UTC
This creates the iommufd_device for the physical VFIO drivers. These are
all the drivers that are calling vfio_register_group_dev() and expect the
type1 code to setup a real iommu_domain against their parent struct
device.

The design gives the driver a choice in how it gets connected to iommufd
by providing bind_iommufd/unbind_iommufd/attach_ioas callbacks to
implement as required. The core code provides three default callbacks for
physical mode using a real iommu_domain. This is suitable for drivers
using vfio_register_group_dev()

Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/Makefile                         |  1 +
 drivers/vfio/fsl-mc/vfio_fsl_mc.c             |  3 +
 drivers/vfio/iommufd.c                        | 99 +++++++++++++++++++
 .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    |  6 ++
 drivers/vfio/pci/mlx5/main.c                  |  3 +
 drivers/vfio/pci/vfio_pci.c                   |  3 +
 drivers/vfio/platform/vfio_amba.c             |  3 +
 drivers/vfio/platform/vfio_platform.c         |  3 +
 drivers/vfio/vfio.h                           | 15 +++
 drivers/vfio/vfio_main.c                      | 13 ++-
 include/linux/vfio.h                          | 25 +++++
 11 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 drivers/vfio/iommufd.c

Comments

Nicolin Chen Nov. 8, 2022, 6:10 a.m. UTC | #1
On Mon, Nov 07, 2022 at 08:52:51PM -0400, Jason Gunthorpe wrote:

> @@ -795,6 +800,10 @@ static int vfio_device_first_open(struct vfio_device *device)
>  		ret = vfio_group_use_container(device->group);
>  		if (ret)
>  			goto err_module_put;
> +	} else if (device->group->iommufd) {
> +		ret = vfio_iommufd_bind(device, device->group->iommufd);

Here we check device->group->iommufd...

> +		if (ret)
> +			goto err_module_put;
>  	}
>  
>  	device->kvm = device->group->kvm;
> @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct vfio_device *device)
>  	device->kvm = NULL;
>  	if (device->group->container)
>  		vfio_group_unuse_container(device->group);
> +	vfio_iommufd_unbind(device);

...yet, missing here, which could result in kernel oops.

Should probably add something similar:
+	if (device->group->iommufd)
+		vfio_iommufd_unbind(device);

Or should check !vdev->iommufd_device inside the ->unbind.

>  err_module_put:
>  	mutex_unlock(&device->group->group_lock);
>  	module_put(device->dev->driver->owner);
> @@ -830,6 +840,7 @@ static void vfio_device_last_close(struct vfio_device *device)
>  	device->kvm = NULL;
>  	if (device->group->container)
>  		vfio_group_unuse_container(device->group);
> +	vfio_iommufd_unbind(device);

Ditto
Yi Liu Nov. 8, 2022, 7:41 a.m. UTC | #2
On 2022/11/8 14:10, Nicolin Chen wrote:
> On Mon, Nov 07, 2022 at 08:52:51PM -0400, Jason Gunthorpe wrote:
> 
>> @@ -795,6 +800,10 @@ static int vfio_device_first_open(struct vfio_device *device)
>>   		ret = vfio_group_use_container(device->group);
>>   		if (ret)
>>   			goto err_module_put;
>> +	} else if (device->group->iommufd) {
>> +		ret = vfio_iommufd_bind(device, device->group->iommufd);
> 
> Here we check device->group->iommufd...
> 
>> +		if (ret)
>> +			goto err_module_put;
>>   	}
>>   
>>   	device->kvm = device->group->kvm;
>> @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct vfio_device *device)
>>   	device->kvm = NULL;
>>   	if (device->group->container)
>>   		vfio_group_unuse_container(device->group);
>> +	vfio_iommufd_unbind(device);
> 
> ...yet, missing here, which could result in kernel oops.
> 
> Should probably add something similar:
> +	if (device->group->iommufd)
> +		vfio_iommufd_unbind(device);
> 
> Or should check !vdev->iommufd_device inside the ->unbind.

this check was in prior version, but removed in this version. any
special reason? Jason?

> 
>>   err_module_put:
>>   	mutex_unlock(&device->group->group_lock);
>>   	module_put(device->dev->driver->owner);
>> @@ -830,6 +840,7 @@ static void vfio_device_last_close(struct vfio_device *device)
>>   	device->kvm = NULL;
>>   	if (device->group->container)
>>   		vfio_group_unuse_container(device->group);
>> +	vfio_iommufd_unbind(device);
> 
> Ditto
Jason Gunthorpe Nov. 8, 2022, 5:48 p.m. UTC | #3
On Mon, Nov 07, 2022 at 10:10:59PM -0800, Nicolin Chen wrote:

> > @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct vfio_device *device)
> >  	device->kvm = NULL;
> >  	if (device->group->container)
> >  		vfio_group_unuse_container(device->group);
> > +	vfio_iommufd_unbind(device);
> 
> ...yet, missing here, which could result in kernel oops.
> 
> Should probably add something similar:
> +	if (device->group->iommufd)
> +		vfio_iommufd_unbind(device);
> 
> Or should check !vdev->iommufd_device inside the ->unbind.

Lets keep it symmetric since the container is checked:

@@ -821,7 +821,8 @@ static int vfio_device_first_open(struct vfio_device *device)
        device->kvm = NULL;
        if (device->group->container)
                vfio_group_unuse_container(device->group);
-       vfio_iommufd_unbind(device);
+       else if (device->group->iommufd)
+               vfio_iommufd_unbind(device);
 err_module_put:
        mutex_unlock(&device->group->group_lock);
        module_put(device->dev->driver->owner);
@@ -840,7 +841,8 @@ static void vfio_device_last_close(struct vfio_device *device)
        device->kvm = NULL;
        if (device->group->container)
                vfio_group_unuse_container(device->group);
-       vfio_iommufd_unbind(device);
+       else if (device->group->iommufd)
+               vfio_iommufd_unbind(device);
        mutex_unlock(&device->group->group_lock);
        module_put(device->dev->driver->owner);

Thanks,
Jason
Jason Gunthorpe Nov. 8, 2022, 5:51 p.m. UTC | #4
On Tue, Nov 08, 2022 at 03:41:25PM +0800, Yi Liu wrote:
> On 2022/11/8 14:10, Nicolin Chen wrote:
> > On Mon, Nov 07, 2022 at 08:52:51PM -0400, Jason Gunthorpe wrote:
> > 
> > > @@ -795,6 +800,10 @@ static int vfio_device_first_open(struct vfio_device *device)
> > >   		ret = vfio_group_use_container(device->group);
> > >   		if (ret)
> > >   			goto err_module_put;
> > > +	} else if (device->group->iommufd) {
> > > +		ret = vfio_iommufd_bind(device, device->group->iommufd);
> > 
> > Here we check device->group->iommufd...
> > 
> > > +		if (ret)
> > > +			goto err_module_put;
> > >   	}
> > >   	device->kvm = device->group->kvm;
> > > @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct vfio_device *device)
> > >   	device->kvm = NULL;
> > >   	if (device->group->container)
> > >   		vfio_group_unuse_container(device->group);
> > > +	vfio_iommufd_unbind(device);
> > 
> > ...yet, missing here, which could result in kernel oops.
> > 
> > Should probably add something similar:
> > +	if (device->group->iommufd)
> > +		vfio_iommufd_unbind(device);
> > 
> > Or should check !vdev->iommufd_device inside the ->unbind.
> 
> this check was in prior version, but removed in this version. any
> special reason? Jason?

Oooh, this makes more sense - Kevin pointed out the check was wrong:

> > +void vfio_iommufd_unbind(struct vfio_device *vdev)
> > +{
> > +	lockdep_assert_held(&vdev->dev_set->lock);
> > +
> > +	if (!vdev->iommufd_device)
> > +		return;

> there is no iommufd_device in the emulated path...

And he is right, so I dropped it. But really the check was just
misspelled, it was supposed to be "device->group->iommufd" because the
caller assumed it.

Still, I think the right way to fix it is to lift the check as we
don't touch group->iommufd in iommufd.c

Thanks,
Jason
Tian, Kevin Nov. 10, 2022, 3:11 a.m. UTC | #5
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, November 8, 2022 8:53 AM
> 
> +
> +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
> +{
> +	u32 ioas_id;
> +	u32 device_id;
> +	int ret;
> +
> +	lockdep_assert_held(&vdev->dev_set->lock);
> +
> +	/*
> +	 * If the driver doesn't provide this op then it means the device does
> +	 * not do DMA at all. So nothing to do.
> +	 */
> +	if (!vdev->ops->bind_iommufd)
> +		return 0;
> +
> +	ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
> +	if (ret)
> +		return ret;
> +
> +	ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
> +	if (ret)
> +		goto err_unbind;
> +	ret = vdev->ops->attach_ioas(vdev, &ioas_id);
> +	if (ret)
> +		goto err_unbind;

with our discussion in v1:

https://lore.kernel.org/all/Y2mgJqz8fvm54C+f@nvidia.com/

I got the rationale on iommufd part which doesn't have the concept
of container hence not necessarily to impose restriction on when
an user can change a compat ioas.

But from vfio side I wonder whether we should cache the compat
ioas id when it's attached by the first device and then use it all the
way for other device attachments coming after. implying IOAS_SET
only affects containers which haven't been attached.

In concept a container should be only aliased to one compat ioas
in its lifetime.
Tian, Kevin Nov. 10, 2022, 3:12 a.m. UTC | #6
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Wednesday, November 9, 2022 1:52 AM
> 
> On Tue, Nov 08, 2022 at 03:41:25PM +0800, Yi Liu wrote:
> > On 2022/11/8 14:10, Nicolin Chen wrote:
> > > On Mon, Nov 07, 2022 at 08:52:51PM -0400, Jason Gunthorpe wrote:
> > >
> > > > @@ -795,6 +800,10 @@ static int vfio_device_first_open(struct
> vfio_device *device)
> > > >   		ret = vfio_group_use_container(device->group);
> > > >   		if (ret)
> > > >   			goto err_module_put;
> > > > +	} else if (device->group->iommufd) {
> > > > +		ret = vfio_iommufd_bind(device, device->group->iommufd);
> > >
> > > Here we check device->group->iommufd...
> > >
> > > > +		if (ret)
> > > > +			goto err_module_put;
> > > >   	}
> > > >   	device->kvm = device->group->kvm;
> > > > @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct
> vfio_device *device)
> > > >   	device->kvm = NULL;
> > > >   	if (device->group->container)
> > > >   		vfio_group_unuse_container(device->group);
> > > > +	vfio_iommufd_unbind(device);
> > >
> > > ...yet, missing here, which could result in kernel oops.
> > >
> > > Should probably add something similar:
> > > +	if (device->group->iommufd)
> > > +		vfio_iommufd_unbind(device);
> > >
> > > Or should check !vdev->iommufd_device inside the ->unbind.
> >
> > this check was in prior version, but removed in this version. any
> > special reason? Jason?
> 
> Oooh, this makes more sense - Kevin pointed out the check was wrong:
> 
> > > +void vfio_iommufd_unbind(struct vfio_device *vdev)
> > > +{
> > > +	lockdep_assert_held(&vdev->dev_set->lock);
> > > +
> > > +	if (!vdev->iommufd_device)
> > > +		return;
> 
> > there is no iommufd_device in the emulated path...
> 
> And he is right, so I dropped it. But really the check was just
> misspelled, it was supposed to be "device->group->iommufd" because the
> caller assumed it.
> 
> Still, I think the right way to fix it is to lift the check as we
> don't touch group->iommufd in iommufd.c
> 

yes this is the right fix.
Jason Gunthorpe Nov. 10, 2022, 5:20 p.m. UTC | #7
On Thu, Nov 10, 2022 at 03:11:16AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Tuesday, November 8, 2022 8:53 AM
> > 
> > +
> > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
> > +{
> > +	u32 ioas_id;
> > +	u32 device_id;
> > +	int ret;
> > +
> > +	lockdep_assert_held(&vdev->dev_set->lock);
> > +
> > +	/*
> > +	 * If the driver doesn't provide this op then it means the device does
> > +	 * not do DMA at all. So nothing to do.
> > +	 */
> > +	if (!vdev->ops->bind_iommufd)
> > +		return 0;
> > +
> > +	ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
> > +	if (ret)
> > +		goto err_unbind;
> > +	ret = vdev->ops->attach_ioas(vdev, &ioas_id);
> > +	if (ret)
> > +		goto err_unbind;
> 
> with our discussion in v1:
> 
> https://lore.kernel.org/all/Y2mgJqz8fvm54C+f@nvidia.com/
> 
> I got the rationale on iommufd part which doesn't have the concept
> of container hence not necessarily to impose restriction on when
> an user can change a compat ioas.
> 
> But from vfio side I wonder whether we should cache the compat
> ioas id when it's attached by the first device and then use it all the
> way for other device attachments coming after. implying IOAS_SET
> only affects containers which haven't been attached.

I can't see a reason to do this. IOAS_SET is a new ioctl and it has
new semantics beyond what original vfio container could do. In this
case having an impact on the next vfio_device that is opened.

This seems generally useful enough I wouldn't want to block it.

In any case, we can't *really* change this because the vfio layer is
working on IDs and the IDs can be destroyed/recreated from under
it. So if we try to hold the ID we could still end up getting it
changed anyhow.

Jason
Tian, Kevin Nov. 10, 2022, 11:58 p.m. UTC | #8
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, November 11, 2022 1:21 AM
> 
> On Thu, Nov 10, 2022 at 03:11:16AM +0000, Tian, Kevin wrote:
> > > From: Jason Gunthorpe <jgg@nvidia.com>
> > > Sent: Tuesday, November 8, 2022 8:53 AM
> > >
> > > +
> > > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx
> *ictx)
> > > +{
> > > +	u32 ioas_id;
> > > +	u32 device_id;
> > > +	int ret;
> > > +
> > > +	lockdep_assert_held(&vdev->dev_set->lock);
> > > +
> > > +	/*
> > > +	 * If the driver doesn't provide this op then it means the device does
> > > +	 * not do DMA at all. So nothing to do.
> > > +	 */
> > > +	if (!vdev->ops->bind_iommufd)
> > > +		return 0;
> > > +
> > > +	ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
> > > +	if (ret)
> > > +		goto err_unbind;
> > > +	ret = vdev->ops->attach_ioas(vdev, &ioas_id);
> > > +	if (ret)
> > > +		goto err_unbind;
> >
> > with our discussion in v1:
> >
> > https://lore.kernel.org/all/Y2mgJqz8fvm54C+f@nvidia.com/
> >
> > I got the rationale on iommufd part which doesn't have the concept
> > of container hence not necessarily to impose restriction on when
> > an user can change a compat ioas.
> >
> > But from vfio side I wonder whether we should cache the compat
> > ioas id when it's attached by the first device and then use it all the
> > way for other device attachments coming after. implying IOAS_SET
> > only affects containers which haven't been attached.
> 
> I can't see a reason to do this. IOAS_SET is a new ioctl and it has
> new semantics beyond what original vfio container could do. In this
> case having an impact on the next vfio_device that is opened.
> 
> This seems generally useful enough I wouldn't want to block it.
> 
> In any case, we can't *really* change this because the vfio layer is
> working on IDs and the IDs can be destroyed/recreated from under
> it. So if we try to hold the ID we could still end up getting it
> changed anyhow.
> 

OK, this is a valid point.

So a legacy vfio application doesn't use IOAS_SET so the backward
compatibility is guaranteed.

a iommufd native application will use cdev where IOAS_SET and
compat ioas are irrelevant.

here just we allow an interesting usage where an user is allowed
to do more funny things with IOAS_SET on vfio-compat. Not sure
how useful it is but not something we want to prohibit.
Yi Liu Nov. 11, 2022, 4:12 a.m. UTC | #9
On 2022/11/8 08:52, Jason Gunthorpe wrote:
> This creates the iommufd_device for the physical VFIO drivers. These are
> all the drivers that are calling vfio_register_group_dev() and expect the
> type1 code to setup a real iommu_domain against their parent struct
> device.
> 
> The design gives the driver a choice in how it gets connected to iommufd
> by providing bind_iommufd/unbind_iommufd/attach_ioas callbacks to
> implement as required. The core code provides three default callbacks for
> physical mode using a real iommu_domain. This is suitable for drivers
> using vfio_register_group_dev()
> 
> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>   drivers/vfio/Makefile                         |  1 +
>   drivers/vfio/fsl-mc/vfio_fsl_mc.c             |  3 +
>   drivers/vfio/iommufd.c                        | 99 +++++++++++++++++++
>   .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    |  6 ++
>   drivers/vfio/pci/mlx5/main.c                  |  3 +
>   drivers/vfio/pci/vfio_pci.c                   |  3 +
>   drivers/vfio/platform/vfio_amba.c             |  3 +
>   drivers/vfio/platform/vfio_platform.c         |  3 +
>   drivers/vfio/vfio.h                           | 15 +++
>   drivers/vfio/vfio_main.c                      | 13 ++-
>   include/linux/vfio.h                          | 25 +++++
>   11 files changed, 172 insertions(+), 2 deletions(-)
>   create mode 100644 drivers/vfio/iommufd.c
> 
> diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
> index b693a1169286f8..3863922529ef20 100644
> --- a/drivers/vfio/Makefile
> +++ b/drivers/vfio/Makefile
> @@ -6,6 +6,7 @@ obj-$(CONFIG_VFIO) += vfio.o
>   vfio-y += vfio_main.o \
>   	  iova_bitmap.o \
>   	  container.o
> +vfio-$(CONFIG_IOMMUFD) += iommufd.o
>   
>   obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
>   obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
> index b16874e913e4f5..5cd4bb47644039 100644
> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
> @@ -592,6 +592,9 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = {
>   	.read		= vfio_fsl_mc_read,
>   	.write		= vfio_fsl_mc_write,
>   	.mmap		= vfio_fsl_mc_mmap,
> +	.bind_iommufd	= vfio_iommufd_physical_bind,
> +	.unbind_iommufd	= vfio_iommufd_physical_unbind,
> +	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
>   };
>   
>   static struct fsl_mc_driver vfio_fsl_mc_driver = {
> diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
> new file mode 100644
> index 00000000000000..bf755d0f375c5d
> --- /dev/null
> +++ b/drivers/vfio/iommufd.c
> @@ -0,0 +1,99 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
> + */
> +#include <linux/vfio.h>
> +#include <linux/iommufd.h>
> +
> +#include "vfio.h"
> +
> +MODULE_IMPORT_NS(IOMMUFD);
> +MODULE_IMPORT_NS(IOMMUFD_VFIO);
> +
> +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
> +{
> +	u32 ioas_id;
> +	u32 device_id;
> +	int ret;
> +
> +	lockdep_assert_held(&vdev->dev_set->lock);
> +
> +	/*
> +	 * If the driver doesn't provide this op then it means the device does
> +	 * not do DMA at all. So nothing to do.
> +	 */
> +	if (!vdev->ops->bind_iommufd)
> +		return 0;
> +
> +	ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
> +	if (ret)
> +		return ret;
> +
> +	ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
> +	if (ret)
> +		goto err_unbind;
> +	ret = vdev->ops->attach_ioas(vdev, &ioas_id);
> +	if (ret)
> +		goto err_unbind;
> +	vdev->iommufd_attached = true;

it's better to set this bool in vfio_iommufd_physical_attach_ioas() as
the emulated devices uses iommufd_access instead. is it? or you mean this
flag to cover both cases?
  --
Regards,
Yi Liu
Jason Gunthorpe Nov. 14, 2022, 2:47 p.m. UTC | #10
On Fri, Nov 11, 2022 at 12:12:36PM +0800, Yi Liu wrote:

> > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
> > +{
> > +	u32 ioas_id;
> > +	u32 device_id;
> > +	int ret;
> > +
> > +	lockdep_assert_held(&vdev->dev_set->lock);
> > +
> > +	/*
> > +	 * If the driver doesn't provide this op then it means the device does
> > +	 * not do DMA at all. So nothing to do.
> > +	 */
> > +	if (!vdev->ops->bind_iommufd)
> > +		return 0;
> > +
> > +	ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
> > +	if (ret)
> > +		goto err_unbind;
> > +	ret = vdev->ops->attach_ioas(vdev, &ioas_id);
> > +	if (ret)
> > +		goto err_unbind;
> > +	vdev->iommufd_attached = true;
> 
> it's better to set this bool in vfio_iommufd_physical_attach_ioas() as
> the emulated devices uses iommufd_access instead. is it? or you mean this
> flag to cover both cases?

Yes, that is probably clearer:

@@ -50,7 +50,6 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
 	ret = vdev->ops->attach_ioas(vdev, &ioas_id);
 	if (ret)
 		goto err_unbind;
-	vdev->iommufd_attached = true;
 
 	/*
 	 * The legacy path has no way to return the device id or the selected
@@ -110,10 +109,15 @@ EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind);
 int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
 {
 	unsigned int flags = 0;
+	int rc;
 
 	if (vfio_allow_unsafe_interrupts)
 		flags |= IOMMUFD_ATTACH_FLAGS_ALLOW_UNSAFE_INTERRUPT;
-	return iommufd_device_attach(vdev->iommufd_device, pt_id, flags);
+	rc = iommufd_device_attach(vdev->iommufd_device, pt_id, flags);
+	if (rc)
+		return rc;
+	vdev->iommufd_attached = true;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas);
 
Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index b693a1169286f8..3863922529ef20 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -6,6 +6,7 @@  obj-$(CONFIG_VFIO) += vfio.o
 vfio-y += vfio_main.o \
 	  iova_bitmap.o \
 	  container.o
+vfio-$(CONFIG_IOMMUFD) += iommufd.o
 
 obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index b16874e913e4f5..5cd4bb47644039 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -592,6 +592,9 @@  static const struct vfio_device_ops vfio_fsl_mc_ops = {
 	.read		= vfio_fsl_mc_read,
 	.write		= vfio_fsl_mc_write,
 	.mmap		= vfio_fsl_mc_mmap,
+	.bind_iommufd	= vfio_iommufd_physical_bind,
+	.unbind_iommufd	= vfio_iommufd_physical_unbind,
+	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
 };
 
 static struct fsl_mc_driver vfio_fsl_mc_driver = {
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
new file mode 100644
index 00000000000000..bf755d0f375c5d
--- /dev/null
+++ b/drivers/vfio/iommufd.c
@@ -0,0 +1,99 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include "vfio.h"
+
+MODULE_IMPORT_NS(IOMMUFD);
+MODULE_IMPORT_NS(IOMMUFD_VFIO);
+
+int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
+{
+	u32 ioas_id;
+	u32 device_id;
+	int ret;
+
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	/*
+	 * If the driver doesn't provide this op then it means the device does
+	 * not do DMA at all. So nothing to do.
+	 */
+	if (!vdev->ops->bind_iommufd)
+		return 0;
+
+	ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
+	if (ret)
+		return ret;
+
+	ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
+	if (ret)
+		goto err_unbind;
+	ret = vdev->ops->attach_ioas(vdev, &ioas_id);
+	if (ret)
+		goto err_unbind;
+	vdev->iommufd_attached = true;
+
+	/*
+	 * The legacy path has no way to return the device id or the selected
+	 * pt_id
+	 */
+	return 0;
+
+err_unbind:
+	if (vdev->ops->unbind_iommufd)
+		vdev->ops->unbind_iommufd(vdev);
+	return ret;
+}
+
+void vfio_iommufd_unbind(struct vfio_device *vdev)
+{
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	if (vdev->ops->unbind_iommufd)
+		vdev->ops->unbind_iommufd(vdev);
+}
+
+/*
+ * The physical standard ops mean that the iommufd_device is bound to the
+ * physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers
+ * using this ops set should call vfio_register_group_dev()
+ */
+int vfio_iommufd_physical_bind(struct vfio_device *vdev,
+			       struct iommufd_ctx *ictx, u32 *out_device_id)
+{
+	struct iommufd_device *idev;
+
+	idev = iommufd_device_bind(ictx, vdev->dev, out_device_id);
+	if (IS_ERR(idev))
+		return PTR_ERR(idev);
+	vdev->iommufd_device = idev;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind);
+
+void vfio_iommufd_physical_unbind(struct vfio_device *vdev)
+{
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	if (vdev->iommufd_attached) {
+		iommufd_device_detach(vdev->iommufd_device);
+		vdev->iommufd_attached = false;
+	}
+	iommufd_device_unbind(vdev->iommufd_device);
+	vdev->iommufd_device = NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind);
+
+int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
+{
+	unsigned int flags = 0;
+
+	if (vfio_allow_unsafe_interrupts)
+		flags |= IOMMUFD_ATTACH_FLAGS_ALLOW_UNSAFE_INTERRUPT;
+	return iommufd_device_attach(vdev->iommufd_device, pt_id, flags);
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas);
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 39eeca18a0f7c8..40019b11c5a969 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -1246,6 +1246,9 @@  static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
 	.mmap = hisi_acc_vfio_pci_mmap,
 	.request = vfio_pci_core_request,
 	.match = vfio_pci_core_match,
+	.bind_iommufd = vfio_iommufd_physical_bind,
+	.unbind_iommufd = vfio_iommufd_physical_unbind,
+	.attach_ioas = vfio_iommufd_physical_attach_ioas,
 };
 
 static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
@@ -1261,6 +1264,9 @@  static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
 	.mmap = vfio_pci_core_mmap,
 	.request = vfio_pci_core_request,
 	.match = vfio_pci_core_match,
+	.bind_iommufd = vfio_iommufd_physical_bind,
+	.unbind_iommufd = vfio_iommufd_physical_unbind,
+	.attach_ioas = vfio_iommufd_physical_attach_ioas,
 };
 
 static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index fd6ccb8454a24a..32d1f38d351e7e 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -623,6 +623,9 @@  static const struct vfio_device_ops mlx5vf_pci_ops = {
 	.mmap = vfio_pci_core_mmap,
 	.request = vfio_pci_core_request,
 	.match = vfio_pci_core_match,
+	.bind_iommufd = vfio_iommufd_physical_bind,
+	.unbind_iommufd = vfio_iommufd_physical_unbind,
+	.attach_ioas = vfio_iommufd_physical_attach_ioas,
 };
 
 static int mlx5vf_pci_probe(struct pci_dev *pdev,
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 1d4919edfbde48..29091ee2e9849b 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -138,6 +138,9 @@  static const struct vfio_device_ops vfio_pci_ops = {
 	.mmap		= vfio_pci_core_mmap,
 	.request	= vfio_pci_core_request,
 	.match		= vfio_pci_core_match,
+	.bind_iommufd	= vfio_iommufd_physical_bind,
+	.unbind_iommufd	= vfio_iommufd_physical_unbind,
+	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
 };
 
 static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c
index eaea63e5294c58..5a046098d0bdf4 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -117,6 +117,9 @@  static const struct vfio_device_ops vfio_amba_ops = {
 	.read		= vfio_platform_read,
 	.write		= vfio_platform_write,
 	.mmap		= vfio_platform_mmap,
+	.bind_iommufd	= vfio_iommufd_physical_bind,
+	.unbind_iommufd	= vfio_iommufd_physical_unbind,
+	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
 };
 
 static const struct amba_id pl330_ids[] = {
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index 82cedcebfd9022..b87c3b70878341 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -106,6 +106,9 @@  static const struct vfio_device_ops vfio_platform_ops = {
 	.read		= vfio_platform_read,
 	.write		= vfio_platform_write,
 	.mmap		= vfio_platform_mmap,
+	.bind_iommufd	= vfio_iommufd_physical_bind,
+	.unbind_iommufd	= vfio_iommufd_physical_unbind,
+	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
 };
 
 static struct platform_driver vfio_platform_driver = {
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 985e13d52989ca..809f2e8523968e 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -124,6 +124,21 @@  void vfio_device_container_unregister(struct vfio_device *device);
 int __init vfio_container_init(void);
 void vfio_container_cleanup(void);
 
+#if IS_ENABLED(CONFIG_IOMMUFD)
+int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx);
+void vfio_iommufd_unbind(struct vfio_device *device);
+#else
+static inline int vfio_iommufd_bind(struct vfio_device *device,
+				    struct iommufd_ctx *ictx)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void vfio_iommufd_unbind(struct vfio_device *device)
+{
+}
+#endif
+
 #ifdef CONFIG_VFIO_NOIOMMU
 extern bool vfio_noiommu __read_mostly;
 #else
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 8c124290ce9f0d..2f3e35d2f2083d 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -528,6 +528,11 @@  static int __vfio_register_dev(struct vfio_device *device,
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
+	if (WARN_ON(device->ops->bind_iommufd &&
+		    (!device->ops->unbind_iommufd ||
+		     !device->ops->attach_ioas)))
+		return -EINVAL;
+
 	/*
 	 * If the driver doesn't specify a set then the device is added to a
 	 * singleton set just for itself.
@@ -795,6 +800,10 @@  static int vfio_device_first_open(struct vfio_device *device)
 		ret = vfio_group_use_container(device->group);
 		if (ret)
 			goto err_module_put;
+	} else if (device->group->iommufd) {
+		ret = vfio_iommufd_bind(device, device->group->iommufd);
+		if (ret)
+			goto err_module_put;
 	}
 
 	device->kvm = device->group->kvm;
@@ -812,6 +821,7 @@  static int vfio_device_first_open(struct vfio_device *device)
 	device->kvm = NULL;
 	if (device->group->container)
 		vfio_group_unuse_container(device->group);
+	vfio_iommufd_unbind(device);
 err_module_put:
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
@@ -830,6 +840,7 @@  static void vfio_device_last_close(struct vfio_device *device)
 	device->kvm = NULL;
 	if (device->group->container)
 		vfio_group_unuse_container(device->group);
+	vfio_iommufd_unbind(device);
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
 }
@@ -1937,8 +1948,6 @@  static void __exit vfio_cleanup(void)
 module_init(vfio_init);
 module_exit(vfio_cleanup);
 
-MODULE_IMPORT_NS(IOMMUFD);
-MODULE_IMPORT_NS(IOMMUFD_VFIO);
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR(DRIVER_AUTHOR);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e7cebeb875dd1a..a7fc4d747dc226 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -17,6 +17,8 @@ 
 #include <linux/iova_bitmap.h>
 
 struct kvm;
+struct iommufd_ctx;
+struct iommufd_device;
 
 /*
  * VFIO devices can be placed in a set, this allows all devices to share this
@@ -54,6 +56,10 @@  struct vfio_device {
 	struct completion comp;
 	struct list_head group_next;
 	struct list_head iommu_entry;
+#if IS_ENABLED(CONFIG_IOMMUFD)
+	struct iommufd_device *iommufd_device;
+	bool iommufd_attached;
+#endif
 };
 
 /**
@@ -80,6 +86,10 @@  struct vfio_device_ops {
 	char	*name;
 	int	(*init)(struct vfio_device *vdev);
 	void	(*release)(struct vfio_device *vdev);
+	int	(*bind_iommufd)(struct vfio_device *vdev,
+				struct iommufd_ctx *ictx, u32 *out_device_id);
+	void	(*unbind_iommufd)(struct vfio_device *vdev);
+	int	(*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
 	int	(*open_device)(struct vfio_device *vdev);
 	void	(*close_device)(struct vfio_device *vdev);
 	ssize_t	(*read)(struct vfio_device *vdev, char __user *buf,
@@ -96,6 +106,21 @@  struct vfio_device_ops {
 				  void __user *arg, size_t argsz);
 };
 
+#if IS_ENABLED(CONFIG_IOMMUFD)
+int vfio_iommufd_physical_bind(struct vfio_device *vdev,
+			       struct iommufd_ctx *ictx, u32 *out_device_id);
+void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
+int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
+#else
+#define vfio_iommufd_physical_bind                                      \
+	((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx,   \
+		  u32 *out_device_id)) NULL)
+#define vfio_iommufd_physical_unbind \
+	((void (*)(struct vfio_device *vdev)) NULL)
+#define vfio_iommufd_physical_attach_ioas \
+	((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
+#endif
+
 /**
  * @migration_set_state: Optional callback to change the migration state for
  *         devices that support migration. It's mandatory for