Message ID | 7-v2-65016290f146+33e-vfio_iommufd_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Connect VFIO to IOMMUFD | expand |
On Mon, Nov 07, 2022 at 08:52:51PM -0400, Jason Gunthorpe wrote: > @@ -795,6 +800,10 @@ static int vfio_device_first_open(struct vfio_device *device) > ret = vfio_group_use_container(device->group); > if (ret) > goto err_module_put; > + } else if (device->group->iommufd) { > + ret = vfio_iommufd_bind(device, device->group->iommufd); Here we check device->group->iommufd... > + if (ret) > + goto err_module_put; > } > > device->kvm = device->group->kvm; > @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct vfio_device *device) > device->kvm = NULL; > if (device->group->container) > vfio_group_unuse_container(device->group); > + vfio_iommufd_unbind(device); ...yet, missing here, which could result in kernel oops. Should probably add something similar: + if (device->group->iommufd) + vfio_iommufd_unbind(device); Or should check !vdev->iommufd_device inside the ->unbind. > err_module_put: > mutex_unlock(&device->group->group_lock); > module_put(device->dev->driver->owner); > @@ -830,6 +840,7 @@ static void vfio_device_last_close(struct vfio_device *device) > device->kvm = NULL; > if (device->group->container) > vfio_group_unuse_container(device->group); > + vfio_iommufd_unbind(device); Ditto
On 2022/11/8 14:10, Nicolin Chen wrote: > On Mon, Nov 07, 2022 at 08:52:51PM -0400, Jason Gunthorpe wrote: > >> @@ -795,6 +800,10 @@ static int vfio_device_first_open(struct vfio_device *device) >> ret = vfio_group_use_container(device->group); >> if (ret) >> goto err_module_put; >> + } else if (device->group->iommufd) { >> + ret = vfio_iommufd_bind(device, device->group->iommufd); > > Here we check device->group->iommufd... > >> + if (ret) >> + goto err_module_put; >> } >> >> device->kvm = device->group->kvm; >> @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct vfio_device *device) >> device->kvm = NULL; >> if (device->group->container) >> vfio_group_unuse_container(device->group); >> + vfio_iommufd_unbind(device); > > ...yet, missing here, which could result in kernel oops. > > Should probably add something similar: > + if (device->group->iommufd) > + vfio_iommufd_unbind(device); > > Or should check !vdev->iommufd_device inside the ->unbind. this check was in prior version, but removed in this version. any special reason? Jason? > >> err_module_put: >> mutex_unlock(&device->group->group_lock); >> module_put(device->dev->driver->owner); >> @@ -830,6 +840,7 @@ static void vfio_device_last_close(struct vfio_device *device) >> device->kvm = NULL; >> if (device->group->container) >> vfio_group_unuse_container(device->group); >> + vfio_iommufd_unbind(device); > > Ditto
On Mon, Nov 07, 2022 at 10:10:59PM -0800, Nicolin Chen wrote: > > @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct vfio_device *device) > > device->kvm = NULL; > > if (device->group->container) > > vfio_group_unuse_container(device->group); > > + vfio_iommufd_unbind(device); > > ...yet, missing here, which could result in kernel oops. > > Should probably add something similar: > + if (device->group->iommufd) > + vfio_iommufd_unbind(device); > > Or should check !vdev->iommufd_device inside the ->unbind. Lets keep it symmetric since the container is checked: @@ -821,7 +821,8 @@ static int vfio_device_first_open(struct vfio_device *device) device->kvm = NULL; if (device->group->container) vfio_group_unuse_container(device->group); - vfio_iommufd_unbind(device); + else if (device->group->iommufd) + vfio_iommufd_unbind(device); err_module_put: mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); @@ -840,7 +841,8 @@ static void vfio_device_last_close(struct vfio_device *device) device->kvm = NULL; if (device->group->container) vfio_group_unuse_container(device->group); - vfio_iommufd_unbind(device); + else if (device->group->iommufd) + vfio_iommufd_unbind(device); mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); Thanks, Jason
On Tue, Nov 08, 2022 at 03:41:25PM +0800, Yi Liu wrote: > On 2022/11/8 14:10, Nicolin Chen wrote: > > On Mon, Nov 07, 2022 at 08:52:51PM -0400, Jason Gunthorpe wrote: > > > > > @@ -795,6 +800,10 @@ static int vfio_device_first_open(struct vfio_device *device) > > > ret = vfio_group_use_container(device->group); > > > if (ret) > > > goto err_module_put; > > > + } else if (device->group->iommufd) { > > > + ret = vfio_iommufd_bind(device, device->group->iommufd); > > > > Here we check device->group->iommufd... > > > > > + if (ret) > > > + goto err_module_put; > > > } > > > device->kvm = device->group->kvm; > > > @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct vfio_device *device) > > > device->kvm = NULL; > > > if (device->group->container) > > > vfio_group_unuse_container(device->group); > > > + vfio_iommufd_unbind(device); > > > > ...yet, missing here, which could result in kernel oops. > > > > Should probably add something similar: > > + if (device->group->iommufd) > > + vfio_iommufd_unbind(device); > > > > Or should check !vdev->iommufd_device inside the ->unbind. > > this check was in prior version, but removed in this version. any > special reason? Jason? Oooh, this makes more sense - Kevin pointed out the check was wrong: > > +void vfio_iommufd_unbind(struct vfio_device *vdev) > > +{ > > + lockdep_assert_held(&vdev->dev_set->lock); > > + > > + if (!vdev->iommufd_device) > > + return; > there is no iommufd_device in the emulated path... And he is right, so I dropped it. But really the check was just misspelled, it was supposed to be "device->group->iommufd" because the caller assumed it. Still, I think the right way to fix it is to lift the check as we don't touch group->iommufd in iommufd.c Thanks, Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, November 8, 2022 8:53 AM > > + > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) > +{ > + u32 ioas_id; > + u32 device_id; > + int ret; > + > + lockdep_assert_held(&vdev->dev_set->lock); > + > + /* > + * If the driver doesn't provide this op then it means the device does > + * not do DMA at all. So nothing to do. > + */ > + if (!vdev->ops->bind_iommufd) > + return 0; > + > + ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); > + if (ret) > + return ret; > + > + ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); > + if (ret) > + goto err_unbind; > + ret = vdev->ops->attach_ioas(vdev, &ioas_id); > + if (ret) > + goto err_unbind; with our discussion in v1: https://lore.kernel.org/all/Y2mgJqz8fvm54C+f@nvidia.com/ I got the rationale on iommufd part which doesn't have the concept of container hence not necessarily to impose restriction on when an user can change a compat ioas. But from vfio side I wonder whether we should cache the compat ioas id when it's attached by the first device and then use it all the way for other device attachments coming after. implying IOAS_SET only affects containers which haven't been attached. In concept a container should be only aliased to one compat ioas in its lifetime.
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Wednesday, November 9, 2022 1:52 AM > > On Tue, Nov 08, 2022 at 03:41:25PM +0800, Yi Liu wrote: > > On 2022/11/8 14:10, Nicolin Chen wrote: > > > On Mon, Nov 07, 2022 at 08:52:51PM -0400, Jason Gunthorpe wrote: > > > > > > > @@ -795,6 +800,10 @@ static int vfio_device_first_open(struct > vfio_device *device) > > > > ret = vfio_group_use_container(device->group); > > > > if (ret) > > > > goto err_module_put; > > > > + } else if (device->group->iommufd) { > > > > + ret = vfio_iommufd_bind(device, device->group->iommufd); > > > > > > Here we check device->group->iommufd... > > > > > > > + if (ret) > > > > + goto err_module_put; > > > > } > > > > device->kvm = device->group->kvm; > > > > @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct > vfio_device *device) > > > > device->kvm = NULL; > > > > if (device->group->container) > > > > vfio_group_unuse_container(device->group); > > > > + vfio_iommufd_unbind(device); > > > > > > ...yet, missing here, which could result in kernel oops. > > > > > > Should probably add something similar: > > > + if (device->group->iommufd) > > > + vfio_iommufd_unbind(device); > > > > > > Or should check !vdev->iommufd_device inside the ->unbind. > > > > this check was in prior version, but removed in this version. any > > special reason? Jason? > > Oooh, this makes more sense - Kevin pointed out the check was wrong: > > > > +void vfio_iommufd_unbind(struct vfio_device *vdev) > > > +{ > > > + lockdep_assert_held(&vdev->dev_set->lock); > > > + > > > + if (!vdev->iommufd_device) > > > + return; > > > there is no iommufd_device in the emulated path... > > And he is right, so I dropped it. But really the check was just > misspelled, it was supposed to be "device->group->iommufd" because the > caller assumed it. > > Still, I think the right way to fix it is to lift the check as we > don't touch group->iommufd in iommufd.c > yes this is the right fix.
On Thu, Nov 10, 2022 at 03:11:16AM +0000, Tian, Kevin wrote: > > From: Jason Gunthorpe <jgg@nvidia.com> > > Sent: Tuesday, November 8, 2022 8:53 AM > > > > + > > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) > > +{ > > + u32 ioas_id; > > + u32 device_id; > > + int ret; > > + > > + lockdep_assert_held(&vdev->dev_set->lock); > > + > > + /* > > + * If the driver doesn't provide this op then it means the device does > > + * not do DMA at all. So nothing to do. > > + */ > > + if (!vdev->ops->bind_iommufd) > > + return 0; > > + > > + ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); > > + if (ret) > > + return ret; > > + > > + ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); > > + if (ret) > > + goto err_unbind; > > + ret = vdev->ops->attach_ioas(vdev, &ioas_id); > > + if (ret) > > + goto err_unbind; > > with our discussion in v1: > > https://lore.kernel.org/all/Y2mgJqz8fvm54C+f@nvidia.com/ > > I got the rationale on iommufd part which doesn't have the concept > of container hence not necessarily to impose restriction on when > an user can change a compat ioas. > > But from vfio side I wonder whether we should cache the compat > ioas id when it's attached by the first device and then use it all the > way for other device attachments coming after. implying IOAS_SET > only affects containers which haven't been attached. I can't see a reason to do this. IOAS_SET is a new ioctl and it has new semantics beyond what original vfio container could do. In this case having an impact on the next vfio_device that is opened. This seems generally useful enough I wouldn't want to block it. In any case, we can't *really* change this because the vfio layer is working on IDs and the IDs can be destroyed/recreated from under it. So if we try to hold the ID we could still end up getting it changed anyhow. Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Friday, November 11, 2022 1:21 AM > > On Thu, Nov 10, 2022 at 03:11:16AM +0000, Tian, Kevin wrote: > > > From: Jason Gunthorpe <jgg@nvidia.com> > > > Sent: Tuesday, November 8, 2022 8:53 AM > > > > > > + > > > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx > *ictx) > > > +{ > > > + u32 ioas_id; > > > + u32 device_id; > > > + int ret; > > > + > > > + lockdep_assert_held(&vdev->dev_set->lock); > > > + > > > + /* > > > + * If the driver doesn't provide this op then it means the device does > > > + * not do DMA at all. So nothing to do. > > > + */ > > > + if (!vdev->ops->bind_iommufd) > > > + return 0; > > > + > > > + ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); > > > + if (ret) > > > + return ret; > > > + > > > + ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); > > > + if (ret) > > > + goto err_unbind; > > > + ret = vdev->ops->attach_ioas(vdev, &ioas_id); > > > + if (ret) > > > + goto err_unbind; > > > > with our discussion in v1: > > > > https://lore.kernel.org/all/Y2mgJqz8fvm54C+f@nvidia.com/ > > > > I got the rationale on iommufd part which doesn't have the concept > > of container hence not necessarily to impose restriction on when > > an user can change a compat ioas. > > > > But from vfio side I wonder whether we should cache the compat > > ioas id when it's attached by the first device and then use it all the > > way for other device attachments coming after. implying IOAS_SET > > only affects containers which haven't been attached. > > I can't see a reason to do this. IOAS_SET is a new ioctl and it has > new semantics beyond what original vfio container could do. In this > case having an impact on the next vfio_device that is opened. > > This seems generally useful enough I wouldn't want to block it. > > In any case, we can't *really* change this because the vfio layer is > working on IDs and the IDs can be destroyed/recreated from under > it. So if we try to hold the ID we could still end up getting it > changed anyhow. > OK, this is a valid point. So a legacy vfio application doesn't use IOAS_SET so the backward compatibility is guaranteed. a iommufd native application will use cdev where IOAS_SET and compat ioas are irrelevant. here just we allow an interesting usage where an user is allowed to do more funny things with IOAS_SET on vfio-compat. Not sure how useful it is but not something we want to prohibit.
On 2022/11/8 08:52, Jason Gunthorpe wrote: > This creates the iommufd_device for the physical VFIO drivers. These are > all the drivers that are calling vfio_register_group_dev() and expect the > type1 code to setup a real iommu_domain against their parent struct > device. > > The design gives the driver a choice in how it gets connected to iommufd > by providing bind_iommufd/unbind_iommufd/attach_ioas callbacks to > implement as required. The core code provides three default callbacks for > physical mode using a real iommu_domain. This is suitable for drivers > using vfio_register_group_dev() > > Tested-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/vfio/Makefile | 1 + > drivers/vfio/fsl-mc/vfio_fsl_mc.c | 3 + > drivers/vfio/iommufd.c | 99 +++++++++++++++++++ > .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 6 ++ > drivers/vfio/pci/mlx5/main.c | 3 + > drivers/vfio/pci/vfio_pci.c | 3 + > drivers/vfio/platform/vfio_amba.c | 3 + > drivers/vfio/platform/vfio_platform.c | 3 + > drivers/vfio/vfio.h | 15 +++ > drivers/vfio/vfio_main.c | 13 ++- > include/linux/vfio.h | 25 +++++ > 11 files changed, 172 insertions(+), 2 deletions(-) > create mode 100644 drivers/vfio/iommufd.c > > diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile > index b693a1169286f8..3863922529ef20 100644 > --- a/drivers/vfio/Makefile > +++ b/drivers/vfio/Makefile > @@ -6,6 +6,7 @@ obj-$(CONFIG_VFIO) += vfio.o > vfio-y += vfio_main.o \ > iova_bitmap.o \ > container.o > +vfio-$(CONFIG_IOMMUFD) += iommufd.o > > obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o > obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o > diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c > index b16874e913e4f5..5cd4bb47644039 100644 > --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c > +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c > @@ -592,6 +592,9 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = { > .read = vfio_fsl_mc_read, > .write = vfio_fsl_mc_write, > .mmap = vfio_fsl_mc_mmap, > + .bind_iommufd = vfio_iommufd_physical_bind, > + .unbind_iommufd = vfio_iommufd_physical_unbind, > + .attach_ioas = vfio_iommufd_physical_attach_ioas, > }; > > static struct fsl_mc_driver vfio_fsl_mc_driver = { > diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c > new file mode 100644 > index 00000000000000..bf755d0f375c5d > --- /dev/null > +++ b/drivers/vfio/iommufd.c > @@ -0,0 +1,99 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES > + */ > +#include <linux/vfio.h> > +#include <linux/iommufd.h> > + > +#include "vfio.h" > + > +MODULE_IMPORT_NS(IOMMUFD); > +MODULE_IMPORT_NS(IOMMUFD_VFIO); > + > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) > +{ > + u32 ioas_id; > + u32 device_id; > + int ret; > + > + lockdep_assert_held(&vdev->dev_set->lock); > + > + /* > + * If the driver doesn't provide this op then it means the device does > + * not do DMA at all. So nothing to do. > + */ > + if (!vdev->ops->bind_iommufd) > + return 0; > + > + ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); > + if (ret) > + return ret; > + > + ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); > + if (ret) > + goto err_unbind; > + ret = vdev->ops->attach_ioas(vdev, &ioas_id); > + if (ret) > + goto err_unbind; > + vdev->iommufd_attached = true; it's better to set this bool in vfio_iommufd_physical_attach_ioas() as the emulated devices uses iommufd_access instead. is it? or you mean this flag to cover both cases? -- Regards, Yi Liu
On Fri, Nov 11, 2022 at 12:12:36PM +0800, Yi Liu wrote: > > +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) > > +{ > > + u32 ioas_id; > > + u32 device_id; > > + int ret; > > + > > + lockdep_assert_held(&vdev->dev_set->lock); > > + > > + /* > > + * If the driver doesn't provide this op then it means the device does > > + * not do DMA at all. So nothing to do. > > + */ > > + if (!vdev->ops->bind_iommufd) > > + return 0; > > + > > + ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); > > + if (ret) > > + return ret; > > + > > + ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); > > + if (ret) > > + goto err_unbind; > > + ret = vdev->ops->attach_ioas(vdev, &ioas_id); > > + if (ret) > > + goto err_unbind; > > + vdev->iommufd_attached = true; > > it's better to set this bool in vfio_iommufd_physical_attach_ioas() as > the emulated devices uses iommufd_access instead. is it? or you mean this > flag to cover both cases? Yes, that is probably clearer: @@ -50,7 +50,6 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) ret = vdev->ops->attach_ioas(vdev, &ioas_id); if (ret) goto err_unbind; - vdev->iommufd_attached = true; /* * The legacy path has no way to return the device id or the selected @@ -110,10 +109,15 @@ EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id) { unsigned int flags = 0; + int rc; if (vfio_allow_unsafe_interrupts) flags |= IOMMUFD_ATTACH_FLAGS_ALLOW_UNSAFE_INTERRUPT; - return iommufd_device_attach(vdev->iommufd_device, pt_id, flags); + rc = iommufd_device_attach(vdev->iommufd_device, pt_id, flags); + if (rc) + return rc; + vdev->iommufd_attached = true; + return 0; } EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas); Thanks, Jason
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index b693a1169286f8..3863922529ef20 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ iova_bitmap.o \ container.o +vfio-$(CONFIG_IOMMUFD) += iommufd.o obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index b16874e913e4f5..5cd4bb47644039 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -592,6 +592,9 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = { .read = vfio_fsl_mc_read, .write = vfio_fsl_mc_write, .mmap = vfio_fsl_mc_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static struct fsl_mc_driver vfio_fsl_mc_driver = { diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c new file mode 100644 index 00000000000000..bf755d0f375c5d --- /dev/null +++ b/drivers/vfio/iommufd.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include <linux/vfio.h> +#include <linux/iommufd.h> + +#include "vfio.h" + +MODULE_IMPORT_NS(IOMMUFD); +MODULE_IMPORT_NS(IOMMUFD_VFIO); + +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) +{ + u32 ioas_id; + u32 device_id; + int ret; + + lockdep_assert_held(&vdev->dev_set->lock); + + /* + * If the driver doesn't provide this op then it means the device does + * not do DMA at all. So nothing to do. + */ + if (!vdev->ops->bind_iommufd) + return 0; + + ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); + if (ret) + return ret; + + ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); + if (ret) + goto err_unbind; + ret = vdev->ops->attach_ioas(vdev, &ioas_id); + if (ret) + goto err_unbind; + vdev->iommufd_attached = true; + + /* + * The legacy path has no way to return the device id or the selected + * pt_id + */ + return 0; + +err_unbind: + if (vdev->ops->unbind_iommufd) + vdev->ops->unbind_iommufd(vdev); + return ret; +} + +void vfio_iommufd_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->ops->unbind_iommufd) + vdev->ops->unbind_iommufd(vdev); +} + +/* + * The physical standard ops mean that the iommufd_device is bound to the + * physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers + * using this ops set should call vfio_register_group_dev() + */ +int vfio_iommufd_physical_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id) +{ + struct iommufd_device *idev; + + idev = iommufd_device_bind(ictx, vdev->dev, out_device_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + vdev->iommufd_device = idev; + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind); + +void vfio_iommufd_physical_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->iommufd_attached) { + iommufd_device_detach(vdev->iommufd_device); + vdev->iommufd_attached = false; + } + iommufd_device_unbind(vdev->iommufd_device); + vdev->iommufd_device = NULL; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind); + +int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id) +{ + unsigned int flags = 0; + + if (vfio_allow_unsafe_interrupts) + flags |= IOMMUFD_ATTACH_FLAGS_ALLOW_UNSAFE_INTERRUPT; + return iommufd_device_attach(vdev->iommufd_device, pt_id, flags); +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 39eeca18a0f7c8..40019b11c5a969 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1246,6 +1246,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { .mmap = hisi_acc_vfio_pci_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { @@ -1261,6 +1264,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index fd6ccb8454a24a..32d1f38d351e7e 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -623,6 +623,9 @@ static const struct vfio_device_ops mlx5vf_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int mlx5vf_pci_probe(struct pci_dev *pdev, diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 1d4919edfbde48..29091ee2e9849b 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -138,6 +138,9 @@ static const struct vfio_device_ops vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index eaea63e5294c58..5a046098d0bdf4 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -117,6 +117,9 @@ static const struct vfio_device_ops vfio_amba_ops = { .read = vfio_platform_read, .write = vfio_platform_write, .mmap = vfio_platform_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static const struct amba_id pl330_ids[] = { diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index 82cedcebfd9022..b87c3b70878341 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -106,6 +106,9 @@ static const struct vfio_device_ops vfio_platform_ops = { .read = vfio_platform_read, .write = vfio_platform_write, .mmap = vfio_platform_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static struct platform_driver vfio_platform_driver = { diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 985e13d52989ca..809f2e8523968e 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -124,6 +124,21 @@ void vfio_device_container_unregister(struct vfio_device *device); int __init vfio_container_init(void); void vfio_container_cleanup(void); +#if IS_ENABLED(CONFIG_IOMMUFD) +int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx); +void vfio_iommufd_unbind(struct vfio_device *device); +#else +static inline int vfio_iommufd_bind(struct vfio_device *device, + struct iommufd_ctx *ictx) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_iommufd_unbind(struct vfio_device *device) +{ +} +#endif + #ifdef CONFIG_VFIO_NOIOMMU extern bool vfio_noiommu __read_mostly; #else diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 8c124290ce9f0d..2f3e35d2f2083d 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -528,6 +528,11 @@ static int __vfio_register_dev(struct vfio_device *device, if (IS_ERR(group)) return PTR_ERR(group); + if (WARN_ON(device->ops->bind_iommufd && + (!device->ops->unbind_iommufd || + !device->ops->attach_ioas))) + return -EINVAL; + /* * If the driver doesn't specify a set then the device is added to a * singleton set just for itself. @@ -795,6 +800,10 @@ static int vfio_device_first_open(struct vfio_device *device) ret = vfio_group_use_container(device->group); if (ret) goto err_module_put; + } else if (device->group->iommufd) { + ret = vfio_iommufd_bind(device, device->group->iommufd); + if (ret) + goto err_module_put; } device->kvm = device->group->kvm; @@ -812,6 +821,7 @@ static int vfio_device_first_open(struct vfio_device *device) device->kvm = NULL; if (device->group->container) vfio_group_unuse_container(device->group); + vfio_iommufd_unbind(device); err_module_put: mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); @@ -830,6 +840,7 @@ static void vfio_device_last_close(struct vfio_device *device) device->kvm = NULL; if (device->group->container) vfio_group_unuse_container(device->group); + vfio_iommufd_unbind(device); mutex_unlock(&device->group->group_lock); module_put(device->dev->driver->owner); } @@ -1937,8 +1948,6 @@ static void __exit vfio_cleanup(void) module_init(vfio_init); module_exit(vfio_cleanup); -MODULE_IMPORT_NS(IOMMUFD); -MODULE_IMPORT_NS(IOMMUFD_VFIO); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e7cebeb875dd1a..a7fc4d747dc226 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -17,6 +17,8 @@ #include <linux/iova_bitmap.h> struct kvm; +struct iommufd_ctx; +struct iommufd_device; /* * VFIO devices can be placed in a set, this allows all devices to share this @@ -54,6 +56,10 @@ struct vfio_device { struct completion comp; struct list_head group_next; struct list_head iommu_entry; +#if IS_ENABLED(CONFIG_IOMMUFD) + struct iommufd_device *iommufd_device; + bool iommufd_attached; +#endif }; /** @@ -80,6 +86,10 @@ struct vfio_device_ops { char *name; int (*init)(struct vfio_device *vdev); void (*release)(struct vfio_device *vdev); + int (*bind_iommufd)(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); + void (*unbind_iommufd)(struct vfio_device *vdev); + int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -96,6 +106,21 @@ struct vfio_device_ops { void __user *arg, size_t argsz); }; +#if IS_ENABLED(CONFIG_IOMMUFD) +int vfio_iommufd_physical_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); +void vfio_iommufd_physical_unbind(struct vfio_device *vdev); +int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +#else +#define vfio_iommufd_physical_bind \ + ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ + u32 *out_device_id)) NULL) +#define vfio_iommufd_physical_unbind \ + ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_physical_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#endif + /** * @migration_set_state: Optional callback to change the migration state for * devices that support migration. It's mandatory for