Message ID | 12-v4-0de2f6c78ed0+9d1-iommufd_jgg@nvidia.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | IOMMUFD Generic interface | expand |
On 2022/11/8 08:49, Jason Gunthorpe wrote: > Add the four functions external drivers need to connect physical DMA to > the IOMMUFD: > > iommufd_device_bind() / iommufd_device_unbind() > Register the device with iommufd and establish security isolation. > > iommufd_device_attach() / iommufd_device_detach() > Connect a bound device to a page table > > Binding a device creates a device object ID in the uAPI, however the > generic API provides no IOCTLs to manipulate them. > > Tested-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/iommu/iommufd/Makefile | 1 + > drivers/iommu/iommufd/device.c | 402 ++++++++++++++++++++++++ > drivers/iommu/iommufd/iommufd_private.h | 5 + > drivers/iommu/iommufd/main.c | 3 + > include/linux/iommufd.h | 13 + > 5 files changed, 424 insertions(+) > create mode 100644 drivers/iommu/iommufd/device.c > > diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile > index e13e971aa28c60..ca28a135b9675f 100644 > --- a/drivers/iommu/iommufd/Makefile > +++ b/drivers/iommu/iommufd/Makefile > @@ -1,5 +1,6 @@ > # SPDX-License-Identifier: GPL-2.0-only > iommufd-y := \ > + device.o \ > hw_pagetable.o \ > io_pagetable.o \ > ioas.o \ > diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c > new file mode 100644 > index 00000000000000..a3bf3c07d3f800 > --- /dev/null > +++ b/drivers/iommu/iommufd/device.c > @@ -0,0 +1,402 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES > + */ > +#include <linux/iommufd.h> > +#include <linux/slab.h> > +#include <linux/iommu.h> > +#include <linux/irqdomain.h> > + > +#include "iommufd_private.h" > + > +/* > + * A iommufd_device object represents the binding relationship between a > + * consuming driver and the iommufd. These objects are created/destroyed by > + * external drivers, not by userspace. > + */ > +struct iommufd_device { > + struct iommufd_object obj; > + struct iommufd_ctx *ictx; > + struct iommufd_hw_pagetable *hwpt; > + /* Head at iommufd_hw_pagetable::devices */ > + struct list_head devices_item; > + /* always the physical device */ > + struct device *dev; > + struct iommu_group *group; > + bool enforce_cache_coherency; > +}; > + > +void iommufd_device_destroy(struct iommufd_object *obj) > +{ > + struct iommufd_device *idev = > + container_of(obj, struct iommufd_device, obj); > + > + iommu_device_release_dma_owner(idev->dev); > + iommu_group_put(idev->group); > + iommufd_ctx_put(idev->ictx); > +} > + > +/** > + * iommufd_device_bind - Bind a physical device to an iommu fd > + * @ictx: iommufd file descriptor > + * @dev: Pointer to a physical PCI device struct > + * @id: Output ID number to return to userspace for this device > + * > + * A successful bind establishes an ownership over the device and returns > + * struct iommufd_device pointer, otherwise returns error pointer. > + * > + * A driver using this API must set driver_managed_dma and must not touch > + * the device until this routine succeeds and establishes ownership. > + * > + * Binding a PCI device places the entire RID under iommufd control. > + * > + * The caller must undo this with iommufd_unbind_device() it should be iommufd_device_unbind() now. > + */ > +struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, > + struct device *dev, u32 *id) > +{ > + struct iommufd_device *idev; > + struct iommu_group *group; > + int rc; > + > + /* > + * iommufd always sets IOMMU_CACHE because we offer no way for userspace > + * to restore cache coherency. > + */ > + if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) > + return ERR_PTR(-EINVAL); > + > + group = iommu_group_get(dev); > + if (!group) > + return ERR_PTR(-ENODEV); > + > + rc = iommu_device_claim_dma_owner(dev, ictx); > + if (rc) > + goto out_group_put; > + > + idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); > + if (IS_ERR(idev)) { > + rc = PTR_ERR(idev); > + goto out_release_owner; > + } > + idev->ictx = ictx; > + iommufd_ctx_get(ictx); > + idev->dev = dev; > + idev->enforce_cache_coherency = > + device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); > + /* The calling driver is a user until iommufd_device_unbind() */ > + refcount_inc(&idev->obj.users); > + /* group refcount moves into iommufd_device */ > + idev->group = group; > + > + /* > + * If the caller fails after this success it must call > + * iommufd_unbind_device() which is safe since we hold this refcount. > + * This also means the device is a leaf in the graph and no other object > + * can take a reference on it. > + */ > + iommufd_object_finalize(ictx, &idev->obj); > + *id = idev->obj.id; > + return idev; > + > +out_release_owner: > + iommu_device_release_dma_owner(dev); > +out_group_put: > + iommu_group_put(group); > + return ERR_PTR(rc); > +} > +EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); > + > +void iommufd_device_unbind(struct iommufd_device *idev) > +{ > + bool was_destroyed; > + > + was_destroyed = iommufd_object_destroy_user(idev->ictx, &idev->obj); > + WARN_ON(!was_destroyed); > +} > +EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); > + > +static int iommufd_device_setup_msi(struct iommufd_device *idev, > + struct iommufd_hw_pagetable *hwpt, > + phys_addr_t sw_msi_start, > + unsigned int flags) > +{ > + int rc; > + > + /* > + * IOMMU_CAP_INTR_REMAP means that the platform is isolating MSI, and it > + * creates the MSI window by default in the iommu domain. Nothing > + * further to do. > + */ > + if (device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP)) > + return 0; > + > + /* > + * On ARM systems that set the global IRQ_DOMAIN_FLAG_MSI_REMAP every > + * allocated iommu_domain will block interrupts by default and this > + * special flow is needed to turn them back on. iommu_dma_prepare_msi() > + * will install pages into our domain after request_irq() to make this > + * work. > + * > + * FIXME: This is conceptually broken for iommufd since we want to allow > + * userspace to change the domains, eg switch from an identity IOAS to a > + * DMA IOAS. There is currently no way to create a MSI window that > + * matches what the IRQ layer actually expects in a newly created > + * domain. > + */ > + if (irq_domain_check_msi_remap()) { > + if (WARN_ON(!sw_msi_start)) > + return -EPERM; > + /* > + * iommu_get_msi_cookie() can only be called once per domain, > + * it returns -EBUSY on later calls. > + */ > + if (hwpt->msi_cookie) > + return 0; > + rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start); > + if (rc) > + return rc; > + hwpt->msi_cookie = true; > + return 0; > + } > + > + /* > + * Otherwise the platform has a MSI window that is not isolated. For > + * historical compat with VFIO allow a module parameter to ignore the > + * insecurity. > + */ > + if (!(flags & IOMMUFD_ATTACH_FLAGS_ALLOW_UNSAFE_INTERRUPT)) > + return -EPERM; > + else > + dev_warn( > + idev->dev, > + "Device interrupts cannot be isolated by the IOMMU, this platform in insecure. Use an \"allow_unsafe_interrupts\" module parameter to override\n"); > + > + return 0; > +} > + > +static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt, > + struct iommu_group *group) > +{ > + struct iommufd_device *cur_dev; > + > + list_for_each_entry(cur_dev, &hwpt->devices, devices_item) > + if (cur_dev->group == group) > + return true; > + return false; > +} > + > +static int iommufd_device_do_attach(struct iommufd_device *idev, > + struct iommufd_hw_pagetable *hwpt, > + unsigned int flags) > +{ > + phys_addr_t sw_msi_start = 0; > + int rc; > + > + mutex_lock(&hwpt->devices_lock); > + > + /* > + * Try to upgrade the domain we have, it is an iommu driver bug to > + * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail > + * enforce_cache_coherency when there are no devices attached to the > + * domain. > + */ > + if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) { > + if (hwpt->domain->ops->enforce_cache_coherency) > + hwpt->enforce_cache_coherency = > + hwpt->domain->ops->enforce_cache_coherency( > + hwpt->domain); > + if (!hwpt->enforce_cache_coherency) { > + WARN_ON(list_empty(&hwpt->devices)); > + rc = -EINVAL; > + goto out_unlock; > + } > + } > + > + rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev, > + idev->group, &sw_msi_start); > + if (rc) > + goto out_unlock; > + > + rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start, flags); > + if (rc) > + goto out_iova; aren't the above two operations only once for a group? I remember you did the two after iommu_attach_group(). > + /* > + * FIXME: Hack around missing a device-centric iommu api, only attach to > + * the group once for the first device that is in the group. > + */ > + if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { > + rc = iommu_attach_group(hwpt->domain, idev->group); > + if (rc) > + goto out_iova; > + > + if (list_empty(&hwpt->devices)) { > + rc = iopt_table_add_domain(&hwpt->ioas->iopt, > + hwpt->domain); > + if (rc) > + goto out_detach; > + } > + } > + > + idev->hwpt = hwpt; > + refcount_inc(&hwpt->obj.users); > + list_add(&idev->devices_item, &hwpt->devices); > + mutex_unlock(&hwpt->devices_lock); > + return 0; > + > +out_detach: > + iommu_detach_group(hwpt->domain, idev->group); > +out_iova: > + iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); > +out_unlock: > + mutex_unlock(&hwpt->devices_lock); > + return rc; > +} > +
On Tue, Nov 08, 2022 at 10:34:05PM +0800, Yi Liu wrote: > > +/** > > + * iommufd_device_bind - Bind a physical device to an iommu fd > > + * @ictx: iommufd file descriptor > > + * @dev: Pointer to a physical PCI device struct > > + * @id: Output ID number to return to userspace for this device > > + * > > + * A successful bind establishes an ownership over the device and returns > > + * struct iommufd_device pointer, otherwise returns error pointer. > > + * > > + * A driver using this API must set driver_managed_dma and must not touch > > + * the device until this routine succeeds and establishes ownership. > > + * > > + * Binding a PCI device places the entire RID under iommufd control. > > + * > > + * The caller must undo this with iommufd_unbind_device() > > it should be iommufd_device_unbind() now. Done > > +static int iommufd_device_do_attach(struct iommufd_device *idev, > > + struct iommufd_hw_pagetable *hwpt, > > + unsigned int flags) > > +{ > > + phys_addr_t sw_msi_start = 0; > > + int rc; > > + > > + mutex_lock(&hwpt->devices_lock); > > + > > + /* > > + * Try to upgrade the domain we have, it is an iommu driver bug to > > + * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail > > + * enforce_cache_coherency when there are no devices attached to the > > + * domain. > > + */ > > + if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) { > > + if (hwpt->domain->ops->enforce_cache_coherency) > > + hwpt->enforce_cache_coherency = > > + hwpt->domain->ops->enforce_cache_coherency( > > + hwpt->domain); > > + if (!hwpt->enforce_cache_coherency) { > > + WARN_ON(list_empty(&hwpt->devices)); > > + rc = -EINVAL; > > + goto out_unlock; > > + } > > + } > > + > > + rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev, > > + idev->group, &sw_msi_start); > > + if (rc) > > + goto out_unlock; > > + > > + rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start, flags); > > + if (rc) > > + goto out_iova; > > aren't the above two operations only once for a group? I remember you did > the two after iommu_attach_group(). No, with the new attach logic per-device is simpler. iopt_table_enforce_group_resv_regions() tags all the reserved ranges with: rc = iopt_reserve_iova(iopt, resv->start, resv->length - 1 + resv->start, device); So they are all undone as each device detaches And iommufd_device_setup_msi() keeps track of what has happened to the domain via: if (hwpt->msi_cookie) return 0; rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start); if (rc) return rc; hwpt->msi_cookie = true; So it is OK to call it multiple times Thanks, Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, November 8, 2022 8:49 AM > > Add the four functions external drivers need to connect physical DMA to > the IOMMUFD: > > iommufd_device_bind() / iommufd_device_unbind() > Register the device with iommufd and establish security isolation. > > iommufd_device_attach() / iommufd_device_detach() > Connect a bound device to a page table > > Binding a device creates a device object ID in the uAPI, however the > generic API provides no IOCTLs to manipulate them. > > Tested-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index e13e971aa28c60..ca28a135b9675f 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ + device.o \ hw_pagetable.o \ io_pagetable.o \ ioas.o \ diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c new file mode 100644 index 00000000000000..a3bf3c07d3f800 --- /dev/null +++ b/drivers/iommu/iommufd/device.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include <linux/iommufd.h> +#include <linux/slab.h> +#include <linux/iommu.h> +#include <linux/irqdomain.h> + +#include "iommufd_private.h" + +/* + * A iommufd_device object represents the binding relationship between a + * consuming driver and the iommufd. These objects are created/destroyed by + * external drivers, not by userspace. + */ +struct iommufd_device { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommufd_hw_pagetable *hwpt; + /* Head at iommufd_hw_pagetable::devices */ + struct list_head devices_item; + /* always the physical device */ + struct device *dev; + struct iommu_group *group; + bool enforce_cache_coherency; +}; + +void iommufd_device_destroy(struct iommufd_object *obj) +{ + struct iommufd_device *idev = + container_of(obj, struct iommufd_device, obj); + + iommu_device_release_dma_owner(idev->dev); + iommu_group_put(idev->group); + iommufd_ctx_put(idev->ictx); +} + +/** + * iommufd_device_bind - Bind a physical device to an iommu fd + * @ictx: iommufd file descriptor + * @dev: Pointer to a physical PCI device struct + * @id: Output ID number to return to userspace for this device + * + * A successful bind establishes an ownership over the device and returns + * struct iommufd_device pointer, otherwise returns error pointer. + * + * A driver using this API must set driver_managed_dma and must not touch + * the device until this routine succeeds and establishes ownership. + * + * Binding a PCI device places the entire RID under iommufd control. + * + * The caller must undo this with iommufd_unbind_device() + */ +struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, + struct device *dev, u32 *id) +{ + struct iommufd_device *idev; + struct iommu_group *group; + int rc; + + /* + * iommufd always sets IOMMU_CACHE because we offer no way for userspace + * to restore cache coherency. + */ + if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) + return ERR_PTR(-EINVAL); + + group = iommu_group_get(dev); + if (!group) + return ERR_PTR(-ENODEV); + + rc = iommu_device_claim_dma_owner(dev, ictx); + if (rc) + goto out_group_put; + + idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); + if (IS_ERR(idev)) { + rc = PTR_ERR(idev); + goto out_release_owner; + } + idev->ictx = ictx; + iommufd_ctx_get(ictx); + idev->dev = dev; + idev->enforce_cache_coherency = + device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); + /* The calling driver is a user until iommufd_device_unbind() */ + refcount_inc(&idev->obj.users); + /* group refcount moves into iommufd_device */ + idev->group = group; + + /* + * If the caller fails after this success it must call + * iommufd_unbind_device() which is safe since we hold this refcount. + * This also means the device is a leaf in the graph and no other object + * can take a reference on it. + */ + iommufd_object_finalize(ictx, &idev->obj); + *id = idev->obj.id; + return idev; + +out_release_owner: + iommu_device_release_dma_owner(dev); +out_group_put: + iommu_group_put(group); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); + +void iommufd_device_unbind(struct iommufd_device *idev) +{ + bool was_destroyed; + + was_destroyed = iommufd_object_destroy_user(idev->ictx, &idev->obj); + WARN_ON(!was_destroyed); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); + +static int iommufd_device_setup_msi(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + phys_addr_t sw_msi_start, + unsigned int flags) +{ + int rc; + + /* + * IOMMU_CAP_INTR_REMAP means that the platform is isolating MSI, and it + * creates the MSI window by default in the iommu domain. Nothing + * further to do. + */ + if (device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP)) + return 0; + + /* + * On ARM systems that set the global IRQ_DOMAIN_FLAG_MSI_REMAP every + * allocated iommu_domain will block interrupts by default and this + * special flow is needed to turn them back on. iommu_dma_prepare_msi() + * will install pages into our domain after request_irq() to make this + * work. + * + * FIXME: This is conceptually broken for iommufd since we want to allow + * userspace to change the domains, eg switch from an identity IOAS to a + * DMA IOAS. There is currently no way to create a MSI window that + * matches what the IRQ layer actually expects in a newly created + * domain. + */ + if (irq_domain_check_msi_remap()) { + if (WARN_ON(!sw_msi_start)) + return -EPERM; + /* + * iommu_get_msi_cookie() can only be called once per domain, + * it returns -EBUSY on later calls. + */ + if (hwpt->msi_cookie) + return 0; + rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start); + if (rc) + return rc; + hwpt->msi_cookie = true; + return 0; + } + + /* + * Otherwise the platform has a MSI window that is not isolated. For + * historical compat with VFIO allow a module parameter to ignore the + * insecurity. + */ + if (!(flags & IOMMUFD_ATTACH_FLAGS_ALLOW_UNSAFE_INTERRUPT)) + return -EPERM; + else + dev_warn( + idev->dev, + "Device interrupts cannot be isolated by the IOMMU, this platform in insecure. Use an \"allow_unsafe_interrupts\" module parameter to override\n"); + + return 0; +} + +static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt, + struct iommu_group *group) +{ + struct iommufd_device *cur_dev; + + list_for_each_entry(cur_dev, &hwpt->devices, devices_item) + if (cur_dev->group == group) + return true; + return false; +} + +static int iommufd_device_do_attach(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + unsigned int flags) +{ + phys_addr_t sw_msi_start = 0; + int rc; + + mutex_lock(&hwpt->devices_lock); + + /* + * Try to upgrade the domain we have, it is an iommu driver bug to + * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail + * enforce_cache_coherency when there are no devices attached to the + * domain. + */ + if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) { + if (hwpt->domain->ops->enforce_cache_coherency) + hwpt->enforce_cache_coherency = + hwpt->domain->ops->enforce_cache_coherency( + hwpt->domain); + if (!hwpt->enforce_cache_coherency) { + WARN_ON(list_empty(&hwpt->devices)); + rc = -EINVAL; + goto out_unlock; + } + } + + rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev, + idev->group, &sw_msi_start); + if (rc) + goto out_unlock; + + rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start, flags); + if (rc) + goto out_iova; + + /* + * FIXME: Hack around missing a device-centric iommu api, only attach to + * the group once for the first device that is in the group. + */ + if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { + rc = iommu_attach_group(hwpt->domain, idev->group); + if (rc) + goto out_iova; + + if (list_empty(&hwpt->devices)) { + rc = iopt_table_add_domain(&hwpt->ioas->iopt, + hwpt->domain); + if (rc) + goto out_detach; + } + } + + idev->hwpt = hwpt; + refcount_inc(&hwpt->obj.users); + list_add(&idev->devices_item, &hwpt->devices); + mutex_unlock(&hwpt->devices_lock); + return 0; + +out_detach: + iommu_detach_group(hwpt->domain, idev->group); +out_iova: + iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); +out_unlock: + mutex_unlock(&hwpt->devices_lock); + return rc; +} + +/* + * When automatically managing the domains we search for a compatible domain in + * the iopt and if one is found use it, otherwise create a new domain. + * Automatic domain selection will never pick a manually created domain. + */ +static int iommufd_device_auto_get_domain(struct iommufd_device *idev, + struct iommufd_ioas *ioas, + unsigned int flags) +{ + struct iommufd_hw_pagetable *hwpt; + int rc; + + /* + * There is no differentiation when domains are allocated, so any domain + * that is willing to attach to the device is interchangeable with any + * other. + */ + mutex_lock(&ioas->mutex); + list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) { + if (!hwpt->auto_domain) + continue; + + rc = iommufd_device_do_attach(idev, hwpt, flags); + + /* + * -EINVAL means the domain is incompatible with the device. + * Other error codes should propagate to userspace as failure. + * Success means the domain is attached. + */ + if (rc == -EINVAL) + continue; + goto out_unlock; + } + + hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev->dev); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out_unlock; + } + hwpt->auto_domain = true; + + rc = iommufd_device_do_attach(idev, hwpt, flags); + if (rc) + goto out_abort; + list_add_tail(&hwpt->hwpt_item, &ioas->hwpt_list); + + mutex_unlock(&ioas->mutex); + iommufd_object_finalize(idev->ictx, &hwpt->obj); + return 0; + +out_abort: + iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); +out_unlock: + mutex_unlock(&ioas->mutex); + return rc; +} + +/** + * iommufd_device_attach - Connect a device to an iommu_domain + * @idev: device to attach + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE + * Output the IOMMUFD_OBJ_HW_PAGETABLE ID + * @flags: Optional flags + * + * This connects the device to an iommu_domain, either automatically or manually + * selected. Once this completes the device could do DMA. + * + * The caller should return the resulting pt_id back to userspace. + * This function is undone by calling iommufd_device_detach(). + */ +int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id, + unsigned int flags) +{ + struct iommufd_object *pt_obj; + int rc; + + pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); + if (IS_ERR(pt_obj)) + return PTR_ERR(pt_obj); + + switch (pt_obj->type) { + case IOMMUFD_OBJ_HW_PAGETABLE: { + struct iommufd_hw_pagetable *hwpt = + container_of(pt_obj, struct iommufd_hw_pagetable, obj); + + rc = iommufd_device_do_attach(idev, hwpt, flags); + if (rc) + goto out_put_pt_obj; + + mutex_lock(&hwpt->ioas->mutex); + list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list); + mutex_unlock(&hwpt->ioas->mutex); + break; + } + case IOMMUFD_OBJ_IOAS: { + struct iommufd_ioas *ioas = + container_of(pt_obj, struct iommufd_ioas, obj); + + rc = iommufd_device_auto_get_domain(idev, ioas, flags); + if (rc) + goto out_put_pt_obj; + break; + } + default: + rc = -EINVAL; + goto out_put_pt_obj; + } + + refcount_inc(&idev->obj.users); + *pt_id = idev->hwpt->obj.id; + rc = 0; + +out_put_pt_obj: + iommufd_put_object(pt_obj); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); + +void iommufd_device_detach(struct iommufd_device *idev) +{ + struct iommufd_hw_pagetable *hwpt = idev->hwpt; + + mutex_lock(&hwpt->ioas->mutex); + mutex_lock(&hwpt->devices_lock); + list_del(&idev->devices_item); + if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { + if (list_empty(&hwpt->devices)) { + iopt_table_remove_domain(&hwpt->ioas->iopt, + hwpt->domain); + list_del(&hwpt->hwpt_item); + } + iommu_detach_group(hwpt->domain, idev->group); + } + iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); + mutex_unlock(&hwpt->devices_lock); + mutex_unlock(&hwpt->ioas->mutex); + + if (hwpt->auto_domain) + iommufd_object_destroy_user(idev->ictx, &hwpt->obj); + else + refcount_dec(&hwpt->obj.users); + + idev->hwpt = NULL; + + refcount_dec(&idev->obj.users); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index d0272efcd8038e..1f8c73aca176e2 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -103,6 +103,7 @@ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, enum iommufd_object_type { IOMMUFD_OBJ_NONE, IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_DEVICE, IOMMUFD_OBJ_HW_PAGETABLE, IOMMUFD_OBJ_IOAS, }; @@ -229,6 +230,8 @@ struct iommufd_hw_pagetable { struct iommufd_ioas *ioas; struct iommu_domain *domain; bool auto_domain : 1; + bool enforce_cache_coherency : 1; + bool msi_cookie : 1; /* Head at iommufd_ioas::hwpt_list */ struct list_head hwpt_item; struct mutex devices_lock; @@ -240,6 +243,8 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, struct device *dev); void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); +void iommufd_device_destroy(struct iommufd_object *obj); + struct iommufd_access { unsigned long iova_alignment; u32 iopt_access_list_id; diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 3eab714b8e12a3..8a114ddbdfcde2 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -352,6 +352,9 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx) EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD); static const struct iommufd_object_ops iommufd_object_ops[] = { + [IOMMUFD_OBJ_DEVICE] = { + .destroy = iommufd_device_destroy, + }, [IOMMUFD_OBJ_IOAS] = { .destroy = iommufd_ioas_destroy, }, diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 26e09d539737bb..31efacd8a46cce 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -9,10 +9,23 @@ #include <linux/types.h> #include <linux/errno.h> #include <linux/err.h> +#include <linux/device.h> +struct iommufd_device; struct iommufd_ctx; struct file; +struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, + struct device *dev, u32 *id); +void iommufd_device_unbind(struct iommufd_device *idev); + +enum { + IOMMUFD_ATTACH_FLAGS_ALLOW_UNSAFE_INTERRUPT = 1 << 0, +}; +int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id, + unsigned int flags); +void iommufd_device_detach(struct iommufd_device *idev); + enum { IOMMUFD_ACCESS_RW_READ = 0, IOMMUFD_ACCESS_RW_WRITE = 1 << 0,