diff mbox series

[RFC,09/12] iommufd: Add a HW pagetable object

Message ID 9-v1-e79cd8d168e8+6-iommufd_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series IOMMUFD Generic interface | expand

Commit Message

Jason Gunthorpe March 18, 2022, 5:27 p.m. UTC
The hw_pagetable object exposes the internal struct iommu_domain's to
userspace. An iommu_domain is required when any DMA device attaches to an
IOAS to control the io page table through the iommu driver.

For compatibility with VFIO the hw_pagetable is automatically created when
a DMA device is attached to the IOAS. If a compatible iommu_domain already
exists then the hw_pagetable associated with it is used for the
attachment.

In the initial series there is no iommufd uAPI for the hw_pagetable
object. The next patch provides driver facing APIs for IO page table
attachment that allows drivers to accept either an IOAS or a hw_pagetable
ID and for the driver to return the hw_pagetable ID that was auto-selected
from an IOAS. The expectation is the driver will provide uAPI through its
own FD for attaching its device to iommufd. This allows userspace to learn
the mapping of devices to iommu_domains and to override the automatic
attachment.

The future HW specific interface will allow userspace to create
hw_pagetable objects using iommu_domains with IOMMU driver specific
parameters. This infrastructure will allow linking those domains to IOAS's
and devices.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/Makefile          |   1 +
 drivers/iommu/iommufd/hw_pagetable.c    | 142 ++++++++++++++++++++++++
 drivers/iommu/iommufd/ioas.c            |   4 +
 drivers/iommu/iommufd/iommufd_private.h |  35 ++++++
 drivers/iommu/iommufd/main.c            |   3 +
 5 files changed, 185 insertions(+)
 create mode 100644 drivers/iommu/iommufd/hw_pagetable.c
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index 2b4f36f1b72f9d..e13e971aa28c60 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,5 +1,6 @@ 
 # SPDX-License-Identifier: GPL-2.0-only
 iommufd-y := \
+	hw_pagetable.o \
 	io_pagetable.o \
 	ioas.o \
 	main.o \
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
new file mode 100644
index 00000000000000..bafd7d07918bfd
--- /dev/null
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -0,0 +1,142 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/iommu.h>
+
+#include "iommufd_private.h"
+
+void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
+{
+	struct iommufd_hw_pagetable *hwpt =
+		container_of(obj, struct iommufd_hw_pagetable, obj);
+	struct iommufd_ioas *ioas = hwpt->ioas;
+
+	WARN_ON(!list_empty(&hwpt->devices));
+	mutex_lock(&ioas->mutex);
+	list_del(&hwpt->auto_domains_item);
+	mutex_unlock(&ioas->mutex);
+
+	iommu_domain_free(hwpt->domain);
+	refcount_dec(&hwpt->ioas->obj.users);
+	mutex_destroy(&hwpt->devices_lock);
+}
+
+/*
+ * When automatically managing the domains we search for a compatible domain in
+ * the iopt and if one is found use it, otherwise create a new domain.
+ * Automatic domain selection will never pick a manually created domain.
+ */
+static struct iommufd_hw_pagetable *
+iommufd_hw_pagetable_auto_get(struct iommufd_ctx *ictx,
+			      struct iommufd_ioas *ioas, struct device *dev)
+{
+	struct iommufd_hw_pagetable *hwpt;
+	int rc;
+
+	/*
+	 * There is no differentiation when domains are allocated, so any domain
+	 * from the right ops is interchangeable with any other.
+	 */
+	mutex_lock(&ioas->mutex);
+	list_for_each_entry (hwpt, &ioas->auto_domains, auto_domains_item) {
+		/*
+		 * FIXME: We really need an op from the driver to test if a
+		 * device is compatible with a domain. This thing from VFIO
+		 * works sometimes.
+		 */
+		if (hwpt->domain->ops == dev_iommu_ops(dev)->default_domain_ops) {
+			if (refcount_inc_not_zero(&hwpt->obj.users)) {
+				mutex_unlock(&ioas->mutex);
+				return hwpt;
+			}
+		}
+	}
+
+	hwpt = iommufd_object_alloc(ictx, hwpt, IOMMUFD_OBJ_HW_PAGETABLE);
+	if (IS_ERR(hwpt)) {
+		rc = PTR_ERR(hwpt);
+		goto out_unlock;
+	}
+
+	hwpt->domain = iommu_domain_alloc(dev->bus);
+	if (!hwpt->domain) {
+		rc = -ENOMEM;
+		goto out_abort;
+	}
+
+	INIT_LIST_HEAD(&hwpt->devices);
+	mutex_init(&hwpt->devices_lock);
+	hwpt->ioas = ioas;
+	/* The calling driver is a user until iommufd_hw_pagetable_put() */
+	refcount_inc(&ioas->obj.users);
+
+	list_add_tail(&hwpt->auto_domains_item, &ioas->auto_domains);
+	/*
+	 * iommufd_object_finalize() consumes the refcount, get one for the
+	 * caller. This pairs with the first put in
+	 * iommufd_object_destroy_user()
+	 */
+	refcount_inc(&hwpt->obj.users);
+	iommufd_object_finalize(ictx, &hwpt->obj);
+
+	mutex_unlock(&ioas->mutex);
+	return hwpt;
+
+out_abort:
+	iommufd_object_abort(ictx, &hwpt->obj);
+out_unlock:
+	mutex_unlock(&ioas->mutex);
+	return ERR_PTR(rc);
+}
+
+/**
+ * iommufd_hw_pagetable_from_id() - Get an iommu_domain for a device
+ * @ictx: iommufd context
+ * @pt_id: ID of the IOAS or hw_pagetable object
+ * @dev: Device to get an iommu_domain for
+ *
+ * Turn a general page table ID into an iommu_domain contained in a
+ * iommufd_hw_pagetable object. If a hw_pagetable ID is specified then that
+ * iommu_domain is used, otherwise a suitable iommu_domain in the IOAS is found
+ * for the device, creating one automatically if necessary.
+ */
+struct iommufd_hw_pagetable *
+iommufd_hw_pagetable_from_id(struct iommufd_ctx *ictx, u32 pt_id,
+			     struct device *dev)
+{
+	struct iommufd_object *obj;
+
+	obj = iommufd_get_object(ictx, pt_id, IOMMUFD_OBJ_ANY);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	switch (obj->type) {
+	case IOMMUFD_OBJ_HW_PAGETABLE:
+		iommufd_put_object_keep_user(obj);
+		return container_of(obj, struct iommufd_hw_pagetable, obj);
+	case IOMMUFD_OBJ_IOAS: {
+		struct iommufd_ioas *ioas =
+			container_of(obj, struct iommufd_ioas, obj);
+		struct iommufd_hw_pagetable *hwpt;
+
+		hwpt = iommufd_hw_pagetable_auto_get(ictx, ioas, dev);
+		iommufd_put_object(obj);
+		return hwpt;
+	}
+	default:
+		iommufd_put_object(obj);
+		return ERR_PTR(-EINVAL);
+	}
+}
+
+void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
+			      struct iommufd_hw_pagetable *hwpt)
+{
+	if (list_empty(&hwpt->auto_domains_item)) {
+		/* Manually created hw_pagetables just keep going */
+		refcount_dec(&hwpt->obj.users);
+		return;
+	}
+	iommufd_object_destroy_user(ictx, &hwpt->obj);
+}
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
index c530b2ba74b06b..48149988c84bbc 100644
--- a/drivers/iommu/iommufd/ioas.c
+++ b/drivers/iommu/iommufd/ioas.c
@@ -17,6 +17,7 @@  void iommufd_ioas_destroy(struct iommufd_object *obj)
 	rc = iopt_unmap_all(&ioas->iopt);
 	WARN_ON(rc);
 	iopt_destroy_table(&ioas->iopt);
+	mutex_destroy(&ioas->mutex);
 }
 
 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx)
@@ -31,6 +32,9 @@  struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx)
 	rc = iopt_init_table(&ioas->iopt);
 	if (rc)
 		goto out_abort;
+
+	INIT_LIST_HEAD(&ioas->auto_domains);
+	mutex_init(&ioas->mutex);
 	return ioas;
 
 out_abort:
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index d24c9dac5a82a9..c5c9650cc86818 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -96,6 +96,7 @@  static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
 enum iommufd_object_type {
 	IOMMUFD_OBJ_NONE,
 	IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
+	IOMMUFD_OBJ_HW_PAGETABLE,
 	IOMMUFD_OBJ_IOAS,
 	IOMMUFD_OBJ_MAX,
 };
@@ -153,10 +154,20 @@  struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
  * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
  * mapping is copied into all of the associated domains and made available to
  * in-kernel users.
+ *
+ * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable
+ * object. When we go to attach a device to an IOAS we need to get an
+ * iommu_domain and wrapping iommufd_hw_pagetable for it.
+ *
+ * An iommu_domain & iommfd_hw_pagetable will be automatically selected
+ * for a device based on the auto_domains list. If no suitable iommu_domain
+ * is found a new iommu_domain will be created.
  */
 struct iommufd_ioas {
 	struct iommufd_object obj;
 	struct io_pagetable iopt;
+	struct mutex mutex;
+	struct list_head auto_domains;
 };
 
 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ucmd *ucmd,
@@ -174,4 +185,28 @@  int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
 int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
 int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
 int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
+
+/*
+ * A HW pagetable is called an iommu_domain inside the kernel. This user object
+ * allows directly creating and inspecting the domains. Domains that have kernel
+ * owned page tables will be associated with an iommufd_ioas that provides the
+ * IOVA to PFN map.
+ */
+struct iommufd_hw_pagetable {
+	struct iommufd_object obj;
+	struct iommufd_ioas *ioas;
+	struct iommu_domain *domain;
+	/* Head at iommufd_ioas::auto_domains */
+	struct list_head auto_domains_item;
+	struct mutex devices_lock;
+	struct list_head devices;
+};
+
+struct iommufd_hw_pagetable *
+iommufd_hw_pagetable_from_id(struct iommufd_ctx *ictx, u32 pt_id,
+			     struct device *dev);
+void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
+			      struct iommufd_hw_pagetable *hwpt);
+void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
+
 #endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index e506f493b54cfe..954cde173c86fc 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -287,6 +287,9 @@  static struct iommufd_object_ops iommufd_object_ops[] = {
 	[IOMMUFD_OBJ_IOAS] = {
 		.destroy = iommufd_ioas_destroy,
 	},
+	[IOMMUFD_OBJ_HW_PAGETABLE] = {
+		.destroy = iommufd_hw_pagetable_destroy,
+	},
 };
 
 static struct miscdevice iommu_misc_dev = {