diff mbox series

[v4,02/11] iommufd: Introduce IOMMUFD_OBJ_VIOMMU and its related struct

Message ID 74fec8c38a7d568bd88beba9082b4a5a4bc2046f.1729553811.git.nicolinc@nvidia.com (mailing list archive)
State New
Headers show
Series iommufd: Add vIOMMU infrastructure (Part-1) | expand

Commit Message

Nicolin Chen Oct. 22, 2024, 12:19 a.m. UTC
Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent
a slice of physical IOMMU device passed to or shared with a user space VM.
This slice, now a vIOMMU object, is a group of virtualization resources of
a physical IOMMU's, such as:
 - Security namespace for guest owned ID, e.g. guest-controlled cache tags
 - Access to a sharable nesting parent pagetable across physical IOMMUs
 - Virtualization of various platforms IDs, e.g. RIDs and others
 - Delivery of paravirtualized invalidation
 - Direct assigned invalidation queues
 - Direct assigned interrupts
 - Non-affiliated event reporting

Add a new viommu_alloc op in iommu_ops, for drivers to allocate their own
vIOMMU structures. And this allocation also needs a free(), so add struct
iommufd_viommu_ops.

To simplify a vIOMMU allocation, provide a iommufd_viommu_alloc() helper.
It's suggested that a driver should embed a core-level viommu structure in
its driver-level viommu struct and call the iommufd_viommu_alloc() helper,
meanwhile the driver can also implement a viommu ops:
    struct my_driver_viommu {
        struct iommufd_viommu core;
        /* driver-owned properties/features */
        ....
    };

    static const struct iommufd_viommu_ops my_driver_viommu_ops = {
        .free = my_driver_viommu_free,
        /* future ops for virtualization features */
        ....
    };

    static struct iommufd_viommu my_driver_viommu_alloc(...)
    {
        struct my_driver_viommu *my_viommu =
                iommufd_viommu_alloc(ictx, my_driver_viommu, core,
                                     my_driver_viommu_ops);
        /* Init my_viommu and related HW feature */
        ....
        return &my_viommu->core;
    }

    static struct iommu_domain_ops my_driver_domain_ops = {
        ....
        .viommu_alloc = my_driver_viommu_alloc,
    };

To make the Kernel config work between a driver and the iommufd core, move
the _iommufd_object_alloc helper into a new driver.c file that builds with
CONFIG_IOMMUFD_DRIVER.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/Makefile          |  2 +-
 drivers/iommu/iommufd/iommufd_private.h |  4 --
 include/linux/iommu.h                   | 14 +++++++
 include/linux/iommufd.h                 | 56 +++++++++++++++++++++++++
 drivers/iommu/iommufd/driver.c          | 38 +++++++++++++++++
 drivers/iommu/iommufd/main.c            | 32 --------------
 6 files changed, 109 insertions(+), 37 deletions(-)
 create mode 100644 drivers/iommu/iommufd/driver.c

Comments

Baolu Lu Oct. 22, 2024, 2:28 a.m. UTC | #1
On 2024/10/22 8:19, Nicolin Chen wrote:
> + * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind
> + *                the @dev, as the set of virtualization resources shared/passed
> + *                to user space IOMMU instance. And associate it with a nesting
> + *                @parent_domain. The @viommu_type must be defined in the header
> + *                include/uapi/linux/iommufd.h
> + *                It is suggested to call iommufd_viommu_alloc() helper for
> + *                a bundled allocation of the core and the driver structures,
> + *                using the given @ictx pointer.
>    * @pgsize_bitmap: bitmap of all possible supported page sizes
>    * @owner: Driver module providing these ops
>    * @identity_domain: An always available, always attachable identity
> @@ -591,6 +601,10 @@ struct iommu_ops {
>   	void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid,
>   				 struct iommu_domain *domain);
>   
> +	struct iommufd_viommu *(*viommu_alloc)(
> +		struct device *dev, struct iommu_domain *parent_domain,
> +		struct iommufd_ctx *ictx, unsigned int viommu_type);

Is the vIOMMU object limited to a parent domain?

Thanks,
baolu
Nicolin Chen Oct. 22, 2024, 4:40 a.m. UTC | #2
On Tue, Oct 22, 2024 at 10:28:30AM +0800, Baolu Lu wrote:
> On 2024/10/22 8:19, Nicolin Chen wrote:
> > + * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind
> > + *                the @dev, as the set of virtualization resources shared/passed
> > + *                to user space IOMMU instance. And associate it with a nesting
> > + *                @parent_domain. The @viommu_type must be defined in the header
> > + *                include/uapi/linux/iommufd.h
> > + *                It is suggested to call iommufd_viommu_alloc() helper for
> > + *                a bundled allocation of the core and the driver structures,
> > + *                using the given @ictx pointer.
> >    * @pgsize_bitmap: bitmap of all possible supported page sizes
> >    * @owner: Driver module providing these ops
> >    * @identity_domain: An always available, always attachable identity
> > @@ -591,6 +601,10 @@ struct iommu_ops {
> >       void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid,
> >                                struct iommu_domain *domain);
> > 
> > +     struct iommufd_viommu *(*viommu_alloc)(
> > +             struct device *dev, struct iommu_domain *parent_domain,
> > +             struct iommufd_ctx *ictx, unsigned int viommu_type);
> 
> Is the vIOMMU object limited to a parent domain?

Yes, for each vIOMMU (a slice of physical IOMMU per VM), one S2
parent domain is enough. Typically, it has the mappings between
gPAs and hPAs. If its format/compatibility allows, this single
parent domain can be shared with other vIOMMUs.

Thanks
Nicolin
Baolu Lu Oct. 22, 2024, 8:59 a.m. UTC | #3
On 2024/10/22 12:40, Nicolin Chen wrote:
> On Tue, Oct 22, 2024 at 10:28:30AM +0800, Baolu Lu wrote:
>> On 2024/10/22 8:19, Nicolin Chen wrote:
>>> + * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind
>>> + *                the @dev, as the set of virtualization resources shared/passed
>>> + *                to user space IOMMU instance. And associate it with a nesting
>>> + *                @parent_domain. The @viommu_type must be defined in the header
>>> + *                include/uapi/linux/iommufd.h
>>> + *                It is suggested to call iommufd_viommu_alloc() helper for
>>> + *                a bundled allocation of the core and the driver structures,
>>> + *                using the given @ictx pointer.
>>>     * @pgsize_bitmap: bitmap of all possible supported page sizes
>>>     * @owner: Driver module providing these ops
>>>     * @identity_domain: An always available, always attachable identity
>>> @@ -591,6 +601,10 @@ struct iommu_ops {
>>>        void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid,
>>>                                 struct iommu_domain *domain);
>>>
>>> +     struct iommufd_viommu *(*viommu_alloc)(
>>> +             struct device *dev, struct iommu_domain *parent_domain,
>>> +             struct iommufd_ctx *ictx, unsigned int viommu_type);
>> Is the vIOMMU object limited to a parent domain?
> Yes, for each vIOMMU (a slice of physical IOMMU per VM), one S2
> parent domain is enough. Typically, it has the mappings between
> gPAs and hPAs. If its format/compatibility allows, this single
> parent domain can be shared with other vIOMMUs.

Is it feasible to make vIOMMU object more generic, rather than strictly
tying it to nested translation? For example, a normal paging domain that
translates gPAs to hPAs could also have a vIOMMU object associated with
it.

While we can only support vIOMMU object allocation uAPI for S2 paging
domains in the context of this series, we could consider leaving the
option open to associate a vIOMMU object with other normal paging
domains that are not a nested parent?

Thanks,
baolu
Jason Gunthorpe Oct. 22, 2024, 1:15 p.m. UTC | #4
On Tue, Oct 22, 2024 at 04:59:07PM +0800, Baolu Lu wrote:

> Is it feasible to make vIOMMU object more generic, rather than strictly
> tying it to nested translation? For example, a normal paging domain that
> translates gPAs to hPAs could also have a vIOMMU object associated with
> it.
> 
> While we can only support vIOMMU object allocation uAPI for S2 paging
> domains in the context of this series, we could consider leaving the
> option open to associate a vIOMMU object with other normal paging
> domains that are not a nested parent?

Why? The nested parent flavour of the domain is basically free to
create, what reason would be to not do that?

If the HW doesn't support it, then does the HW really need/support a
VIOMMU?

I suppose it could be made up to the driver, but for now I think we
should leave it as is in the core code requiring it until we have a
reason to relax it.

Jason
Baolu Lu Oct. 23, 2024, 1:48 a.m. UTC | #5
On 2024/10/22 21:15, Jason Gunthorpe wrote:
> On Tue, Oct 22, 2024 at 04:59:07PM +0800, Baolu Lu wrote:
> 
>> Is it feasible to make vIOMMU object more generic, rather than strictly
>> tying it to nested translation? For example, a normal paging domain that
>> translates gPAs to hPAs could also have a vIOMMU object associated with
>> it.
>>
>> While we can only support vIOMMU object allocation uAPI for S2 paging
>> domains in the context of this series, we could consider leaving the
>> option open to associate a vIOMMU object with other normal paging
>> domains that are not a nested parent?
> Why? The nested parent flavour of the domain is basically free to
> create, what reason would be to not do that?

Above addressed my question. The software using vIOMMU should allocate a
domain of nested parent type.

> 
> If the HW doesn't support it, then does the HW really need/support a
> VIOMMU?
> 
> I suppose it could be made up to the driver, but for now I think we
> should leave it as is in the core code requiring it until we have a
> reason to relax it.

Yes. In such cases, the iommu driver could always allow nested parent
domain allocation, but fails to allocate a nested domain if the hardware
is not capable of nesting translation.

Thanks,
baolu
Tian, Kevin Oct. 25, 2024, 8:47 a.m. UTC | #6
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, October 22, 2024 9:16 PM
> 
> On Tue, Oct 22, 2024 at 04:59:07PM +0800, Baolu Lu wrote:
> 
> > Is it feasible to make vIOMMU object more generic, rather than strictly
> > tying it to nested translation? For example, a normal paging domain that
> > translates gPAs to hPAs could also have a vIOMMU object associated with
> > it.
> >
> > While we can only support vIOMMU object allocation uAPI for S2 paging
> > domains in the context of this series, we could consider leaving the
> > option open to associate a vIOMMU object with other normal paging
> > domains that are not a nested parent?
> 
> Why? The nested parent flavour of the domain is basically free to
> create, what reason would be to not do that?
> 
> If the HW doesn't support it, then does the HW really need/support a
> VIOMMU?
> 

Now it's agreed to build trusted I/O on top of this new vIOMMU object.
format-wise probably it's free to assume that nested parent is supported
on any new platform which will support trusted I/O. But I'm not sure
all the conditions around allowing nested are same as for trusted I/O,
e.g. for ARM nesting is allowed only for CANWBS/S2FWB. Are they
always guaranteed in trusted I/O configuration?

Baolu did raise a good open to confirm given it will be used beyond
nesting. 
Jason Gunthorpe Oct. 25, 2024, 3:24 p.m. UTC | #7
On Fri, Oct 25, 2024 at 08:47:40AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Tuesday, October 22, 2024 9:16 PM
> > 
> > On Tue, Oct 22, 2024 at 04:59:07PM +0800, Baolu Lu wrote:
> > 
> > > Is it feasible to make vIOMMU object more generic, rather than strictly
> > > tying it to nested translation? For example, a normal paging domain that
> > > translates gPAs to hPAs could also have a vIOMMU object associated with
> > > it.
> > >
> > > While we can only support vIOMMU object allocation uAPI for S2 paging
> > > domains in the context of this series, we could consider leaving the
> > > option open to associate a vIOMMU object with other normal paging
> > > domains that are not a nested parent?
> > 
> > Why? The nested parent flavour of the domain is basically free to
> > create, what reason would be to not do that?
> > 
> > If the HW doesn't support it, then does the HW really need/support a
> > VIOMMU?
> 
> Now it's agreed to build trusted I/O on top of this new vIOMMU object.
> format-wise probably it's free to assume that nested parent is supported
> on any new platform which will support trusted I/O. But I'm not sure
> all the conditions around allowing nested are same as for trusted I/O,
> e.g. for ARM nesting is allowed only for CANWBS/S2FWB. Are they
> always guaranteed in trusted I/O configuration?

ARM is a big ? what exactly will come, but I'm expecting that to be
resolved either with continued HW support or Linux will add the cache
flushing and relax the test.

> Baolu did raise a good open to confirm given it will be used beyond
> nesting. 
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index cf4605962bea..435124a8e1f1 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -12,4 +12,4 @@  iommufd-y := \
 iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
 
 obj-$(CONFIG_IOMMUFD) += iommufd.o
-obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o
+obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o driver.o
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 1bb8c0aaecd1..5bd41257f2ef 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -202,10 +202,6 @@  iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
 	iommufd_object_remove(ictx, obj, obj->id, 0);
 }
 
-struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
-					     size_t size,
-					     enum iommufd_object_type type);
-
 #define __iommufd_object_alloc(ictx, ptr, type, obj)                           \
 	container_of(_iommufd_object_alloc(                                    \
 			     ictx,                                             \
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 4ad9b9ec6c9b..14f24b5cd16f 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -42,6 +42,8 @@  struct notifier_block;
 struct iommu_sva;
 struct iommu_dma_cookie;
 struct iommu_fault_param;
+struct iommufd_ctx;
+struct iommufd_viommu;
 
 #define IOMMU_FAULT_PERM_READ	(1 << 0) /* read */
 #define IOMMU_FAULT_PERM_WRITE	(1 << 1) /* write */
@@ -542,6 +544,14 @@  static inline int __iommu_copy_struct_from_user_array(
  * @remove_dev_pasid: Remove any translation configurations of a specific
  *                    pasid, so that any DMA transactions with this pasid
  *                    will be blocked by the hardware.
+ * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind
+ *                the @dev, as the set of virtualization resources shared/passed
+ *                to user space IOMMU instance. And associate it with a nesting
+ *                @parent_domain. The @viommu_type must be defined in the header
+ *                include/uapi/linux/iommufd.h
+ *                It is suggested to call iommufd_viommu_alloc() helper for
+ *                a bundled allocation of the core and the driver structures,
+ *                using the given @ictx pointer.
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  * @owner: Driver module providing these ops
  * @identity_domain: An always available, always attachable identity
@@ -591,6 +601,10 @@  struct iommu_ops {
 	void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid,
 				 struct iommu_domain *domain);
 
+	struct iommufd_viommu *(*viommu_alloc)(
+		struct device *dev, struct iommu_domain *parent_domain,
+		struct iommufd_ctx *ictx, unsigned int viommu_type);
+
 	const struct iommu_domain_ops *default_domain_ops;
 	unsigned long pgsize_bitmap;
 	struct module *owner;
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 22948dd03d67..55054fbc793c 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -17,6 +17,7 @@  struct iommu_group;
 struct iommufd_access;
 struct iommufd_ctx;
 struct iommufd_device;
+struct iommufd_viommu_ops;
 struct page;
 
 enum iommufd_object_type {
@@ -28,6 +29,7 @@  enum iommufd_object_type {
 	IOMMUFD_OBJ_IOAS,
 	IOMMUFD_OBJ_ACCESS,
 	IOMMUFD_OBJ_FAULT,
+	IOMMUFD_OBJ_VIOMMU,
 #ifdef CONFIG_IOMMUFD_TEST
 	IOMMUFD_OBJ_SELFTEST,
 #endif
@@ -78,6 +80,26 @@  void iommufd_access_detach(struct iommufd_access *access);
 
 void iommufd_ctx_get(struct iommufd_ctx *ictx);
 
+struct iommufd_viommu {
+	struct iommufd_object obj;
+	struct iommufd_ctx *ictx;
+	struct iommu_device *iommu_dev;
+	struct iommufd_hwpt_paging *hwpt;
+
+	const struct iommufd_viommu_ops *ops;
+
+	unsigned int type;
+};
+
+/**
+ * struct iommufd_viommu_ops - vIOMMU specific operations
+ * @free: Free all driver-specific parts of an iommufd_viommu. The memory of the
+ *        vIOMMU will be free-ed by iommufd core after calling this free op.
+ */
+struct iommufd_viommu_ops {
+	void (*free)(struct iommufd_viommu *viommu);
+};
+
 #if IS_ENABLED(CONFIG_IOMMUFD)
 struct iommufd_ctx *iommufd_ctx_from_file(struct file *file);
 struct iommufd_ctx *iommufd_ctx_from_fd(int fd);
@@ -135,4 +157,38 @@  static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
 	return -EOPNOTSUPP;
 }
 #endif /* CONFIG_IOMMUFD */
+
+#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER)
+struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
+					     size_t size,
+					     enum iommufd_object_type type);
+#else /* !CONFIG_IOMMUFD_DRIVER */
+static inline struct iommufd_object *
+_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size,
+		      enum iommufd_object_type type)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+#endif /* CONFIG_IOMMUFD_DRIVER */
+
+/*
+ * Helpers for IOMMU driver to allocate driver structures that will be freed by
+ * the iommufd core. The free op will be called prior to freeing the memory.
+ */
+#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops)             \
+	({                                                                     \
+		struct drv_struct *ret;                                        \
+									       \
+		static_assert(                                                 \
+			__same_type(struct iommufd_viommu,                     \
+				    ((struct drv_struct *)NULL)->member));     \
+		static_assert(offsetof(struct drv_struct, member.obj) == 0);   \
+		ret = container_of(                                            \
+			_iommufd_object_alloc(ictx, sizeof(struct drv_struct), \
+					      IOMMUFD_OBJ_VIOMMU),             \
+			struct drv_struct, member.obj);                        \
+		if (!IS_ERR(ret))                                              \
+			ret->member.ops = viommu_ops;                          \
+		ret;                                                           \
+	})
 #endif
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
new file mode 100644
index 000000000000..c0876d3f91c7
--- /dev/null
+++ b/drivers/iommu/iommufd/driver.c
@@ -0,0 +1,38 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include "iommufd_private.h"
+
+struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
+					     size_t size,
+					     enum iommufd_object_type type)
+{
+	struct iommufd_object *obj;
+	int rc;
+
+	obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+	obj->type = type;
+	/* Starts out bias'd by 1 until it is removed from the xarray */
+	refcount_set(&obj->shortterm_users, 1);
+	refcount_set(&obj->users, 1);
+
+	/*
+	 * Reserve an ID in the xarray but do not publish the pointer yet since
+	 * the caller hasn't initialized it yet. Once the pointer is published
+	 * in the xarray and visible to other threads we can't reliably destroy
+	 * it anymore, so the caller must complete all errorable operations
+	 * before calling iommufd_object_finalize().
+	 */
+	rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b,
+		      GFP_KERNEL_ACCOUNT);
+	if (rc)
+		goto out_free;
+	return obj;
+out_free:
+	kfree(obj);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, IOMMUFD);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index b5f5d27ee963..92bd075108e5 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -29,38 +29,6 @@  struct iommufd_object_ops {
 static const struct iommufd_object_ops iommufd_object_ops[];
 static struct miscdevice vfio_misc_dev;
 
-struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
-					     size_t size,
-					     enum iommufd_object_type type)
-{
-	struct iommufd_object *obj;
-	int rc;
-
-	obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
-	if (!obj)
-		return ERR_PTR(-ENOMEM);
-	obj->type = type;
-	/* Starts out bias'd by 1 until it is removed from the xarray */
-	refcount_set(&obj->shortterm_users, 1);
-	refcount_set(&obj->users, 1);
-
-	/*
-	 * Reserve an ID in the xarray but do not publish the pointer yet since
-	 * the caller hasn't initialized it yet. Once the pointer is published
-	 * in the xarray and visible to other threads we can't reliably destroy
-	 * it anymore, so the caller must complete all errorable operations
-	 * before calling iommufd_object_finalize().
-	 */
-	rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY,
-		      xa_limit_31b, GFP_KERNEL_ACCOUNT);
-	if (rc)
-		goto out_free;
-	return obj;
-out_free:
-	kfree(obj);
-	return ERR_PTR(rc);
-}
-
 /*
  * Allow concurrent access to the object.
  *