diff mbox series

[v1,01/16] iommufd/viommu: Add IOMMUFD_OBJ_VIOMMU and IOMMU_VIOMMU_ALLOC ioctl

Message ID 536c5e908af3847649d1f4b7050af17d77d8b524.1723061378.git.nicolinc@nvidia.com (mailing list archive)
State New
Headers show
Series iommufd: Add VIOMMU infrastructure (Part-1) | expand

Commit Message

Nicolin Chen Aug. 7, 2024, 8:10 p.m. UTC
Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent
a vIOMMU instance in the user space, backed by a physical IOMMU for its HW
accelerated virtualization feature, such as nested translation support for
a multi-viommu-instance VM, NVIDIA CMDQ-Virtualization extension for ARM
SMMUv3, and AMD Hardware Accelerated Virtualized IOMMU (vIOMMU).

Also, add a new ioctl for user space to do a viommu allocation. It must be
based on a nested parent HWPT, so take its refcount.

As an initial version, support a viommu of IOMMU_VIOMMU_TYPE_DEFAULT type.
IOMMUFD core can use this viommu to store a virtual device ID lookup table
in a following patch.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/Makefile          |  3 +-
 drivers/iommu/iommufd/iommufd_private.h | 13 +++++
 drivers/iommu/iommufd/main.c            |  6 ++
 drivers/iommu/iommufd/viommu.c          | 75 +++++++++++++++++++++++++
 include/uapi/linux/iommufd.h            | 30 ++++++++++
 5 files changed, 126 insertions(+), 1 deletion(-)
 create mode 100644 drivers/iommu/iommufd/viommu.c

Comments

Nicolin Chen Aug. 14, 2024, 4:50 p.m. UTC | #1
On Wed, Aug 07, 2024 at 01:10:42PM -0700, Nicolin Chen wrote:

>  /**
> @@ -876,4 +877,33 @@ struct iommu_fault_alloc {
>         __u32 out_fault_fd;
>  };
>  #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
> +
> +/**
> + * enum iommu_viommu_type - Virtual IOMMU Type
> + * @IOMMU_VIOMMU_TYPE_DEFAULT: Core-managed VIOMMU type
> + */
> +enum iommu_viommu_type {
> +       IOMMU_VIOMMU_TYPE_DEFAULT,

This needs a "= 0x0". I fixed locally along with others in this series.

Nicolin
Jason Gunthorpe Aug. 15, 2024, 6:11 p.m. UTC | #2
On Wed, Aug 07, 2024 at 01:10:42PM -0700, Nicolin Chen wrote:

> +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> +{
> +	struct iommu_viommu_alloc *cmd = ucmd->cmd;
> +	struct iommufd_hwpt_paging *hwpt_paging;
> +	struct iommufd_viommu *viommu;
> +	struct iommufd_device *idev;
> +	int rc;
> +
> +	if (cmd->flags)
> +		return -EOPNOTSUPP;
> +
> +	idev = iommufd_get_device(ucmd, cmd->dev_id);
> +	if (IS_ERR(idev))
> +		return PTR_ERR(idev);
> +
> +	hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id);
> +	if (IS_ERR(hwpt_paging)) {
> +		rc = PTR_ERR(hwpt_paging);
> +		goto out_put_idev;
> +	}
> +
> +	if (!hwpt_paging->nest_parent) {
> +		rc = -EINVAL;
> +		goto out_put_hwpt;
> +	}
> +
> +	if (cmd->type != IOMMU_VIOMMU_TYPE_DEFAULT) {
> +		rc = -EOPNOTSUPP;
> +		goto out_put_hwpt;
> +	}
> +
> +	viommu = iommufd_object_alloc(ucmd->ictx, viommu, IOMMUFD_OBJ_VIOMMU);
> +	if (IS_ERR(viommu)) {
> +		rc = PTR_ERR(viommu);
> +		goto out_put_hwpt;
> +	}
> +
> +	viommu->type = cmd->type;
> +	viommu->ictx = ucmd->ictx;
> +	viommu->hwpt = hwpt_paging;
> +	viommu->iommu_dev = idev->dev->iommu->iommu_dev;

Pedantically this is troublesome because we don't have any lifetime
control on this pointer. 

iommu unplug is fairly troubled on real HW, but the selftest does do
it.

At least for this series the value isn't used so lets remove it.

I don't have an easy solution in mind though later as surely we will
need this when we start to create more iommu bound objects. I'm pretty
sure syzkaller would eventually find such a UAF using the iommufd
selftest framework.

Jason
Nicolin Chen Aug. 15, 2024, 6:20 p.m. UTC | #3
On Thu, Aug 15, 2024 at 03:11:17PM -0300, Jason Gunthorpe wrote:
> On Wed, Aug 07, 2024 at 01:10:42PM -0700, Nicolin Chen wrote:
> 
> > +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> > +{
> > +	struct iommu_viommu_alloc *cmd = ucmd->cmd;
> > +	struct iommufd_hwpt_paging *hwpt_paging;
> > +	struct iommufd_viommu *viommu;
> > +	struct iommufd_device *idev;
> > +	int rc;
> > +
> > +	if (cmd->flags)
> > +		return -EOPNOTSUPP;
> > +
> > +	idev = iommufd_get_device(ucmd, cmd->dev_id);
> > +	if (IS_ERR(idev))
> > +		return PTR_ERR(idev);
> > +
> > +	hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id);
> > +	if (IS_ERR(hwpt_paging)) {
> > +		rc = PTR_ERR(hwpt_paging);
> > +		goto out_put_idev;
> > +	}
> > +
> > +	if (!hwpt_paging->nest_parent) {
> > +		rc = -EINVAL;
> > +		goto out_put_hwpt;
> > +	}
> > +
> > +	if (cmd->type != IOMMU_VIOMMU_TYPE_DEFAULT) {
> > +		rc = -EOPNOTSUPP;
> > +		goto out_put_hwpt;
> > +	}
> > +
> > +	viommu = iommufd_object_alloc(ucmd->ictx, viommu, IOMMUFD_OBJ_VIOMMU);
> > +	if (IS_ERR(viommu)) {
> > +		rc = PTR_ERR(viommu);
> > +		goto out_put_hwpt;
> > +	}
> > +
> > +	viommu->type = cmd->type;
> > +	viommu->ictx = ucmd->ictx;
> > +	viommu->hwpt = hwpt_paging;
> > +	viommu->iommu_dev = idev->dev->iommu->iommu_dev;
> 
> Pedantically this is troublesome because we don't have any lifetime
> control on this pointer. 
> 
> iommu unplug is fairly troubled on real HW, but the selftest does do
> it.
> 
> At least for this series the value isn't used so lets remove it.

I recall one of my local versions had a validation using that, but
not that crucial either. Will drop it.

> I don't have an easy solution in mind though later as surely we will
> need this when we start to create more iommu bound objects. I'm pretty
> sure syzkaller would eventually find such a UAF using the iommufd
> selftest framework.

Would adding a user count in struct iommu_device help?

Thanks
Nicolin
Jason Gunthorpe Aug. 15, 2024, 6:31 p.m. UTC | #4
On Wed, Aug 07, 2024 at 01:10:42PM -0700, Nicolin Chen wrote:
> @@ -876,4 +877,33 @@ struct iommu_fault_alloc {
>  	__u32 out_fault_fd;
>  };
>  #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
> +
> +/**
> + * enum iommu_viommu_type - Virtual IOMMU Type
> + * @IOMMU_VIOMMU_TYPE_DEFAULT: Core-managed VIOMMU type
> + */
> +enum iommu_viommu_type {
> +	IOMMU_VIOMMU_TYPE_DEFAULT,

= 0 here now

Jason
Jason Gunthorpe Aug. 15, 2024, 11:37 p.m. UTC | #5
On Thu, Aug 15, 2024 at 11:20:35AM -0700, Nicolin Chen wrote:

> > I don't have an easy solution in mind though later as surely we will
> > need this when we start to create more iommu bound objects. I'm pretty
> > sure syzkaller would eventually find such a UAF using the iommufd
> > selftest framework.
> 
> Would adding a user count in struct iommu_device help?

Yeah, maybe something like that. It is a real corner case though.

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index cf4605962bea..df490e836b30 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -7,7 +7,8 @@  iommufd-y := \
 	ioas.o \
 	main.o \
 	pages.o \
-	vfio_compat.o
+	vfio_compat.o \
+	viommu.o
 
 iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
 
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 5d3768d77099..0e3755408911 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -131,6 +131,7 @@  enum iommufd_object_type {
 	IOMMUFD_OBJ_IOAS,
 	IOMMUFD_OBJ_ACCESS,
 	IOMMUFD_OBJ_FAULT,
+	IOMMUFD_OBJ_VIOMMU,
 #ifdef CONFIG_IOMMUFD_TEST
 	IOMMUFD_OBJ_SELFTEST,
 #endif
@@ -526,6 +527,18 @@  static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
 	return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
 }
 
+struct iommufd_viommu {
+	struct iommufd_object obj;
+	struct iommufd_ctx *ictx;
+	struct iommu_device *iommu_dev;
+	struct iommufd_hwpt_paging *hwpt;
+
+	unsigned int type;
+};
+
+int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
+void iommufd_viommu_destroy(struct iommufd_object *obj);
+
 #ifdef CONFIG_IOMMUFD_TEST
 int iommufd_test(struct iommufd_ucmd *ucmd);
 void iommufd_selftest_destroy(struct iommufd_object *obj);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index b5f5d27ee963..288ee51b6829 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -333,6 +333,7 @@  union ucmd_buffer {
 	struct iommu_ioas_unmap unmap;
 	struct iommu_option option;
 	struct iommu_vfio_ioas vfio_ioas;
+	struct iommu_viommu_alloc viommu;
 #ifdef CONFIG_IOMMUFD_TEST
 	struct iommu_test_cmd test;
 #endif
@@ -384,6 +385,8 @@  static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
 		 val64),
 	IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas,
 		 __reserved),
+	IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
+		 struct iommu_viommu_alloc, out_viommu_id),
 #ifdef CONFIG_IOMMUFD_TEST
 	IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
 #endif
@@ -519,6 +522,9 @@  static const struct iommufd_object_ops iommufd_object_ops[] = {
 	[IOMMUFD_OBJ_FAULT] = {
 		.destroy = iommufd_fault_destroy,
 	},
+	[IOMMUFD_OBJ_VIOMMU] = {
+		.destroy = iommufd_viommu_destroy,
+	},
 #ifdef CONFIG_IOMMUFD_TEST
 	[IOMMUFD_OBJ_SELFTEST] = {
 		.destroy = iommufd_selftest_destroy,
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
new file mode 100644
index 000000000000..35ad6a77c9c1
--- /dev/null
+++ b/drivers/iommu/iommufd/viommu.c
@@ -0,0 +1,75 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <linux/iommufd.h>
+
+#include "iommufd_private.h"
+
+void iommufd_viommu_destroy(struct iommufd_object *obj)
+{
+	struct iommufd_viommu *viommu =
+		container_of(obj, struct iommufd_viommu, obj);
+
+	refcount_dec(&viommu->hwpt->common.obj.users);
+}
+
+int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_viommu_alloc *cmd = ucmd->cmd;
+	struct iommufd_hwpt_paging *hwpt_paging;
+	struct iommufd_viommu *viommu;
+	struct iommufd_device *idev;
+	int rc;
+
+	if (cmd->flags)
+		return -EOPNOTSUPP;
+
+	idev = iommufd_get_device(ucmd, cmd->dev_id);
+	if (IS_ERR(idev))
+		return PTR_ERR(idev);
+
+	hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id);
+	if (IS_ERR(hwpt_paging)) {
+		rc = PTR_ERR(hwpt_paging);
+		goto out_put_idev;
+	}
+
+	if (!hwpt_paging->nest_parent) {
+		rc = -EINVAL;
+		goto out_put_hwpt;
+	}
+
+	if (cmd->type != IOMMU_VIOMMU_TYPE_DEFAULT) {
+		rc = -EOPNOTSUPP;
+		goto out_put_hwpt;
+	}
+
+	viommu = iommufd_object_alloc(ucmd->ictx, viommu, IOMMUFD_OBJ_VIOMMU);
+	if (IS_ERR(viommu)) {
+		rc = PTR_ERR(viommu);
+		goto out_put_hwpt;
+	}
+
+	viommu->type = cmd->type;
+	viommu->ictx = ucmd->ictx;
+	viommu->hwpt = hwpt_paging;
+	viommu->iommu_dev = idev->dev->iommu->iommu_dev;
+
+	refcount_inc(&viommu->hwpt->common.obj.users);
+
+	cmd->out_viommu_id = viommu->obj.id;
+	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+	if (rc)
+		goto out_abort;
+	iommufd_object_finalize(ucmd->ictx, &viommu->obj);
+	goto out_put_hwpt;
+
+out_abort:
+	iommufd_object_abort_and_destroy(ucmd->ictx, &viommu->obj);
+out_put_hwpt:
+	iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
+out_put_idev:
+	iommufd_put_object(ucmd->ictx, &idev->obj);
+	return rc;
+}
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index d42e0471156f..ed6cdc2e0be1 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -51,6 +51,7 @@  enum {
 	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
 	IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
 	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
+	IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f,
 };
 
 /**
@@ -876,4 +877,33 @@  struct iommu_fault_alloc {
 	__u32 out_fault_fd;
 };
 #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
+
+/**
+ * enum iommu_viommu_type - Virtual IOMMU Type
+ * @IOMMU_VIOMMU_TYPE_DEFAULT: Core-managed VIOMMU type
+ */
+enum iommu_viommu_type {
+	IOMMU_VIOMMU_TYPE_DEFAULT,
+};
+
+/**
+ * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
+ * @size: sizeof(struct iommu_viommu_alloc)
+ * @flags: Must be 0
+ * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
+ * @dev_id: The device to allocate this virtual IOMMU for
+ * @hwpt_id: ID of a nesting parent HWPT to associate to
+ * @out_viommu_id: Output virtual IOMMU ID for the allocated object
+ *
+ * Allocate a virtual IOMMU object that holds a (shared) nesting parent HWPT
+ */
+struct iommu_viommu_alloc {
+	__u32 size;
+	__u32 flags;
+	__u32 type;
+	__u32 dev_id;
+	__u32 hwpt_id;
+	__u32 out_viommu_id;
+};
+#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
 #endif