diff mbox series

[v4,06/18] iommufd: Add IOMMU_HWPT_SET_DIRTY

Message ID 20231018202715.69734-7-joao.m.martins@oracle.com (mailing list archive)
State New, archived
Headers show
Series IOMMUFD Dirty Tracking | expand

Commit Message

Joao Martins Oct. 18, 2023, 8:27 p.m. UTC
Every IOMMU driver should be able to implement the needed iommu domain ops
to control dirty tracking.

Connect a hw_pagetable to the IOMMU core dirty tracking ops, specifically
the ability to enable/disable dirty tracking on an IOMMU domain
(hw_pagetable id). To that end add an io_pagetable kernel API to toggle
dirty tracking:

* iopt_set_dirty_tracking(iopt, [domain], state)

The intended caller of this is via the hw_pagetable object that is created.

Internally it will ensure the leftover dirty state is cleared /right
before/ dirty tracking starts. This is also useful for iommu drivers which
may decide that dirty tracking is always-enabled at boot without wanting to
toggle dynamically via corresponding iommu domain op.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 drivers/iommu/iommufd/hw_pagetable.c    | 24 +++++++++++
 drivers/iommu/iommufd/io_pagetable.c    | 55 +++++++++++++++++++++++++
 drivers/iommu/iommufd/iommufd_private.h | 12 ++++++
 drivers/iommu/iommufd/main.c            |  3 ++
 include/uapi/linux/iommufd.h            | 25 +++++++++++
 5 files changed, 119 insertions(+)

Comments

Jason Gunthorpe Oct. 18, 2023, 10:28 p.m. UTC | #1
On Wed, Oct 18, 2023 at 09:27:03PM +0100, Joao Martins wrote:
> Every IOMMU driver should be able to implement the needed iommu domain ops
> to control dirty tracking.
> 
> Connect a hw_pagetable to the IOMMU core dirty tracking ops, specifically
> the ability to enable/disable dirty tracking on an IOMMU domain
> (hw_pagetable id). To that end add an io_pagetable kernel API to toggle
> dirty tracking:
> 
> * iopt_set_dirty_tracking(iopt, [domain], state)
> 
> The intended caller of this is via the hw_pagetable object that is created.
> 
> Internally it will ensure the leftover dirty state is cleared /right
> before/ dirty tracking starts. This is also useful for iommu drivers which
> may decide that dirty tracking is always-enabled at boot without wanting to
> toggle dynamically via corresponding iommu domain op.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>  drivers/iommu/iommufd/hw_pagetable.c    | 24 +++++++++++
>  drivers/iommu/iommufd/io_pagetable.c    | 55 +++++++++++++++++++++++++
>  drivers/iommu/iommufd/iommufd_private.h | 12 ++++++
>  drivers/iommu/iommufd/main.c            |  3 ++
>  include/uapi/linux/iommufd.h            | 25 +++++++++++
>  5 files changed, 119 insertions(+)

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason
Tian, Kevin Oct. 20, 2023, 6:09 a.m. UTC | #2
> From: Joao Martins <joao.m.martins@oracle.com>
> Sent: Thursday, October 19, 2023 4:27 AM
> 
> +
> +int iopt_set_dirty_tracking(struct io_pagetable *iopt,
> +			    struct iommu_domain *domain, bool enable)
> +{
> +	const struct iommu_dirty_ops *ops = domain->dirty_ops;
> +	int ret = 0;
> +
> +	if (!ops)
> +		return -EOPNOTSUPP;
> +
> +	down_read(&iopt->iova_rwsem);
> +
> +	/* Clear dirty bits from PTEs to ensure a clean snapshot */
> +	if (enable) {
> +		ret = iopt_clear_dirty_data(iopt, domain);
> +		if (ret)
> +			goto out_unlock;
> +	}

why not leaving this to the actual driver which wants to always
enable dirty instead of making it a general burden for all?

> +
> +/*
> + * enum iommufd_set_dirty_flags - Flags for steering dirty tracking
> + * @IOMMU_DIRTY_TRACKING_ENABLE: Enables dirty tracking

s/Enables/Enable/

> + */
> +enum iommufd_hwpt_set_dirty_flags {
> +	IOMMU_DIRTY_TRACKING_ENABLE = 1,

IOMMU_HWPT_DIRTY_TRACKING_ENABLE

> +
> +/**
> + * struct iommu_hwpt_set_dirty - ioctl(IOMMU_HWPT_SET_DIRTY)

IOMMU_HWPT_SET_DIRTY_TRACKING
Tian, Kevin Oct. 20, 2023, 7:56 a.m. UTC | #3
> From: Tian, Kevin
> Sent: Friday, October 20, 2023 2:09 PM
> 
> > From: Joao Martins <joao.m.martins@oracle.com>
> > Sent: Thursday, October 19, 2023 4:27 AM
> >
> > +
> > +int iopt_set_dirty_tracking(struct io_pagetable *iopt,
> > +			    struct iommu_domain *domain, bool enable)
> > +{
> > +	const struct iommu_dirty_ops *ops = domain->dirty_ops;
> > +	int ret = 0;
> > +
> > +	if (!ops)
> > +		return -EOPNOTSUPP;
> > +
> > +	down_read(&iopt->iova_rwsem);
> > +
> > +	/* Clear dirty bits from PTEs to ensure a clean snapshot */
> > +	if (enable) {
> > +		ret = iopt_clear_dirty_data(iopt, domain);
> > +		if (ret)
> > +			goto out_unlock;
> > +	}
> 
> why not leaving this to the actual driver which wants to always
> enable dirty instead of making it a general burden for all?
> 

looks I was misled by the commit msg. This is really for the reality
that there is no guarantee from previous cycle where the user will
quiesce device, read/clean all the remaining dirty bits and then
disable dirty tracking. It's true for live migration but not if just
looking at this dirty tracking alone.

So,

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Joao Martins Oct. 20, 2023, 3:30 p.m. UTC | #4
On 20/10/2023 07:09, Tian, Kevin wrote:
>> From: Joao Martins <joao.m.martins@oracle.com>
>> Sent: Thursday, October 19, 2023 4:27 AM
>> +
>> +/**
>> + * struct iommu_hwpt_set_dirty - ioctl(IOMMU_HWPT_SET_DIRTY)
> 
> IOMMU_HWPT_SET_DIRTY_TRACKING

OK; I am riding on the assumption that the naming change ask
is to broadly replace structs/iommufd-cmds/ioctls/commit-msgs from
set_dirty to set_dirty_tracking
Joao Martins Oct. 20, 2023, 8:41 p.m. UTC | #5
On 18/10/2023 21:27, Joao Martins wrote:
> +
> +/*
> + * enum iommufd_set_dirty_flags - Flags for steering dirty tracking
> + * @IOMMU_DIRTY_TRACKING_ENABLE: Enables dirty tracking
> + */
> +enum iommufd_hwpt_set_dirty_flags {
> +	IOMMU_DIRTY_TRACKING_ENABLE = 1,
> +};
> +
> +/**
> + * struct iommu_hwpt_set_dirty - ioctl(IOMMU_HWPT_SET_DIRTY)
> + * @size: sizeof(struct iommu_hwpt_set_dirty)
> + * @flags: Flags to control dirty tracking status.
> + * @hwpt_id: HW pagetable ID that represents the IOMMU domain.
> + *
> + * Toggle dirty tracking on an HW pagetable.
> + */
> +struct iommu_hwpt_set_dirty {
> +	__u32 size;
> +	__u32 flags;
> +	__u32 hwpt_id;
> +	__u32 __reserved;
> +};
> +#define IOMMU_HWPT_SET_DIRTY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_SET_DIRTY)
>  #endif

Notice a docs inconsistency with the docs compared to other ioctls which pass
flags. So applying the snippet below to this patch. Will be doing similar thing
to GET_DIRTY_BITMAP patch, except that the GET_DIRTY_BITMAP patch says "Must be
zero", and then change into a similar comment as below when I introduce the
NO_CLEAR flag.

 /**
  * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING)
  * @size: sizeof(struct iommu_hwpt_set_dirty_tracking)
- * @flags: Flags to control dirty tracking status.
+ * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags
  * @hwpt_id: HW pagetable ID that represents the IOMMU domain.
  *
  * Toggle dirty tracking on an HW pagetable.
diff mbox series

Patch

diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 2b9ff3850bb8..85a0f696c744 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -196,3 +196,27 @@  int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
 	iommufd_put_object(&idev->obj);
 	return rc;
 }
+
+int iommufd_hwpt_set_dirty(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_hwpt_set_dirty *cmd = ucmd->cmd;
+	struct iommufd_hw_pagetable *hwpt;
+	struct iommufd_ioas *ioas;
+	int rc = -EOPNOTSUPP;
+	bool enable;
+
+	if (cmd->flags & ~IOMMU_DIRTY_TRACKING_ENABLE)
+		return rc;
+
+	hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id);
+	if (IS_ERR(hwpt))
+		return PTR_ERR(hwpt);
+
+	ioas = hwpt->ioas;
+	enable = cmd->flags & IOMMU_DIRTY_TRACKING_ENABLE;
+
+	rc = iopt_set_dirty_tracking(&ioas->iopt, hwpt->domain, enable);
+
+	iommufd_put_object(&hwpt->obj);
+	return rc;
+}
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 3a598182b761..535d73466e15 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -412,6 +412,61 @@  int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
 	return 0;
 }
 
+static int iopt_clear_dirty_data(struct io_pagetable *iopt,
+				 struct iommu_domain *domain)
+{
+	const struct iommu_dirty_ops *ops = domain->dirty_ops;
+	struct iommu_iotlb_gather gather;
+	struct iommu_dirty_bitmap dirty;
+	struct iopt_area *area;
+	int ret = 0;
+
+	lockdep_assert_held_read(&iopt->iova_rwsem);
+
+	iommu_dirty_bitmap_init(&dirty, NULL, &gather);
+
+	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
+	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
+		if (!area->pages)
+			continue;
+
+		ret = ops->read_and_clear_dirty(domain,
+						iopt_area_iova(area),
+						iopt_area_length(area), 0,
+						&dirty);
+		if (ret)
+			break;
+	}
+
+	iommu_iotlb_sync(domain, &gather);
+	return ret;
+}
+
+int iopt_set_dirty_tracking(struct io_pagetable *iopt,
+			    struct iommu_domain *domain, bool enable)
+{
+	const struct iommu_dirty_ops *ops = domain->dirty_ops;
+	int ret = 0;
+
+	if (!ops)
+		return -EOPNOTSUPP;
+
+	down_read(&iopt->iova_rwsem);
+
+	/* Clear dirty bits from PTEs to ensure a clean snapshot */
+	if (enable) {
+		ret = iopt_clear_dirty_data(iopt, domain);
+		if (ret)
+			goto out_unlock;
+	}
+
+	ret = ops->set_dirty_tracking(domain, enable);
+
+out_unlock:
+	up_read(&iopt->iova_rwsem);
+	return ret;
+}
+
 int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
 		   unsigned long length, struct list_head *pages_list)
 {
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 3064997a0181..d42e01cc1105 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -8,6 +8,7 @@ 
 #include <linux/xarray.h>
 #include <linux/refcount.h>
 #include <linux/uaccess.h>
+#include <uapi/linux/iommufd.h>
 
 struct iommu_domain;
 struct iommu_group;
@@ -70,6 +71,9 @@  int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
 		    unsigned long length, unsigned long *unmapped);
 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
 
+int iopt_set_dirty_tracking(struct io_pagetable *iopt,
+			    struct iommu_domain *domain, bool enable);
+
 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
 				 unsigned long length);
 int iopt_table_add_domain(struct io_pagetable *iopt,
@@ -240,6 +244,14 @@  struct iommufd_hw_pagetable {
 	struct list_head hwpt_item;
 };
 
+static inline struct iommufd_hw_pagetable *iommufd_get_hwpt(
+					struct iommufd_ucmd *ucmd, u32 id)
+{
+	return container_of(iommufd_get_object(ucmd->ictx, id,
+					       IOMMUFD_OBJ_HW_PAGETABLE),
+			    struct iommufd_hw_pagetable, obj);
+}
+int iommufd_hwpt_set_dirty(struct iommufd_ucmd *ucmd);
 struct iommufd_hw_pagetable *
 iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 			   struct iommufd_device *idev, u32 flags,
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index e71523cbd0de..2e625b280d61 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -307,6 +307,7 @@  union ucmd_buffer {
 	struct iommu_destroy destroy;
 	struct iommu_hw_info info;
 	struct iommu_hwpt_alloc hwpt;
+	struct iommu_hwpt_set_dirty set_dirty;
 	struct iommu_ioas_alloc alloc;
 	struct iommu_ioas_allow_iovas allow_iovas;
 	struct iommu_ioas_copy ioas_copy;
@@ -342,6 +343,8 @@  static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
 		 __reserved),
 	IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
 		 __reserved),
+	IOCTL_OP(IOMMU_HWPT_SET_DIRTY, iommufd_hwpt_set_dirty,
+		 struct iommu_hwpt_set_dirty, __reserved),
 	IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
 		 struct iommu_ioas_alloc, out_ioas_id),
 	IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index cd94a9d8ce66..9e1721e38819 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -47,6 +47,7 @@  enum {
 	IOMMUFD_CMD_VFIO_IOAS,
 	IOMMUFD_CMD_HWPT_ALLOC,
 	IOMMUFD_CMD_GET_HW_INFO,
+	IOMMUFD_CMD_HWPT_SET_DIRTY,
 };
 
 /**
@@ -454,4 +455,28 @@  struct iommu_hw_info {
 	__u32 __reserved;
 };
 #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
+
+/*
+ * enum iommufd_set_dirty_flags - Flags for steering dirty tracking
+ * @IOMMU_DIRTY_TRACKING_ENABLE: Enables dirty tracking
+ */
+enum iommufd_hwpt_set_dirty_flags {
+	IOMMU_DIRTY_TRACKING_ENABLE = 1,
+};
+
+/**
+ * struct iommu_hwpt_set_dirty - ioctl(IOMMU_HWPT_SET_DIRTY)
+ * @size: sizeof(struct iommu_hwpt_set_dirty)
+ * @flags: Flags to control dirty tracking status.
+ * @hwpt_id: HW pagetable ID that represents the IOMMU domain.
+ *
+ * Toggle dirty tracking on an HW pagetable.
+ */
+struct iommu_hwpt_set_dirty {
+	__u32 size;
+	__u32 flags;
+	__u32 hwpt_id;
+	__u32 __reserved;
+};
+#define IOMMU_HWPT_SET_DIRTY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_SET_DIRTY)
 #endif