@@ -220,3 +220,25 @@ int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd)
iommufd_put_object(&hwpt->obj);
return rc;
}
+
+int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_hwpt_get_dirty_bitmap *cmd = ucmd->cmd;
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_ioas *ioas;
+ int rc = -EOPNOTSUPP;
+
+ if ((cmd->flags || cmd->__reserved))
+ return -EOPNOTSUPP;
+
+ hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id);
+ if (IS_ERR(hwpt))
+ return PTR_ERR(hwpt);
+
+ ioas = hwpt->ioas;
+ rc = iopt_read_and_clear_dirty_data(&ioas->iopt, hwpt->domain,
+ cmd->flags, cmd);
+
+ iommufd_put_object(&hwpt->obj);
+ return rc;
+}
@@ -15,6 +15,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/errno.h>
+#include <uapi/linux/iommufd.h>
#include "io_pagetable.h"
#include "double_span.h"
@@ -412,6 +413,118 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
return 0;
}
+struct iova_bitmap_fn_arg {
+ struct io_pagetable *iopt;
+ struct iommu_domain *domain;
+ struct iommu_dirty_bitmap *dirty;
+};
+
+static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap,
+ unsigned long iova, size_t length,
+ void *opaque)
+{
+ struct iopt_area *area;
+ struct iopt_area_contig_iter iter;
+ struct iova_bitmap_fn_arg *arg = opaque;
+ struct iommu_domain *domain = arg->domain;
+ struct iommu_dirty_bitmap *dirty = arg->dirty;
+ const struct iommu_dirty_ops *ops = domain->dirty_ops;
+ unsigned long last_iova = iova + length - 1;
+ int ret;
+
+ iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) {
+ unsigned long last = min(last_iova, iopt_area_last_iova(area));
+
+ ret = ops->read_and_clear_dirty(domain, iter.cur_iova,
+ last - iter.cur_iova + 1, 0,
+ dirty);
+ if (ret)
+ return ret;
+ }
+
+ if (!iopt_area_contig_done(&iter))
+ return -EINVAL;
+ return 0;
+}
+
+static int
+iommu_read_and_clear_dirty(struct iommu_domain *domain,
+ struct io_pagetable *iopt, unsigned long flags,
+ struct iommu_hwpt_get_dirty_bitmap *bitmap)
+{
+ const struct iommu_dirty_ops *ops = domain->dirty_ops;
+ struct iommu_iotlb_gather gather;
+ struct iommu_dirty_bitmap dirty;
+ struct iova_bitmap_fn_arg arg;
+ struct iova_bitmap *iter;
+ int ret = 0;
+
+ if (!ops || !ops->read_and_clear_dirty)
+ return -EOPNOTSUPP;
+
+ iter = iova_bitmap_alloc(bitmap->iova, bitmap->length,
+ bitmap->page_size,
+ u64_to_user_ptr(bitmap->data));
+ if (IS_ERR(iter))
+ return -ENOMEM;
+
+ iommu_dirty_bitmap_init(&dirty, iter, &gather);
+
+ arg.iopt = iopt;
+ arg.domain = domain;
+ arg.dirty = &dirty;
+ iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty);
+
+ iommu_iotlb_sync(domain, &gather);
+ iova_bitmap_free(iter);
+
+ return ret;
+}
+
+int iommufd_check_iova_range(struct io_pagetable *iopt,
+ struct iommu_hwpt_get_dirty_bitmap *bitmap)
+{
+ size_t iommu_pgsize = iopt->iova_alignment;
+ u64 last_iova;
+
+ if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova))
+ return -EOVERFLOW;
+
+ if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX)
+ return -EOVERFLOW;
+
+ if ((bitmap->iova & (iommu_pgsize - 1)) ||
+ ((last_iova + 1) & (iommu_pgsize - 1)))
+ return -EINVAL;
+
+ if (!bitmap->page_size)
+ return -EINVAL;
+
+ if ((bitmap->iova & (bitmap->page_size - 1)) ||
+ ((last_iova + 1) & (bitmap->page_size - 1)))
+ return -EINVAL;
+
+ return 0;
+}
+
+int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
+ struct iommu_domain *domain,
+ unsigned long flags,
+ struct iommu_hwpt_get_dirty_bitmap *bitmap)
+{
+ int ret;
+
+ ret = iommufd_check_iova_range(iopt, bitmap);
+ if (ret)
+ return ret;
+
+ down_read(&iopt->iova_rwsem);
+ ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap);
+ up_read(&iopt->iova_rwsem);
+
+ return ret;
+}
+
static int iopt_clear_dirty_data(struct io_pagetable *iopt,
struct iommu_domain *domain)
{
@@ -8,6 +8,8 @@
#include <linux/xarray.h>
#include <linux/refcount.h>
#include <linux/uaccess.h>
+#include <linux/iommu.h>
+#include <linux/iova_bitmap.h>
#include <uapi/linux/iommufd.h>
struct iommu_domain;
@@ -71,6 +73,10 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
unsigned long length, unsigned long *unmapped);
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
+int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
+ struct iommu_domain *domain,
+ unsigned long flags,
+ struct iommu_hwpt_get_dirty_bitmap *bitmap);
int iopt_set_dirty_tracking(struct io_pagetable *iopt,
struct iommu_domain *domain, bool enable);
@@ -226,6 +232,8 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd,
struct iommufd_ctx *ictx);
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
+int iommufd_check_iova_range(struct io_pagetable *iopt,
+ struct iommu_hwpt_get_dirty_bitmap *bitmap);
/*
* A HW pagetable is called an iommu_domain inside the kernel. This user object
@@ -252,6 +260,8 @@ iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id)
struct iommufd_hw_pagetable, obj);
}
int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd);
+int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
+
struct iommufd_hw_pagetable *
iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
struct iommufd_device *idev, u32 flags,
@@ -307,6 +307,7 @@ union ucmd_buffer {
struct iommu_destroy destroy;
struct iommu_hw_info info;
struct iommu_hwpt_alloc hwpt;
+ struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap;
struct iommu_hwpt_set_dirty_tracking set_dirty_tracking;
struct iommu_ioas_alloc alloc;
struct iommu_ioas_allow_iovas allow_iovas;
@@ -343,6 +344,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
__reserved),
IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
__reserved),
+ IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap,
+ struct iommu_hwpt_get_dirty_bitmap, data),
IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking,
struct iommu_hwpt_set_dirty_tracking, __reserved),
IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
@@ -555,5 +558,6 @@ MODULE_ALIAS_MISCDEV(VFIO_MINOR);
MODULE_ALIAS("devname:vfio/vfio");
#endif
MODULE_IMPORT_NS(IOMMUFD_INTERNAL);
+MODULE_IMPORT_NS(IOMMUFD);
MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
MODULE_LICENSE("GPL");
@@ -48,6 +48,7 @@ enum {
IOMMUFD_CMD_HWPT_ALLOC,
IOMMUFD_CMD_GET_HW_INFO,
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
+ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
};
/**
@@ -481,4 +482,38 @@ struct iommu_hwpt_set_dirty_tracking {
};
#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
+
+/**
+ * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
+ * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
+ * @hwpt_id: HW pagetable ID that represents the IOMMU domain
+ * @flags: Must be zero
+ * @__reserved: Must be 0
+ * @iova: base IOVA of the bitmap first bit
+ * @length: IOVA range size
+ * @page_size: page size granularity of each bit in the bitmap
+ * @data: bitmap where to set the dirty bits. The bitmap bits each
+ * represent a page_size which you deviate from an arbitrary iova.
+ *
+ * Checking a given IOVA is dirty:
+ *
+ * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
+ *
+ * Walk the IOMMU pagetables for a given IOVA range to return a bitmap
+ * with the dirty IOVAs. In doing so it will also by default clear any
+ * dirty bit metadata set in the IOPTE.
+ */
+struct iommu_hwpt_get_dirty_bitmap {
+ __u32 size;
+ __u32 hwpt_id;
+ __u32 flags;
+ __u32 __reserved;
+ __aligned_u64 iova;
+ __aligned_u64 length;
+ __aligned_u64 page_size;
+ __aligned_u64 data;
+};
+#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
+ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
+
#endif