@@ -56,6 +56,16 @@ create_compat_ioas(struct iommufd_ctx *ictx)
return ioas;
}
+static u64 iommufd_get_pagesizes(struct iommufd_ioas *ioas)
+{
+ /* FIXME: See vfio_update_pgsize_bitmap(), for compat this should return
+ * the high bits too, and we need to decide if we should report that
+ * iommufd supports less than PAGE_SIZE alignment or stick to strict
+ * compatibility. qemu only cares about the first set bit.
+ */
+ return ioas->iopt.iova_alignment;
+}
+
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
{
struct iommu_vfio_ioas *cmd = ucmd->cmd;
@@ -130,9 +140,14 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd,
void __user *arg)
{
size_t minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
- u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL;
+ u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL |
+ VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
+ struct iommufd_dirty_data dirty, *dirtyp = NULL;
struct vfio_iommu_type1_dma_unmap unmap;
+ struct vfio_bitmap bitmap;
struct iommufd_ioas *ioas;
+ unsigned long pgshift;
+ size_t pgsize;
int rc;
if (copy_from_user(&unmap, arg, minsz))
@@ -141,14 +156,53 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd,
if (unmap.argsz < minsz || unmap.flags & ~supported_flags)
return -EINVAL;
+ if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
+ unsigned long npages;
+
+ if (copy_from_user(&bitmap,
+ (void __user *)(arg + minsz),
+ sizeof(bitmap)))
+ return -EFAULT;
+
+ if (!access_ok((void __user *)bitmap.data, bitmap.size))
+ return -EINVAL;
+
+ pgshift = __ffs(bitmap.pgsize);
+ npages = unmap.size >> pgshift;
+
+ if (!npages || !bitmap.size ||
+ (bitmap.size > DIRTY_BITMAP_SIZE_MAX) ||
+ (bitmap.size < dirty_bitmap_bytes(npages)))
+ return -EINVAL;
+
+ dirty.iova = unmap.iova;
+ dirty.length = unmap.size;
+ dirty.data = bitmap.data;
+ dirty.page_size = 1 << pgshift;
+ dirtyp = &dirty;
+ }
+
ioas = get_compat_ioas(ictx);
if (IS_ERR(ioas))
return PTR_ERR(ioas);
+ pgshift = __ffs(iommufd_get_pagesizes(ioas)),
+ pgsize = (size_t)1 << pgshift;
+
+ /* When dirty tracking is enabled, allow only min supported pgsize */
+ if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
+ (bitmap.pgsize != pgsize)) {
+ rc = -EINVAL;
+ goto out_put;
+ }
+
if (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL)
rc = iopt_unmap_all(&ioas->iopt);
else
- rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size, NULL);
+ rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size,
+ dirtyp);
+
+out_put:
iommufd_put_object(&ioas->obj);
return rc;
}
@@ -222,16 +276,6 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
return 0;
}
-static u64 iommufd_get_pagesizes(struct iommufd_ioas *ioas)
-{
- /* FIXME: See vfio_update_pgsize_bitmap(), for compat this should return
- * the high bits too, and we need to decide if we should report that
- * iommufd supports less than PAGE_SIZE alignment or stick to strict
- * compatibility. qemu only cares about the first set bit.
- */
- return ioas->iopt.iova_alignment;
-}
-
static int iommufd_fill_cap_iova(struct iommufd_ioas *ioas,
struct vfio_info_cap_header __user *cur,
size_t avail)
@@ -289,6 +333,26 @@ static int iommufd_fill_cap_dma_avail(struct iommufd_ioas *ioas,
return sizeof(cap_dma);
}
+static int iommufd_fill_cap_migration(struct iommufd_ioas *ioas,
+ struct vfio_info_cap_header __user *cur,
+ size_t avail)
+{
+ struct vfio_iommu_type1_info_cap_migration cap_mig = {
+ .header = {
+ .id = VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION,
+ .version = 1,
+ },
+ .flags = 0,
+ .pgsize_bitmap = (size_t) 1 << __ffs(iommufd_get_pagesizes(ioas)),
+ .max_dirty_bitmap_size = DIRTY_BITMAP_SIZE_MAX,
+ };
+
+ if (avail >= sizeof(cap_mig) &&
+ copy_to_user(cur, &cap_mig, sizeof(cap_mig)))
+ return -EFAULT;
+ return sizeof(cap_mig);
+}
+
static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
void __user *arg)
{
@@ -298,6 +362,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
static const fill_cap_fn fill_fns[] = {
iommufd_fill_cap_iova,
iommufd_fill_cap_dma_avail,
+ iommufd_fill_cap_migration,
};
size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
struct vfio_info_cap_header __user *last_cap = NULL;
@@ -364,6 +429,137 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
return rc;
}
+static int iommufd_vfio_dirty_pages_start(struct iommufd_ctx *ictx,
+ struct vfio_iommu_type1_dirty_bitmap *dirty)
+{
+ struct iommufd_ioas *ioas;
+ int ret = -EINVAL;
+
+ ioas = get_compat_ioas(ictx);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+
+ ret = iopt_set_dirty_tracking(&ioas->iopt, NULL, true);
+
+ iommufd_put_object(&ioas->obj);
+
+ return ret;
+}
+
+static int iommufd_vfio_dirty_pages_stop(struct iommufd_ctx *ictx,
+ struct vfio_iommu_type1_dirty_bitmap *dirty)
+{
+ struct iommufd_ioas *ioas;
+ int ret;
+
+ ioas = get_compat_ioas(ictx);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+
+ ret = iopt_set_dirty_tracking(&ioas->iopt, NULL, false);
+
+ iommufd_put_object(&ioas->obj);
+
+ return ret;
+}
+
+static int iommufd_vfio_dirty_pages_get_bitmap(struct iommufd_ctx *ictx,
+ struct vfio_iommu_type1_dirty_bitmap_get *range)
+{
+ struct iommufd_dirty_data bitmap;
+ uint64_t npages, bitmap_size;
+ struct iommufd_ioas *ioas;
+ unsigned long pgshift;
+ size_t iommu_pgsize;
+ int ret = -EINVAL;
+
+ ioas = get_compat_ioas(ictx);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+
+ down_read(&ioas->iopt.iova_rwsem);
+ pgshift = __ffs(range->bitmap.pgsize);
+ npages = range->size >> pgshift;
+ bitmap_size = range->bitmap.size;
+
+ if (!npages || !bitmap_size || (bitmap_size > DIRTY_BITMAP_SIZE_MAX) ||
+ (bitmap_size < dirty_bitmap_bytes(npages)))
+ goto out_put;
+
+ iommu_pgsize = 1 << __ffs(iommufd_get_pagesizes(ioas));
+
+ /* allow only smallest supported pgsize */
+ if (range->bitmap.pgsize != iommu_pgsize)
+ goto out_put;
+
+ if (range->iova & (iommu_pgsize - 1))
+ goto out_put;
+
+ if (!range->size || range->size & (iommu_pgsize - 1))
+ goto out_put;
+
+ bitmap.iova = range->iova;
+ bitmap.length = range->size;
+ bitmap.data = range->bitmap.data;
+ bitmap.page_size = 1 << pgshift;
+
+ ret = iopt_read_and_clear_dirty_data(&ioas->iopt, NULL, &bitmap);
+
+out_put:
+ up_read(&ioas->iopt.iova_rwsem);
+ iommufd_put_object(&ioas->obj);
+ return ret;
+}
+
+static int iommufd_vfio_dirty_pages(struct iommufd_ctx *ictx, unsigned int cmd,
+ void __user *arg)
+{
+ size_t minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, flags);
+ struct vfio_iommu_type1_dirty_bitmap dirty;
+ u32 supported_flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START |
+ VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP |
+ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
+ int ret = 0;
+
+ if (copy_from_user(&dirty, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (dirty.argsz < minsz || dirty.flags & ~supported_flags)
+ return -EINVAL;
+
+ /* only one flag should be set at a time */
+ if (__ffs(dirty.flags) != __fls(dirty.flags))
+ return -EINVAL;
+
+ if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) {
+ ret = iommufd_vfio_dirty_pages_start(ictx, &dirty);
+ } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) {
+ ret = iommufd_vfio_dirty_pages_stop(ictx, &dirty);
+ } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) {
+ struct vfio_iommu_type1_dirty_bitmap_get range;
+ size_t data_size = dirty.argsz - minsz;
+
+ if (!data_size || data_size < sizeof(range))
+ return -EINVAL;
+
+ if (copy_from_user(&range, (void __user *)(arg + minsz),
+ sizeof(range)))
+ return -EFAULT;
+
+ if (range.iova + range.size < range.iova)
+ return -EINVAL;
+
+ if (!access_ok((void __user *)range.bitmap.data,
+ range.bitmap.size))
+ return -EINVAL;
+
+ ret = iommufd_vfio_dirty_pages_get_bitmap(ictx, &range);
+ }
+
+ return ret;
+}
+
+
/* FIXME TODO:
PowerPC SPAPR only:
#define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15)
@@ -394,6 +590,7 @@ int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
case VFIO_IOMMU_UNMAP_DMA:
return iommufd_vfio_unmap_dma(ictx, cmd, uarg);
case VFIO_IOMMU_DIRTY_PAGES:
+ return iommufd_vfio_dirty_pages(ictx, cmd, uarg);
default:
return -ENOIOCTLCMD;
}
Add the correspondent APIs for performing VFIO dirty tracking, particularly VFIO_IOMMU_DIRTY_PAGES ioctl subcmds: * VFIO_IOMMU_DIRTY_PAGES_FLAG_START: Start dirty tracking and allocates the area @dirty_bitmap * VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP: Stop dirty tracking and frees the area @dirty_bitmap * VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP: Fetch dirty bitmap while dirty tracking is active. Advertise the VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION whereas it gets set the domain configured page size the same as iopt::iova_alignment and maximum dirty bitmap size same as VFIO. Compared to VFIO type1 iommu, the perpectual dirtying is not implemented and userspace gets -EOPNOTSUPP which is handled by today's userspace. Move iommufd_get_pagesizes() definition prior to unmap for iommufd_vfio_unmap_dma() dirty support to validate the user bitmap page size against IOPT pagesize. Signed-off-by: Joao Martins <joao.m.martins@oracle.com> --- drivers/iommu/iommufd/vfio_compat.c | 221 ++++++++++++++++++++++++++-- 1 file changed, 209 insertions(+), 12 deletions(-)