diff mbox series

[v9,Kernel,4/5] vfio iommu: Implementation of ioctl to get dirty pages bitmap.

Message ID 1573578220-7530-5-git-send-email-kwankhede@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Add KABIs to support migration for VFIO devices | expand

Commit Message

Kirti Wankhede Nov. 12, 2019, 5:03 p.m. UTC
IOMMU container maintains list of external pinned pages. Bitmap of pinned
pages for input IO virtual address range is created and returned.
IO virtual address range should be from a single mapping created by
map request. Input bitmap_size is validated by calculating the size of
requested range.
This ioctl returns bitmap of dirty pages, its user space application
responsibility to copy content of dirty pages from source to destination
during migration.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
---
 drivers/vfio/vfio_iommu_type1.c | 92 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

Comments

Alex Williamson Nov. 12, 2019, 10:30 p.m. UTC | #1
On Tue, 12 Nov 2019 22:33:39 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> IOMMU container maintains list of external pinned pages. Bitmap of pinned
> pages for input IO virtual address range is created and returned.
> IO virtual address range should be from a single mapping created by
> map request. Input bitmap_size is validated by calculating the size of
> requested range.
> This ioctl returns bitmap of dirty pages, its user space application
> responsibility to copy content of dirty pages from source to destination
> during migration.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 92 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 92 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 2ada8e6cdb88..ac176e672857 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -850,6 +850,81 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
>  	return bitmap;
>  }
>  
> +/*
> + * start_iova is the reference from where bitmaping started. This is called
> + * from DMA_UNMAP where start_iova can be different than iova

Why not simply call this with a pointer to the bitmap relative to the
start of the iova?

> + */
> +
> +static int vfio_iova_dirty_bitmap(struct vfio_iommu *iommu, dma_addr_t iova,
> +				  size_t size, dma_addr_t start_iova,
> +				  unsigned long *bitmap)
> +{
> +	struct vfio_dma *dma;
> +	dma_addr_t temp_iova = iova;
> +
> +	dma = vfio_find_dma(iommu, iova, size);
> +	if (!dma)

The UAPI did not define that the user can only ask for the dirty bitmap
across a mapped range.

> +		return -EINVAL;
> +
> +	/*
> +	 * Range should be from a single mapping created by map request.
> +	 */

The UAPI also did not specify this as a requirement.

> +
> +	if ((iova < dma->iova) ||
> +	    ((dma->iova + dma->size) < (iova + size)))
> +		return -EINVAL;

Nor this.

So the actual implemented UAPI is that the user must call this over
some portion of, but not exceeding a single previously mapped DMA
range.  Why so restrictive?

> +
> +	while (temp_iova < iova + size) {
> +		struct vfio_pfn *vpfn = NULL;
> +
> +		vpfn = vfio_find_vpfn(dma, temp_iova);
> +		if (vpfn)
> +			__bitmap_set(bitmap, vpfn->iova - start_iova, 1);
> +
> +		temp_iova += PAGE_SIZE;

Seems like waking the rb tree would be far more efficient.  Also, if
dma->iommu_mapped, mark all pages dirty until we figure out how to
avoid it.

> +	}
> +
> +	return 0;
> +}
> +
> +static int verify_bitmap_size(unsigned long npages, unsigned long bitmap_size)
> +{
> +	unsigned long bsize = ALIGN(npages, BITS_PER_LONG) / 8;
> +
> +	if ((bitmap_size == 0) || (bitmap_size < bsize))
> +		return -EINVAL;
> +	return 0;
> +}
> +
> +static int vfio_iova_get_dirty_bitmap(struct vfio_iommu *iommu,
> +				struct vfio_iommu_type1_dirty_bitmap *range)
> +{
> +	unsigned long *bitmap;
> +	int ret;
> +
> +	ret = verify_bitmap_size(range->size >> PAGE_SHIFT, range->bitmap_size);
> +	if (ret)
> +		return ret;
> +
> +	/* one bit per page */
> +	bitmap = bitmap_zalloc(range->size >> PAGE_SHIFT, GFP_KERNEL);

This creates a DoS vector, we need to be able to directly use the user
bitmap or chunk words into it using a confined size (ex. a user can
with args 0 to UIN64_MAX). Thanks,

Alex

> +	if (!bitmap)
> +		return -ENOMEM;
> +
> +	mutex_lock(&iommu->lock);
> +	ret = vfio_iova_dirty_bitmap(iommu, range->iova, range->size,
> +				     range->iova, bitmap);
> +	mutex_unlock(&iommu->lock);
> +
> +	if (!ret) {
> +		if (copy_to_user(range->bitmap, bitmap, range->bitmap_size))
> +			ret = -EFAULT;
> +	}
> +
> +	bitmap_free(bitmap);
> +	return ret;
> +}
> +
>  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>  			     struct vfio_iommu_type1_dma_unmap *unmap)
>  {
> @@ -2297,6 +2372,23 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>  
>  		return copy_to_user((void __user *)arg, &unmap, minsz) ?
>  			-EFAULT : 0;
> +	} else if (cmd == VFIO_IOMMU_GET_DIRTY_BITMAP) {
> +		struct vfio_iommu_type1_dirty_bitmap range;
> +
> +		/* Supported for v2 version only */
> +		if (!iommu->v2)
> +			return -EACCES;
> +
> +		minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap,
> +					bitmap);
> +
> +		if (copy_from_user(&range, (void __user *)arg, minsz))
> +			return -EFAULT;
> +
> +		if (range.argsz < minsz)
> +			return -EINVAL;
> +
> +		return vfio_iova_get_dirty_bitmap(iommu, &range);
>  	}
>  
>  	return -ENOTTY;
diff mbox series

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 2ada8e6cdb88..ac176e672857 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -850,6 +850,81 @@  static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
 	return bitmap;
 }
 
+/*
+ * start_iova is the reference from where bitmaping started. This is called
+ * from DMA_UNMAP where start_iova can be different than iova
+ */
+
+static int vfio_iova_dirty_bitmap(struct vfio_iommu *iommu, dma_addr_t iova,
+				  size_t size, dma_addr_t start_iova,
+				  unsigned long *bitmap)
+{
+	struct vfio_dma *dma;
+	dma_addr_t temp_iova = iova;
+
+	dma = vfio_find_dma(iommu, iova, size);
+	if (!dma)
+		return -EINVAL;
+
+	/*
+	 * Range should be from a single mapping created by map request.
+	 */
+
+	if ((iova < dma->iova) ||
+	    ((dma->iova + dma->size) < (iova + size)))
+		return -EINVAL;
+
+	while (temp_iova < iova + size) {
+		struct vfio_pfn *vpfn = NULL;
+
+		vpfn = vfio_find_vpfn(dma, temp_iova);
+		if (vpfn)
+			__bitmap_set(bitmap, vpfn->iova - start_iova, 1);
+
+		temp_iova += PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+static int verify_bitmap_size(unsigned long npages, unsigned long bitmap_size)
+{
+	unsigned long bsize = ALIGN(npages, BITS_PER_LONG) / 8;
+
+	if ((bitmap_size == 0) || (bitmap_size < bsize))
+		return -EINVAL;
+	return 0;
+}
+
+static int vfio_iova_get_dirty_bitmap(struct vfio_iommu *iommu,
+				struct vfio_iommu_type1_dirty_bitmap *range)
+{
+	unsigned long *bitmap;
+	int ret;
+
+	ret = verify_bitmap_size(range->size >> PAGE_SHIFT, range->bitmap_size);
+	if (ret)
+		return ret;
+
+	/* one bit per page */
+	bitmap = bitmap_zalloc(range->size >> PAGE_SHIFT, GFP_KERNEL);
+	if (!bitmap)
+		return -ENOMEM;
+
+	mutex_lock(&iommu->lock);
+	ret = vfio_iova_dirty_bitmap(iommu, range->iova, range->size,
+				     range->iova, bitmap);
+	mutex_unlock(&iommu->lock);
+
+	if (!ret) {
+		if (copy_to_user(range->bitmap, bitmap, range->bitmap_size))
+			ret = -EFAULT;
+	}
+
+	bitmap_free(bitmap);
+	return ret;
+}
+
 static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 			     struct vfio_iommu_type1_dma_unmap *unmap)
 {
@@ -2297,6 +2372,23 @@  static long vfio_iommu_type1_ioctl(void *iommu_data,
 
 		return copy_to_user((void __user *)arg, &unmap, minsz) ?
 			-EFAULT : 0;
+	} else if (cmd == VFIO_IOMMU_GET_DIRTY_BITMAP) {
+		struct vfio_iommu_type1_dirty_bitmap range;
+
+		/* Supported for v2 version only */
+		if (!iommu->v2)
+			return -EACCES;
+
+		minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap,
+					bitmap);
+
+		if (copy_from_user(&range, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (range.argsz < minsz)
+			return -EINVAL;
+
+		return vfio_iova_get_dirty_bitmap(iommu, &range);
 	}
 
 	return -ENOTTY;