diff mbox series

[V5,4/7] vfio/type1: restore locked_vm

Message ID 1671141424-81853-5-git-send-email-steven.sistare@oracle.com (mailing list archive)
State New, archived
Headers show
Series fixes for virtual address update | expand

Commit Message

Steven Sistare Dec. 15, 2022, 9:57 p.m. UTC
When a vfio container is preserved across exec or fork-exec, the new
task's mm has a locked_vm count of 0.  After a dma vaddr is updated using
VFIO_DMA_MAP_FLAG_VADDR, locked_vm remains 0, and the pinned memory does
not count against the task's RLIMIT_MEMLOCK.

To restore the correct locked_vm count, when VFIO_DMA_MAP_FLAG_VADDR is
used and the dma's mm has changed, add the mapping's pinned page count to
the new mm->locked_vm, subject to the rlimit.  Now that mediated devices
are excluded when using VFIO_UPDATE_VADDR, the amount of pinned memory
equals the size of the mapping less the reserved page count.

Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 drivers/vfio/vfio_iommu_type1.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

Comments

Jason Gunthorpe Dec. 16, 2022, 2:12 p.m. UTC | #1
On Thu, Dec 15, 2022 at 01:57:01PM -0800, Steve Sistare wrote:
> When a vfio container is preserved across exec or fork-exec, the new
> task's mm has a locked_vm count of 0.  After a dma vaddr is updated using
> VFIO_DMA_MAP_FLAG_VADDR, locked_vm remains 0, and the pinned memory does
> not count against the task's RLIMIT_MEMLOCK.
> 
> To restore the correct locked_vm count, when VFIO_DMA_MAP_FLAG_VADDR is
> used and the dma's mm has changed, add the mapping's pinned page count to
> the new mm->locked_vm, subject to the rlimit.  Now that mediated devices
> are excluded when using VFIO_UPDATE_VADDR, the amount of pinned memory
> equals the size of the mapping less the reserved page count.
> 
> Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
> 
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 35 +++++++++++++++++++++++++++++++++++
>  1 file changed, 35 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index add87cd..70b52e9 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -1588,6 +1588,38 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
>  	return list_empty(iova);
>  }
>  
> +static int vfio_change_dma_owner(struct vfio_dma *dma)
> +{
> +	struct task_struct *new_task = current->group_leader;
> +
> +	if (new_task->mm != dma->mm) {
> +		long npage = (dma->size >> PAGE_SHIFT) - dma->reserved_pages;
> +		bool new_lock_cap = capable(CAP_IPC_LOCK);
> +		int ret = mmap_write_lock_killable(new_task->mm);
> +
> +		if (ret)
> +			return ret;
> +
> +		ret = __account_locked_vm(new_task->mm, npage, true,
> +					  new_task, new_lock_cap);
> +		mmap_write_unlock(new_task->mm);
> +		if (ret)
> +			return ret;
> +
> +		vfio_lock_acct(dma, -npage, true);
> +		if (dma->task != new_task) {
> +			put_task_struct(dma->task);
> +			dma->task = get_task_struct(new_task);
> +		}
> +		mmdrop(dma->mm);
> +		dma->mm = new_task->mm;

This also should be current->mm not current->group_leader->mm

Jason
diff mbox series

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index add87cd..70b52e9 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1588,6 +1588,38 @@  static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
 	return list_empty(iova);
 }
 
+static int vfio_change_dma_owner(struct vfio_dma *dma)
+{
+	struct task_struct *new_task = current->group_leader;
+
+	if (new_task->mm != dma->mm) {
+		long npage = (dma->size >> PAGE_SHIFT) - dma->reserved_pages;
+		bool new_lock_cap = capable(CAP_IPC_LOCK);
+		int ret = mmap_write_lock_killable(new_task->mm);
+
+		if (ret)
+			return ret;
+
+		ret = __account_locked_vm(new_task->mm, npage, true,
+					  new_task, new_lock_cap);
+		mmap_write_unlock(new_task->mm);
+		if (ret)
+			return ret;
+
+		vfio_lock_acct(dma, -npage, true);
+		if (dma->task != new_task) {
+			put_task_struct(dma->task);
+			dma->task = get_task_struct(new_task);
+		}
+		mmdrop(dma->mm);
+		dma->mm = new_task->mm;
+		mmgrab(dma->mm);
+		dma->lock_cap = new_lock_cap;
+	}
+
+	return 0;
+}
+
 static int vfio_dma_do_map(struct vfio_iommu *iommu,
 			   struct vfio_iommu_type1_dma_map *map)
 {
@@ -1637,6 +1669,9 @@  static int vfio_dma_do_map(struct vfio_iommu *iommu,
 			   dma->size != size) {
 			ret = -EINVAL;
 		} else {
+			ret = vfio_change_dma_owner(dma);
+			if (ret)
+				goto out_unlock;
 			dma->vaddr = vaddr;
 			dma->vaddr_invalid = false;
 			iommu->vaddr_invalid_count--;