diff mbox series

[V7,4/7] vfio/type1: restore locked_vm

Message ID 1671568765-297322-5-git-send-email-steven.sistare@oracle.com (mailing list archive)
State New, archived
Headers show
Series fixes for virtual address update | expand

Commit Message

Steve Sistare Dec. 20, 2022, 8:39 p.m. UTC
When a vfio container is preserved across exec or fork-exec, the new
task's mm has a locked_vm count of 0.  After a dma vaddr is updated using
VFIO_DMA_MAP_FLAG_VADDR, locked_vm remains 0, and the pinned memory does
not count against the task's RLIMIT_MEMLOCK.

To restore the correct locked_vm count, when VFIO_DMA_MAP_FLAG_VADDR is
used and the dma's mm has changed, add the dma's locked_vm count to
the new mm->locked_vm, subject to the rlimit.

Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
Cc: stable@vger.kernel.org
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
---
 drivers/vfio/vfio_iommu_type1.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

Comments

Jason Gunthorpe Jan. 3, 2023, 3:22 p.m. UTC | #1
On Tue, Dec 20, 2022 at 12:39:22PM -0800, Steve Sistare wrote:
> When a vfio container is preserved across exec or fork-exec, the new
> task's mm has a locked_vm count of 0.  After a dma vaddr is updated using
> VFIO_DMA_MAP_FLAG_VADDR, locked_vm remains 0, and the pinned memory does
> not count against the task's RLIMIT_MEMLOCK.
> 
> To restore the correct locked_vm count, when VFIO_DMA_MAP_FLAG_VADDR is
> used and the dma's mm has changed, add the dma's locked_vm count to
> the new mm->locked_vm, subject to the rlimit.
> 
> Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
> Cc: stable@vger.kernel.org
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)

But you should subtract it from the old one as well?

Jason
Steve Sistare Jan. 3, 2023, 6:12 p.m. UTC | #2
On 1/3/2023 10:22 AM, Jason Gunthorpe wrote:
> On Tue, Dec 20, 2022 at 12:39:22PM -0800, Steve Sistare wrote:
>> When a vfio container is preserved across exec or fork-exec, the new
>> task's mm has a locked_vm count of 0.  After a dma vaddr is updated using
>> VFIO_DMA_MAP_FLAG_VADDR, locked_vm remains 0, and the pinned memory does
>> not count against the task's RLIMIT_MEMLOCK.
>>
>> To restore the correct locked_vm count, when VFIO_DMA_MAP_FLAG_VADDR is
>> used and the dma's mm has changed, add the dma's locked_vm count to
>> the new mm->locked_vm, subject to the rlimit.
>>
>> Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr")
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
>> ---
>>  drivers/vfio/vfio_iommu_type1.c | 32 ++++++++++++++++++++++++++++++++
>>  1 file changed, 32 insertions(+)
> 
> But you should subtract it from the old one as well?

Yes, as implemented.  I'll add that to the commit message.

- Steve
diff mbox series

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 588d690..1036736 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1590,6 +1590,35 @@  static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
 	return list_empty(iova);
 }
 
+static int vfio_change_dma_owner(struct vfio_dma *dma)
+{
+	int ret = 0;
+	struct task_struct *task = current->group_leader;
+	struct mm_struct *mm = current->mm;
+
+	if (mm != dma->mm) {
+		long npage = dma->locked_vm;
+		bool lock_cap = capable(CAP_IPC_LOCK);
+
+		ret = mm_lock_acct(task, mm, lock_cap, npage, false);
+		if (ret)
+			return ret;
+
+		mm_lock_acct(dma->task, dma->mm, dma->lock_cap, -npage, true);
+
+		if (dma->task != task) {
+			put_task_struct(dma->task);
+			dma->task = get_task_struct(task);
+		}
+		mmdrop(dma->mm);
+		dma->mm = mm;
+		mmgrab(dma->mm);
+		dma->lock_cap = lock_cap;
+	}
+
+	return ret;
+}
+
 static int vfio_dma_do_map(struct vfio_iommu *iommu,
 			   struct vfio_iommu_type1_dma_map *map)
 {
@@ -1639,6 +1668,9 @@  static int vfio_dma_do_map(struct vfio_iommu *iommu,
 			   dma->size != size) {
 			ret = -EINVAL;
 		} else {
+			ret = vfio_change_dma_owner(dma);
+			if (ret)
+				goto out_unlock;
 			dma->vaddr = vaddr;
 			dma->vaddr_invalid = false;
 			iommu->vaddr_invalid_count--;