diff mbox series

[v4,1/2] mm/hmm: make full use of walk_page_range()

Message ID 20191104222141.5173-2-rcampbell@nvidia.com (mailing list archive)
State New
Headers show
Series HMM tests and minor fixes | expand

Commit Message

Ralph Campbell Nov. 4, 2019, 10:21 p.m. UTC
hmm_range_fault() calls find_vma() and walk_page_range() in a loop.
This is unnecessary duplication since walk_page_range() calls find_vma()
in a loop already.
Simplify hmm_range_fault() by defining a walk_test() callback function
to filter unhandled vmas.
This also fixes a bug where hmm_range_fault() was not checking
start >= vma->vm_start before checking vma->vm_flags so hmm_range_fault()
could return an error based on the wrong vma for the requested range.
It also fixes a bug when the vma has no read access and the caller did
not request a fault, there shouldn't be any error return code.

Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Christoph Hellwig <hch@lst.de>
---
 mm/hmm.c | 121 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 58 insertions(+), 63 deletions(-)

Comments

Christoph Hellwig Nov. 12, 2019, 3:18 p.m. UTC | #1
Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

Although we could clean this up a tidbit more by removing the start
variable:

diff --git a/mm/hmm.c b/mm/hmm.c
index d4984a08ed9b..b5b1ed646c2f 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -667,10 +667,9 @@ static const struct mm_walk_ops hmm_walk_ops = {
  */
 long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 {
-	unsigned long start = range->start;
 	struct hmm_vma_walk hmm_vma_walk = {
 		.range = range,
-		.last = start,
+		.last = range->start,
 		.flags = flags,
 	};
 	struct mm_struct *mm = range->notifier->mm;
@@ -682,9 +681,8 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 		/* If range is no longer valid force retry. */
 		if (mmu_range_check_retry(range->notifier, range->notifier_seq))
 			return -EBUSY;
-		ret = walk_page_range(mm, start, range->end, &hmm_walk_ops,
-				      &hmm_vma_walk);
-		start = hmm_vma_walk.last;
+		ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
+				      &hmm_walk_ops, &hmm_vma_walk);
 	} while (ret == -EBUSY);
 
 	if (ret)
Ralph Campbell Nov. 12, 2019, 10:21 p.m. UTC | #2
On 11/12/19 7:18 AM, Christoph Hellwig wrote:
> Looks good,
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> 
> Although we could clean this up a tidbit more by removing the start
> variable:
> 
> diff --git a/mm/hmm.c b/mm/hmm.c
> index d4984a08ed9b..b5b1ed646c2f 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -667,10 +667,9 @@ static const struct mm_walk_ops hmm_walk_ops = {
>    */
>   long hmm_range_fault(struct hmm_range *range, unsigned int flags)
>   {
> -	unsigned long start = range->start;
>   	struct hmm_vma_walk hmm_vma_walk = {
>   		.range = range,
> -		.last = start,
> +		.last = range->start,
>   		.flags = flags,
>   	};
>   	struct mm_struct *mm = range->notifier->mm;
> @@ -682,9 +681,8 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
>   		/* If range is no longer valid force retry. */
>   		if (mmu_range_check_retry(range->notifier, range->notifier_seq))
>   			return -EBUSY;
> -		ret = walk_page_range(mm, start, range->end, &hmm_walk_ops,
> -				      &hmm_vma_walk);
> -		start = hmm_vma_walk.last;
> +		ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
> +				      &hmm_walk_ops, &hmm_vma_walk);
>   	} while (ret == -EBUSY);
>   
>   	if (ret)
> 

Thanks for the review.
I'll add this to patch 1 since I need to send a v5 for patch 2.
Jason Gunthorpe Nov. 14, 2019, 2:24 p.m. UTC | #3
On Mon, Nov 04, 2019 at 02:21:40PM -0800, Ralph Campbell wrote:
> hmm_range_fault() calls find_vma() and walk_page_range() in a loop.
> This is unnecessary duplication since walk_page_range() calls find_vma()
> in a loop already.
> Simplify hmm_range_fault() by defining a walk_test() callback function
> to filter unhandled vmas.
> This also fixes a bug where hmm_range_fault() was not checking
> start >= vma->vm_start before checking vma->vm_flags so hmm_range_fault()
> could return an error based on the wrong vma for the requested range.
> It also fixes a bug when the vma has no read access and the caller did
> not request a fault, there shouldn't be any error return code.
> 
> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
> Cc: "Jérôme Glisse" <jglisse@redhat.com>
> Cc: Jason Gunthorpe <jgg@mellanox.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> ---
>  mm/hmm.c | 121 ++++++++++++++++++++++++++-----------------------------
>  1 file changed, 58 insertions(+), 63 deletions(-)

Applied to hmm.git with Christoph's hunk merged in

Thanks,
Jason
diff mbox series

Patch

diff --git a/mm/hmm.c b/mm/hmm.c
index 9e9d3f4ea17c..d4984a08ed9b 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -65,18 +65,15 @@  static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
 	return -EFAULT;
 }
 
-static int hmm_pfns_bad(unsigned long addr,
-			unsigned long end,
-			struct mm_walk *walk)
+static int hmm_pfns_fill(unsigned long addr, unsigned long end,
+		struct hmm_range *range, enum hmm_pfn_value_e value)
 {
-	struct hmm_vma_walk *hmm_vma_walk = walk->private;
-	struct hmm_range *range = hmm_vma_walk->range;
 	uint64_t *pfns = range->pfns;
 	unsigned long i;
 
 	i = (addr - range->start) >> PAGE_SHIFT;
 	for (; addr < end; addr += PAGE_SIZE, i++)
-		pfns[i] = range->values[HMM_PFN_ERROR];
+		pfns[i] = range->values[value];
 
 	return 0;
 }
@@ -403,7 +400,7 @@  static int hmm_vma_walk_pmd(pmd_t *pmdp,
 		}
 		return 0;
 	} else if (!pmd_present(pmd))
-		return hmm_pfns_bad(start, end, walk);
+		return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
 
 	if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
 		/*
@@ -431,7 +428,7 @@  static int hmm_vma_walk_pmd(pmd_t *pmdp,
 	 * recover.
 	 */
 	if (pmd_bad(pmd))
-		return hmm_pfns_bad(start, end, walk);
+		return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
 
 	ptep = pte_offset_map(pmdp, addr);
 	i = (addr - range->start) >> PAGE_SHIFT;
@@ -589,13 +586,47 @@  static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
 #define hmm_vma_walk_hugetlb_entry NULL
 #endif /* CONFIG_HUGETLB_PAGE */
 
-static void hmm_pfns_clear(struct hmm_range *range,
-			   uint64_t *pfns,
-			   unsigned long addr,
-			   unsigned long end)
+static int hmm_vma_walk_test(unsigned long start, unsigned long end,
+			     struct mm_walk *walk)
 {
-	for (; addr < end; addr += PAGE_SIZE, pfns++)
-		*pfns = range->values[HMM_PFN_NONE];
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	struct vm_area_struct *vma = walk->vma;
+
+	/*
+	 * Skip vma ranges that don't have struct page backing them or
+	 * map I/O devices directly.
+	 */
+	if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP))
+		return -EFAULT;
+
+	/*
+	 * If the vma does not allow read access, then assume that it does not
+	 * allow write access either. HMM does not support architectures
+	 * that allow write without read.
+	 */
+	if (!(vma->vm_flags & VM_READ)) {
+		bool fault, write_fault;
+
+		/*
+		 * Check to see if a fault is requested for any page in the
+		 * range.
+		 */
+		hmm_range_need_fault(hmm_vma_walk, range->pfns +
+					((start - range->start) >> PAGE_SHIFT),
+					(end - start) >> PAGE_SHIFT,
+					0, &fault, &write_fault);
+		if (fault || write_fault)
+			return -EFAULT;
+
+		hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
+		hmm_vma_walk->last = end;
+
+		/* Skip this vma and continue processing the next vma. */
+		return 1;
+	}
+
+	return 0;
 }
 
 static const struct mm_walk_ops hmm_walk_ops = {
@@ -603,6 +634,7 @@  static const struct mm_walk_ops hmm_walk_ops = {
 	.pmd_entry	= hmm_vma_walk_pmd,
 	.pte_hole	= hmm_vma_walk_hole,
 	.hugetlb_entry	= hmm_vma_walk_hugetlb_entry,
+	.test_walk	= hmm_vma_walk_test,
 };
 
 /**
@@ -635,11 +667,13 @@  static const struct mm_walk_ops hmm_walk_ops = {
  */
 long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 {
-	const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
-	unsigned long start = range->start, end;
-	struct hmm_vma_walk hmm_vma_walk;
+	unsigned long start = range->start;
+	struct hmm_vma_walk hmm_vma_walk = {
+		.range = range,
+		.last = start,
+		.flags = flags,
+	};
 	struct mm_struct *mm = range->notifier->mm;
-	struct vm_area_struct *vma;
 	int ret;
 
 	lockdep_assert_held(&mm->mmap_sem);
@@ -648,52 +682,13 @@  long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 		/* If range is no longer valid force retry. */
 		if (mmu_range_check_retry(range->notifier, range->notifier_seq))
 			return -EBUSY;
+		ret = walk_page_range(mm, start, range->end, &hmm_walk_ops,
+				      &hmm_vma_walk);
+		start = hmm_vma_walk.last;
+	} while (ret == -EBUSY);
 
-		vma = find_vma(mm, start);
-		if (vma == NULL || (vma->vm_flags & device_vma))
-			return -EFAULT;
-
-		if (!(vma->vm_flags & VM_READ)) {
-			/*
-			 * If vma do not allow read access, then assume that it
-			 * does not allow write access, either. HMM does not
-			 * support architecture that allow write without read.
-			 */
-			hmm_pfns_clear(range, range->pfns,
-				range->start, range->end);
-			return -EPERM;
-		}
-
-		hmm_vma_walk.pgmap = NULL;
-		hmm_vma_walk.last = start;
-		hmm_vma_walk.flags = flags;
-		hmm_vma_walk.range = range;
-		end = min(range->end, vma->vm_end);
-
-		walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops,
-				&hmm_vma_walk);
-
-		do {
-			ret = walk_page_range(vma->vm_mm, start, end,
-					&hmm_walk_ops, &hmm_vma_walk);
-			start = hmm_vma_walk.last;
-
-			/* Keep trying while the range is valid. */
-		} while (ret == -EBUSY &&
-			 !mmu_range_check_retry(range->notifier,
-						range->notifier_seq));
-
-		if (ret) {
-			unsigned long i;
-
-			i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
-			hmm_pfns_clear(range, &range->pfns[i],
-				hmm_vma_walk.last, range->end);
-			return ret;
-		}
-		start = end;
-
-	} while (start < range->end);
+	if (ret)
+		return ret;
 
 	return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
 }