diff mbox series

[v2,06/10] mm: avoid using vma_merge() for new VMAs

Message ID 57f55a1473586a88211e04b44c6b128332d4272c.1724441678.git.lorenzo.stoakes@oracle.com (mailing list archive)
State New
Headers show
Series mm: remove vma_merge() | expand

Commit Message

Lorenzo Stoakes Aug. 23, 2024, 8:07 p.m. UTC
Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
resultant function vma_merge_new_range() to be clear what the purpose of
this function is - a new VMA is desired in the specified range, and we wish
to see if it is possible to 'merge' surrounding VMAs into this range rather
than having to allocate a new VMA.

Note that this function uses vma_extend() exclusively, so adopts its
requirement that the iterator point at or before the gap. We add an assert
to this effect.

This is as opposed to vma_merge_existing_range(), which will be introduced
in a subsequent commit, and provide the same functionality for cases in
which we are modifying an existing VMA.

In mmap_region() and do_brk_flags() we open code scenarios where we prefer
to use vma_expand() rather than invoke a full vma_merge() operation.

Abstract this logic and eliminate all of the open-coding, and also use the
same logic for all cases where we add new VMAs to, rather than ultimately
use vma_merge(), rather use vma_expand().

Doing so removes duplication and simplifies VMA merging in all such cases,
laying the ground for us to eliminate the merging of new VMAs in
vma_merge() altogether.

Also add the ability for the vmg to track state, and able to report errors,
allowing for us to differentiate a failed merge from an inability to
allocate memory in callers.

This makes it far easier to understand what is happening in these cases
avoiding confusion, bugs and allowing for future optimisation.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 mm/mmap.c                        |  93 +++++-------------
 mm/vma.c                         | 162 ++++++++++++++++++++++++++-----
 mm/vma.h                         |  20 +++-
 tools/testing/vma/vma.c          |  33 ++++++-
 tools/testing/vma/vma_internal.h |   6 ++
 5 files changed, 216 insertions(+), 98 deletions(-)

Comments

Lorenzo Stoakes Aug. 27, 2024, 11:41 a.m. UTC | #1
On Fri, Aug 23, 2024 at 09:07:01PM GMT, Lorenzo Stoakes wrote:

[snip]

>  void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb)
> @@ -1426,9 +1536,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>  	struct vm_area_struct *vma = *vmap;
>  	unsigned long vma_start = vma->vm_start;
>  	struct mm_struct *mm = vma->vm_mm;
> -	struct vm_area_struct *new_vma, *prev;
> +	struct vm_area_struct *new_vma;
>  	bool faulted_in_anon_vma = true;
>  	VMA_ITERATOR(vmi, mm, addr);
> +	VMG_VMA_STATE(vmg, &vmi, NULL, vma, addr, addr + len);
>
>  	/*
>  	 * If anonymous vma has not yet been faulted, update new pgoff
> @@ -1439,11 +1550,18 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>  		faulted_in_anon_vma = false;
>  	}
>
> -	new_vma = find_vma_prev(mm, addr, &prev);
> +	new_vma = find_vma_prev(mm, addr, &vmg.prev);
>  	if (new_vma && new_vma->vm_start < addr + len)
>  		return NULL;	/* should never get here */
>
> -	new_vma = vma_merge_new_vma(&vmi, prev, vma, addr, addr + len, pgoff);
> +	vmg.vma = NULL; /* New VMA range. */
> +	vmg.pgoff = pgoff;
> +	vmg.next = vma_next(&vmi);
> +	vma_prev(&vmi);
> +	vma_iter_next_range(&vmi);
> +
> +	new_vma = vma_merge_new_range(&vmg);
> +
>  	if (new_vma) {
>  		/*
>  		 * Source vma may have been merged into new_vma

[snip]

Hi Andrew - could you squash the attached fix-patch into this please? As
there is an issue with a CONFIG_DEBUG_VM check firing when copy_vma()
unnecessarily moves the VMA iterator as reported at [0].

Thanks!

[0]: https://lore.kernel.org/linux-mm/202408271452.c842a71d-lkp@intel.com/

----8<----
From 53b41cc9ddfaf30f8a037f466686d942e0e64943 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Tue, 27 Aug 2024 11:59:27 +0100
Subject: [PATCH] mm: only advance iterator if prev exists

If we have no VMAs prior to us, such as in a case where we are mremap()'ing
a VMA backwards, then we will advance the iterator backwards to 0, before
moving to the original range again.

The intent is to position the iterator at or before the gap, therefore we
must avoid this - this is simply addressed by only advancing the iterator
should vma_prev() yield a result.

Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202408271452.c842a71d-lkp@intel.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

---
 mm/vma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/vma.c b/mm/vma.c
index 8a5fa15f46a2..7d948edbbb9e 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1557,8 +1557,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	vmg.vma = NULL; /* New VMA range. */
 	vmg.pgoff = pgoff;
 	vmg.next = vma_next(&vmi);
-	vma_prev(&vmi);
-	vma_iter_next_range(&vmi);
+	if (vma_prev(&vmi))
+		vma_iter_next_range(&vmi);

 	new_vma = vma_merge_new_range(&vmg);

--
2.46.0
Liam R. Howlett Aug. 28, 2024, 8:52 p.m. UTC | #2
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [240823 16:07]:
> Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
> resultant function vma_merge_new_range() to be clear what the purpose of
> this function is - a new VMA is desired in the specified range, and we wish
> to see if it is possible to 'merge' surrounding VMAs into this range rather
> than having to allocate a new VMA.
> 
> Note that this function uses vma_extend() exclusively, so adopts its
> requirement that the iterator point at or before the gap. We add an assert
> to this effect.
> 
> This is as opposed to vma_merge_existing_range(), which will be introduced
> in a subsequent commit, and provide the same functionality for cases in
> which we are modifying an existing VMA.
> 
> In mmap_region() and do_brk_flags() we open code scenarios where we prefer
> to use vma_expand() rather than invoke a full vma_merge() operation.
> 
> Abstract this logic and eliminate all of the open-coding, and also use the
> same logic for all cases where we add new VMAs to, rather than ultimately
> use vma_merge(), rather use vma_expand().
> 
> Doing so removes duplication and simplifies VMA merging in all such cases,
> laying the ground for us to eliminate the merging of new VMAs in
> vma_merge() altogether.
> 
> Also add the ability for the vmg to track state, and able to report errors,
> allowing for us to differentiate a failed merge from an inability to
> allocate memory in callers.
> 
> This makes it far easier to understand what is happening in these cases
> avoiding confusion, bugs and allowing for future optimisation.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---
>  mm/mmap.c                        |  93 +++++-------------
>  mm/vma.c                         | 162 ++++++++++++++++++++++++++-----
>  mm/vma.h                         |  20 +++-
>  tools/testing/vma/vma.c          |  33 ++++++-
>  tools/testing/vma/vma_internal.h |   6 ++
>  5 files changed, 216 insertions(+), 98 deletions(-)
> 
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 0d242c9b1f4c..80d70ed099cf 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1364,8 +1364,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  {
>  	struct mm_struct *mm = current->mm;
>  	struct vm_area_struct *vma = NULL;
> -	struct vm_area_struct *next, *prev, *merge;
>  	pgoff_t pglen = PHYS_PFN(len);
> +	struct vm_area_struct *merge;
>  	unsigned long charged = 0;
>  	struct vma_munmap_struct vms;
>  	struct ma_state mas_detach;
> @@ -1389,13 +1389,13 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  		if (vms_gather_munmap_vmas(&vms, &mas_detach))
>  			return -ENOMEM;
>  
> -		next = vmg.next = vms.next;
> -		prev = vmg.prev = vms.prev;
> +		vmg.next = vms.next;
> +		vmg.prev = vms.prev;
>  		vma = NULL;
>  	} else {
> -		next = vmg.next = vma_next(&vmi);
> -		prev = vmg.prev = vma_prev(&vmi);
> -		if (prev)
> +		vmg.next = vma_next(&vmi);
> +		vmg.prev = vma_prev(&vmi);
> +		if (vmg.prev)
>  			vma_iter_next_range(&vmi);
>  	}
>  
> @@ -1417,45 +1417,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  		vmg.flags = vm_flags;
>  	}
>  
> -	if (vm_flags & VM_SPECIAL)
> -		goto cannot_expand;
> -
> -	/* Attempt to expand an old mapping */
> -	/* Check next */
> -	if (next && next->vm_start == end && can_vma_merge_before(&vmg)) {
> -		vmg.end = next->vm_end;
> -		vma = vmg.vma = next;
> -		vmg.pgoff = next->vm_pgoff - pglen;
> -		/*
> -		 * We set this here so if we will merge with the previous VMA in
> -		 * the code below, can_vma_merge_after() ensures anon_vma
> -		 * compatibility between prev and next.
> -		 */
> -		vmg.anon_vma = vma->anon_vma;
> -		vmg.uffd_ctx = vma->vm_userfaultfd_ctx;
> -	}
> -
> -	/* Check prev */
> -	if (prev && prev->vm_end == addr && can_vma_merge_after(&vmg)) {
> -		vmg.start = prev->vm_start;
> -		vma = vmg.vma = prev;
> -		vmg.pgoff = prev->vm_pgoff;
> -		vma_prev(&vmi); /* Equivalent to going to the previous range */
> -	}
> -
> -	if (vma) {
> -		/* Actually expand, if possible */
> -		if (!vma_expand(&vmg)) {
> -			khugepaged_enter_vma(vma, vm_flags);
> -			goto expanded;
> -		}
> -
> -		/* If the expand fails, then reposition the vma iterator */
> -		if (unlikely(vma == prev))
> -			vma_iter_set(&vmi, addr);
> -	}
> -
> -cannot_expand:
> +	vma = vma_merge_new_range(&vmg);
> +	if (vma)
> +		goto expanded;
>  
>  	/*
>  	 * Determine the object being mapped and call the appropriate
> @@ -1503,10 +1467,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  		 * If vm_flags changed after call_mmap(), we should try merge
>  		 * vma again as we may succeed this time.
>  		 */
> -		if (unlikely(vm_flags != vma->vm_flags && prev)) {
> -			merge = vma_merge_new_vma(&vmi, prev, vma,
> -						  vma->vm_start, vma->vm_end,
> -						  vma->vm_pgoff);
> +		if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) {
> +			vmg.flags = vma->vm_flags;
> +			/* If this fails, state is reset ready for a reattempt. */
> +			merge = vma_merge_new_range(&vmg);
> +

Extra white space.

>  			if (merge) {
>  				/*
>  				 * ->mmap() can change vma->vm_file and fput
> @@ -1521,6 +1486,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  				/* Update vm_flags to pick up the change. */
>  				vm_flags = vma->vm_flags;
>  				goto unmap_writable;
> +			} else {
> +				vma_iter_config(&vmi, addr, end);

This else can be dropped since the if ends in a goto.  I guess, what you
are trying to fix is the merge of the prev (which moved the iterator)
that failed.  Might be easier to read, either way it is correct.

>  			}
>  		}
>  
> @@ -1554,7 +1521,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  	vma_link_file(vma);
>  
>  	/*
> -	 * vma_merge() calls khugepaged_enter_vma() either, the below
> +	 * vma_merge_new_range() calls khugepaged_enter_vma() too, the below
>  	 * call covers the non-merge case.
>  	 */
>  	khugepaged_enter_vma(vma, vma->vm_flags);
> @@ -1609,7 +1576,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  
>  		vma_iter_set(&vmi, vma->vm_end);
>  		/* Undo any partial mapping done by a device driver. */
> -		unmap_region(&vmi.mas, vma, prev, next);
> +		unmap_region(&vmi.mas, vma, vmg.prev, vmg.next);
>  	}
>  	if (writable_file_mapping)
>  		mapping_unmap_writable(file->f_mapping);
> @@ -1755,7 +1722,6 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
>  		unsigned long addr, unsigned long len, unsigned long flags)
>  {
>  	struct mm_struct *mm = current->mm;
> -	struct vma_prepare vp;
>  
>  	/*
>  	 * Check against address space limits by the changed size
> @@ -1779,25 +1745,12 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
>  		VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr));
>  
>  		vmg.prev = vma;
> -		if (can_vma_merge_after(&vmg)) {
> -			vma_iter_config(vmi, vma->vm_start, addr + len);
> -			if (vma_iter_prealloc(vmi, vma))
> -				goto unacct_fail;
> -
> -			vma_start_write(vma);
> -
> -			init_vma_prep(&vp, vma);
> -			vma_prepare(&vp);
> -			vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
> -			vma->vm_end = addr + len;
> -			vm_flags_set(vma, VM_SOFTDIRTY);
> -			vma_iter_store(vmi, vma);
> -
> -			vma_complete(&vp, vmi, mm);
> -			validate_mm(mm);
> -			khugepaged_enter_vma(vma, flags);
> +		vma_iter_next_range(vmi);
> +
> +		if (vma_merge_new_range(&vmg))
>  			goto out;
> -		}
> +		else if (vmg_nomem(&vmg))
> +			goto unacct_fail;
>  	}
>  
>  	if (vma)
> diff --git a/mm/vma.c b/mm/vma.c
> index 4867ae722a9a..8a5fa15f46a2 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -464,6 +464,116 @@ void validate_mm(struct mm_struct *mm)
>  }
>  #endif /* CONFIG_DEBUG_VM_MAPLE_TREE */
>  
> +/*
> + * vma_merge_new_range - Attempt to merge a new VMA into address space
> + *
> + * @vmg: Describes the VMA we are adding, in the range @vmg->start to @vmg->end
> + *       (exclusive), which we try to merge with any adjacent VMAs if possible.
> + *
> + * We are about to add a VMA to the address space starting at @vmg->start and
> + * ending at @vmg->end. There are three different possible scenarios:
> + *
> + * 1. There is a VMA with identical properties immediately adjacent to the
> + *    proposed new VMA [@vmg->start, @vmg->end) either before or after it -
> + *    EXPAND that VMA:
> + *
> + * Proposed:       |-----|  or  |-----|
> + * Existing:  |----|                  |----|
> + *
> + * 2. There are VMAs with identical properties immediately adjacent to the
> + *    proposed new VMA [@vmg->start, @vmg->end) both before AND after it -
> + *    EXPAND the former and REMOVE the latter:
> + *
> + * Proposed:       |-----|
> + * Existing:  |----|     |----|
> + *
> + * 3. There are no VMAs immediately adjacent to the proposed new VMA or those
> + *    VMAs do not have identical attributes - NO MERGE POSSIBLE.
> + *
> + * In instances where we can merge, this function returns the expanded VMA which
> + * will have its range adjusted accordingly and the underlying maple tree also
> + * adjusted.
> + *
> + * Returns: In instances where no merge was possible, NULL. Otherwise, a pointer
> + *          to the VMA we expanded.
> + *
> + * This function adjusts @vmg to provide @vmg->next if not already specified,
> + * and adjusts [@vmg->start, @vmg->end) to span the expanded range.
> + *
> + * ASSUMPTIONS:
> + * - The caller must hold a WRITE lock on the mm_struct->mmap_lock.
> + * - The caller must have determined that [@vmg->start, @vmg->end) is empty,
> +     other than VMAs that will be unmapped should the operation succeed.
> + * - The caller must have specified the previous vma in @vmg->prev.
> + * - The caller must have specified the next vma in @vmg->next.
> + * - The caller must have positioned the vmi at or before the gap.
> + */
> +struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
> +{
> +	struct vm_area_struct *prev = vmg->prev;
> +	struct vm_area_struct *next = vmg->next;
> +	unsigned long start = vmg->start;
> +	unsigned long end = vmg->end;
> +	pgoff_t pgoff = vmg->pgoff;
> +	pgoff_t pglen = PHYS_PFN(end - start);
> +	bool can_merge_before, can_merge_after;
> +
> +	mmap_assert_write_locked(vmg->mm);
> +	VM_WARN_ON(vmg->vma);
> +	/* vmi must point at or before the gap. */
> +	VM_WARN_ON(vma_iter_addr(vmg->vmi) > end);
> +
> +	vmg->state = VMA_MERGE_NOMERGE;
> +
> +	/* Special VMAs are unmergeable, also if no prev/next. */
> +	if ((vmg->flags & VM_SPECIAL) || (!prev && !next))
> +		return NULL;
> +
> +	can_merge_before = next && next->vm_start == end &&
> +		can_vma_merge_before(vmg);
> +	can_merge_after = prev && prev->vm_end == start &&
> +		can_vma_merge_after(vmg);

Can we please rewrite this as if statements for clarity?

> +
> +	/* If we can merge with the next VMA, adjust vmg accordingly. */
> +	if (can_merge_before &&
> +	    (!can_merge_after || is_mergeable_anon_vma(prev->anon_vma,
> +						       next->anon_vma, NULL))) {
> +		vmg->end = next->vm_end;
> +		vmg->vma = next;
> +		vmg->pgoff = next->vm_pgoff - pglen;
> +	}
> +
> +	/* If we can merge with the previous VMA, adjust vmg accordingly. */
> +	if (can_merge_after) {
> +		vmg->start = prev->vm_start;
> +		vmg->vma = prev;
> +		vmg->pgoff = prev->vm_pgoff;
> +
> +		vma_prev(vmg->vmi); /* Equivalent to going to the previous range */
> +	}
> +
> +	/*
> +	 * Now try to expand adjacent VMA(s). This takes care of removing the
> +	 * following VMA if we have VMAs on both sides.
> +	 */
> +	if (vmg->vma && !vma_expand(vmg)) {
> +		khugepaged_enter_vma(vmg->vma, vmg->flags);

This might be able to be moved into vma_expand().

> +

Extra whitespace

> +		vmg->state = VMA_MERGE_SUCCESS;
> +		return vmg->vma;
> +	}
> +
> +	/* If expansion failed, reset state. Allows us to retry merge later. */
> +	vmg->vma = NULL;
> +	vmg->start = start;
> +	vmg->end = end;
> +	vmg->pgoff = pgoff;
> +	if (vmg->vma == prev)
> +		vma_iter_set(vmg->vmi, start);
> +
> +	return NULL;
> +}
> +
>  /*
>   * vma_expand - Expand an existing VMA
>   *
> @@ -474,7 +584,11 @@ void validate_mm(struct mm_struct *mm)
>   * vmg->next->vm_end.  Checking if the vmg->vma can expand and merge with
>   * vmg->next needs to be handled by the caller.
>   *
> - * Returns: 0 on success
> + * Returns: 0 on success.
> + *
> + * ASSUMPTIONS:
> + * - The caller must hold a WRITE lock on vmg->vma->mm->mmap_lock.
> + * - The caller must have set @vmg->vma and @vmg->next.
>   */
>  int vma_expand(struct vma_merge_struct *vmg)
>  {
> @@ -484,6 +598,8 @@ int vma_expand(struct vma_merge_struct *vmg)
>  	struct vm_area_struct *next = vmg->next;
>  	struct vma_prepare vp;
>  
> +	mmap_assert_write_locked(vmg->mm);
> +

There are a few unnecessary whitespaces here..

>  	vma_start_write(vma);
>  	if (next && (vma != next) && (vmg->end == next->vm_end)) {
>  		int ret;
> @@ -516,6 +632,7 @@ int vma_expand(struct vma_merge_struct *vmg)
>  	return 0;
>  
>  nomem:
> +	vmg->state = VMA_MERGE_ERROR_NOMEM;
>  	if (anon_dup)
>  		unlink_anon_vmas(anon_dup);
>  	return -ENOMEM;
> @@ -1034,6 +1151,8 @@ static struct vm_area_struct *vma_merge(struct vma_merge_struct *vmg)
>  	pgoff_t pglen = PHYS_PFN(end - addr);
>  	long adj_start = 0;
>  
> +	vmg->state = VMA_MERGE_NOMERGE;
> +
>  	/*
>  	 * We later require that vma->vm_flags == vm_flags,
>  	 * so this tests vma->vm_flags & VM_SPECIAL, too.
> @@ -1185,13 +1304,19 @@ static struct vm_area_struct *vma_merge(struct vma_merge_struct *vmg)
>  	vma_complete(&vp, vmg->vmi, mm);
>  	validate_mm(mm);
>  	khugepaged_enter_vma(res, vmg->flags);
> +
> +	vmg->state = VMA_MERGE_SUCCESS;
>  	return res;
>  
>  prealloc_fail:
> +	vmg->state = VMA_MERGE_ERROR_NOMEM;
>  	if (anon_dup)
>  		unlink_anon_vmas(anon_dup);
>  
>  anon_vma_fail:
> +	if (err == -ENOMEM)
> +		vmg->state = VMA_MERGE_ERROR_NOMEM;
> +
>  	vma_iter_set(vmg->vmi, addr);
>  	vma_iter_load(vmg->vmi);
>  	return NULL;
> @@ -1298,22 +1423,6 @@ struct vm_area_struct
>  	return vma_modify(&vmg);
>  }
>  
> -/*
> - * Attempt to merge a newly mapped VMA with those adjacent to it. The caller
> - * must ensure that [start, end) does not overlap any existing VMA.
> - */
> -struct vm_area_struct
> -*vma_merge_new_vma(struct vma_iterator *vmi, struct vm_area_struct *prev,
> -		   struct vm_area_struct *vma, unsigned long start,
> -		   unsigned long end, pgoff_t pgoff)
> -{
> -	VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
> -
> -	vmg.pgoff = pgoff;
> -
> -	return vma_merge(&vmg);
> -}
> -
>  /*
>   * Expand vma by delta bytes, potentially merging with an immediately adjacent
>   * VMA with identical properties.
> @@ -1324,8 +1433,9 @@ struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
>  {
>  	VMG_VMA_STATE(vmg, vmi, vma, vma, vma->vm_end, vma->vm_end + delta);
>  
> -	/* vma is specified as prev, so case 1 or 2 will apply. */
> -	return vma_merge(&vmg);
> +	/* We use the VMA to populate VMG fields only. */
> +	vmg.vma = NULL;
> +	return vma_merge_new_range(&vmg);
>  }
>  
>  void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb)
> @@ -1426,9 +1536,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>  	struct vm_area_struct *vma = *vmap;
>  	unsigned long vma_start = vma->vm_start;
>  	struct mm_struct *mm = vma->vm_mm;
> -	struct vm_area_struct *new_vma, *prev;
> +	struct vm_area_struct *new_vma;
>  	bool faulted_in_anon_vma = true;
>  	VMA_ITERATOR(vmi, mm, addr);
> +	VMG_VMA_STATE(vmg, &vmi, NULL, vma, addr, addr + len);
>  
>  	/*
>  	 * If anonymous vma has not yet been faulted, update new pgoff
> @@ -1439,11 +1550,18 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>  		faulted_in_anon_vma = false;
>  	}
>  
> -	new_vma = find_vma_prev(mm, addr, &prev);
> +	new_vma = find_vma_prev(mm, addr, &vmg.prev);
>  	if (new_vma && new_vma->vm_start < addr + len)
>  		return NULL;	/* should never get here */
>  
> -	new_vma = vma_merge_new_vma(&vmi, prev, vma, addr, addr + len, pgoff);
> +	vmg.vma = NULL; /* New VMA range. */
> +	vmg.pgoff = pgoff;
> +	vmg.next = vma_next(&vmi);
> +	vma_prev(&vmi);
> +	vma_iter_next_range(&vmi);

You have already fixed this.

> +
> +	new_vma = vma_merge_new_range(&vmg);
> +
>  	if (new_vma) {
>  		/*
>  		 * Source vma may have been merged into new_vma
> diff --git a/mm/vma.h b/mm/vma.h
> index 8f01fbc20fe7..dbcdf1431014 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -52,6 +52,13 @@ struct vma_munmap_struct {
>  	unsigned long data_vm;
>  };
>  
> +enum vma_merge_state {
> +	VMA_MERGE_START,
> +	VMA_MERGE_ERROR_NOMEM,
> +	VMA_MERGE_NOMERGE,
> +	VMA_MERGE_SUCCESS,
> +};
> +
>  /* Represents a VMA merge operation. */
>  struct vma_merge_struct {
>  	struct mm_struct *mm;
> @@ -68,8 +75,14 @@ struct vma_merge_struct {
>  	struct mempolicy *policy;
>  	struct vm_userfaultfd_ctx uffd_ctx;
>  	struct anon_vma_name *anon_name;
> +	enum vma_merge_state state;
>  };
>  
> +static inline bool vmg_nomem(struct vma_merge_struct *vmg)
> +{
> +	return vmg->state == VMA_MERGE_ERROR_NOMEM;
> +}
> +
>  /* Assumes addr >= vma->vm_start. */
>  static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
>  				       unsigned long addr)
> @@ -85,6 +98,7 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
>  		.end = end_,						\
>  		.flags = flags_,					\
>  		.pgoff = pgoff_,					\
> +		.state = VMA_MERGE_START,				\
>  	}
>  
>  #define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_)	\
> @@ -103,6 +117,7 @@ static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
>  		.policy = vma_policy(vma_),			\
>  		.uffd_ctx = vma_->vm_userfaultfd_ctx,		\
>  		.anon_name = anon_vma_name(vma_),		\
> +		.state = VMA_MERGE_START,			\
>  	}
>  
>  #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
> @@ -306,10 +321,7 @@ struct vm_area_struct
>  		       unsigned long new_flags,
>  		       struct vm_userfaultfd_ctx new_ctx);
>  
> -struct vm_area_struct
> -*vma_merge_new_vma(struct vma_iterator *vmi, struct vm_area_struct *prev,
> -		   struct vm_area_struct *vma, unsigned long start,
> -		   unsigned long end, pgoff_t pgoff);
> +struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg);
>  
>  struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
>  					struct vm_area_struct *vma,
> diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
> index cce1ba78c57f..3a3a850d951c 100644
> --- a/tools/testing/vma/vma.c
> +++ b/tools/testing/vma/vma.c
> @@ -101,9 +101,9 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
>  	 */
>  	vmg->next = vma_next(vmg->vmi);
>  	vmg->prev = vma_prev(vmg->vmi);
> +	vma_iter_next_range(vmg->vmi);
>  
> -	vma_iter_set(vmg->vmi, vmg->start);
> -	return vma_merge(vmg);
> +	return vma_merge_new_range(vmg);
>  }
>  
>  /*
> @@ -162,10 +162,14 @@ static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
>  	merged = merge_new(vmg);
>  	if (merged) {
>  		*was_merged = true;
> +		ASSERT_EQ(vmg->state, VMA_MERGE_SUCCESS);
>  		return merged;
>  	}
>  
>  	*was_merged = false;
> +
> +	ASSERT_EQ(vmg->state, VMA_MERGE_NOMERGE);
> +
>  	return alloc_and_link_vma(mm, start, end, pgoff, flags);
>  }
>  
> @@ -595,6 +599,7 @@ static bool test_vma_merge_special_flags(void)
>  		vmg.flags = flags | special_flag;
>  		vma = merge_new(&vmg);
>  		ASSERT_EQ(vma, NULL);
> +		ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  	}
>  
>  	/* 2. Modify VMA with special flag that would otherwise merge. */
> @@ -616,6 +621,7 @@ static bool test_vma_merge_special_flags(void)
>  		vmg.flags = flags | special_flag;
>  		vma = merge_existing(&vmg);
>  		ASSERT_EQ(vma, NULL);
> +		ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  	}
>  
>  	cleanup_mm(&mm, &vmi);
> @@ -708,6 +714,7 @@ static bool test_vma_merge_with_close(void)
>  
>  	/* The next VMA having a close() operator should cause the merge to fail.*/
>  	ASSERT_EQ(merge_new(&vmg), NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  
>  	/* Now create the VMA so we can merge via modified flags */
>  	vmg_set_range(&vmg, 0x1000, 0x2000, 1, flags);
> @@ -719,6 +726,7 @@ static bool test_vma_merge_with_close(void)
>  	 * also fail.
>  	 */
>  	ASSERT_EQ(merge_existing(&vmg), NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  
>  	/* SCENARIO B
>  	 *
> @@ -744,6 +752,7 @@ static bool test_vma_merge_with_close(void)
>  	vmg.vma = vma;
>  	/* Make sure merge does not occur. */
>  	ASSERT_EQ(merge_existing(&vmg), NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  
>  	cleanup_mm(&mm, &vmi);
>  	return true;
> @@ -792,6 +801,7 @@ static bool test_vma_merge_new_with_close(void)
>  	vmg_set_range(&vmg, 0x2000, 0x5000, 2, flags);
>  	vma = merge_new(&vmg);
>  	ASSERT_NE(vma, NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  	ASSERT_EQ(vma->vm_start, 0);
>  	ASSERT_EQ(vma->vm_end, 0x5000);
>  	ASSERT_EQ(vma->vm_pgoff, 0);
> @@ -831,6 +841,7 @@ static bool test_merge_existing(void)
>  	vmg.prev = vma;
>  	vma->anon_vma = &dummy_anon_vma;
>  	ASSERT_EQ(merge_existing(&vmg), vma_next);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  	ASSERT_EQ(vma_next->vm_start, 0x3000);
>  	ASSERT_EQ(vma_next->vm_end, 0x9000);
>  	ASSERT_EQ(vma_next->vm_pgoff, 3);
> @@ -861,6 +872,7 @@ static bool test_merge_existing(void)
>  	vmg.vma = vma;
>  	vma->anon_vma = &dummy_anon_vma;
>  	ASSERT_EQ(merge_existing(&vmg), vma_next);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  	ASSERT_EQ(vma_next->vm_start, 0x2000);
>  	ASSERT_EQ(vma_next->vm_end, 0x9000);
>  	ASSERT_EQ(vma_next->vm_pgoff, 2);
> @@ -889,6 +901,7 @@ static bool test_merge_existing(void)
>  	vma->anon_vma = &dummy_anon_vma;
>  
>  	ASSERT_EQ(merge_existing(&vmg), vma_prev);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  	ASSERT_EQ(vma_prev->vm_start, 0);
>  	ASSERT_EQ(vma_prev->vm_end, 0x6000);
>  	ASSERT_EQ(vma_prev->vm_pgoff, 0);
> @@ -920,6 +933,7 @@ static bool test_merge_existing(void)
>  	vmg.vma = vma;
>  	vma->anon_vma = &dummy_anon_vma;
>  	ASSERT_EQ(merge_existing(&vmg), vma_prev);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  	ASSERT_EQ(vma_prev->vm_start, 0);
>  	ASSERT_EQ(vma_prev->vm_end, 0x7000);
>  	ASSERT_EQ(vma_prev->vm_pgoff, 0);
> @@ -948,6 +962,7 @@ static bool test_merge_existing(void)
>  	vmg.vma = vma;
>  	vma->anon_vma = &dummy_anon_vma;
>  	ASSERT_EQ(merge_existing(&vmg), vma_prev);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  	ASSERT_EQ(vma_prev->vm_start, 0);
>  	ASSERT_EQ(vma_prev->vm_end, 0x9000);
>  	ASSERT_EQ(vma_prev->vm_pgoff, 0);
> @@ -981,31 +996,37 @@ static bool test_merge_existing(void)
>  	vmg.prev = vma;
>  	vmg.vma = vma;
>  	ASSERT_EQ(merge_existing(&vmg), NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  
>  	vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags);
>  	vmg.prev = vma;
>  	vmg.vma = vma;
>  	ASSERT_EQ(merge_existing(&vmg), NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  
>  	vmg_set_range(&vmg, 0x6000, 0x7000, 6, flags);
>  	vmg.prev = vma;
>  	vmg.vma = vma;
>  	ASSERT_EQ(merge_existing(&vmg), NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  
>  	vmg_set_range(&vmg, 0x4000, 0x7000, 4, flags);
>  	vmg.prev = vma;
>  	vmg.vma = vma;
>  	ASSERT_EQ(merge_existing(&vmg), NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  
>  	vmg_set_range(&vmg, 0x4000, 0x6000, 4, flags);
>  	vmg.prev = vma;
>  	vmg.vma = vma;
>  	ASSERT_EQ(merge_existing(&vmg), NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  
>  	vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags);
>  	vmg.prev = vma;
>  	vmg.vma = vma;
>  	ASSERT_EQ(merge_existing(&vmg), NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
>  
>  	ASSERT_EQ(cleanup_mm(&mm, &vmi), 3);
>  
> @@ -1071,6 +1092,7 @@ static bool test_anon_vma_non_mergeable(void)
>  	vmg.vma = vma;
>  
>  	ASSERT_EQ(merge_existing(&vmg), vma_prev);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  	ASSERT_EQ(vma_prev->vm_start, 0);
>  	ASSERT_EQ(vma_prev->vm_end, 0x7000);
>  	ASSERT_EQ(vma_prev->vm_pgoff, 0);
> @@ -1106,6 +1128,7 @@ static bool test_anon_vma_non_mergeable(void)
>  	vmg.prev = vma_prev;
>  
>  	ASSERT_EQ(merge_new(&vmg), vma_prev);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  	ASSERT_EQ(vma_prev->vm_start, 0);
>  	ASSERT_EQ(vma_prev->vm_end, 0x7000);
>  	ASSERT_EQ(vma_prev->vm_pgoff, 0);
> @@ -1181,6 +1204,7 @@ static bool test_dup_anon_vma(void)
>  	vmg.vma = vma;
>  
>  	ASSERT_EQ(merge_existing(&vmg), vma_prev);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  
>  	ASSERT_EQ(vma_prev->vm_start, 0);
>  	ASSERT_EQ(vma_prev->vm_end, 0x8000);
> @@ -1209,6 +1233,7 @@ static bool test_dup_anon_vma(void)
>  	vmg.vma = vma;
>  
>  	ASSERT_EQ(merge_existing(&vmg), vma_prev);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  
>  	ASSERT_EQ(vma_prev->vm_start, 0);
>  	ASSERT_EQ(vma_prev->vm_end, 0x8000);
> @@ -1236,6 +1261,7 @@ static bool test_dup_anon_vma(void)
>  	vmg.vma = vma;
>  
>  	ASSERT_EQ(merge_existing(&vmg), vma_prev);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  
>  	ASSERT_EQ(vma_prev->vm_start, 0);
>  	ASSERT_EQ(vma_prev->vm_end, 0x5000);
> @@ -1263,6 +1289,7 @@ static bool test_dup_anon_vma(void)
>  	vmg.vma = vma;
>  
>  	ASSERT_EQ(merge_existing(&vmg), vma_next);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
>  
>  	ASSERT_EQ(vma_next->vm_start, 0x3000);
>  	ASSERT_EQ(vma_next->vm_end, 0x8000);
> @@ -1303,6 +1330,7 @@ static bool test_vmi_prealloc_fail(void)
>  
>  	/* This will cause the merge to fail. */
>  	ASSERT_EQ(merge_existing(&vmg), NULL);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM);
>  	/* We will already have assigned the anon_vma. */
>  	ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma);
>  	/* And it was both cloned and unlinked. */
> @@ -1327,6 +1355,7 @@ static bool test_vmi_prealloc_fail(void)
>  
>  	fail_prealloc = true;
>  	ASSERT_EQ(expand_existing(&vmg), -ENOMEM);
> +	ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM);
>  
>  	ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma);
>  	ASSERT_TRUE(dummy_anon_vma.was_cloned);
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index a3c262c6eb73..c5b9da034511 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -740,6 +740,12 @@ static inline void vma_iter_free(struct vma_iterator *vmi)
>  	mas_destroy(&vmi->mas);
>  }
>  
> +static inline
> +struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
> +{
> +	return mas_next_range(&vmi->mas, ULONG_MAX);
> +}
> +
>  static inline void vm_acct_memory(long pages)
>  {
>  }
> -- 
> 2.46.0
>
Mark Brown Aug. 29, 2024, 7:46 p.m. UTC | #3
On Fri, Aug 23, 2024 at 09:07:01PM +0100, Lorenzo Stoakes wrote:
> Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
> resultant function vma_merge_new_range() to be clear what the purpose of
> this function is - a new VMA is desired in the specified range, and we wish
> to see if it is possible to 'merge' surrounding VMAs into this range rather
> than having to allocate a new VMA.

This patch, which is in -next today with the fixup Lorenzo posted as
commit 8c9d0f8b1e9a42586, seems to be causing problems with the mremap
expand merge selftest.  The test has been failing for a few days.  It
unfortunately doesn't log anything about why it's upset:

# # ok 15 5MB mremap - Source 1MB-aligned, Dest 1MB-aligned with 40MB Preamble
# # not ok 16 mremap expand merge
# # ok 18 mremap mremap move within range

I identified this commit using a bisect which appears to converge fairly
smoothly, I didn't do any other analysis:

git bisect start
# status: waiting for both good and bad commits
# bad: [b18bbfc14a38b5234e09c2adcf713e38063a7e6e] Add linux-next specific files for 20240829
git bisect bad b18bbfc14a38b5234e09c2adcf713e38063a7e6e
# status: waiting for good commit(s), bad commit known
# good: [559a93afece952cb129a236febe5d1b8f7c79367] Merge branch 'for-linux-next-fixes' of https://gitlab.freedesktop.org/drm/misc/kernel.git
git bisect good 559a93afece952cb129a236febe5d1b8f7c79367
# bad: [47b9fb7c1f3efb63f2a5ed614385476856196527] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git
git bisect bad 47b9fb7c1f3efb63f2a5ed614385476856196527
# bad: [3582529887df7b4d397a05584ceb9d9944b54328] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git
git bisect bad 3582529887df7b4d397a05584ceb9d9944b54328
# bad: [a97442baf3538d884696539023a068269c983182] Merge branch 'at91-next' of git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git
git bisect bad a97442baf3538d884696539023a068269c983182
# bad: [9287e4adbc6ab8fa04d25eb82e097fed877a4642] mm: optimization on page allocation when CMA enabled
git bisect bad 9287e4adbc6ab8fa04d25eb82e097fed877a4642
# good: [becee36d72a57fd245033efdfd94cb54ac24472e] mm: swap: extend swap_shmem_alloc() to support batch SWAP_MAP_SHMEM flag setting
git bisect good becee36d72a57fd245033efdfd94cb54ac24472e
# good: [8939071cd2fdfd69f48631845613f5d841e0e97e] selftests-test_zswap-add-test-for-hierarchical-zswapwriteback-fix
git bisect good 8939071cd2fdfd69f48631845613f5d841e0e97e
# bad: [42c5d3ae23db3086624db99c52498d5843d3b88a] mm/damon/core-test: fix damon_test_ops_registration() for DAMON_VADDR unset case
git bisect bad 42c5d3ae23db3086624db99c52498d5843d3b88a
# good: [6abe8def9543d42a31fa6560410a1ab45b3e1917] mm/vma: drop incorrect comment from vms_gather_munmap_vmas()
git bisect good 6abe8def9543d42a31fa6560410a1ab45b3e1917
# bad: [bdf4d125288d2ac24a49f7f52d882261bed6e1e1] mm: vmalloc: refactor vm_area_alloc_pages() function
git bisect bad bdf4d125288d2ac24a49f7f52d882261bed6e1e1
# good: [5cc3d95b7d3a1cd7c09772d6b7aaa0371cc37236] mm: abstract vma_expand() to use vma_merge_struct
git bisect good 5cc3d95b7d3a1cd7c09772d6b7aaa0371cc37236
# bad: [042a3d61fddaff155b6595d8e47323f1e85aefb3] mm: make vma_prepare() and friends static and internal to vma.c
git bisect bad 042a3d61fddaff155b6595d8e47323f1e85aefb3
# bad: [b46446ab789bc2cfc3d18ddcc0e93fa533f6b479] mm: only advance iterator if prev exists
git bisect bad b46446ab789bc2cfc3d18ddcc0e93fa533f6b479
# bad: [8c9d0f8b1e9a4258676714557c8d69fbb85578ab] mm: avoid using vma_merge() for new VMAs
git bisect bad 8c9d0f8b1e9a4258676714557c8d69fbb85578ab
# first bad commit: [8c9d0f8b1e9a4258676714557c8d69fbb85578ab] mm: avoid using vma_merge() for new VMAs
Lorenzo Stoakes Aug. 29, 2024, 9:22 p.m. UTC | #4
On Thu, Aug 29, 2024 at 08:46:28PM GMT, Mark Brown wrote:
> On Fri, Aug 23, 2024 at 09:07:01PM +0100, Lorenzo Stoakes wrote:
> > Abstract vma_merge_new_vma() to use vma_merge_struct and rename the
> > resultant function vma_merge_new_range() to be clear what the purpose of
> > this function is - a new VMA is desired in the specified range, and we wish
> > to see if it is possible to 'merge' surrounding VMAs into this range rather
> > than having to allocate a new VMA.
>
> This patch, which is in -next today with the fixup Lorenzo posted as
> commit 8c9d0f8b1e9a42586, seems to be causing problems with the mremap
> expand merge selftest.  The test has been failing for a few days.  It
> unfortunately doesn't log anything about why it's upset:
>
> # # ok 15 5MB mremap - Source 1MB-aligned, Dest 1MB-aligned with 40MB Preamble
> # # not ok 16 mremap expand merge
> # # ok 18 mremap mremap move within range

[snip]

Thanks, I figured out the problem, it's not arm-specific, I was running
self-tests but eyeballing-failure resulted in me missing this.

This is a product of vma_merge_extend() invoking vma_merge_new_range() without
having determined the next VMA correctly, after moving from vma_merge() (which
looked this up for us) to vma_merge_new_range() (which does not).

This is after having adjusted the assumptions between v1 and v2 of the series in
each merge function, and I simply missed this mremap()-specific case.

Andrew - I enclose a fix-patch to get a fix out for this asap, but I am due a
respin relatively soon and will also include that in this.

----8<----
From 3678f8a53f98de52f11946d4d32e6fb239d11c2f Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Date: Thu, 29 Aug 2024 22:18:02 +0100
Subject: [PATCH] mm: correctly determine vmg.next in vma_merge_extend()

vma_merge_next_range() requires that the caller specify prev AND next.

Failure to specify results in missed merges. Fix this by explicitly looking
up next.

This function is explicitly used by mremap() in extend cases.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reported-by: Mark Brown <broonie@kernel.org>
---
 mm/vma.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/vma.c b/mm/vma.c
index 7cddeea907f8..bd35abc70ed8 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -1489,6 +1489,10 @@ struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
 {
 	VMG_VMA_STATE(vmg, vmi, vma, vma, vma->vm_end, vma->vm_end + delta);

+	vmg.next = vma_next(vmi);
+	if (vma_prev(vmi))
+		vma_iter_next_range(vmi);
+
 	/* We use the VMA to populate VMG fields only. */
 	vmg.vma = NULL;
 	return vma_merge_new_range(&vmg);
--
2.46.0
Mark Brown Aug. 30, 2024, 12:59 p.m. UTC | #5
On Thu, Aug 29, 2024 at 10:22:53PM +0100, Lorenzo Stoakes wrote:

> Thanks, I figured out the problem, it's not arm-specific, I was running
> self-tests but eyeballing-failure resulted in me missing this.
> 
> This is a product of vma_merge_extend() invoking vma_merge_new_range() without
> having determined the next VMA correctly, after moving from vma_merge() (which
> looked this up for us) to vma_merge_new_range() (which does not).
> 
> This is after having adjusted the assumptions between v1 and v2 of the series in
> each merge function, and I simply missed this mremap()-specific case.
> 
> Andrew - I enclose a fix-patch to get a fix out for this asap, but I am due a
> respin relatively soon and will also include that in this.
> 
> ----8<----
> From 3678f8a53f98de52f11946d4d32e6fb239d11c2f Mon Sep 17 00:00:00 2001
> From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Date: Thu, 29 Aug 2024 22:18:02 +0100
> Subject: [PATCH] mm: correctly determine vmg.next in vma_merge_extend()
> 
> vma_merge_next_range() requires that the caller specify prev AND next.

This fixes the problem for me.

Tested-by: Mark Brown <broonie@kernel.org>
Lorenzo Stoakes Aug. 30, 2024, 1:02 p.m. UTC | #6
On Fri, Aug 30, 2024 at 01:59:37PM GMT, Mark Brown wrote:
> On Thu, Aug 29, 2024 at 10:22:53PM +0100, Lorenzo Stoakes wrote:
>
> > Thanks, I figured out the problem, it's not arm-specific, I was running
> > self-tests but eyeballing-failure resulted in me missing this.
> >
> > This is a product of vma_merge_extend() invoking vma_merge_new_range() without
> > having determined the next VMA correctly, after moving from vma_merge() (which
> > looked this up for us) to vma_merge_new_range() (which does not).
> >
> > This is after having adjusted the assumptions between v1 and v2 of the series in
> > each merge function, and I simply missed this mremap()-specific case.
> >
> > Andrew - I enclose a fix-patch to get a fix out for this asap, but I am due a
> > respin relatively soon and will also include that in this.
> >
> > ----8<----
> > From 3678f8a53f98de52f11946d4d32e6fb239d11c2f Mon Sep 17 00:00:00 2001
> > From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > Date: Thu, 29 Aug 2024 22:18:02 +0100
> > Subject: [PATCH] mm: correctly determine vmg.next in vma_merge_extend()
> >
> > vma_merge_next_range() requires that the caller specify prev AND next.
>
> This fixes the problem for me.
>
> Tested-by: Mark Brown <broonie@kernel.org>

Thanks! I will be folding this into a respin soon, are you good with me
adding this tag to the patch 6/10 in general? No worries if not, as fix
will be subsumed there.
Mark Brown Aug. 30, 2024, 1:05 p.m. UTC | #7
On Fri, Aug 30, 2024 at 02:02:33PM +0100, Lorenzo Stoakes wrote:
> On Fri, Aug 30, 2024 at 01:59:37PM GMT, Mark Brown wrote:

> > Tested-by: Mark Brown <broonie@kernel.org>

> Thanks! I will be folding this into a respin soon, are you good with me
> adding this tag to the patch 6/10 in general? No worries if not, as fix
> will be subsumed there.

Sure.
Lorenzo Stoakes Aug. 30, 2024, 1:10 p.m. UTC | #8
On Fri, Aug 30, 2024 at 02:05:23PM GMT, Mark Brown wrote:
> On Fri, Aug 30, 2024 at 02:02:33PM +0100, Lorenzo Stoakes wrote:
> > On Fri, Aug 30, 2024 at 01:59:37PM GMT, Mark Brown wrote:
>
> > > Tested-by: Mark Brown <broonie@kernel.org>
>
> > Thanks! I will be folding this into a respin soon, are you good with me
> > adding this tag to the patch 6/10 in general? No worries if not, as fix
> > will be subsumed there.
>
> Sure.

Thanks!
Lorenzo Stoakes Aug. 30, 2024, 3:19 p.m. UTC | #9
On Wed, Aug 28, 2024 at 04:52:07PM GMT, Liam R. Howlett wrote:

[snip]

> > +	/*
> > +	 * Now try to expand adjacent VMA(s). This takes care of removing the
> > +	 * following VMA if we have VMAs on both sides.
> > +	 */
> > +	if (vmg->vma && !vma_expand(vmg)) {
> > +		khugepaged_enter_vma(vmg->vma, vmg->flags);
>
> This might be able to be moved into vma_expand().

Sorry for the multiple emails, but am working my way through.

Can't do this, as relocate_vma_down() (and the original implementation in
fs/exec.c - I checked) does not invoke khugepaged_enter_vma(), sadly.

>
> > +
>
> Extra whitespace

Ack. A lot of these are to subjective taste for clarity, but am happy to
adjust these for the most part...

>
> > +		vmg->state = VMA_MERGE_SUCCESS;
> > +		return vmg->vma;
> > +	}
> > +
> > +	/* If expansion failed, reset state. Allows us to retry merge later. */
> > +	vmg->vma = NULL;
> > +	vmg->start = start;
> > +	vmg->end = end;
> > +	vmg->pgoff = pgoff;
> > +	if (vmg->vma == prev)
> > +		vma_iter_set(vmg->vmi, start);
> > +
> > +	return NULL;
> > +}
> > +
> >  /*
> >   * vma_expand - Expand an existing VMA
> >   *
> > @@ -474,7 +584,11 @@ void validate_mm(struct mm_struct *mm)
> >   * vmg->next->vm_end.  Checking if the vmg->vma can expand and merge with
> >   * vmg->next needs to be handled by the caller.
> >   *
> > - * Returns: 0 on success
> > + * Returns: 0 on success.
> > + *
> > + * ASSUMPTIONS:
> > + * - The caller must hold a WRITE lock on vmg->vma->mm->mmap_lock.
> > + * - The caller must have set @vmg->vma and @vmg->next.
> >   */
> >  int vma_expand(struct vma_merge_struct *vmg)
> >  {
> > @@ -484,6 +598,8 @@ int vma_expand(struct vma_merge_struct *vmg)
> >  	struct vm_area_struct *next = vmg->next;
> >  	struct vma_prepare vp;
> >
> > +	mmap_assert_write_locked(vmg->mm);
> > +
>
> There are a few unnecessary whitespaces here..

...except here :) I like to keep the asserts separate from the rest of the
logic, and local declarations on their own lines.

[snip]
diff mbox series

Patch

diff --git a/mm/mmap.c b/mm/mmap.c
index 0d242c9b1f4c..80d70ed099cf 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1364,8 +1364,8 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma = NULL;
-	struct vm_area_struct *next, *prev, *merge;
 	pgoff_t pglen = PHYS_PFN(len);
+	struct vm_area_struct *merge;
 	unsigned long charged = 0;
 	struct vma_munmap_struct vms;
 	struct ma_state mas_detach;
@@ -1389,13 +1389,13 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 		if (vms_gather_munmap_vmas(&vms, &mas_detach))
 			return -ENOMEM;
 
-		next = vmg.next = vms.next;
-		prev = vmg.prev = vms.prev;
+		vmg.next = vms.next;
+		vmg.prev = vms.prev;
 		vma = NULL;
 	} else {
-		next = vmg.next = vma_next(&vmi);
-		prev = vmg.prev = vma_prev(&vmi);
-		if (prev)
+		vmg.next = vma_next(&vmi);
+		vmg.prev = vma_prev(&vmi);
+		if (vmg.prev)
 			vma_iter_next_range(&vmi);
 	}
 
@@ -1417,45 +1417,9 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 		vmg.flags = vm_flags;
 	}
 
-	if (vm_flags & VM_SPECIAL)
-		goto cannot_expand;
-
-	/* Attempt to expand an old mapping */
-	/* Check next */
-	if (next && next->vm_start == end && can_vma_merge_before(&vmg)) {
-		vmg.end = next->vm_end;
-		vma = vmg.vma = next;
-		vmg.pgoff = next->vm_pgoff - pglen;
-		/*
-		 * We set this here so if we will merge with the previous VMA in
-		 * the code below, can_vma_merge_after() ensures anon_vma
-		 * compatibility between prev and next.
-		 */
-		vmg.anon_vma = vma->anon_vma;
-		vmg.uffd_ctx = vma->vm_userfaultfd_ctx;
-	}
-
-	/* Check prev */
-	if (prev && prev->vm_end == addr && can_vma_merge_after(&vmg)) {
-		vmg.start = prev->vm_start;
-		vma = vmg.vma = prev;
-		vmg.pgoff = prev->vm_pgoff;
-		vma_prev(&vmi); /* Equivalent to going to the previous range */
-	}
-
-	if (vma) {
-		/* Actually expand, if possible */
-		if (!vma_expand(&vmg)) {
-			khugepaged_enter_vma(vma, vm_flags);
-			goto expanded;
-		}
-
-		/* If the expand fails, then reposition the vma iterator */
-		if (unlikely(vma == prev))
-			vma_iter_set(&vmi, addr);
-	}
-
-cannot_expand:
+	vma = vma_merge_new_range(&vmg);
+	if (vma)
+		goto expanded;
 
 	/*
 	 * Determine the object being mapped and call the appropriate
@@ -1503,10 +1467,11 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 		 * If vm_flags changed after call_mmap(), we should try merge
 		 * vma again as we may succeed this time.
 		 */
-		if (unlikely(vm_flags != vma->vm_flags && prev)) {
-			merge = vma_merge_new_vma(&vmi, prev, vma,
-						  vma->vm_start, vma->vm_end,
-						  vma->vm_pgoff);
+		if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) {
+			vmg.flags = vma->vm_flags;
+			/* If this fails, state is reset ready for a reattempt. */
+			merge = vma_merge_new_range(&vmg);
+
 			if (merge) {
 				/*
 				 * ->mmap() can change vma->vm_file and fput
@@ -1521,6 +1486,8 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 				/* Update vm_flags to pick up the change. */
 				vm_flags = vma->vm_flags;
 				goto unmap_writable;
+			} else {
+				vma_iter_config(&vmi, addr, end);
 			}
 		}
 
@@ -1554,7 +1521,7 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 	vma_link_file(vma);
 
 	/*
-	 * vma_merge() calls khugepaged_enter_vma() either, the below
+	 * vma_merge_new_range() calls khugepaged_enter_vma() too, the below
 	 * call covers the non-merge case.
 	 */
 	khugepaged_enter_vma(vma, vma->vm_flags);
@@ -1609,7 +1576,7 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 
 		vma_iter_set(&vmi, vma->vm_end);
 		/* Undo any partial mapping done by a device driver. */
-		unmap_region(&vmi.mas, vma, prev, next);
+		unmap_region(&vmi.mas, vma, vmg.prev, vmg.next);
 	}
 	if (writable_file_mapping)
 		mapping_unmap_writable(file->f_mapping);
@@ -1755,7 +1722,6 @@  static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 		unsigned long addr, unsigned long len, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-	struct vma_prepare vp;
 
 	/*
 	 * Check against address space limits by the changed size
@@ -1779,25 +1745,12 @@  static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 		VMG_STATE(vmg, mm, vmi, addr, addr + len, flags, PHYS_PFN(addr));
 
 		vmg.prev = vma;
-		if (can_vma_merge_after(&vmg)) {
-			vma_iter_config(vmi, vma->vm_start, addr + len);
-			if (vma_iter_prealloc(vmi, vma))
-				goto unacct_fail;
-
-			vma_start_write(vma);
-
-			init_vma_prep(&vp, vma);
-			vma_prepare(&vp);
-			vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
-			vma->vm_end = addr + len;
-			vm_flags_set(vma, VM_SOFTDIRTY);
-			vma_iter_store(vmi, vma);
-
-			vma_complete(&vp, vmi, mm);
-			validate_mm(mm);
-			khugepaged_enter_vma(vma, flags);
+		vma_iter_next_range(vmi);
+
+		if (vma_merge_new_range(&vmg))
 			goto out;
-		}
+		else if (vmg_nomem(&vmg))
+			goto unacct_fail;
 	}
 
 	if (vma)
diff --git a/mm/vma.c b/mm/vma.c
index 4867ae722a9a..8a5fa15f46a2 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -464,6 +464,116 @@  void validate_mm(struct mm_struct *mm)
 }
 #endif /* CONFIG_DEBUG_VM_MAPLE_TREE */
 
+/*
+ * vma_merge_new_range - Attempt to merge a new VMA into address space
+ *
+ * @vmg: Describes the VMA we are adding, in the range @vmg->start to @vmg->end
+ *       (exclusive), which we try to merge with any adjacent VMAs if possible.
+ *
+ * We are about to add a VMA to the address space starting at @vmg->start and
+ * ending at @vmg->end. There are three different possible scenarios:
+ *
+ * 1. There is a VMA with identical properties immediately adjacent to the
+ *    proposed new VMA [@vmg->start, @vmg->end) either before or after it -
+ *    EXPAND that VMA:
+ *
+ * Proposed:       |-----|  or  |-----|
+ * Existing:  |----|                  |----|
+ *
+ * 2. There are VMAs with identical properties immediately adjacent to the
+ *    proposed new VMA [@vmg->start, @vmg->end) both before AND after it -
+ *    EXPAND the former and REMOVE the latter:
+ *
+ * Proposed:       |-----|
+ * Existing:  |----|     |----|
+ *
+ * 3. There are no VMAs immediately adjacent to the proposed new VMA or those
+ *    VMAs do not have identical attributes - NO MERGE POSSIBLE.
+ *
+ * In instances where we can merge, this function returns the expanded VMA which
+ * will have its range adjusted accordingly and the underlying maple tree also
+ * adjusted.
+ *
+ * Returns: In instances where no merge was possible, NULL. Otherwise, a pointer
+ *          to the VMA we expanded.
+ *
+ * This function adjusts @vmg to provide @vmg->next if not already specified,
+ * and adjusts [@vmg->start, @vmg->end) to span the expanded range.
+ *
+ * ASSUMPTIONS:
+ * - The caller must hold a WRITE lock on the mm_struct->mmap_lock.
+ * - The caller must have determined that [@vmg->start, @vmg->end) is empty,
+     other than VMAs that will be unmapped should the operation succeed.
+ * - The caller must have specified the previous vma in @vmg->prev.
+ * - The caller must have specified the next vma in @vmg->next.
+ * - The caller must have positioned the vmi at or before the gap.
+ */
+struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
+{
+	struct vm_area_struct *prev = vmg->prev;
+	struct vm_area_struct *next = vmg->next;
+	unsigned long start = vmg->start;
+	unsigned long end = vmg->end;
+	pgoff_t pgoff = vmg->pgoff;
+	pgoff_t pglen = PHYS_PFN(end - start);
+	bool can_merge_before, can_merge_after;
+
+	mmap_assert_write_locked(vmg->mm);
+	VM_WARN_ON(vmg->vma);
+	/* vmi must point at or before the gap. */
+	VM_WARN_ON(vma_iter_addr(vmg->vmi) > end);
+
+	vmg->state = VMA_MERGE_NOMERGE;
+
+	/* Special VMAs are unmergeable, also if no prev/next. */
+	if ((vmg->flags & VM_SPECIAL) || (!prev && !next))
+		return NULL;
+
+	can_merge_before = next && next->vm_start == end &&
+		can_vma_merge_before(vmg);
+	can_merge_after = prev && prev->vm_end == start &&
+		can_vma_merge_after(vmg);
+
+	/* If we can merge with the next VMA, adjust vmg accordingly. */
+	if (can_merge_before &&
+	    (!can_merge_after || is_mergeable_anon_vma(prev->anon_vma,
+						       next->anon_vma, NULL))) {
+		vmg->end = next->vm_end;
+		vmg->vma = next;
+		vmg->pgoff = next->vm_pgoff - pglen;
+	}
+
+	/* If we can merge with the previous VMA, adjust vmg accordingly. */
+	if (can_merge_after) {
+		vmg->start = prev->vm_start;
+		vmg->vma = prev;
+		vmg->pgoff = prev->vm_pgoff;
+
+		vma_prev(vmg->vmi); /* Equivalent to going to the previous range */
+	}
+
+	/*
+	 * Now try to expand adjacent VMA(s). This takes care of removing the
+	 * following VMA if we have VMAs on both sides.
+	 */
+	if (vmg->vma && !vma_expand(vmg)) {
+		khugepaged_enter_vma(vmg->vma, vmg->flags);
+
+		vmg->state = VMA_MERGE_SUCCESS;
+		return vmg->vma;
+	}
+
+	/* If expansion failed, reset state. Allows us to retry merge later. */
+	vmg->vma = NULL;
+	vmg->start = start;
+	vmg->end = end;
+	vmg->pgoff = pgoff;
+	if (vmg->vma == prev)
+		vma_iter_set(vmg->vmi, start);
+
+	return NULL;
+}
+
 /*
  * vma_expand - Expand an existing VMA
  *
@@ -474,7 +584,11 @@  void validate_mm(struct mm_struct *mm)
  * vmg->next->vm_end.  Checking if the vmg->vma can expand and merge with
  * vmg->next needs to be handled by the caller.
  *
- * Returns: 0 on success
+ * Returns: 0 on success.
+ *
+ * ASSUMPTIONS:
+ * - The caller must hold a WRITE lock on vmg->vma->mm->mmap_lock.
+ * - The caller must have set @vmg->vma and @vmg->next.
  */
 int vma_expand(struct vma_merge_struct *vmg)
 {
@@ -484,6 +598,8 @@  int vma_expand(struct vma_merge_struct *vmg)
 	struct vm_area_struct *next = vmg->next;
 	struct vma_prepare vp;
 
+	mmap_assert_write_locked(vmg->mm);
+
 	vma_start_write(vma);
 	if (next && (vma != next) && (vmg->end == next->vm_end)) {
 		int ret;
@@ -516,6 +632,7 @@  int vma_expand(struct vma_merge_struct *vmg)
 	return 0;
 
 nomem:
+	vmg->state = VMA_MERGE_ERROR_NOMEM;
 	if (anon_dup)
 		unlink_anon_vmas(anon_dup);
 	return -ENOMEM;
@@ -1034,6 +1151,8 @@  static struct vm_area_struct *vma_merge(struct vma_merge_struct *vmg)
 	pgoff_t pglen = PHYS_PFN(end - addr);
 	long adj_start = 0;
 
+	vmg->state = VMA_MERGE_NOMERGE;
+
 	/*
 	 * We later require that vma->vm_flags == vm_flags,
 	 * so this tests vma->vm_flags & VM_SPECIAL, too.
@@ -1185,13 +1304,19 @@  static struct vm_area_struct *vma_merge(struct vma_merge_struct *vmg)
 	vma_complete(&vp, vmg->vmi, mm);
 	validate_mm(mm);
 	khugepaged_enter_vma(res, vmg->flags);
+
+	vmg->state = VMA_MERGE_SUCCESS;
 	return res;
 
 prealloc_fail:
+	vmg->state = VMA_MERGE_ERROR_NOMEM;
 	if (anon_dup)
 		unlink_anon_vmas(anon_dup);
 
 anon_vma_fail:
+	if (err == -ENOMEM)
+		vmg->state = VMA_MERGE_ERROR_NOMEM;
+
 	vma_iter_set(vmg->vmi, addr);
 	vma_iter_load(vmg->vmi);
 	return NULL;
@@ -1298,22 +1423,6 @@  struct vm_area_struct
 	return vma_modify(&vmg);
 }
 
-/*
- * Attempt to merge a newly mapped VMA with those adjacent to it. The caller
- * must ensure that [start, end) does not overlap any existing VMA.
- */
-struct vm_area_struct
-*vma_merge_new_vma(struct vma_iterator *vmi, struct vm_area_struct *prev,
-		   struct vm_area_struct *vma, unsigned long start,
-		   unsigned long end, pgoff_t pgoff)
-{
-	VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
-
-	vmg.pgoff = pgoff;
-
-	return vma_merge(&vmg);
-}
-
 /*
  * Expand vma by delta bytes, potentially merging with an immediately adjacent
  * VMA with identical properties.
@@ -1324,8 +1433,9 @@  struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
 {
 	VMG_VMA_STATE(vmg, vmi, vma, vma, vma->vm_end, vma->vm_end + delta);
 
-	/* vma is specified as prev, so case 1 or 2 will apply. */
-	return vma_merge(&vmg);
+	/* We use the VMA to populate VMG fields only. */
+	vmg.vma = NULL;
+	return vma_merge_new_range(&vmg);
 }
 
 void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb)
@@ -1426,9 +1536,10 @@  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	struct vm_area_struct *vma = *vmap;
 	unsigned long vma_start = vma->vm_start;
 	struct mm_struct *mm = vma->vm_mm;
-	struct vm_area_struct *new_vma, *prev;
+	struct vm_area_struct *new_vma;
 	bool faulted_in_anon_vma = true;
 	VMA_ITERATOR(vmi, mm, addr);
+	VMG_VMA_STATE(vmg, &vmi, NULL, vma, addr, addr + len);
 
 	/*
 	 * If anonymous vma has not yet been faulted, update new pgoff
@@ -1439,11 +1550,18 @@  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		faulted_in_anon_vma = false;
 	}
 
-	new_vma = find_vma_prev(mm, addr, &prev);
+	new_vma = find_vma_prev(mm, addr, &vmg.prev);
 	if (new_vma && new_vma->vm_start < addr + len)
 		return NULL;	/* should never get here */
 
-	new_vma = vma_merge_new_vma(&vmi, prev, vma, addr, addr + len, pgoff);
+	vmg.vma = NULL; /* New VMA range. */
+	vmg.pgoff = pgoff;
+	vmg.next = vma_next(&vmi);
+	vma_prev(&vmi);
+	vma_iter_next_range(&vmi);
+
+	new_vma = vma_merge_new_range(&vmg);
+
 	if (new_vma) {
 		/*
 		 * Source vma may have been merged into new_vma
diff --git a/mm/vma.h b/mm/vma.h
index 8f01fbc20fe7..dbcdf1431014 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -52,6 +52,13 @@  struct vma_munmap_struct {
 	unsigned long data_vm;
 };
 
+enum vma_merge_state {
+	VMA_MERGE_START,
+	VMA_MERGE_ERROR_NOMEM,
+	VMA_MERGE_NOMERGE,
+	VMA_MERGE_SUCCESS,
+};
+
 /* Represents a VMA merge operation. */
 struct vma_merge_struct {
 	struct mm_struct *mm;
@@ -68,8 +75,14 @@  struct vma_merge_struct {
 	struct mempolicy *policy;
 	struct vm_userfaultfd_ctx uffd_ctx;
 	struct anon_vma_name *anon_name;
+	enum vma_merge_state state;
 };
 
+static inline bool vmg_nomem(struct vma_merge_struct *vmg)
+{
+	return vmg->state == VMA_MERGE_ERROR_NOMEM;
+}
+
 /* Assumes addr >= vma->vm_start. */
 static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
 				       unsigned long addr)
@@ -85,6 +98,7 @@  static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
 		.end = end_,						\
 		.flags = flags_,					\
 		.pgoff = pgoff_,					\
+		.state = VMA_MERGE_START,				\
 	}
 
 #define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_)	\
@@ -103,6 +117,7 @@  static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
 		.policy = vma_policy(vma_),			\
 		.uffd_ctx = vma_->vm_userfaultfd_ctx,		\
 		.anon_name = anon_vma_name(vma_),		\
+		.state = VMA_MERGE_START,			\
 	}
 
 #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
@@ -306,10 +321,7 @@  struct vm_area_struct
 		       unsigned long new_flags,
 		       struct vm_userfaultfd_ctx new_ctx);
 
-struct vm_area_struct
-*vma_merge_new_vma(struct vma_iterator *vmi, struct vm_area_struct *prev,
-		   struct vm_area_struct *vma, unsigned long start,
-		   unsigned long end, pgoff_t pgoff);
+struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg);
 
 struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
 					struct vm_area_struct *vma,
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index cce1ba78c57f..3a3a850d951c 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -101,9 +101,9 @@  static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
 	 */
 	vmg->next = vma_next(vmg->vmi);
 	vmg->prev = vma_prev(vmg->vmi);
+	vma_iter_next_range(vmg->vmi);
 
-	vma_iter_set(vmg->vmi, vmg->start);
-	return vma_merge(vmg);
+	return vma_merge_new_range(vmg);
 }
 
 /*
@@ -162,10 +162,14 @@  static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
 	merged = merge_new(vmg);
 	if (merged) {
 		*was_merged = true;
+		ASSERT_EQ(vmg->state, VMA_MERGE_SUCCESS);
 		return merged;
 	}
 
 	*was_merged = false;
+
+	ASSERT_EQ(vmg->state, VMA_MERGE_NOMERGE);
+
 	return alloc_and_link_vma(mm, start, end, pgoff, flags);
 }
 
@@ -595,6 +599,7 @@  static bool test_vma_merge_special_flags(void)
 		vmg.flags = flags | special_flag;
 		vma = merge_new(&vmg);
 		ASSERT_EQ(vma, NULL);
+		ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 	}
 
 	/* 2. Modify VMA with special flag that would otherwise merge. */
@@ -616,6 +621,7 @@  static bool test_vma_merge_special_flags(void)
 		vmg.flags = flags | special_flag;
 		vma = merge_existing(&vmg);
 		ASSERT_EQ(vma, NULL);
+		ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 	}
 
 	cleanup_mm(&mm, &vmi);
@@ -708,6 +714,7 @@  static bool test_vma_merge_with_close(void)
 
 	/* The next VMA having a close() operator should cause the merge to fail.*/
 	ASSERT_EQ(merge_new(&vmg), NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
 	/* Now create the VMA so we can merge via modified flags */
 	vmg_set_range(&vmg, 0x1000, 0x2000, 1, flags);
@@ -719,6 +726,7 @@  static bool test_vma_merge_with_close(void)
 	 * also fail.
 	 */
 	ASSERT_EQ(merge_existing(&vmg), NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
 	/* SCENARIO B
 	 *
@@ -744,6 +752,7 @@  static bool test_vma_merge_with_close(void)
 	vmg.vma = vma;
 	/* Make sure merge does not occur. */
 	ASSERT_EQ(merge_existing(&vmg), NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
 	cleanup_mm(&mm, &vmi);
 	return true;
@@ -792,6 +801,7 @@  static bool test_vma_merge_new_with_close(void)
 	vmg_set_range(&vmg, 0x2000, 0x5000, 2, flags);
 	vma = merge_new(&vmg);
 	ASSERT_NE(vma, NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 	ASSERT_EQ(vma->vm_start, 0);
 	ASSERT_EQ(vma->vm_end, 0x5000);
 	ASSERT_EQ(vma->vm_pgoff, 0);
@@ -831,6 +841,7 @@  static bool test_merge_existing(void)
 	vmg.prev = vma;
 	vma->anon_vma = &dummy_anon_vma;
 	ASSERT_EQ(merge_existing(&vmg), vma_next);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 	ASSERT_EQ(vma_next->vm_start, 0x3000);
 	ASSERT_EQ(vma_next->vm_end, 0x9000);
 	ASSERT_EQ(vma_next->vm_pgoff, 3);
@@ -861,6 +872,7 @@  static bool test_merge_existing(void)
 	vmg.vma = vma;
 	vma->anon_vma = &dummy_anon_vma;
 	ASSERT_EQ(merge_existing(&vmg), vma_next);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 	ASSERT_EQ(vma_next->vm_start, 0x2000);
 	ASSERT_EQ(vma_next->vm_end, 0x9000);
 	ASSERT_EQ(vma_next->vm_pgoff, 2);
@@ -889,6 +901,7 @@  static bool test_merge_existing(void)
 	vma->anon_vma = &dummy_anon_vma;
 
 	ASSERT_EQ(merge_existing(&vmg), vma_prev);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 	ASSERT_EQ(vma_prev->vm_start, 0);
 	ASSERT_EQ(vma_prev->vm_end, 0x6000);
 	ASSERT_EQ(vma_prev->vm_pgoff, 0);
@@ -920,6 +933,7 @@  static bool test_merge_existing(void)
 	vmg.vma = vma;
 	vma->anon_vma = &dummy_anon_vma;
 	ASSERT_EQ(merge_existing(&vmg), vma_prev);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 	ASSERT_EQ(vma_prev->vm_start, 0);
 	ASSERT_EQ(vma_prev->vm_end, 0x7000);
 	ASSERT_EQ(vma_prev->vm_pgoff, 0);
@@ -948,6 +962,7 @@  static bool test_merge_existing(void)
 	vmg.vma = vma;
 	vma->anon_vma = &dummy_anon_vma;
 	ASSERT_EQ(merge_existing(&vmg), vma_prev);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 	ASSERT_EQ(vma_prev->vm_start, 0);
 	ASSERT_EQ(vma_prev->vm_end, 0x9000);
 	ASSERT_EQ(vma_prev->vm_pgoff, 0);
@@ -981,31 +996,37 @@  static bool test_merge_existing(void)
 	vmg.prev = vma;
 	vmg.vma = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
 	vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags);
 	vmg.prev = vma;
 	vmg.vma = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
 	vmg_set_range(&vmg, 0x6000, 0x7000, 6, flags);
 	vmg.prev = vma;
 	vmg.vma = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
 	vmg_set_range(&vmg, 0x4000, 0x7000, 4, flags);
 	vmg.prev = vma;
 	vmg.vma = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
 	vmg_set_range(&vmg, 0x4000, 0x6000, 4, flags);
 	vmg.prev = vma;
 	vmg.vma = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
 	vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags);
 	vmg.prev = vma;
 	vmg.vma = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
 	ASSERT_EQ(cleanup_mm(&mm, &vmi), 3);
 
@@ -1071,6 +1092,7 @@  static bool test_anon_vma_non_mergeable(void)
 	vmg.vma = vma;
 
 	ASSERT_EQ(merge_existing(&vmg), vma_prev);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 	ASSERT_EQ(vma_prev->vm_start, 0);
 	ASSERT_EQ(vma_prev->vm_end, 0x7000);
 	ASSERT_EQ(vma_prev->vm_pgoff, 0);
@@ -1106,6 +1128,7 @@  static bool test_anon_vma_non_mergeable(void)
 	vmg.prev = vma_prev;
 
 	ASSERT_EQ(merge_new(&vmg), vma_prev);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 	ASSERT_EQ(vma_prev->vm_start, 0);
 	ASSERT_EQ(vma_prev->vm_end, 0x7000);
 	ASSERT_EQ(vma_prev->vm_pgoff, 0);
@@ -1181,6 +1204,7 @@  static bool test_dup_anon_vma(void)
 	vmg.vma = vma;
 
 	ASSERT_EQ(merge_existing(&vmg), vma_prev);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 
 	ASSERT_EQ(vma_prev->vm_start, 0);
 	ASSERT_EQ(vma_prev->vm_end, 0x8000);
@@ -1209,6 +1233,7 @@  static bool test_dup_anon_vma(void)
 	vmg.vma = vma;
 
 	ASSERT_EQ(merge_existing(&vmg), vma_prev);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 
 	ASSERT_EQ(vma_prev->vm_start, 0);
 	ASSERT_EQ(vma_prev->vm_end, 0x8000);
@@ -1236,6 +1261,7 @@  static bool test_dup_anon_vma(void)
 	vmg.vma = vma;
 
 	ASSERT_EQ(merge_existing(&vmg), vma_prev);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 
 	ASSERT_EQ(vma_prev->vm_start, 0);
 	ASSERT_EQ(vma_prev->vm_end, 0x5000);
@@ -1263,6 +1289,7 @@  static bool test_dup_anon_vma(void)
 	vmg.vma = vma;
 
 	ASSERT_EQ(merge_existing(&vmg), vma_next);
+	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 
 	ASSERT_EQ(vma_next->vm_start, 0x3000);
 	ASSERT_EQ(vma_next->vm_end, 0x8000);
@@ -1303,6 +1330,7 @@  static bool test_vmi_prealloc_fail(void)
 
 	/* This will cause the merge to fail. */
 	ASSERT_EQ(merge_existing(&vmg), NULL);
+	ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM);
 	/* We will already have assigned the anon_vma. */
 	ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma);
 	/* And it was both cloned and unlinked. */
@@ -1327,6 +1355,7 @@  static bool test_vmi_prealloc_fail(void)
 
 	fail_prealloc = true;
 	ASSERT_EQ(expand_existing(&vmg), -ENOMEM);
+	ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM);
 
 	ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma);
 	ASSERT_TRUE(dummy_anon_vma.was_cloned);
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index a3c262c6eb73..c5b9da034511 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -740,6 +740,12 @@  static inline void vma_iter_free(struct vma_iterator *vmi)
 	mas_destroy(&vmi->mas);
 }
 
+static inline
+struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
+{
+	return mas_next_range(&vmi->mas, ULONG_MAX);
+}
+
 static inline void vm_acct_memory(long pages)
 {
 }