diff mbox series

[v2,8/8] mm: defer second attempt at merge on mmap()

Message ID e063d9d31a4dbd83c2d77a3f1e0114e6a0788724.1729715266.git.lorenzo.stoakes@oracle.com (mailing list archive)
State New
Headers show
Series fix error handling in mmap_region() and refactor | expand

Commit Message

Lorenzo Stoakes Oct. 23, 2024, 8:38 p.m. UTC
Rather than trying to merge again when ostensibly allocating a new VMA,
instead defer until the VMA is added and attempt to merge the existing
range.

This way we have no complicated unwinding logic midway through the process
of mapping the VMA.

In addition this removes limitations on the VMA not being able to be the
first in the virtual memory address space which was previously implicitly
required.

It also performs this merge after the final flag adjustments are performed,
something that was not done previously and thus might have prevented
possibly valid merges in the past.

In theory, for this very same reason, we should unconditionally attempt
merge here, however this is likely to have a performance impact so it is
better to avoid this given the unlikely outcome of a merge.

The vmg state will already have been reset by the first attempt at a merge
so we only need to reset the iterator, set the vma and flags and try again.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 mm/vma.c | 75 ++++++++++++++++++++------------------------------------
 1 file changed, 26 insertions(+), 49 deletions(-)

Comments

Vlastimil Babka Oct. 25, 2024, 9:43 a.m. UTC | #1
On 10/23/24 22:38, Lorenzo Stoakes wrote:
> Rather than trying to merge again when ostensibly allocating a new VMA,
> instead defer until the VMA is added and attempt to merge the existing
> range.
> 
> This way we have no complicated unwinding logic midway through the process
> of mapping the VMA.
> 
> In addition this removes limitations on the VMA not being able to be the
> first in the virtual memory address space which was previously implicitly
> required.
> 
> It also performs this merge after the final flag adjustments are performed,
> something that was not done previously and thus might have prevented
> possibly valid merges in the past.
> 
> In theory, for this very same reason, we should unconditionally attempt
> merge here, however this is likely to have a performance impact so it is
> better to avoid this given the unlikely outcome of a merge.

Maybe just expand the cases where we set map->retry_merge, i.e. in case the
final flag adjustments do anything?

> The vmg state will already have been reset by the first attempt at a merge
> so we only need to reset the iterator, set the vma and flags and try again.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

A nit:

> ---
>  mm/vma.c | 75 ++++++++++++++++++++------------------------------------
>  1 file changed, 26 insertions(+), 49 deletions(-)
> 
> diff --git a/mm/vma.c b/mm/vma.c
> index 065f5e1f65be..c493ecebf394 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -19,6 +19,7 @@ struct mmap_state {
>  	struct file *file;
>  
>  	unsigned long charged;
> +	bool retry_merge;
>  
>  	struct vm_area_struct *prev;
>  	struct vm_area_struct *next;
> @@ -2280,9 +2281,9 @@ static int __mmap_prepare(struct mmap_state *map, struct vma_merge_struct *vmg,
>  	return 0;
>  }
>  
> +
>  static int __mmap_new_file_vma(struct mmap_state *map,
> -			       struct vma_merge_struct *vmg,
> -			       struct vm_area_struct **vmap, bool *mergedp)
> +			       struct vm_area_struct **vmap)
>  {
>  	struct vma_iterator *vmi = map->vmi;
>  	struct vm_area_struct *vma = *vmap;
> @@ -2311,37 +2312,11 @@ static int __mmap_new_file_vma(struct mmap_state *map,
>  			!(map->flags & VM_MAYWRITE) &&
>  			(vma->vm_flags & VM_MAYWRITE));
>  
> -	vma_iter_config(vmi, map->addr, map->end);
> -	/*
> -	 * If flags changed after mmap_file(), we should try merge
> -	 * vma again as we may succeed this time.
> -	 */
> -	if (unlikely(map->flags != vma->vm_flags && map->prev)) {
> -		struct vm_area_struct *merge;
> -
> -		vmg->flags = vma->vm_flags;
> -		/* If this fails, state is reset ready for a reattempt. */
> -		merge = vma_merge_new_range(vmg);
> -
> -		if (merge) {
> -			/*
> -			 * ->mmap() can change vma->vm_file and fput
> -			 * the original file. So fput the vma->vm_file
> -			 * here or we would add an extra fput for file
> -			 * and cause general protection fault
> -			 * ultimately.
> -			 */
> -			fput(vma->vm_file);
> -			vm_area_free(vma);
> -			vma = merge;
> -			*mergedp = true;
> -		} else {
> -			vma_iter_config(vmi, map->addr, map->end);
> -		}
> -	}
> +	/* If the flags change (and are mergeable), let's retry later. */
> +	map->retry_merge = vma->vm_flags != map->flags && !(vma->vm_flags & VM_SPECIAL);
>  
> +	vma_iter_config(vmi, map->addr, map->end);

Do we need this still? __mmap_new_vma() did that and nothing changed since
in the non-error case, AFAICS?

>  	map->flags = vma->vm_flags;
> -	*vmap = vma;
>  	return 0;
>  }
>
Lorenzo Stoakes Oct. 25, 2024, 10:20 a.m. UTC | #2
On Fri, Oct 25, 2024 at 11:43:20AM +0200, Vlastimil Babka wrote:
> On 10/23/24 22:38, Lorenzo Stoakes wrote:
> > Rather than trying to merge again when ostensibly allocating a new VMA,
> > instead defer until the VMA is added and attempt to merge the existing
> > range.
> >
> > This way we have no complicated unwinding logic midway through the process
> > of mapping the VMA.
> >
> > In addition this removes limitations on the VMA not being able to be the
> > first in the virtual memory address space which was previously implicitly
> > required.
> >
> > It also performs this merge after the final flag adjustments are performed,
> > something that was not done previously and thus might have prevented
> > possibly valid merges in the past.
> >
> > In theory, for this very same reason, we should unconditionally attempt
> > merge here, however this is likely to have a performance impact so it is
> > better to avoid this given the unlikely outcome of a merge.
>
> Maybe just expand the cases where we set map->retry_merge, i.e. in case the
> final flag adjustments do anything?
>
> > The vmg state will already have been reset by the first attempt at a merge
> > so we only need to reset the iterator, set the vma and flags and try again.
> >
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

Thanks (and for 7/8!)

>
> A nit:
>
> > ---
> >  mm/vma.c | 75 ++++++++++++++++++++------------------------------------
> >  1 file changed, 26 insertions(+), 49 deletions(-)
> >
> > diff --git a/mm/vma.c b/mm/vma.c
> > index 065f5e1f65be..c493ecebf394 100644
> > --- a/mm/vma.c
> > +++ b/mm/vma.c
> > @@ -19,6 +19,7 @@ struct mmap_state {
> >  	struct file *file;
> >
> >  	unsigned long charged;
> > +	bool retry_merge;
> >
> >  	struct vm_area_struct *prev;
> >  	struct vm_area_struct *next;
> > @@ -2280,9 +2281,9 @@ static int __mmap_prepare(struct mmap_state *map, struct vma_merge_struct *vmg,
> >  	return 0;
> >  }
> >
> > +
> >  static int __mmap_new_file_vma(struct mmap_state *map,
> > -			       struct vma_merge_struct *vmg,
> > -			       struct vm_area_struct **vmap, bool *mergedp)
> > +			       struct vm_area_struct **vmap)
> >  {
> >  	struct vma_iterator *vmi = map->vmi;
> >  	struct vm_area_struct *vma = *vmap;
> > @@ -2311,37 +2312,11 @@ static int __mmap_new_file_vma(struct mmap_state *map,
> >  			!(map->flags & VM_MAYWRITE) &&
> >  			(vma->vm_flags & VM_MAYWRITE));
> >
> > -	vma_iter_config(vmi, map->addr, map->end);
> > -	/*
> > -	 * If flags changed after mmap_file(), we should try merge
> > -	 * vma again as we may succeed this time.
> > -	 */
> > -	if (unlikely(map->flags != vma->vm_flags && map->prev)) {
> > -		struct vm_area_struct *merge;
> > -
> > -		vmg->flags = vma->vm_flags;
> > -		/* If this fails, state is reset ready for a reattempt. */
> > -		merge = vma_merge_new_range(vmg);
> > -
> > -		if (merge) {
> > -			/*
> > -			 * ->mmap() can change vma->vm_file and fput
> > -			 * the original file. So fput the vma->vm_file
> > -			 * here or we would add an extra fput for file
> > -			 * and cause general protection fault
> > -			 * ultimately.
> > -			 */
> > -			fput(vma->vm_file);
> > -			vm_area_free(vma);
> > -			vma = merge;
> > -			*mergedp = true;
> > -		} else {
> > -			vma_iter_config(vmi, map->addr, map->end);
> > -		}
> > -	}
> > +	/* If the flags change (and are mergeable), let's retry later. */
> > +	map->retry_merge = vma->vm_flags != map->flags && !(vma->vm_flags & VM_SPECIAL);
> >
> > +	vma_iter_config(vmi, map->addr, map->end);
>
> Do we need this still? __mmap_new_vma() did that and nothing changed since
> in the non-error case, AFAICS?

You're right, this change really highlights that, will remove thanks!

>
> >  	map->flags = vma->vm_flags;
> > -	*vmap = vma;
> >  	return 0;
> >  }
> >
diff mbox series

Patch

diff --git a/mm/vma.c b/mm/vma.c
index 065f5e1f65be..c493ecebf394 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -19,6 +19,7 @@  struct mmap_state {
 	struct file *file;
 
 	unsigned long charged;
+	bool retry_merge;
 
 	struct vm_area_struct *prev;
 	struct vm_area_struct *next;
@@ -2280,9 +2281,9 @@  static int __mmap_prepare(struct mmap_state *map, struct vma_merge_struct *vmg,
 	return 0;
 }
 
+
 static int __mmap_new_file_vma(struct mmap_state *map,
-			       struct vma_merge_struct *vmg,
-			       struct vm_area_struct **vmap, bool *mergedp)
+			       struct vm_area_struct **vmap)
 {
 	struct vma_iterator *vmi = map->vmi;
 	struct vm_area_struct *vma = *vmap;
@@ -2311,37 +2312,11 @@  static int __mmap_new_file_vma(struct mmap_state *map,
 			!(map->flags & VM_MAYWRITE) &&
 			(vma->vm_flags & VM_MAYWRITE));
 
-	vma_iter_config(vmi, map->addr, map->end);
-	/*
-	 * If flags changed after mmap_file(), we should try merge
-	 * vma again as we may succeed this time.
-	 */
-	if (unlikely(map->flags != vma->vm_flags && map->prev)) {
-		struct vm_area_struct *merge;
-
-		vmg->flags = vma->vm_flags;
-		/* If this fails, state is reset ready for a reattempt. */
-		merge = vma_merge_new_range(vmg);
-
-		if (merge) {
-			/*
-			 * ->mmap() can change vma->vm_file and fput
-			 * the original file. So fput the vma->vm_file
-			 * here or we would add an extra fput for file
-			 * and cause general protection fault
-			 * ultimately.
-			 */
-			fput(vma->vm_file);
-			vm_area_free(vma);
-			vma = merge;
-			*mergedp = true;
-		} else {
-			vma_iter_config(vmi, map->addr, map->end);
-		}
-	}
+	/* If the flags change (and are mergeable), let's retry later. */
+	map->retry_merge = vma->vm_flags != map->flags && !(vma->vm_flags & VM_SPECIAL);
 
+	vma_iter_config(vmi, map->addr, map->end);
 	map->flags = vma->vm_flags;
-	*vmap = vma;
 	return 0;
 }
 
@@ -2349,22 +2324,15 @@  static int __mmap_new_file_vma(struct mmap_state *map,
  * __mmap_new_vma() - Allocate a new VMA for the region, as merging was not
  * possible.
  *
- * An exception to this is if the mapping is file-backed, and the underlying
- * driver changes the VMA flags, permitting a subsequent merge of the VMA, in
- * which case the returned VMA is one that was merged on a second attempt.
- *
  * @map:  Mapping state.
- * @vmg:  VMA merge state.
  * @vmap: Output pointer for the new VMA.
  *
  * Returns: Zero on success, or an error.
  */
-static int __mmap_new_vma(struct mmap_state *map, struct vma_merge_struct *vmg,
-			  struct vm_area_struct **vmap)
+static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
 {
 	struct vma_iterator *vmi = map->vmi;
 	int error = 0;
-	bool merged = false;
 	struct vm_area_struct *vma;
 
 	/*
@@ -2387,7 +2355,7 @@  static int __mmap_new_vma(struct mmap_state *map, struct vma_merge_struct *vmg,
 	}
 
 	if (map->file)
-		error = __mmap_new_file_vma(map, vmg, &vma, &merged);
+		error = __mmap_new_file_vma(map, &vma);
 	else if (map->flags & VM_SHARED)
 		error = shmem_zero_setup(vma);
 	else
@@ -2396,9 +2364,6 @@  static int __mmap_new_vma(struct mmap_state *map, struct vma_merge_struct *vmg,
 	if (error)
 		goto free_iter_vma;
 
-	if (merged)
-		goto file_expanded;
-
 #ifdef CONFIG_SPARC64
 	/* TODO: Fix SPARC ADI! */
 	WARN_ON_ONCE(!arch_validate_flags(map->flags));
@@ -2415,8 +2380,6 @@  static int __mmap_new_vma(struct mmap_state *map, struct vma_merge_struct *vmg,
 	 * call covers the non-merge case.
 	 */
 	khugepaged_enter_vma(vma, map->flags);
-
-file_expanded:
 	ksm_add_vma(vma);
 	*vmap = vma;
 	return 0;
@@ -2430,13 +2393,17 @@  static int __mmap_new_vma(struct mmap_state *map, struct vma_merge_struct *vmg,
 
 /*
  * __mmap_complete() - Unmap any VMAs we overlap, account memory mapping
- *                     statistics, handle locking and finalise the VMA.
+ *                     statistics, handle locking and finalise the VMA,
+ *                     attempt a final merge if required.
  *
  * @map: Mapping state.
  * @vma: Merged or newly allocated VMA for the mmap()'d region.
+ * @vmg: VMA merge state.
  */
-static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
+static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma,
+			    struct vma_merge_struct *vmg)
 {
+
 	struct mm_struct *mm = map->mm;
 	unsigned long vm_flags = vma->vm_flags;
 
@@ -2468,6 +2435,16 @@  static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
 	vm_flags_set(vma, VM_SOFTDIRTY);
 
 	vma_set_page_prot(vma);
+
+	/* OK VMA flags changed in __mmap_new_vma(), try a merge again. */
+	if (map->retry_merge) {
+		vma_iter_config(map->vmi, map->addr, map->end);
+		vmg->vma = vma;
+		vmg->flags = map->flags;
+		vmg->next = NULL; /* Must be set by merge logic. */
+
+		vma_merge_existing_range(vmg);
+	}
 }
 
 unsigned long __mmap_region(struct file *file, unsigned long addr,
@@ -2490,12 +2467,12 @@  unsigned long __mmap_region(struct file *file, unsigned long addr,
 	vma = vma_merge_new_range(&vmg);
 	if (!vma) {
 		/* ...but if we can't, allocate a new VMA. */
-		error = __mmap_new_vma(&map, &vmg, &vma);
+		error = __mmap_new_vma(&map, &vma);
 		if (error)
 			goto unacct_error;
 	}
 
-	__mmap_complete(&map, vma);
+	__mmap_complete(&map, vma, &vmg);
 
 	return addr;