diff mbox series

[3/3] mm/mmap: Regression fix for unmapped_area{_topdown}

Message ID 20230414145728.4067069-3-Liam.Howlett@oracle.com (mailing list archive)
State New
Headers show
Series [1/3] maple_tree: Make maple state reusable after mas_empty_area_rev() | expand

Commit Message

Liam R. Howlett April 14, 2023, 2:57 p.m. UTC
The maple tree limits the gap returned to a window that specifically
fits what was asked.  This may not be optimal in the case of switching
search directions or a gap that does not satisfy the requested space for
other reasons.  Fix the search by retrying the operation and limiting
the search window in the rare occasion that a conflict occurs.

Reported-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Fixes: 3499a13168da ("mm/mmap: use maple tree for unmapped_area{_topdown}")
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
 mm/mmap.c | 50 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 5 deletions(-)

Comments

Edgecombe, Rick P April 14, 2023, 4:27 p.m. UTC | #1
On Fri, 2023-04-14 at 10:57 -0400, Liam R. Howlett wrote:<br>
> +       tmp = mas_next(&mas, ULONG_MAX);
> +       if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) {

Why also check VM_GROWSDOWN here (and VM_GROWSUP below)?
vm_start/end_gap() already have checks inside.

> +               if (vm_start_gap(tmp) < gap + length - 1) {
> +                       low_limit = tmp->vm_end;
> +                       mas_reset(&mas);
> +                       goto retry;
> +               }
> +       } else {
> +               tmp = mas_prev(&mas, 0);
> +               if (tmp && (tmp->vm_flags & VM_GROWSUP) &&
> +                   vm_end_gap(tmp) > gap) {
> +                       low_limit = vm_end_gap(tmp); 
> +                       mas_reset(&mas);
> +                       goto retry; 
> +               }
> +       } 
> +

Could it be like this?

tmp = mas_next(&mas, ULONG_MAX);
if (tmp && vm_start_gap(tmp) < gap + length - 1) {
		low_limit = tmp->vm_end;
		mas_reset(&mas);
		goto retry;
	}
} else {
	tmp = mas_prev(&mas, 0);
	if (tmp && vm_end_gap(tmp) > gap) {
		low_limit = vm_end_gap(tmp);
		mas_reset(&mas);
		goto retry;
	}
}
Liam R. Howlett April 14, 2023, 5:26 p.m. UTC | #2
* Edgecombe, Rick P <rick.p.edgecombe@intel.com> [230414 12:27]:
> On Fri, 2023-04-14 at 10:57 -0400, Liam R. Howlett wrote:<br>
> > +       tmp = mas_next(&mas, ULONG_MAX);
> > +       if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) {
> 
> Why also check VM_GROWSDOWN here (and VM_GROWSUP below)?
> vm_start/end_gap() already have checks inside.

An artifact of a plan that was later abandoned.

> 
> > +               if (vm_start_gap(tmp) < gap + length - 1) {
> > +                       low_limit = tmp->vm_end;
> > +                       mas_reset(&mas);
> > +                       goto retry;
> > +               }
> > +       } else {
> > +               tmp = mas_prev(&mas, 0);
> > +               if (tmp && (tmp->vm_flags & VM_GROWSUP) &&
> > +                   vm_end_gap(tmp) > gap) {
> > +                       low_limit = vm_end_gap(tmp); 
> > +                       mas_reset(&mas);
> > +                       goto retry; 
> > +               }
> > +       } 
> > +
> 
> Could it be like this?

Yes, I'll make this change.  Thanks for the suggestion.

> 
> tmp = mas_next(&mas, ULONG_MAX);
> if (tmp && vm_start_gap(tmp) < gap + length - 1) {
> 		low_limit = tmp->vm_end;
> 		mas_reset(&mas);
> 		goto retry;
> 	}
> } else {
> 	tmp = mas_prev(&mas, 0);
> 	if (tmp && vm_end_gap(tmp) > gap) {
> 		low_limit = vm_end_gap(tmp);
> 		mas_reset(&mas);
> 		goto retry;
> 	}
> }
>
Liam R. Howlett April 14, 2023, 5:29 p.m. UTC | #3
* Liam R. Howlett <Liam.Howlett@Oracle.com> [230414 13:26]:
> * Edgecombe, Rick P <rick.p.edgecombe@intel.com> [230414 12:27]:
> > On Fri, 2023-04-14 at 10:57 -0400, Liam R. Howlett wrote:<br>
> > > +       tmp = mas_next(&mas, ULONG_MAX);
> > > +       if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) {
> > 
> > Why also check VM_GROWSDOWN here (and VM_GROWSUP below)?
> > vm_start/end_gap() already have checks inside.
> 
> An artifact of a plan that was later abandoned.
> 
> > 
> > > +               if (vm_start_gap(tmp) < gap + length - 1) {
> > > +                       low_limit = tmp->vm_end;
> > > +                       mas_reset(&mas);
> > > +                       goto retry;
> > > +               }
> > > +       } else {
> > > +               tmp = mas_prev(&mas, 0);
> > > +               if (tmp && (tmp->vm_flags & VM_GROWSUP) &&
> > > +                   vm_end_gap(tmp) > gap) {
> > > +                       low_limit = vm_end_gap(tmp); 
> > > +                       mas_reset(&mas);
> > > +                       goto retry; 
> > > +               }
> > > +       } 
> > > +
> > 
> > Could it be like this?
> 
> Yes, I'll make this change.  Thanks for the suggestion.


Wait, I like how it is.

In my version, if there is a stack that is VM_GROWSDOWN there, but does
not intercept the gap, then I won't check the prev.. in yours, we will
never avoid checking prev.

> 
> > 
> > tmp = mas_next(&mas, ULONG_MAX);
> > if (tmp && vm_start_gap(tmp) < gap + length - 1) {
> > 		low_limit = tmp->vm_end;
> > 		mas_reset(&mas);
> > 		goto retry;
> > 	}
> > } else {
> > 	tmp = mas_prev(&mas, 0);
> > 	if (tmp && vm_end_gap(tmp) > gap) {
> > 		low_limit = vm_end_gap(tmp);
> > 		mas_reset(&mas);
> > 		goto retry;
> > 	}
> > }
> >
Edgecombe, Rick P April 14, 2023, 5:53 p.m. UTC | #4
On Fri, 2023-04-14 at 13:29 -0400, Liam R. Howlett wrote:
> * Liam R. Howlett <Liam.Howlett@Oracle.com> [230414 13:26]:
> > * Edgecombe, Rick P <rick.p.edgecombe@intel.com> [230414 12:27]:
> > > On Fri, 2023-04-14 at 10:57 -0400, Liam R. Howlett wrote:<br>
> > > > +       tmp = mas_next(&mas, ULONG_MAX);
> > > > +       if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) {
> > > 
> > > Why also check VM_GROWSDOWN here (and VM_GROWSUP below)?
> > > vm_start/end_gap() already have checks inside.
> > 
> > An artifact of a plan that was later abandoned.
> > 
> > > 
> > > > +               if (vm_start_gap(tmp) < gap + length - 1) {
> > > > +                       low_limit = tmp->vm_end;
> > > > +                       mas_reset(&mas);
> > > > +                       goto retry;
> > > > +               }
> > > > +       } else {
> > > > +               tmp = mas_prev(&mas, 0);
> > > > +               if (tmp && (tmp->vm_flags & VM_GROWSUP) &&
> > > > +                   vm_end_gap(tmp) > gap) {
> > > > +                       low_limit = vm_end_gap(tmp); 
> > > > +                       mas_reset(&mas);
> > > > +                       goto retry; 
> > > > +               }
> > > > +       } 
> > > > +
> > > 
> > > Could it be like this?
> > 
> > Yes, I'll make this change.  Thanks for the suggestion.
> 
> 
> Wait, I like how it is.
> 
> In my version, if there is a stack that is VM_GROWSDOWN there, but
> does
> not intercept the gap, then I won't check the prev.. in yours, we
> will
> never avoid checking prev.

Hmm, I see. I guess I'm thinking ahead a bit to adding the shadow stack
guard gap, but I can always add to these vm_flags checks.

But are you sure this optimization is even possible? The old
vma_compute_gap() had this comment:
/*
 * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
 * allow two stack_guard_gaps between them here, and when choosing
 * an unmapped area; whereas when expanding we only require one.
 * That's a little inconsistent, but keeps the code here simpler.
 */

Assuming this is a real scenario, if you have VM_GROWSDOWN above and
VM_GROWSUP below, don't you need to check the gaps for above and below?
Again thinking about adding shadow stack guard pages, something like
that could be a more common scenario. Not that you need to fix my out
of tree issues, but I would probably need to adjust it to check both
directions.

I guess there is no way to embed this inside maple tree search so we
don't need to retry? (sorry if this is a dumb question, it's an opaque
box to me).
Liam R. Howlett April 14, 2023, 6:07 p.m. UTC | #5
* Edgecombe, Rick P <rick.p.edgecombe@intel.com> [230414 13:53]:
> On Fri, 2023-04-14 at 13:29 -0400, Liam R. Howlett wrote:
> > * Liam R. Howlett <Liam.Howlett@Oracle.com> [230414 13:26]:
> > > * Edgecombe, Rick P <rick.p.edgecombe@intel.com> [230414 12:27]:
> > > > On Fri, 2023-04-14 at 10:57 -0400, Liam R. Howlett wrote:<br>
> > > > > +       tmp = mas_next(&mas, ULONG_MAX);
> > > > > +       if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) {
> > > > 
> > > > Why also check VM_GROWSDOWN here (and VM_GROWSUP below)?
> > > > vm_start/end_gap() already have checks inside.
> > > 
> > > An artifact of a plan that was later abandoned.
> > > 
> > > > 
> > > > > +               if (vm_start_gap(tmp) < gap + length - 1) {
> > > > > +                       low_limit = tmp->vm_end;
> > > > > +                       mas_reset(&mas);
> > > > > +                       goto retry;
> > > > > +               }
> > > > > +       } else {
> > > > > +               tmp = mas_prev(&mas, 0);
> > > > > +               if (tmp && (tmp->vm_flags & VM_GROWSUP) &&
> > > > > +                   vm_end_gap(tmp) > gap) {
> > > > > +                       low_limit = vm_end_gap(tmp); 
> > > > > +                       mas_reset(&mas);
> > > > > +                       goto retry; 
> > > > > +               }
> > > > > +       } 
> > > > > +
> > > > 
> > > > Could it be like this?
> > > 
> > > Yes, I'll make this change.  Thanks for the suggestion.
> > 
> > 
> > Wait, I like how it is.
> > 
> > In my version, if there is a stack that is VM_GROWSDOWN there, but
> > does
> > not intercept the gap, then I won't check the prev.. in yours, we
> > will
> > never avoid checking prev.
> 
> Hmm, I see. I guess I'm thinking ahead a bit to adding the shadow stack
> guard gap, but I can always add to these vm_flags checks.
> 
> But are you sure this optimization is even possible? The old
> vma_compute_gap() had this comment:
> /*
>  * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
>  * allow two stack_guard_gaps between them here, and when choosing
>  * an unmapped area; whereas when expanding we only require one.
>  * That's a little inconsistent, but keeps the code here simpler.
>  */

I didn't think this was possible.  ia64 (orphaned in 96ec72a3425d) did
this.

> 
> Assuming this is a real scenario, if you have VM_GROWSDOWN above and
> VM_GROWSUP below, don't you need to check the gaps for above and below?
> Again thinking about adding shadow stack guard pages, something like
> that could be a more common scenario. Not that you need to fix my out
> of tree issues, but I would probably need to adjust it to check both
> directions.
> 
> I guess there is no way to embed this inside maple tree search so we
> don't need to retry? (sorry if this is a dumb question, it's an opaque
> box to me).

Absolutely, and I'm working on this as well, but right now I'm trying
to fix my regression for this and past releases.  Handling this in the
maple tree is more involved and so there's more risk.
Edgecombe, Rick P April 14, 2023, 6:21 p.m. UTC | #6
On Fri, 2023-04-14 at 14:07 -0400, Liam R. Howlett wrote:
> * Edgecombe, Rick P <rick.p.edgecombe@intel.com> [230414 13:53]:
> > On Fri, 2023-04-14 at 13:29 -0400, Liam R. Howlett wrote:
> > > * Liam R. Howlett <Liam.Howlett@Oracle.com> [230414 13:26]:
> > > > * Edgecombe, Rick P <rick.p.edgecombe@intel.com> [230414
> > > > 12:27]:
> > > > > On Fri, 2023-04-14 at 10:57 -0400, Liam R. Howlett wrote:<br>
> > > > > > +       tmp = mas_next(&mas, ULONG_MAX);
> > > > > > +       if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) {
> > > > > 
> > > > > Why also check VM_GROWSDOWN here (and VM_GROWSUP below)?
> > > > > vm_start/end_gap() already have checks inside.
> > > > 
> > > > An artifact of a plan that was later abandoned.
> > > > 
> > > > > 
> > > > > > +               if (vm_start_gap(tmp) < gap + length - 1) {
> > > > > > +                       low_limit = tmp->vm_end;
> > > > > > +                       mas_reset(&mas);
> > > > > > +                       goto retry;
> > > > > > +               }
> > > > > > +       } else {
> > > > > > +               tmp = mas_prev(&mas, 0);
> > > > > > +               if (tmp && (tmp->vm_flags & VM_GROWSUP) &&
> > > > > > +                   vm_end_gap(tmp) > gap) {
> > > > > > +                       low_limit = vm_end_gap(tmp); 
> > > > > > +                       mas_reset(&mas);
> > > > > > +                       goto retry; 
> > > > > > +               }
> > > > > > +       } 
> > > > > > +
> > > > > 
> > > > > Could it be like this?
> > > > 
> > > > Yes, I'll make this change.  Thanks for the suggestion.
> > > 
> > > 
> > > Wait, I like how it is.
> > > 
> > > In my version, if there is a stack that is VM_GROWSDOWN there,
> > > but
> > > does
> > > not intercept the gap, then I won't check the prev.. in yours, we
> > > will
> > > never avoid checking prev.
> > 
> > Hmm, I see. I guess I'm thinking ahead a bit to adding the shadow
> > stack
> > guard gap, but I can always add to these vm_flags checks.
> > 
> > But are you sure this optimization is even possible? The old
> > vma_compute_gap() had this comment:
> > /*
> >   * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
> >   * allow two stack_guard_gaps between them here, and when choosing
> >   * an unmapped area; whereas when expanding we only require one.
> >   * That's a little inconsistent, but keeps the code here simpler.
> >   */
> 
> I didn't think this was possible.  ia64 (orphaned in 96ec72a3425d)
> did
> this.

Ah, ok.

> 
> > 
> > Assuming this is a real scenario, if you have VM_GROWSDOWN above
> > and
> > VM_GROWSUP below, don't you need to check the gaps for above and
> > below?
> > Again thinking about adding shadow stack guard pages, something
> > like
> > that could be a more common scenario. Not that you need to fix my
> > out
> > of tree issues, but I would probably need to adjust it to check
> > both
> > directions.
> > 
> > I guess there is no way to embed this inside maple tree search so
> > we
> > don't need to retry? (sorry if this is a dumb question, it's an
> > opaque
> > box to me).
> 
> Absolutely, and I'm working on this as well, but right now I'm trying
> to fix my regression for this and past releases.  Handling this in
> the
> maple tree is more involved and so there's more risk.

Ok, thanks. It looks good to me in that respect.
diff mbox series

Patch

diff --git a/mm/mmap.c b/mm/mmap.c
index 055fbd5ed762..c3b269260138 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1548,7 +1548,8 @@  static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
  */
 static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 {
-	unsigned long length, gap;
+	unsigned long length, gap, low_limit;
+	struct vm_area_struct *tmp;
 
 	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
 
@@ -1557,12 +1558,30 @@  static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 	if (length < info->length)
 		return -ENOMEM;
 
-	if (mas_empty_area(&mas, info->low_limit, info->high_limit - 1,
-				  length))
+	low_limit = info->low_limit;
+retry:
+	if (mas_empty_area(&mas, low_limit, info->high_limit - 1, length))
 		return -ENOMEM;
 
 	gap = mas.index;
 	gap += (info->align_offset - gap) & info->align_mask;
+	tmp = mas_next(&mas, ULONG_MAX);
+	if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) {
+		if (vm_start_gap(tmp) < gap + length - 1) {
+			low_limit = tmp->vm_end;
+			mas_reset(&mas);
+			goto retry;
+		}
+	} else {
+		tmp = mas_prev(&mas, 0);
+		if (tmp && (tmp->vm_flags & VM_GROWSUP) &&
+		    vm_end_gap(tmp) > gap) {
+			low_limit = vm_end_gap(tmp);
+			mas_reset(&mas);
+			goto retry;
+		}
+	}
+
 	return gap;
 }
 
@@ -1578,7 +1597,8 @@  static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
  */
 static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 {
-	unsigned long length, gap;
+	unsigned long length, gap, high_limit, gap_end;
+	struct vm_area_struct *tmp;
 
 	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
 	/* Adjust search length to account for worst case alignment overhead */
@@ -1586,12 +1606,32 @@  static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 	if (length < info->length)
 		return -ENOMEM;
 
-	if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1,
+	high_limit = info->high_limit;
+retry:
+	if (mas_empty_area_rev(&mas, info->low_limit, high_limit - 1,
 				length))
 		return -ENOMEM;
 
 	gap = mas.last + 1 - info->length;
 	gap -= (gap - info->align_offset) & info->align_mask;
+	gap_end = mas.last;
+	tmp = mas_next(&mas, ULONG_MAX);
+	if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) {
+		if (vm_start_gap(tmp) <= gap_end) {
+			high_limit = vm_start_gap(tmp);
+			mas_reset(&mas);
+			goto retry;
+		}
+	} else {
+		tmp = mas_prev(&mas, 0);
+		if (tmp && (tmp->vm_flags & VM_GROWSUP) &&
+		    vm_end_gap(tmp) > gap) {
+			high_limit = tmp->vm_start;
+			mas_reset(&mas);
+			goto retry;
+		}
+	}
+
 	return gap;
 }