diff mbox series

[v3,09/16] mm/mmap: Expand mmap_region() munmap call

Message ID 20240704182718.2653918-10-Liam.Howlett@oracle.com (mailing list archive)
State New
Headers show
Series Avoid MAP_FIXED gap exposure | expand

Commit Message

Liam R. Howlett July 4, 2024, 6:27 p.m. UTC
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>

Open code the do_vmi_align_munmap() call so that it can be broken up
later in the series.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
---
 mm/mmap.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

Comments

Lorenzo Stoakes July 5, 2024, 8:06 p.m. UTC | #1
On Thu, Jul 04, 2024 at 02:27:11PM GMT, Liam R. Howlett wrote:
> From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
>
> Open code the do_vmi_align_munmap() call so that it can be broken up
> later in the series.
>
> Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> ---
>  mm/mmap.c | 22 +++++++++++++++++++---
>  1 file changed, 19 insertions(+), 3 deletions(-)
>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index e9858ca8bbd4..f5b33de4e717 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2915,6 +2915,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  	struct vm_area_struct *next, *prev, *merge;
>  	pgoff_t pglen = len >> PAGE_SHIFT;
>  	unsigned long charged = 0;
> +	struct vma_munmap_struct vms;
> +	struct ma_state mas_detach;
> +	struct maple_tree mt_detach;
>  	unsigned long end = addr + len;
>  	unsigned long merge_start = addr, merge_end = end;
>  	bool writable_file_mapping = false;
> @@ -2947,9 +2950,24 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  	/* Find the first overlapping VMA */
>  	vma = vma_find(&vmi, end);
>  	if (vma) {
> -		if (do_vmi_align_munmap(&vmi, vma, mm, addr, end, uf, false))
> +		mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
> +		mt_on_stack(mt_detach);
> +		mas_init(&mas_detach, &mt_detach, /* addr = */ 0);

I'm guessing this is exactly equivalent (or equivalent enough for our purposes
here) to the MA_STATE() call in do_vmi_align_munmap()?

Checking the two against each other it seems that it is indeed.

> +		init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
> +		if (vms_gather_munmap_vmas(&vms, &mas_detach))
> +			return -ENOMEM;

In do_vmi_align_munmap() we also invoke vmalidate_mm(), why aren't we doing that
here?

> +
> +		if (vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL))
>  			return -ENOMEM;

Same here.

> +
> +		vms_complete_munmap_vmas(&vms, &mas_detach);
> +		next = vms.next;
> +		prev = vms.prev;
> +		vma_prev(&vmi);

I'm sure this is correct, but just to double-check - we want to set the VMI to
prev here right?

It might be worth adding a small cmoment saying '/* vmi now points at prev */'
or similar, I've found it can get quite hard to follow where the iterator is at
sometimes.

>  		vma = NULL;
> +	} else {
> +		next = vma_next(&vmi);
> +		prev = vma_prev(&vmi);

But here we move forward to the next VMA and set this to next, then go back to
the _original_ one and this is prev?

Actually I guess if vma == NULL, next gets you to the next, and prev jumps back
to prev, with nothing between, and so that makes sense.


>  	}
>
>  	/*
> @@ -2962,8 +2980,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  		vm_flags |= VM_ACCOUNT;
>  	}
>
> -	next = vma_next(&vmi);
> -	prev = vma_prev(&vmi);
>  	if (vm_flags & VM_SPECIAL) {
>  		if (prev)
>  			vma_iter_next_range(&vmi);
> --
> 2.43.0
>
>
Liam R. Howlett July 5, 2024, 8:30 p.m. UTC | #2
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [240705 16:06]:
> On Thu, Jul 04, 2024 at 02:27:11PM GMT, Liam R. Howlett wrote:
> > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
> >
> > Open code the do_vmi_align_munmap() call so that it can be broken up
> > later in the series.
> >
> > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> > ---
> >  mm/mmap.c | 22 +++++++++++++++++++---
> >  1 file changed, 19 insertions(+), 3 deletions(-)
> >
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index e9858ca8bbd4..f5b33de4e717 100644
> > --- a/mm/mmap.c
> > +++ b/mm/mmap.c
> > @@ -2915,6 +2915,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> >  	struct vm_area_struct *next, *prev, *merge;
> >  	pgoff_t pglen = len >> PAGE_SHIFT;
> >  	unsigned long charged = 0;
> > +	struct vma_munmap_struct vms;
> > +	struct ma_state mas_detach;
> > +	struct maple_tree mt_detach;
> >  	unsigned long end = addr + len;
> >  	unsigned long merge_start = addr, merge_end = end;
> >  	bool writable_file_mapping = false;
> > @@ -2947,9 +2950,24 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> >  	/* Find the first overlapping VMA */
> >  	vma = vma_find(&vmi, end);
> >  	if (vma) {
> > -		if (do_vmi_align_munmap(&vmi, vma, mm, addr, end, uf, false))
> > +		mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
> > +		mt_on_stack(mt_detach);
> > +		mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
> 
> I'm guessing this is exactly equivalent (or equivalent enough for our purposes
> here) to the MA_STATE() call in do_vmi_align_munmap()?

Yes, what we are doing is inlining the function call so that it can be
split to parts.  So for reviewing, I just made it in-line.

> 
> Checking the two against each other it seems that it is indeed.
> 
> > +		init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
> > +		if (vms_gather_munmap_vmas(&vms, &mas_detach))
> > +			return -ENOMEM;
> 
> In do_vmi_align_munmap() we also invoke vmalidate_mm(), why aren't we doing that
> here?

I don't see the validate_mm() call in do_vmi_align_munmap, it is called
in the vms_complete_munmap_vmas() function  though.

> 
> > +
> > +		if (vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL))
> >  			return -ENOMEM;
> 
> Same here.

It would fail here, our count would be wrong.

> 
> > +
> > +		vms_complete_munmap_vmas(&vms, &mas_detach);
> > +		next = vms.next;
> > +		prev = vms.prev;
> > +		vma_prev(&vmi);
> 
> I'm sure this is correct, but just to double-check - we want to set the VMI to
> prev here right?

Yes.  In the current function, the vmi points to prev when we are done
this dance - so I put it there.
> 
> It might be worth adding a small cmoment saying '/* vmi now points at prev */'
> or similar, I've found it can get quite hard to follow where the iterator is at
> sometimes.

So this is about to change..

> 
> >  		vma = NULL;
> > +	} else {
> > +		next = vma_next(&vmi);
> > +		prev = vma_prev(&vmi);
> 
> But here we move forward to the next VMA and set this to next, then go back to
> the _original_ one and this is prev?
> 
> Actually I guess if vma == NULL, next gets you to the next, and prev jumps back
> to prev, with nothing between, and so that makes sense.

Yes, this is what I'm doing.

> 
> 
> >  	}
> >
> >  	/*
> > @@ -2962,8 +2980,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> >  		vm_flags |= VM_ACCOUNT;
> >  	}
> >
> > -	next = vma_next(&vmi);
> > -	prev = vma_prev(&vmi);
> >  	if (vm_flags & VM_SPECIAL) {
> >  		if (prev)
> >  			vma_iter_next_range(&vmi);
> > --
> > 2.43.0
> >
> >
Lorenzo Stoakes July 5, 2024, 8:36 p.m. UTC | #3
On Fri, Jul 05, 2024 at 04:30:46PM GMT, Liam R. Howlett wrote:
> * Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [240705 16:06]:
> > On Thu, Jul 04, 2024 at 02:27:11PM GMT, Liam R. Howlett wrote:
> > > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
> > >
> > > Open code the do_vmi_align_munmap() call so that it can be broken up
> > > later in the series.
> > >
> > > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> > > ---
> > >  mm/mmap.c | 22 +++++++++++++++++++---
> > >  1 file changed, 19 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/mm/mmap.c b/mm/mmap.c
> > > index e9858ca8bbd4..f5b33de4e717 100644
> > > --- a/mm/mmap.c
> > > +++ b/mm/mmap.c
> > > @@ -2915,6 +2915,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > >  	struct vm_area_struct *next, *prev, *merge;
> > >  	pgoff_t pglen = len >> PAGE_SHIFT;
> > >  	unsigned long charged = 0;
> > > +	struct vma_munmap_struct vms;
> > > +	struct ma_state mas_detach;
> > > +	struct maple_tree mt_detach;
> > >  	unsigned long end = addr + len;
> > >  	unsigned long merge_start = addr, merge_end = end;
> > >  	bool writable_file_mapping = false;
> > > @@ -2947,9 +2950,24 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > >  	/* Find the first overlapping VMA */
> > >  	vma = vma_find(&vmi, end);
> > >  	if (vma) {
> > > -		if (do_vmi_align_munmap(&vmi, vma, mm, addr, end, uf, false))
> > > +		mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
> > > +		mt_on_stack(mt_detach);
> > > +		mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
> >
> > I'm guessing this is exactly equivalent (or equivalent enough for our purposes
> > here) to the MA_STATE() call in do_vmi_align_munmap()?
>
> Yes, what we are doing is inlining the function call so that it can be
> split to parts.  So for reviewing, I just made it in-line.
>
> >
> > Checking the two against each other it seems that it is indeed.
> >
> > > +		init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
> > > +		if (vms_gather_munmap_vmas(&vms, &mas_detach))
> > > +			return -ENOMEM;
> >
> > In do_vmi_align_munmap() we also invoke vmalidate_mm(), why aren't we doing that
> > here?
>
> I don't see the validate_mm() call in do_vmi_align_munmap, it is called
> in the vms_complete_munmap_vmas() function  though.

In do_vmi_align_munmap() you have:

	error = vms_gather_munmap_vmas(&vms, &mas_detach);
	if (error)
		goto gather_failed;

	...

gather_failed:
	validate_mm(mm);
	return error;

>
> >
> > > +
> > > +		if (vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL))
> > >  			return -ENOMEM;
> >
> > Same here.
>
> It would fail here, our count would be wrong.

Right, but in do_vmi_align_munmap() you have:

	error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL);
	if (error)
		goto clear_tree_failed;

	...

clear_tree_failed:
	abort_munmap_vmas(&mas_detach);
gather_failed:
	validate_mm(mm);


>
> >
> > > +
> > > +		vms_complete_munmap_vmas(&vms, &mas_detach);
> > > +		next = vms.next;
> > > +		prev = vms.prev;
> > > +		vma_prev(&vmi);
> >
> > I'm sure this is correct, but just to double-check - we want to set the VMI to
> > prev here right?
>
> Yes.  In the current function, the vmi points to prev when we are done
> this dance - so I put it there.
> >
> > It might be worth adding a small cmoment saying '/* vmi now points at prev */'
> > or similar, I've found it can get quite hard to follow where the iterator is at
> > sometimes.
>
> So this is about to change..

Yeah, I saw :)

>
> >
> > >  		vma = NULL;
> > > +	} else {
> > > +		next = vma_next(&vmi);
> > > +		prev = vma_prev(&vmi);
> >
> > But here we move forward to the next VMA and set this to next, then go back to
> > the _original_ one and this is prev?
> >
> > Actually I guess if vma == NULL, next gets you to the next, and prev jumps back
> > to prev, with nothing between, and so that makes sense.
>
> Yes, this is what I'm doing.
>
> >
> >
> > >  	}
> > >
> > >  	/*
> > > @@ -2962,8 +2980,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > >  		vm_flags |= VM_ACCOUNT;
> > >  	}
> > >
> > > -	next = vma_next(&vmi);
> > > -	prev = vma_prev(&vmi);
> > >  	if (vm_flags & VM_SPECIAL) {
> > >  		if (prev)
> > >  			vma_iter_next_range(&vmi);
> > > --
> > > 2.43.0
> > >
> > >
diff mbox series

Patch

diff --git a/mm/mmap.c b/mm/mmap.c
index e9858ca8bbd4..f5b33de4e717 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2915,6 +2915,9 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 	struct vm_area_struct *next, *prev, *merge;
 	pgoff_t pglen = len >> PAGE_SHIFT;
 	unsigned long charged = 0;
+	struct vma_munmap_struct vms;
+	struct ma_state mas_detach;
+	struct maple_tree mt_detach;
 	unsigned long end = addr + len;
 	unsigned long merge_start = addr, merge_end = end;
 	bool writable_file_mapping = false;
@@ -2947,9 +2950,24 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 	/* Find the first overlapping VMA */
 	vma = vma_find(&vmi, end);
 	if (vma) {
-		if (do_vmi_align_munmap(&vmi, vma, mm, addr, end, uf, false))
+		mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
+		mt_on_stack(mt_detach);
+		mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
+		init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
+		if (vms_gather_munmap_vmas(&vms, &mas_detach))
+			return -ENOMEM;
+
+		if (vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL))
 			return -ENOMEM;
+
+		vms_complete_munmap_vmas(&vms, &mas_detach);
+		next = vms.next;
+		prev = vms.prev;
+		vma_prev(&vmi);
 		vma = NULL;
+	} else {
+		next = vma_next(&vmi);
+		prev = vma_prev(&vmi);
 	}
 
 	/*
@@ -2962,8 +2980,6 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 		vm_flags |= VM_ACCOUNT;
 	}
 
-	next = vma_next(&vmi);
-	prev = vma_prev(&vmi);
 	if (vm_flags & VM_SPECIAL) {
 		if (prev)
 			vma_iter_next_range(&vmi);