diff mbox series

[v2,3/4] mm/mmap/vma_merge: extend invariants, avoid invalid res, vma

Message ID 17b6fc3edc46c4b33aa93b9ef17a63a3a76f4b5f.1679431180.git.lstoakes@gmail.com (mailing list archive)
State New
Headers show
Series further cleanup of vma_merge() | expand

Commit Message

Lorenzo Stoakes March 21, 2023, 8:45 p.m. UTC
Previously, vma was an uninitialised variable which was only definitely
assigned as a result of the logic covering all possible input cases - for
it to have remained uninitialised, prev would have to be NULL, and next
would _have_ to be mergeable.

We now reuse vma to assign curr and next, so to be absolutely explicit,
ensure this variable is _always_ assigned, and while we're at it remove the
redundant assignment of both res and vma (if prev is NULL then we simply
assign to NULL).

In addition, we absolutely do rely on addr == curr->vm_start should curr
exist, so assert as much.

Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
---
 mm/mmap.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

Comments

Vlastimil Babka March 22, 2023, 9:16 a.m. UTC | #1
On 3/21/23 21:45, Lorenzo Stoakes wrote:
> Previously, vma was an uninitialised variable which was only definitely
> assigned as a result of the logic covering all possible input cases - for
> it to have remained uninitialised, prev would have to be NULL, and next
> would _have_ to be mergeable.
> 
> We now reuse vma to assign curr and next, so to be absolutely explicit,
> ensure this variable is _always_ assigned, and while we're at it remove the
> redundant assignment of both res and vma (if prev is NULL then we simply
> assign to NULL).
> 
> In addition, we absolutely do rely on addr == curr->vm_start should curr
> exist, so assert as much.
> 
> Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

Nit suggestion below.

> ---
>  mm/mmap.c | 19 ++++++++++++++-----
>  1 file changed, 14 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 6361baf75601..7aec49c3bc74 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -911,7 +911,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
>  {
>  	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
>  	pgoff_t vma_pgoff;
> -	struct vm_area_struct *curr, *next, *res = NULL;
> +	struct vm_area_struct *curr, *next, *res;
>  	struct vm_area_struct *vma, *adjust, *remove, *remove2;
>  	int err = -1;
>  	bool merge_prev = false;
> @@ -940,14 +940,18 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
>  		/* Is there a VMA next to a hole (case 1 - 3) or prev (4)? */
>  		next = vma_lookup(mm, end);
>  
> -	/* verify some invariant that must be enforced by the caller */
> +	/*
> +	 * By default, we return prev. Cases 3, 4, 8 will instead return next
> +	 * and cases 3, 8 will also update vma to point at next.
> +	 */
> +	res = vma = prev;

Later in the function there's a line:

	remove = remove2 = adjust = NULL;

Now it would make sense to move it up here?

> +
> +	/* Verify some invariant that must be enforced by the caller. */
>  	VM_WARN_ON(prev && addr <= prev->vm_start);
> -	VM_WARN_ON(curr && end > curr->vm_end);
> +	VM_WARN_ON(curr && (addr != curr->vm_start || end > curr->vm_end));
>  	VM_WARN_ON(addr >= end);
>  
>  	if (prev) {
> -		res = prev;
> -		vma = prev;
>  		vma_start = prev->vm_start;
>  		vma_pgoff = prev->vm_pgoff;
>  		/* Can we merge the predecessor? */
> @@ -958,6 +962,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
>  			vma_prev(vmi);
>  		}
>  	}
> +
>  	/* Can we merge the successor? */
>  	if (next && mpol_equal(policy, vma_policy(next)) &&
>  	    can_vma_merge_before(next, vm_flags,
> @@ -998,6 +1003,10 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
>  			adj_start = -(prev->vm_end - addr);
>  			err = dup_anon_vma(next, prev);
>  		} else {
> +			/*
> +			 * Note that cases 3 and 8 are the ONLY ones where prev
> +			 * is permitted to be (but is not necessarily) NULL.
> +			 */
>  			vma = next;			/* case 3 */
>  			vma_start = addr;
>  			vma_end = next->vm_end;
Marek Szyprowski March 23, 2023, 5 p.m. UTC | #2
On 21.03.2023 21:45, Lorenzo Stoakes wrote:
> Previously, vma was an uninitialised variable which was only definitely
> assigned as a result of the logic covering all possible input cases - for
> it to have remained uninitialised, prev would have to be NULL, and next
> would _have_ to be mergeable.
>
> We now reuse vma to assign curr and next, so to be absolutely explicit,
> ensure this variable is _always_ assigned, and while we're at it remove the
> redundant assignment of both res and vma (if prev is NULL then we simply
> assign to NULL).
>
> In addition, we absolutely do rely on addr == curr->vm_start should curr
> exist, so assert as much.
>
> Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>

This patch has been merged into today's linux next-20230323 as commit 
6426bbcc76be ("mm/mmap/vma_merge: extend invariants, avoid invalid res, 
vma").

Unfortunately it breaks booting of some ARM 32bit machines, like Samsung 
Exynos5422-based Odroid XU3 board. This shortened log shows the issue:

Run /sbin/init as init process
   with arguments:
     /sbin/init
   with environment:
     HOME=/
     TERM=linux
8<--- cut here ---
Unhandled fault: page domain fault (0x01b) at 0xb6f03010
[b6f03010] *pgd=b5e84835
Internal error: : 1b [#1] PREEMPT SMP ARM
Modules linked in:
CPU: 2 PID: 1 Comm: init Not tainted 6.3.0-rc1-00296-g6426bbcc76be #6445
Hardware name: Samsung Exynos (Flattened Device Tree)
PC is at vma_merge+0xa0/0x728
LR is at vma_merge+0x294/0x728
pc : [<c02b08a8>]    lr : [<c02b0a9c>]    psr: a0000013
...
Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
Control: 10c5387d  Table: 4f11406a  DAC: 00000051
...
Process init (pid: 1, stack limit = 0x5219a5c7)
Stack: (0xf0835e30 to 0xf0836000)
...
  vma_merge from mprotect_fixup+0xc8/0x290
  mprotect_fixup from do_mprotect_pkey.constprop.0+0x210/0x338
  do_mprotect_pkey.constprop.0 from ret_fast_syscall+0x0/0x1c
Exception stack(0xf0835fa8 to 0xf0835ff0)
...
---[ end trace 0000000000000000 ]---
note: init[1] exited with irqs disabled
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

Reverting it on top of linux-next, together with 183b2bced4c9 
("mm/mmap/vma_merge: init cleanup, be explicit about the non-mergeable 
case"), which depends on this patch, fixes the boot issue.


> ---
>   mm/mmap.c | 19 ++++++++++++++-----
>   1 file changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 6361baf75601..7aec49c3bc74 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -911,7 +911,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
>   {
>   	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
>   	pgoff_t vma_pgoff;
> -	struct vm_area_struct *curr, *next, *res = NULL;
> +	struct vm_area_struct *curr, *next, *res;
>   	struct vm_area_struct *vma, *adjust, *remove, *remove2;
>   	int err = -1;
>   	bool merge_prev = false;
> @@ -940,14 +940,18 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
>   		/* Is there a VMA next to a hole (case 1 - 3) or prev (4)? */
>   		next = vma_lookup(mm, end);
>   
> -	/* verify some invariant that must be enforced by the caller */
> +	/*
> +	 * By default, we return prev. Cases 3, 4, 8 will instead return next
> +	 * and cases 3, 8 will also update vma to point at next.
> +	 */
> +	res = vma = prev;
> +
> +	/* Verify some invariant that must be enforced by the caller. */
>   	VM_WARN_ON(prev && addr <= prev->vm_start);
> -	VM_WARN_ON(curr && end > curr->vm_end);
> +	VM_WARN_ON(curr && (addr != curr->vm_start || end > curr->vm_end));
>   	VM_WARN_ON(addr >= end);
>   
>   	if (prev) {
> -		res = prev;
> -		vma = prev;
>   		vma_start = prev->vm_start;
>   		vma_pgoff = prev->vm_pgoff;
>   		/* Can we merge the predecessor? */
> @@ -958,6 +962,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
>   			vma_prev(vmi);
>   		}
>   	}
> +
>   	/* Can we merge the successor? */
>   	if (next && mpol_equal(policy, vma_policy(next)) &&
>   	    can_vma_merge_before(next, vm_flags,
> @@ -998,6 +1003,10 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
>   			adj_start = -(prev->vm_end - addr);
>   			err = dup_anon_vma(next, prev);
>   		} else {
> +			/*
> +			 * Note that cases 3 and 8 are the ONLY ones where prev
> +			 * is permitted to be (but is not necessarily) NULL.
> +			 */
>   			vma = next;			/* case 3 */
>   			vma_start = addr;
>   			vma_end = next->vm_end;

Best regards
Lorenzo Stoakes March 23, 2023, 5:08 p.m. UTC | #3
On Thu, Mar 23, 2023 at 06:00:45PM +0100, Marek Szyprowski wrote:
> On 21.03.2023 21:45, Lorenzo Stoakes wrote:
> > Previously, vma was an uninitialised variable which was only definitely
> > assigned as a result of the logic covering all possible input cases - for
> > it to have remained uninitialised, prev would have to be NULL, and next
> > would _have_ to be mergeable.
> >
> > We now reuse vma to assign curr and next, so to be absolutely explicit,
> > ensure this variable is _always_ assigned, and while we're at it remove the
> > redundant assignment of both res and vma (if prev is NULL then we simply
> > assign to NULL).
> >
> > In addition, we absolutely do rely on addr == curr->vm_start should curr
> > exist, so assert as much.
> >
> > Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
>
> This patch has been merged into today's linux next-20230323 as commit
> 6426bbcc76be ("mm/mmap/vma_merge: extend invariants, avoid invalid res,
> vma").
>
> Unfortunately it breaks booting of some ARM 32bit machines, like Samsung
> Exynos5422-based Odroid XU3 board. This shortened log shows the issue:

I'm very sorry about this. This was due to a bug with uninitiased
state. This was fixed in v3 of this series, which is now at v5 [1]. This
has already been taken to mm-unstable in Andrew's repo, hopefully it will
be taken to -next soon!

I have made sure to reply quickly whenever this is raised to ensure people
are aware, and I will also buy anybody affected a pint if I meet them in
person :)

[1]:https://lore.kernel.org/all/over.1679516210.git.lstoakes@gmail.com/

>
> Run /sbin/init as init process
>    with arguments:
>      /sbin/init
>    with environment:
>      HOME=/
>      TERM=linux
> 8<--- cut here ---
> Unhandled fault: page domain fault (0x01b) at 0xb6f03010
> [b6f03010] *pgd=b5e84835
> Internal error: : 1b [#1] PREEMPT SMP ARM
> Modules linked in:
> CPU: 2 PID: 1 Comm: init Not tainted 6.3.0-rc1-00296-g6426bbcc76be #6445
> Hardware name: Samsung Exynos (Flattened Device Tree)
> PC is at vma_merge+0xa0/0x728
> LR is at vma_merge+0x294/0x728
> pc : [<c02b08a8>]    lr : [<c02b0a9c>]    psr: a0000013
> ...
> Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> Control: 10c5387d  Table: 4f11406a  DAC: 00000051
> ...
> Process init (pid: 1, stack limit = 0x5219a5c7)
> Stack: (0xf0835e30 to 0xf0836000)
> ...
>   vma_merge from mprotect_fixup+0xc8/0x290
>   mprotect_fixup from do_mprotect_pkey.constprop.0+0x210/0x338
>   do_mprotect_pkey.constprop.0 from ret_fast_syscall+0x0/0x1c
> Exception stack(0xf0835fa8 to 0xf0835ff0)
> ...
> ---[ end trace 0000000000000000 ]---
> note: init[1] exited with irqs disabled
> Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>
> Reverting it on top of linux-next, together with 183b2bced4c9
> ("mm/mmap/vma_merge: init cleanup, be explicit about the non-mergeable
> case"), which depends on this patch, fixes the boot issue.
>
>
> > ---
> >   mm/mmap.c | 19 ++++++++++++++-----
> >   1 file changed, 14 insertions(+), 5 deletions(-)
> >
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index 6361baf75601..7aec49c3bc74 100644
> > --- a/mm/mmap.c
> > +++ b/mm/mmap.c
> > @@ -911,7 +911,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
> >   {
> >   	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
> >   	pgoff_t vma_pgoff;
> > -	struct vm_area_struct *curr, *next, *res = NULL;
> > +	struct vm_area_struct *curr, *next, *res;
> >   	struct vm_area_struct *vma, *adjust, *remove, *remove2;
> >   	int err = -1;
> >   	bool merge_prev = false;
> > @@ -940,14 +940,18 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
> >   		/* Is there a VMA next to a hole (case 1 - 3) or prev (4)? */
> >   		next = vma_lookup(mm, end);
> >
> > -	/* verify some invariant that must be enforced by the caller */
> > +	/*
> > +	 * By default, we return prev. Cases 3, 4, 8 will instead return next
> > +	 * and cases 3, 8 will also update vma to point at next.
> > +	 */
> > +	res = vma = prev;
> > +
> > +	/* Verify some invariant that must be enforced by the caller. */
> >   	VM_WARN_ON(prev && addr <= prev->vm_start);
> > -	VM_WARN_ON(curr && end > curr->vm_end);
> > +	VM_WARN_ON(curr && (addr != curr->vm_start || end > curr->vm_end));
> >   	VM_WARN_ON(addr >= end);
> >
> >   	if (prev) {
> > -		res = prev;
> > -		vma = prev;
> >   		vma_start = prev->vm_start;
> >   		vma_pgoff = prev->vm_pgoff;
> >   		/* Can we merge the predecessor? */
> > @@ -958,6 +962,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
> >   			vma_prev(vmi);
> >   		}
> >   	}
> > +
> >   	/* Can we merge the successor? */
> >   	if (next && mpol_equal(policy, vma_policy(next)) &&
> >   	    can_vma_merge_before(next, vm_flags,
> > @@ -998,6 +1003,10 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
> >   			adj_start = -(prev->vm_end - addr);
> >   			err = dup_anon_vma(next, prev);
> >   		} else {
> > +			/*
> > +			 * Note that cases 3 and 8 are the ONLY ones where prev
> > +			 * is permitted to be (but is not necessarily) NULL.
> > +			 */
> >   			vma = next;			/* case 3 */
> >   			vma_start = addr;
> >   			vma_end = next->vm_end;
>
> Best regards
> --
> Marek Szyprowski, PhD
> Samsung R&D Institute Poland
>
Lorenzo Stoakes March 23, 2023, 5:10 p.m. UTC | #4
On Thu, Mar 23, 2023 at 05:08:19PM +0000, Lorenzo Stoakes wrote:
> [1]:https://lore.kernel.org/all/over.1679516210.git.lstoakes@gmail.com/

Whoops! Typo - https://lore.kernel.org/all/cover.1679516210.git.lstoakes@gmail.com/

:)
diff mbox series

Patch

diff --git a/mm/mmap.c b/mm/mmap.c
index 6361baf75601..7aec49c3bc74 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -911,7 +911,7 @@  struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 {
 	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
 	pgoff_t vma_pgoff;
-	struct vm_area_struct *curr, *next, *res = NULL;
+	struct vm_area_struct *curr, *next, *res;
 	struct vm_area_struct *vma, *adjust, *remove, *remove2;
 	int err = -1;
 	bool merge_prev = false;
@@ -940,14 +940,18 @@  struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 		/* Is there a VMA next to a hole (case 1 - 3) or prev (4)? */
 		next = vma_lookup(mm, end);
 
-	/* verify some invariant that must be enforced by the caller */
+	/*
+	 * By default, we return prev. Cases 3, 4, 8 will instead return next
+	 * and cases 3, 8 will also update vma to point at next.
+	 */
+	res = vma = prev;
+
+	/* Verify some invariant that must be enforced by the caller. */
 	VM_WARN_ON(prev && addr <= prev->vm_start);
-	VM_WARN_ON(curr && end > curr->vm_end);
+	VM_WARN_ON(curr && (addr != curr->vm_start || end > curr->vm_end));
 	VM_WARN_ON(addr >= end);
 
 	if (prev) {
-		res = prev;
-		vma = prev;
 		vma_start = prev->vm_start;
 		vma_pgoff = prev->vm_pgoff;
 		/* Can we merge the predecessor? */
@@ -958,6 +962,7 @@  struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 			vma_prev(vmi);
 		}
 	}
+
 	/* Can we merge the successor? */
 	if (next && mpol_equal(policy, vma_policy(next)) &&
 	    can_vma_merge_before(next, vm_flags,
@@ -998,6 +1003,10 @@  struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 			adj_start = -(prev->vm_end - addr);
 			err = dup_anon_vma(next, prev);
 		} else {
+			/*
+			 * Note that cases 3 and 8 are the ONLY ones where prev
+			 * is permitted to be (but is not necessarily) NULL.
+			 */
 			vma = next;			/* case 3 */
 			vma_start = addr;
 			vma_end = next->vm_end;