diff mbox series

[v4,65/66] mm: Remove the vma linked list

Message ID 20211201142918.921493-66-Liam.Howlett@oracle.com (mailing list archive)
State New
Headers show
Series Introducing the Maple Tree | expand

Commit Message

Liam R. Howlett Dec. 1, 2021, 2:30 p.m. UTC
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>

Replace any vm_next use with vma_find().

Update free_pgtables(), unmap_vmas(), and zap_page_range() to use the
maple tree.

Use the new free_pgtables() and unmap_vmas() in do_mas_align_munmap().
At the same time, alter the loop to be more compact.

Now that free_pgtables() and unmap_vmas() take a maple tree as an
argument, rearrange do_mas_align_munmap() to use the new table to hold
the lock

Remove __vma_link_list() and __vma_unlink_list() as they are exclusively
used to update the linked list

Rework validation of tree as it was depending on the linked list.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
---
 include/linux/mm.h       |   5 +-
 include/linux/mm_types.h |   4 -
 kernel/fork.c            |  13 +-
 mm/debug.c               |  14 +-
 mm/gup.c                 |   2 +-
 mm/internal.h            |  10 +-
 mm/memory.c              |  33 ++-
 mm/mmap.c                | 497 ++++++++++++++++++---------------------
 mm/nommu.c               |   2 -
 mm/util.c                |  40 ----
 10 files changed, 269 insertions(+), 351 deletions(-)

Comments

Vlastimil Babka Jan. 20, 2022, 5:41 p.m. UTC | #1
On 12/1/21 15:30, Liam Howlett wrote:
> From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
> 
> Replace any vm_next use with vma_find().
> 
> Update free_pgtables(), unmap_vmas(), and zap_page_range() to use the
> maple tree.

> Use the new free_pgtables() and unmap_vmas() in do_mas_align_munmap().
> At the same time, alter the loop to be more compact.
> 
> Now that free_pgtables() and unmap_vmas() take a maple tree as an
> argument, rearrange do_mas_align_munmap() to use the new table to hold
> the lock

table or tree?

> Remove __vma_link_list() and __vma_unlink_list() as they are exclusively
> used to update the linked list
> 
> Rework validation of tree as it was depending on the linked list.
> 
> Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>

git grep shows that some usages of 'vm_next' and 'vm_prev' remain after this
patch, including some exotic arch code.

> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -398,12 +398,21 @@ void free_pgd_range(struct mmu_gather *tlb,
>  	} while (pgd++, addr = next, addr != end);
>  }
>  
> -void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
> -		unsigned long floor, unsigned long ceiling)
> +void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
> +		   struct vm_area_struct *vma, unsigned long floor,
> +		   unsigned long ceiling)
>  {
> -	while (vma) {
> -		struct vm_area_struct *next = vma->vm_next;
> +	MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
> +
> +	do {
>  		unsigned long addr = vma->vm_start;
> +		struct vm_area_struct *next;
> +
> +		/*
> +		 * Note: USER_PGTABLES_CEILING may be passed as ceiling and may
> +		 * be 0.  This will underflow and is okay.
> +		 */
> +		next = mas_find(&mas, ceiling - 1);
>  
>  		/*
>  		 * Hide vma from rmap and truncate_pagecache before freeing
> @@ -422,7 +431,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  			while (next && next->vm_start <= vma->vm_end + PMD_SIZE
>  			       && !is_vm_hugetlb_page(next)) {
>  				vma = next;
> -				next = vma->vm_next;
> +				next = mas_find(&mas, ceiling - 1);
>  				unlink_anon_vmas(vma);
>  				unlink_file_vma(vma);
>  			}
> @@ -430,7 +439,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
>  				floor, next ? next->vm_start : ceiling);
>  		}
>  		vma = next;
> -	}
> +	} while (vma);
>  }
>  
>  void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
> @@ -1602,17 +1611,19 @@ static void unmap_single_vma(struct mmu_gather *tlb,
>   * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
>   * drops the lock and schedules.
>   */
> -void unmap_vmas(struct mmu_gather *tlb,
> +void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
>  		struct vm_area_struct *vma, unsigned long start_addr,
>  		unsigned long end_addr)
>  {
>  	struct mmu_notifier_range range;
> +	MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
>  
>  	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
>  				start_addr, end_addr);
>  	mmu_notifier_invalidate_range_start(&range);
> -	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
> +	do {
>  		unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
> +	} while ((vma = mas_find(&mas, end_addr - 1)) != NULL);
>  	mmu_notifier_invalidate_range_end(&range);
>  }
>  
> @@ -1627,8 +1638,11 @@ void unmap_vmas(struct mmu_gather *tlb,
>  void zap_page_range(struct vm_area_struct *vma, unsigned long start,
>  		unsigned long size)
>  {
> +	struct maple_tree *mt = &vma->vm_mm->mm_mt;

Well looks like that's also an option to avoid a new parameter :)

> +	unsigned long end = start + size;
>  	struct mmu_notifier_range range;
>  	struct mmu_gather tlb;
> +	MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
>  
>  	lru_add_drain();
>  	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
> @@ -1636,8 +1650,9 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
>  	tlb_gather_mmu(&tlb, vma->vm_mm);
>  	update_hiwater_rss(vma->vm_mm);
>  	mmu_notifier_invalidate_range_start(&range);
> -	for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next)
> +	do {
>  		unmap_single_vma(&tlb, vma, start, range.end, NULL);
> +	} while ((vma = mas_find(&mas, end - 1)) != NULL);
>  	mmu_notifier_invalidate_range_end(&range);
>  	tlb_finish_mmu(&tlb);
>  }
> diff --git a/mm/mmap.c b/mm/mmap.c
> index dde74e0b195d..e13c6ef76697 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -74,9 +74,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
>  static bool ignore_rlimit_data;
>  core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
>  
> -static void unmap_region(struct mm_struct *mm,
> +static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
>  		struct vm_area_struct *vma, struct vm_area_struct *prev,
> -		unsigned long start, unsigned long end);
> +		struct vm_area_struct *next, unsigned long start,
> +		unsigned long end);
>  
>  /* description of effects of mapping type and prot in current implementation.
>   * this is due to the limited x86 page protection hardware.  The expected
> @@ -173,10 +174,8 @@ void unlink_file_vma(struct vm_area_struct *vma)
>  /*
>   * Close a vm structure and free it, returning the next.

No longer returning the next.

>   */
> -static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
> +static void remove_vma(struct vm_area_struct *vma)
>  {
> -	struct vm_area_struct *next = vma->vm_next;
> -
>  	might_sleep();
>  	if (vma->vm_ops && vma->vm_ops->close)
>  		vma->vm_ops->close(vma);

<snip>

>   */
>  struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
>  {
> +	MA_STATE(mas, &vma->vm_mm->mm_mt, vma->vm_end, vma->vm_end);
>  	struct anon_vma *anon_vma = NULL;
> +	struct vm_area_struct *prev, *next;
>  
>  	/* Try next first. */
> -	if (vma->vm_next) {
> -		anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
> +	next = mas_walk(&mas);
> +	if (next) {
> +		anon_vma = reusable_anon_vma(next, vma, next);
>  		if (anon_vma)
>  			return anon_vma;
>  	}
>  
> +	prev = mas_prev(&mas, 0);
> +	VM_BUG_ON_VMA(prev != vma, vma);
> +	prev = mas_prev(&mas, 0);
>  	/* Try prev next. */
> -	if (vma->vm_prev)
> -		anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
> +	if (prev)
> +		anon_vma = reusable_anon_vma(prev, prev, vma);
>  
>  	/*
>  	 * We might reach here with anon_vma == NULL if we can't find
> @@ -1906,10 +1825,10 @@ struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
>  					     unsigned long start_addr,
>  					     unsigned long end_addr)
>  {
> -	MA_STATE(mas, &mm->mm_mt, start_addr, start_addr);
> +	unsigned long index = start_addr;
>  
>  	mmap_assert_locked(mm);
> -	return mas_find(&mas, end_addr - 1);
> +	return mt_find(&mm->mm_mt, &index, end_addr - 1);

Why is this now changed again?

>  }
>  EXPORT_SYMBOL(find_vma_intersection);
>  
> @@ -1923,8 +1842,10 @@ EXPORT_SYMBOL(find_vma_intersection);
>   */
>  inline struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
>  {
> -	// Note find_vma_intersection will decrease 0 to underflow to ULONG_MAX
> -	return find_vma_intersection(mm, addr, 0);
> +	unsigned long index = addr;
> +
> +	mmap_assert_locked(mm);
> +	return mt_find(&mm->mm_mt, &index, ULONG_MAX);

And here.

>  }
>  EXPORT_SYMBOL(find_vma);
>  
> @@ -2026,7 +1947,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
>  	if (gap_addr < address || gap_addr > TASK_SIZE)
>  		gap_addr = TASK_SIZE;
>  
> -	next = vma->vm_next;
> +	next = vma_find(mm, vma->vm_end);
>  	if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
>  		if (!(next->vm_flags & VM_GROWSUP))
>  			return -ENOMEM;
> @@ -2072,8 +1993,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
>  				vma->vm_end = address;
>  				vma_store(mm, vma);
>  				anon_vma_interval_tree_post_update_vma(vma);
> -				if (!vma->vm_next)
> -					mm->highest_vm_end = vm_end_gap(vma);
>  				spin_unlock(&mm->page_table_lock);
>  
>  				perf_event_mmap(vma);
> @@ -2100,7 +2019,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
>  		return -EPERM;
>  
>  	/* Enforce stack_guard_gap */
> -	prev = vma->vm_prev;
> +	find_vma_prev(mm, vma->vm_start, &prev);
>  	/* Check that both stack segments have the same anon_vma? */
>  	if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
>  			vma_is_accessible(prev)) {
> @@ -2235,20 +2154,22 @@ EXPORT_SYMBOL_GPL(find_extend_vma);
>   *
>   * Called with the mm semaphore held.

Above this, the comment talks about vma list, update?

>   */
> -static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
> +static inline void remove_mt(struct mm_struct *mm, struct maple_tree *detached)
>  {
>  	unsigned long nr_accounted = 0;
> +	unsigned long index = 0;
> +	struct vm_area_struct *vma;
>  
>  	/* Update high watermark before we lower total_vm */
>  	update_hiwater_vm(mm);
> -	do {
> +	mt_for_each(detached, vma, index, ULONG_MAX) {
>  		long nrpages = vma_pages(vma);
>  
>  		if (vma->vm_flags & VM_ACCOUNT)
>  			nr_accounted += nrpages;
>  		vm_stat_account(mm, vma->vm_flags, -nrpages);
> -		vma = remove_vma(vma);
> -	} while (vma);
> +		remove_vma(vma);
> +	}
>  	vm_unacct_memory(nr_accounted);
>  	validate_mm(mm);
>  }
Liam R. Howlett Jan. 26, 2022, 8:29 p.m. UTC | #2
* Vlastimil Babka <vbabka@suse.cz> [220120 12:41]:
> On 12/1/21 15:30, Liam Howlett wrote:
> > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
> > 
> > Replace any vm_next use with vma_find().
> > 
> > Update free_pgtables(), unmap_vmas(), and zap_page_range() to use the
> > maple tree.
> 
> > Use the new free_pgtables() and unmap_vmas() in do_mas_align_munmap().
> > At the same time, alter the loop to be more compact.
> > 
> > Now that free_pgtables() and unmap_vmas() take a maple tree as an
> > argument, rearrange do_mas_align_munmap() to use the new table to hold
> > the lock
> 
> table or tree?

tree, thanks.

> 
> > Remove __vma_link_list() and __vma_unlink_list() as they are exclusively
> > used to update the linked list
> > 
> > Rework validation of tree as it was depending on the linked list.
> > 
> > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> 
> git grep shows that some usages of 'vm_next' and 'vm_prev' remain after this
> patch, including some exotic arch code.

I must have missed them being added during the development cycle of
maple tree.. except parisc; parisc has a block of code left in an #if 0
so it's not lost - Good thing it's in CVS now so it's safe :)

Thanks, riscv will require a new patch.

damon test code will require a new patch - I will add this to the damon
conversion patch.

> 
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -398,12 +398,21 @@ void free_pgd_range(struct mmu_gather *tlb,
> >  	} while (pgd++, addr = next, addr != end);
> >  }
> >  
> > -void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
> > -		unsigned long floor, unsigned long ceiling)
> > +void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
> > +		   struct vm_area_struct *vma, unsigned long floor,
> > +		   unsigned long ceiling)
> >  {
> > -	while (vma) {
> > -		struct vm_area_struct *next = vma->vm_next;
> > +	MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
> > +
> > +	do {
> >  		unsigned long addr = vma->vm_start;
> > +		struct vm_area_struct *next;
> > +
> > +		/*
> > +		 * Note: USER_PGTABLES_CEILING may be passed as ceiling and may
> > +		 * be 0.  This will underflow and is okay.
> > +		 */
> > +		next = mas_find(&mas, ceiling - 1);
> >  
> >  		/*
> >  		 * Hide vma from rmap and truncate_pagecache before freeing
> > @@ -422,7 +431,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
> >  			while (next && next->vm_start <= vma->vm_end + PMD_SIZE
> >  			       && !is_vm_hugetlb_page(next)) {
> >  				vma = next;
> > -				next = vma->vm_next;
> > +				next = mas_find(&mas, ceiling - 1);
> >  				unlink_anon_vmas(vma);
> >  				unlink_file_vma(vma);
> >  			}
> > @@ -430,7 +439,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
> >  				floor, next ? next->vm_start : ceiling);
> >  		}
> >  		vma = next;
> > -	}
> > +	} while (vma);
> >  }
> >  
> >  void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
> > @@ -1602,17 +1611,19 @@ static void unmap_single_vma(struct mmu_gather *tlb,
> >   * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
> >   * drops the lock and schedules.
> >   */
> > -void unmap_vmas(struct mmu_gather *tlb,
> > +void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
> >  		struct vm_area_struct *vma, unsigned long start_addr,
> >  		unsigned long end_addr)
> >  {
> >  	struct mmu_notifier_range range;
> > +	MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
> >  
> >  	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
> >  				start_addr, end_addr);
> >  	mmu_notifier_invalidate_range_start(&range);
> > -	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
> > +	do {
> >  		unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
> > +	} while ((vma = mas_find(&mas, end_addr - 1)) != NULL);
> >  	mmu_notifier_invalidate_range_end(&range);
> >  }
> >  
> > @@ -1627,8 +1638,11 @@ void unmap_vmas(struct mmu_gather *tlb,
> >  void zap_page_range(struct vm_area_struct *vma, unsigned long start,
> >  		unsigned long size)
> >  {
> > +	struct maple_tree *mt = &vma->vm_mm->mm_mt;
> 
> Well looks like that's also an option to avoid a new parameter :)
> 
> > +	unsigned long end = start + size;
> >  	struct mmu_notifier_range range;
> >  	struct mmu_gather tlb;
> > +	MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
> >  
> >  	lru_add_drain();
> >  	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
> > @@ -1636,8 +1650,9 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
> >  	tlb_gather_mmu(&tlb, vma->vm_mm);
> >  	update_hiwater_rss(vma->vm_mm);
> >  	mmu_notifier_invalidate_range_start(&range);
> > -	for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next)
> > +	do {
> >  		unmap_single_vma(&tlb, vma, start, range.end, NULL);
> > +	} while ((vma = mas_find(&mas, end - 1)) != NULL);
> >  	mmu_notifier_invalidate_range_end(&range);
> >  	tlb_finish_mmu(&tlb);
> >  }
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index dde74e0b195d..e13c6ef76697 100644
> > --- a/mm/mmap.c
> > +++ b/mm/mmap.c
> > @@ -74,9 +74,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
> >  static bool ignore_rlimit_data;
> >  core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
> >  
> > -static void unmap_region(struct mm_struct *mm,
> > +static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
> >  		struct vm_area_struct *vma, struct vm_area_struct *prev,
> > -		unsigned long start, unsigned long end);
> > +		struct vm_area_struct *next, unsigned long start,
> > +		unsigned long end);
> >  
> >  /* description of effects of mapping type and prot in current implementation.
> >   * this is due to the limited x86 page protection hardware.  The expected
> > @@ -173,10 +174,8 @@ void unlink_file_vma(struct vm_area_struct *vma)
> >  /*
> >   * Close a vm structure and free it, returning the next.
> 
> No longer returning the next.

ack

> 
> >   */
> > -static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
> > +static void remove_vma(struct vm_area_struct *vma)
> >  {
> > -	struct vm_area_struct *next = vma->vm_next;
> > -
> >  	might_sleep();
> >  	if (vma->vm_ops && vma->vm_ops->close)
> >  		vma->vm_ops->close(vma);
> 
> <snip>
> 
> >   */
> >  struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
> >  {
> > +	MA_STATE(mas, &vma->vm_mm->mm_mt, vma->vm_end, vma->vm_end);
> >  	struct anon_vma *anon_vma = NULL;
> > +	struct vm_area_struct *prev, *next;
> >  
> >  	/* Try next first. */
> > -	if (vma->vm_next) {
> > -		anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
> > +	next = mas_walk(&mas);
> > +	if (next) {
> > +		anon_vma = reusable_anon_vma(next, vma, next);
> >  		if (anon_vma)
> >  			return anon_vma;
> >  	}
> >  
> > +	prev = mas_prev(&mas, 0);
> > +	VM_BUG_ON_VMA(prev != vma, vma);
> > +	prev = mas_prev(&mas, 0);
> >  	/* Try prev next. */
> > -	if (vma->vm_prev)
> > -		anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
> > +	if (prev)
> > +		anon_vma = reusable_anon_vma(prev, prev, vma);
> >  
> >  	/*
> >  	 * We might reach here with anon_vma == NULL if we can't find
> > @@ -1906,10 +1825,10 @@ struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
> >  					     unsigned long start_addr,
> >  					     unsigned long end_addr)
> >  {
> > -	MA_STATE(mas, &mm->mm_mt, start_addr, start_addr);
> > +	unsigned long index = start_addr;
> >  
> >  	mmap_assert_locked(mm);
> > -	return mas_find(&mas, end_addr - 1);
> > +	return mt_find(&mm->mm_mt, &index, end_addr - 1);
> 
> Why is this now changed again?

I found this with one of your previous comments, I have a fix.

> 
> >  }
> >  EXPORT_SYMBOL(find_vma_intersection);
> >  
> > @@ -1923,8 +1842,10 @@ EXPORT_SYMBOL(find_vma_intersection);
> >   */
> >  inline struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
> >  {
> > -	// Note find_vma_intersection will decrease 0 to underflow to ULONG_MAX
> > -	return find_vma_intersection(mm, addr, 0);
> > +	unsigned long index = addr;
> > +
> > +	mmap_assert_locked(mm);
> > +	return mt_find(&mm->mm_mt, &index, ULONG_MAX);
> 
> And here.

Ditto.

> 
> >  }
> >  EXPORT_SYMBOL(find_vma);
> >  
> > @@ -2026,7 +1947,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
> >  	if (gap_addr < address || gap_addr > TASK_SIZE)
> >  		gap_addr = TASK_SIZE;
> >  
> > -	next = vma->vm_next;
> > +	next = vma_find(mm, vma->vm_end);
> >  	if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
> >  		if (!(next->vm_flags & VM_GROWSUP))
> >  			return -ENOMEM;
> > @@ -2072,8 +1993,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
> >  				vma->vm_end = address;
> >  				vma_store(mm, vma);
> >  				anon_vma_interval_tree_post_update_vma(vma);
> > -				if (!vma->vm_next)
> > -					mm->highest_vm_end = vm_end_gap(vma);
> >  				spin_unlock(&mm->page_table_lock);
> >  
> >  				perf_event_mmap(vma);
> > @@ -2100,7 +2019,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
> >  		return -EPERM;
> >  
> >  	/* Enforce stack_guard_gap */
> > -	prev = vma->vm_prev;
> > +	find_vma_prev(mm, vma->vm_start, &prev);
> >  	/* Check that both stack segments have the same anon_vma? */
> >  	if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
> >  			vma_is_accessible(prev)) {
> > @@ -2235,20 +2154,22 @@ EXPORT_SYMBOL_GPL(find_extend_vma);
> >   *
> >   * Called with the mm semaphore held.
> 
> Above this, the comment talks about vma list, update?

I will update the comment.

> 
> >   */
> > -static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
> > +static inline void remove_mt(struct mm_struct *mm, struct maple_tree *detached)
> >  {
> >  	unsigned long nr_accounted = 0;
> > +	unsigned long index = 0;
> > +	struct vm_area_struct *vma;
> >  
> >  	/* Update high watermark before we lower total_vm */
> >  	update_hiwater_vm(mm);
> > -	do {
> > +	mt_for_each(detached, vma, index, ULONG_MAX) {
> >  		long nrpages = vma_pages(vma);
> >  
> >  		if (vma->vm_flags & VM_ACCOUNT)
> >  			nr_accounted += nrpages;
> >  		vm_stat_account(mm, vma->vm_flags, -nrpages);
> > -		vma = remove_vma(vma);
> > -	} while (vma);
> > +		remove_vma(vma);
> > +	}
> >  	vm_unacct_memory(nr_accounted);
> >  	validate_mm(mm);
> >  }
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 199f66716eef..f5a1394bad61 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1883,8 +1883,9 @@  void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		  unsigned long size);
 void zap_page_range(struct vm_area_struct *vma, unsigned long address,
 		    unsigned long size);
-void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
-		unsigned long start, unsigned long end);
+void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
+		struct vm_area_struct *start_vma, unsigned long start,
+		unsigned long end);
 
 struct mmu_notifier_range;
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 1ab6260fb164..1d41cef68f73 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -400,8 +400,6 @@  struct vm_area_struct {
 	unsigned long vm_end;		/* The first byte after our end address
 					   within vm_mm. */
 
-	/* linked list of VM areas per task, sorted by address */
-	struct vm_area_struct *vm_next, *vm_prev;
 	struct mm_struct *vm_mm;	/* The address space we belong to. */
 
 	/*
@@ -454,7 +452,6 @@  struct vm_area_struct {
 struct kioctx_table;
 struct mm_struct {
 	struct {
-		struct vm_area_struct *mmap;		/* list of VMAs */
 		struct maple_tree mm_mt;
 #ifdef CONFIG_MMU
 		unsigned long (*get_unmapped_area) (struct file *filp,
@@ -469,7 +466,6 @@  struct mm_struct {
 		unsigned long mmap_compat_legacy_base;
 #endif
 		unsigned long task_size;	/* size of task vm space */
-		unsigned long highest_vm_end;	/* highest vma end address */
 		pgd_t * pgd;
 
 #ifdef CONFIG_MEMBARRIER
diff --git a/kernel/fork.c b/kernel/fork.c
index 6de302e93519..c748b7457693 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -363,7 +363,6 @@  struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
 		 */
 		*new = data_race(*orig);
 		INIT_LIST_HEAD(&new->anon_vma_chain);
-		new->vm_next = new->vm_prev = NULL;
 	}
 	return new;
 }
@@ -497,7 +496,7 @@  static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
 static __latent_entropy int dup_mmap(struct mm_struct *mm,
 					struct mm_struct *oldmm)
 {
-	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
+	struct vm_area_struct *mpnt, *tmp, *prev;
 	int retval;
 	unsigned long charge = 0;
 	MA_STATE(old_mas, &oldmm->mm_mt, 0, 0);
@@ -524,7 +523,6 @@  static __latent_entropy int dup_mmap(struct mm_struct *mm,
 	mm->exec_vm = oldmm->exec_vm;
 	mm->stack_vm = oldmm->stack_vm;
 
-	pprev = &mm->mmap;
 	retval = ksm_fork(mm, oldmm);
 	if (retval)
 		goto out;
@@ -605,14 +603,6 @@  static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		if (is_vm_hugetlb_page(tmp))
 			reset_vma_resv_huge_pages(tmp);
 
-		/*
-		 * Link in the new vma and copy the page table entries.
-		 */
-		*pprev = tmp;
-		pprev = &tmp->vm_next;
-		tmp->vm_prev = prev;
-		prev = tmp;
-
 		/* Link the vma into the MT */
 		mas.index = tmp->vm_start;
 		mas.last = tmp->vm_end - 1;
@@ -1052,7 +1042,6 @@  static void mm_init_uprobes_state(struct mm_struct *mm)
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	struct user_namespace *user_ns)
 {
-	mm->mmap = NULL;
 	mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
 	mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
 	atomic_set(&mm->mm_users, 1);
diff --git a/mm/debug.c b/mm/debug.c
index 440a0614d14a..3b6fb26934c1 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -191,13 +191,11 @@  EXPORT_SYMBOL(dump_page);
 
 void dump_vma(const struct vm_area_struct *vma)
 {
-	pr_emerg("vma %px start %px end %px\n"
-		"next %px prev %px mm %px\n"
+	pr_emerg("vma %px start %px end %px mm %px\n"
 		"prot %lx anon_vma %px vm_ops %px\n"
 		"pgoff %lx file %px private_data %px\n"
 		"flags: %#lx(%pGv)\n",
-		vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
-		vma->vm_prev, vma->vm_mm,
+		vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_mm,
 		(unsigned long)pgprot_val(vma->vm_page_prot),
 		vma->anon_vma, vma->vm_ops, vma->vm_pgoff,
 		vma->vm_file, vma->vm_private_data,
@@ -207,11 +205,11 @@  EXPORT_SYMBOL(dump_vma);
 
 void dump_mm(const struct mm_struct *mm)
 {
-	pr_emerg("mm %px mmap %px task_size %lu\n"
+	pr_emerg("mm %px task_size %lu\n"
 #ifdef CONFIG_MMU
 		"get_unmapped_area %px\n"
 #endif
-		"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
+		"mmap_base %lu mmap_legacy_base %lu\n"
 		"pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
 		"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
 		"pinned_vm %llx data_vm %lx exec_vm %lx stack_vm %lx\n"
@@ -235,11 +233,11 @@  void dump_mm(const struct mm_struct *mm)
 		"tlb_flush_pending %d\n"
 		"def_flags: %#lx(%pGv)\n",
 
-		mm, mm->mmap, mm->task_size,
+		mm, mm->task_size,
 #ifdef CONFIG_MMU
 		mm->get_unmapped_area,
 #endif
-		mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
+		mm->mmap_base, mm->mmap_legacy_base,
 		mm->pgd, atomic_read(&mm->mm_users),
 		atomic_read(&mm->mm_count),
 		mm_pgtables_bytes(mm),
diff --git a/mm/gup.c b/mm/gup.c
index 60892e5df6a2..ba6249fab982 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1731,7 +1731,7 @@  size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
 			mmap_read_lock(mm);
 			vma = find_vma(mm, nstart);
 		} else if (nstart >= vma->vm_end)
-			vma = vma->vm_next;
+			vma = find_vma(mm, vma->vm_end);
 		if (!vma || vma->vm_start >= end)
 			break;
 		nend = end ? min(end, vma->vm_end) : vma->vm_end;
diff --git a/mm/internal.h b/mm/internal.h
index 7ec79cef3ea9..c617291f6d5f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -65,8 +65,9 @@  vm_fault_t do_swap_page(struct vm_fault *vmf);
 void folio_rotate_reclaimable(struct folio *folio);
 bool __folio_end_writeback(struct folio *folio);
 
-void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
-		unsigned long floor, unsigned long ceiling);
+void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
+		   struct vm_area_struct *start_vma, unsigned long floor,
+		   unsigned long ceiling);
 void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
 
 static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
@@ -455,11 +456,6 @@  static inline int vma_mas_remove(struct vm_area_struct *vma, struct ma_state *ma
 	return ret;
 }
 
-/* mm/util.c */
-void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
-		struct vm_area_struct *prev);
-void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma);
-
 #ifdef CONFIG_MMU
 extern long populate_vma_page_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end, int *locked);
diff --git a/mm/memory.c b/mm/memory.c
index 8f1de811a1dc..445c4a06d7f7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -398,12 +398,21 @@  void free_pgd_range(struct mmu_gather *tlb,
 	} while (pgd++, addr = next, addr != end);
 }
 
-void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
-		unsigned long floor, unsigned long ceiling)
+void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
+		   struct vm_area_struct *vma, unsigned long floor,
+		   unsigned long ceiling)
 {
-	while (vma) {
-		struct vm_area_struct *next = vma->vm_next;
+	MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
+
+	do {
 		unsigned long addr = vma->vm_start;
+		struct vm_area_struct *next;
+
+		/*
+		 * Note: USER_PGTABLES_CEILING may be passed as ceiling and may
+		 * be 0.  This will underflow and is okay.
+		 */
+		next = mas_find(&mas, ceiling - 1);
 
 		/*
 		 * Hide vma from rmap and truncate_pagecache before freeing
@@ -422,7 +431,7 @@  void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			while (next && next->vm_start <= vma->vm_end + PMD_SIZE
 			       && !is_vm_hugetlb_page(next)) {
 				vma = next;
-				next = vma->vm_next;
+				next = mas_find(&mas, ceiling - 1);
 				unlink_anon_vmas(vma);
 				unlink_file_vma(vma);
 			}
@@ -430,7 +439,7 @@  void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 				floor, next ? next->vm_start : ceiling);
 		}
 		vma = next;
-	}
+	} while (vma);
 }
 
 void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
@@ -1602,17 +1611,19 @@  static void unmap_single_vma(struct mmu_gather *tlb,
  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
  * drops the lock and schedules.
  */
-void unmap_vmas(struct mmu_gather *tlb,
+void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
 		struct vm_area_struct *vma, unsigned long start_addr,
 		unsigned long end_addr)
 {
 	struct mmu_notifier_range range;
+	MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
 
 	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
 				start_addr, end_addr);
 	mmu_notifier_invalidate_range_start(&range);
-	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
+	do {
 		unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
+	} while ((vma = mas_find(&mas, end_addr - 1)) != NULL);
 	mmu_notifier_invalidate_range_end(&range);
 }
 
@@ -1627,8 +1638,11 @@  void unmap_vmas(struct mmu_gather *tlb,
 void zap_page_range(struct vm_area_struct *vma, unsigned long start,
 		unsigned long size)
 {
+	struct maple_tree *mt = &vma->vm_mm->mm_mt;
+	unsigned long end = start + size;
 	struct mmu_notifier_range range;
 	struct mmu_gather tlb;
+	MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
 
 	lru_add_drain();
 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
@@ -1636,8 +1650,9 @@  void zap_page_range(struct vm_area_struct *vma, unsigned long start,
 	tlb_gather_mmu(&tlb, vma->vm_mm);
 	update_hiwater_rss(vma->vm_mm);
 	mmu_notifier_invalidate_range_start(&range);
-	for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next)
+	do {
 		unmap_single_vma(&tlb, vma, start, range.end, NULL);
+	} while ((vma = mas_find(&mas, end - 1)) != NULL);
 	mmu_notifier_invalidate_range_end(&range);
 	tlb_finish_mmu(&tlb);
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index dde74e0b195d..e13c6ef76697 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -74,9 +74,10 @@  int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 static bool ignore_rlimit_data;
 core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
-static void unmap_region(struct mm_struct *mm,
+static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
-		unsigned long start, unsigned long end);
+		struct vm_area_struct *next, unsigned long start,
+		unsigned long end);
 
 /* description of effects of mapping type and prot in current implementation.
  * this is due to the limited x86 page protection hardware.  The expected
@@ -173,10 +174,8 @@  void unlink_file_vma(struct vm_area_struct *vma)
 /*
  * Close a vm structure and free it, returning the next.
  */
-static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+static void remove_vma(struct vm_area_struct *vma)
 {
-	struct vm_area_struct *next = vma->vm_next;
-
 	might_sleep();
 	if (vma->vm_ops && vma->vm_ops->close)
 		vma->vm_ops->close(vma);
@@ -184,15 +183,14 @@  static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 		fput(vma->vm_file);
 	mpol_put(vma_policy(vma));
 	vm_area_free(vma);
-	return next;
 }
 
 static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 			 unsigned long newbrk, unsigned long oldbrk,
 			 struct list_head *uf);
-static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *brkvma,
-			unsigned long addr, unsigned long request,
-			unsigned long flags);
+static int do_brk_flags(struct ma_state *mas, struct ma_state *ma_prev,
+		struct vm_area_struct *brkvma, unsigned long addr,
+		unsigned long request, unsigned long flags);
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
 	unsigned long newbrk, oldbrk, origbrk;
@@ -203,6 +201,7 @@  SYSCALL_DEFINE1(brk, unsigned long, brk)
 	bool downgraded = false;
 	LIST_HEAD(uf);
 	MA_STATE(mas, &mm->mm_mt, 0, 0);
+	struct ma_state ma_neighbour;
 
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
@@ -259,7 +258,6 @@  SYSCALL_DEFINE1(brk, unsigned long, brk)
 		 * before calling do_brk_munmap().
 		 */
 		mm->brk = brk;
-		mas.last = oldbrk - 1;
 		ret = do_brk_munmap(&mas, brkvma, newbrk, oldbrk, &uf);
 		if (ret == 1)  {
 			downgraded = true;
@@ -270,26 +268,26 @@  SYSCALL_DEFINE1(brk, unsigned long, brk)
 		mm->brk = origbrk;
 		goto out;
 	}
+	ma_neighbour = mas;
+	next = mas_next(&ma_neighbour, newbrk + PAGE_SIZE + stack_guard_gap);
 	/* Only check if the next VMA is within the stack_guard_gap of the
 	 * expansion area */
-	next = mas_next(&mas, newbrk + PAGE_SIZE + stack_guard_gap);
 	/* Check against existing mmap mappings. */
 	if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
 		goto out;
 
-	brkvma = mas_prev(&mas, mm->start_brk);
+	brkvma = mas_prev(&ma_neighbour, mm->start_brk);
 	if (brkvma) {
 		if (brkvma->vm_start >= oldbrk)
 			goto out; // Trying to map over another vma.
 
-		if (brkvma->vm_end <= min_brk) {
+		if (brkvma->vm_end <= min_brk)
 			brkvma = NULL;
-			mas_reset(&mas);
-		}
 	}
 
 	/* Ok, looks good - let it rip. */
-	if (do_brk_flags(&mas, brkvma, oldbrk, newbrk - oldbrk, 0) < 0)
+	if (do_brk_flags(&mas, &ma_neighbour, brkvma, oldbrk,
+			 newbrk - oldbrk, 0) < 0)
 		goto out;
 
 	mm->brk = brk;
@@ -300,6 +298,7 @@  SYSCALL_DEFINE1(brk, unsigned long, brk)
 		mmap_read_unlock(mm);
 	else
 		mmap_write_unlock(mm);
+
 	userfaultfd_unmap_complete(mm, &uf);
 	if (populate)
 		mm_populate(oldbrk, newbrk - oldbrk);
@@ -318,44 +317,22 @@  extern void mt_dump(const struct maple_tree *mt);
 static void validate_mm_mt(struct mm_struct *mm)
 {
 	struct maple_tree *mt = &mm->mm_mt;
-	struct vm_area_struct *vma_mt, *vma = mm->mmap;
+	struct vm_area_struct *vma_mt;
 
 	MA_STATE(mas, mt, 0, 0);
-	mas_for_each(&mas, vma_mt, ULONG_MAX) {
-		if (xa_is_zero(vma_mt))
-			continue;
 
-		if (!vma)
-			break;
-
-		if ((vma != vma_mt) ||
-		    (vma->vm_start != vma_mt->vm_start) ||
-		    (vma->vm_end != vma_mt->vm_end) ||
-		    (vma->vm_start != mas.index) ||
-		    (vma->vm_end - 1 != mas.last)) {
+	mas_for_each(&mas, vma_mt, ULONG_MAX) {
+		if ((vma_mt->vm_start != mas.index) ||
+		    (vma_mt->vm_end - 1 != mas.last)) {
 			pr_emerg("issue in %s\n", current->comm);
 			dump_stack();
 #ifdef CONFIG_DEBUG_VM
 			dump_vma(vma_mt);
-			pr_emerg("and vm_next\n");
-			dump_vma(vma->vm_next);
-#endif
+#endif // CONFIG_DEBUG_VM
 			pr_emerg("mt piv: %px %lu - %lu\n", vma_mt,
 				 mas.index, mas.last);
 			pr_emerg("mt vma: %px %lu - %lu\n", vma_mt,
 				 vma_mt->vm_start, vma_mt->vm_end);
-			if (vma->vm_prev) {
-				pr_emerg("ll prev: %px %lu - %lu\n",
-					 vma->vm_prev, vma->vm_prev->vm_start,
-					 vma->vm_prev->vm_end);
-			}
-			pr_emerg("ll vma: %px %lu - %lu\n", vma,
-				 vma->vm_start, vma->vm_end);
-			if (vma->vm_next) {
-				pr_emerg("ll next: %px %lu - %lu\n",
-					 vma->vm_next, vma->vm_next->vm_start,
-					 vma->vm_next->vm_end);
-			}
 
 			mt_dump(mas.tree);
 			if (vma_mt->vm_end != mas.last + 1) {
@@ -372,11 +349,7 @@  static void validate_mm_mt(struct mm_struct *mm)
 			}
 			VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm);
 		}
-		VM_BUG_ON(vma != vma_mt);
-		vma = vma->vm_next;
-
 	}
-	VM_BUG_ON(vma);
 	mt_validate(&mm->mm_mt);
 }
 
@@ -384,12 +357,12 @@  static void validate_mm(struct mm_struct *mm)
 {
 	int bug = 0;
 	int i = 0;
-	unsigned long highest_address = 0;
-	struct vm_area_struct *vma = mm->mmap;
+	struct vm_area_struct *vma;
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
 
 	validate_mm_mt(mm);
 
-	while (vma) {
+	mas_for_each(&mas, vma, ULONG_MAX) {
 #ifdef CONFIG_DEBUG_VM_RB
 		struct anon_vma *anon_vma = vma->anon_vma;
 		struct anon_vma_chain *avc;
@@ -401,20 +374,12 @@  static void validate_mm(struct mm_struct *mm)
 			anon_vma_unlock_read(anon_vma);
 		}
 #endif
-
-		highest_address = vm_end_gap(vma);
-		vma = vma->vm_next;
 		i++;
 	}
 	if (i != mm->map_count) {
 		pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
 		bug = 1;
 	}
-	if (highest_address != mm->highest_vm_end) {
-		pr_emerg("mm->highest_vm_end %lx, found %lx\n",
-			  mm->highest_vm_end, highest_address);
-		bug = 1;
-	}
 	VM_BUG_ON_MM(bug, mm);
 }
 
@@ -472,29 +437,13 @@  bool range_has_overlap(struct mm_struct *mm, unsigned long start,
 	struct vm_area_struct *existing;
 
 	MA_STATE(mas, &mm->mm_mt, start, start);
+	rcu_read_lock();
 	existing = mas_find(&mas, end - 1);
 	*pprev = mas_prev(&mas, 0);
+	rcu_read_unlock();
 	return existing ? true : false;
 }
 
-/*
- * __vma_next() - Get the next VMA.
- * @mm: The mm_struct.
- * @vma: The current vma.
- *
- * If @vma is NULL, return the first vma in the mm.
- *
- * Returns: The next VMA after @vma.
- */
-static inline struct vm_area_struct *__vma_next(struct mm_struct *mm,
-					 struct vm_area_struct *vma)
-{
-	if (!vma)
-		return mm->mmap;
-
-	return vma->vm_next;
-}
-
 static unsigned long count_vma_pages_range(struct mm_struct *mm,
 		unsigned long addr, unsigned long end)
 {
@@ -559,8 +508,7 @@  void vma_store(struct mm_struct *mm, struct vm_area_struct *vma)
 	mas_store_gfp(&mas, vma, GFP_KERNEL);
 }
 
-static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
-			struct vm_area_struct *prev)
+static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
 {
 	struct address_space *mapping = NULL;
 
@@ -570,7 +518,6 @@  static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	vma_store(mm, vma);
-	__vma_link_list(mm, vma, prev);
 	__vma_link_file(vma);
 
 	if (mapping)
@@ -586,12 +533,7 @@  static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
  */
 static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-	struct vm_area_struct *prev;
-	MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start);
-
-	prev = mas_prev(&mas, 0);
 	vma_store(mm, vma);
-	__vma_link_list(mm, vma, prev);
 	mm->map_count++;
 }
 
@@ -650,15 +592,8 @@  inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma,
 	}
 
 	/* Expanding over the next vma */
-	if (remove_next) {
-		/* Remove from mm linked list - also updates highest_vm_end */
-		__vma_unlink_list(mm, next);
-
-		if (file)
-			__remove_shared_vm_struct(next, file, mapping);
-
-	} else if (!next) {
-		mm->highest_vm_end = vm_end_gap(vma);
+	if (remove_next && file) {
+		__remove_shared_vm_struct(next, file, mapping);
 	}
 
 	if (anon_vma) {
@@ -699,7 +634,8 @@  int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	struct vm_area_struct *expand)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
+	struct vm_area_struct *next = find_vma(mm, vma->vm_end);
+	struct vm_area_struct *orig_vma = vma;
 	struct address_space *mapping = NULL;
 	struct rb_root_cached *root = NULL;
 	struct anon_vma *anon_vma = NULL;
@@ -741,7 +677,7 @@  int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 				 */
 				remove_next = 1 + (end > next->vm_end);
 				VM_WARN_ON(remove_next == 2 &&
-					   end != next->vm_next->vm_end);
+					   end != find_vma(mm, next->vm_end)->vm_end);
 				/* trim end to next, for case 6 first pass */
 				end = next->vm_end;
 			}
@@ -754,7 +690,7 @@  int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 			 * next, if the vma overlaps with it.
 			 */
 			if (remove_next == 2 && !next->anon_vma)
-				exporter = next->vm_next;
+				exporter = find_vma(mm, next->vm_end);
 
 		} else if (end > next->vm_start) {
 			/*
@@ -845,8 +781,6 @@  int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 		} else
 			vma_changed = true;
 		vma->vm_end = end;
-		if (!next)
-			mm->highest_vm_end = vm_end_gap(vma);
 	}
 
 	if (vma_changed) {
@@ -871,10 +805,8 @@  int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 		flush_dcache_mmap_unlock(mapping);
 	}
 
-	if (remove_next) {
-		__vma_unlink_list(mm, next);
-		if (file)
-			__remove_shared_vm_struct(next, file, mapping);
+	if (remove_next && file) {
+		__remove_shared_vm_struct(next, file, mapping);
 	} else if (insert) {
 		/*
 		 * split_vma has split insert from vma, and needs
@@ -921,7 +853,7 @@  int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 			 * "next->vm_prev->vm_end" changed and the
 			 * "vma->vm_next" gap must be updated.
 			 */
-			next = vma->vm_next;
+			next = find_vma(mm, vma->vm_end);
 		} else {
 			/*
 			 * For the scope of the comment "next" and
@@ -939,33 +871,14 @@  int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 			remove_next = 1;
 			end = next->vm_end;
 			goto again;
-		} else if (!next) {
-			/*
-			 * If remove_next == 2 we obviously can't
-			 * reach this path.
-			 *
-			 * If remove_next == 3 we can't reach this
-			 * path because pre-swap() next is always not
-			 * NULL. pre-swap() "next" is not being
-			 * removed and its next->vm_end is not altered
-			 * (and furthermore "end" already matches
-			 * next->vm_end in remove_next == 3).
-			 *
-			 * We reach this only in the remove_next == 1
-			 * case if the "next" vma that was removed was
-			 * the highest vma of the mm. However in such
-			 * case next->vm_end == "end" and the extended
-			 * "vma" has vma->vm_end == next->vm_end so
-			 * mm->highest_vm_end doesn't need any update
-			 * in remove_next == 1 case.
-			 */
-			VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
 		}
 	}
-	if (insert && file)
+	if (insert && file) {
 		uprobe_mmap(insert);
+	}
 
 	validate_mm(mm);
+
 	return 0;
 }
 
@@ -1119,10 +1032,10 @@  struct vm_area_struct *vma_merge(struct mm_struct *mm,
 	if (vm_flags & VM_SPECIAL)
 		return NULL;
 
-	next = __vma_next(mm, prev);
+	next = find_vma(mm, prev ? prev->vm_end : 0);
 	area = next;
 	if (area && area->vm_end == end)		/* cases 6, 7, 8 */
-		next = next->vm_next;
+		next = find_vma(mm, next->vm_end);
 
 	/* verify some invariant that must be enforced by the caller */
 	VM_WARN_ON(prev && addr <= prev->vm_start);
@@ -1256,18 +1169,24 @@  static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_
  */
 struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
 {
+	MA_STATE(mas, &vma->vm_mm->mm_mt, vma->vm_end, vma->vm_end);
 	struct anon_vma *anon_vma = NULL;
+	struct vm_area_struct *prev, *next;
 
 	/* Try next first. */
-	if (vma->vm_next) {
-		anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
+	next = mas_walk(&mas);
+	if (next) {
+		anon_vma = reusable_anon_vma(next, vma, next);
 		if (anon_vma)
 			return anon_vma;
 	}
 
+	prev = mas_prev(&mas, 0);
+	VM_BUG_ON_VMA(prev != vma, vma);
+	prev = mas_prev(&mas, 0);
 	/* Try prev next. */
-	if (vma->vm_prev)
-		anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
+	if (prev)
+		anon_vma = reusable_anon_vma(prev, prev, vma);
 
 	/*
 	 * We might reach here with anon_vma == NULL if we can't find
@@ -1906,10 +1825,10 @@  struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
 					     unsigned long start_addr,
 					     unsigned long end_addr)
 {
-	MA_STATE(mas, &mm->mm_mt, start_addr, start_addr);
+	unsigned long index = start_addr;
 
 	mmap_assert_locked(mm);
-	return mas_find(&mas, end_addr - 1);
+	return mt_find(&mm->mm_mt, &index, end_addr - 1);
 }
 EXPORT_SYMBOL(find_vma_intersection);
 
@@ -1923,8 +1842,10 @@  EXPORT_SYMBOL(find_vma_intersection);
  */
 inline struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
-	// Note find_vma_intersection will decrease 0 to underflow to ULONG_MAX
-	return find_vma_intersection(mm, addr, 0);
+	unsigned long index = addr;
+
+	mmap_assert_locked(mm);
+	return mt_find(&mm->mm_mt, &index, ULONG_MAX);
 }
 EXPORT_SYMBOL(find_vma);
 
@@ -2026,7 +1947,7 @@  int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 	if (gap_addr < address || gap_addr > TASK_SIZE)
 		gap_addr = TASK_SIZE;
 
-	next = vma->vm_next;
+	next = vma_find(mm, vma->vm_end);
 	if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
 		if (!(next->vm_flags & VM_GROWSUP))
 			return -ENOMEM;
@@ -2072,8 +1993,6 @@  int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 				vma->vm_end = address;
 				vma_store(mm, vma);
 				anon_vma_interval_tree_post_update_vma(vma);
-				if (!vma->vm_next)
-					mm->highest_vm_end = vm_end_gap(vma);
 				spin_unlock(&mm->page_table_lock);
 
 				perf_event_mmap(vma);
@@ -2100,7 +2019,7 @@  int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 		return -EPERM;
 
 	/* Enforce stack_guard_gap */
-	prev = vma->vm_prev;
+	find_vma_prev(mm, vma->vm_start, &prev);
 	/* Check that both stack segments have the same anon_vma? */
 	if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
 			vma_is_accessible(prev)) {
@@ -2235,20 +2154,22 @@  EXPORT_SYMBOL_GPL(find_extend_vma);
  *
  * Called with the mm semaphore held.
  */
-static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
+static inline void remove_mt(struct mm_struct *mm, struct maple_tree *detached)
 {
 	unsigned long nr_accounted = 0;
+	unsigned long index = 0;
+	struct vm_area_struct *vma;
 
 	/* Update high watermark before we lower total_vm */
 	update_hiwater_vm(mm);
-	do {
+	mt_for_each(detached, vma, index, ULONG_MAX) {
 		long nrpages = vma_pages(vma);
 
 		if (vma->vm_flags & VM_ACCOUNT)
 			nr_accounted += nrpages;
 		vm_stat_account(mm, vma->vm_flags, -nrpages);
-		vma = remove_vma(vma);
-	} while (vma);
+		remove_vma(vma);
+	}
 	vm_unacct_memory(nr_accounted);
 	validate_mm(mm);
 }
@@ -2258,18 +2179,18 @@  static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
  *
  * Called with the mm semaphore held.
  */
-static void unmap_region(struct mm_struct *mm,
+static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
+		struct vm_area_struct *next,
 		unsigned long start, unsigned long end)
 {
-	struct vm_area_struct *next = __vma_next(mm, prev);
 	struct mmu_gather tlb;
 
 	lru_add_drain();
 	tlb_gather_mmu(&tlb, mm);
 	update_hiwater_rss(mm);
-	unmap_vmas(&tlb, vma, start, end);
-	free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+	unmap_vmas(&tlb, mt, vma, start, end);
+	free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
 				 next ? next->vm_start : USER_PGTABLES_CEILING);
 	tlb_finish_mmu(&tlb);
 }
@@ -2310,8 +2231,9 @@  int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (err)
 		goto out_free_mpol;
 
-	if (new->vm_file)
+	if (new->vm_file) {
 		get_file(new->vm_file);
+	}
 
 	if (new->vm_ops && new->vm_ops->open)
 		new->vm_ops->open(new);
@@ -2353,28 +2275,6 @@  int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	return __split_vma(mm, vma, addr, new_below);
 }
 
-static inline int
-unlock_range(struct vm_area_struct *start, struct vm_area_struct **tail,
-	     unsigned long limit)
-{
-	struct mm_struct *mm = start->vm_mm;
-	struct vm_area_struct *tmp = start;
-	int count = 0;
-
-	while (tmp && tmp->vm_start < limit) {
-		*tail = tmp;
-		count++;
-		if (tmp->vm_flags & VM_LOCKED) {
-			mm->locked_vm -= vma_pages(tmp);
-			munlock_vma_pages_all(tmp);
-		}
-
-		tmp = tmp->vm_next;
-	}
-
-	return count;
-}
-
 /*
  * do_mas_align_munmap() - munmap the aligned region from @start to @end.
  * @mas: The maple_state, ideally set up to alter the correct tree location.
@@ -2393,9 +2293,14 @@  do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 		    struct mm_struct *mm, unsigned long start,
 		    unsigned long end, struct list_head *uf, bool downgrade)
 {
-	struct vm_area_struct *prev, *last;
-	/* we have start < vma->vm_end  */
+	struct vm_area_struct *prev, *next;
+	struct maple_tree mt_detach;
+	int count = 0;
+	MA_STATE(mas_detach, &mt_detach, start, end - 1);
+	mt_init_flags(&mt_detach, MM_MT_FLAGS);
+	mt_set_external_lock(&mt_detach, &mm->mmap_lock);
 
+	prev = next = NULL;
 	/*
 	 * If we need to split any vma, do it now to save pain later.
 	 *
@@ -2403,6 +2308,8 @@  do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 	 * unmapped vm_area_struct will remain in use: so lower split_vma
 	 * places tmp vma above, and higher split_vma places tmp vma below.
 	 */
+
+	/* Does it split the first one? */
 	if (start > vma->vm_start) {
 		int error;
 
@@ -2414,31 +2321,49 @@  do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 		if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
 			return -ENOMEM;
 
-		error = __split_vma(mm, vma, start, 0);
+		/*
+		 * mas_pause() is not needed since mas->index needs to be set
+		 * differently than vma->vm_end anyways.
+		 */
+		error = __split_vma(mm, vma, start, 1);
 		if (error)
 			return error;
-		prev = vma;
-		vma = __vma_next(mm, prev);
-		mas->index = start;
-		mas_reset(mas);
+
+		mas_set(mas, start - 1);
+		prev = mas_walk(mas);
 	} else {
-		prev = vma->vm_prev;
+		prev = mas_prev(mas, 0);
+		if (unlikely((!prev)))
+			mas_set(mas, start);
 	}
 
-	if (vma->vm_end >= end)
-		last = vma;
-	else
-		last = find_vma_intersection(mm, end - 1, end);
+	/*
+	 * Detach a range of VMAs from the mm. Using next as a temp variable as
+	 * it is always overwritten.
+	 */
+	mas_for_each(mas, next, end - 1) {
+		/* Does it split the end? */
+		if (next->vm_end > end) {
+			int error;
 
-	/* Does it split the last one? */
-	if (last && end < last->vm_end) {
-		int error = __split_vma(mm, last, end, 1);
-		if (error)
-			return error;
-		vma = __vma_next(mm, prev);
-		mas_reset(mas);
+			error = __split_vma(mm, next, end, 0);
+			if (error)
+				return error;
+			mas_set(mas, end);
+		}
+		count++;
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+		BUG_ON(next->vm_start < start);
+		BUG_ON(next->vm_start > end);
+#endif
+		vma_mas_store(next, &mas_detach);
+		if (next->vm_flags & VM_LOCKED) {
+			mm->locked_vm -= vma_pages(next);
+			munlock_vma_pages_all(next);
+		}
 	}
 
+	next = mas_find(mas, ULONG_MAX);
 	if (unlikely(uf)) {
 		/*
 		 * If userfaultfd_unmap_prep returns an error the vmas
@@ -2455,47 +2380,48 @@  do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 			return error;
 	}
 
-	/*
-	 * unlock any mlock()ed ranges before detaching vmas, count the number
-	 * of VMAs to be dropped, and return the tail entry of the affected
-	 * area.
-	 */
-	mm->map_count -= unlock_range(vma, &last, end);
-	/* Drop removed area from the tree */
+	/* Point of no return */
+	mas_set_range(mas, start, end - 1);
+#if defined(CONFIG_DEBUG_MAPLE_TREE)
+	/* Make sure no VMAs are about to be lost. */
+	{
+		MA_STATE(test, &mt_detach, start, end - 1);
+		struct vm_area_struct *vma_mas, *vma_test;
+		int test_count = 0;
+
+		rcu_read_lock();
+		vma_test = mas_find(&test, end - 1);
+		mas_for_each(mas, vma_mas, end - 1) {
+			BUG_ON(vma_mas != vma_test);
+			test_count++;
+			vma_test = mas_next(&test, end - 1);
+		}
+		rcu_read_unlock();
+		BUG_ON(count != test_count);
+		mas_set_range(mas, start, end - 1);
+	}
+#endif
 	mas_store_gfp(mas, NULL, GFP_KERNEL);
-
-	/* Detach vmas from the MM linked list */
-	vma->vm_prev = NULL;
-	if (prev)
-		prev->vm_next = last->vm_next;
-	else
-		mm->mmap = last->vm_next;
-
-	if (last->vm_next) {
-		last->vm_next->vm_prev = prev;
-		last->vm_next = NULL;
-	} else
-		mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
-
+	mm->map_count -= count;
 	/*
 	 * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
 	 * VM_GROWSUP VMA. Such VMAs can change their size under
 	 * down_read(mmap_lock) and collide with the VMA we are about to unmap.
 	 */
 	if (downgrade) {
-		if (last && (last->vm_flags & VM_GROWSDOWN))
+		if (next && (next->vm_flags & VM_GROWSDOWN))
 			downgrade = false;
 		else if (prev && (prev->vm_flags & VM_GROWSUP))
 			downgrade = false;
-		else {
+		else
 			mmap_write_downgrade(mm);
-		}
 	}
 
-	unmap_region(mm, vma, prev, start, end);
-
-	/* Fix up all other VM information */
-	remove_vma_list(mm, vma);
+	unmap_region(mm, &mt_detach, vma, prev, next, start, end);
+	/* Statistics and freeing VMAs */
+	remove_mt(mm, &mt_detach);
+	validate_mm(mm);
+	__mt_destroy(&mt_detach);
 
 	return downgrade ? 1 : 0;
 }
@@ -2661,6 +2587,11 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 		}
 
 		vma->vm_file = get_file(file);
+		/*
+		 * call_mmap() may sleep, but will not alter the maple tree
+		 * since the mmap_lock is held.  This is safe and will be
+		 * changed later.
+		 */
 		error = call_mmap(file, vma);
 		if (error)
 			goto unmap_and_free_vma;
@@ -2724,7 +2655,6 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 		i_mmap_lock_write(vma->vm_file->f_mapping);
 
 	vma_mas_store(vma, &mas);
-	__vma_link_list(mm, vma, prev);
 	mm->map_count++;
 	if (vma->vm_file) {
 		if (vma->vm_flags & VM_SHARED)
@@ -2775,7 +2705,7 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 	vma->vm_file = NULL;
 
 	/* Undo any partial mapping done by a device driver. */
-	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+	unmap_region(mm, mas.tree, vma, prev, next, vma->vm_start, vma->vm_end);
 	charged = 0;
 	if (vm_flags & VM_SHARED)
 		mapping_unmap_writable(file->f_mapping);
@@ -2864,11 +2794,12 @@  SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 		goto out;
 
 	if (start + size > vma->vm_end) {
-		struct vm_area_struct *next;
+		VMA_ITERATOR(vmi, mm, vma->vm_end);
+		struct vm_area_struct *next, *prev = vma;
 
-		for (next = vma->vm_next; next; next = next->vm_next) {
+		for_each_vma_range(vmi, next, start + size) {
 			/* hole between vmas ? */
-			if (next->vm_start != next->vm_prev->vm_end)
+			if (next->vm_start != prev->vm_end)
 				goto out;
 
 			if (next->vm_file != vma->vm_file)
@@ -2877,8 +2808,7 @@  SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 			if (next->vm_flags != vma->vm_flags)
 				goto out;
 
-			if (start + size <= next->vm_end)
-				break;
+			prev = next;
 		}
 
 		if (!next)
@@ -2924,7 +2854,7 @@  static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 			 struct list_head *uf)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	struct vm_area_struct unmap;
+	struct vm_area_struct unmap, *next;
 	unsigned long unmap_pages;
 	int ret;
 
@@ -2941,6 +2871,7 @@  static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 	ret = userfaultfd_unmap_prep(mm, newbrk, oldbrk, uf);
 	if (ret)
 		return ret;
+
 	ret = 1;
 
 	// Change the oldbrk of vma to the newbrk of the munmap area
@@ -2954,9 +2885,12 @@  static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 	vma_init(&unmap, mm);
 	unmap.vm_start = newbrk;
 	unmap.vm_end = oldbrk;
+	if (vma->anon_vma)
+		vma_set_anonymous(&unmap);
 	if (vma_mas_remove(&unmap, mas))
 		goto mas_store_fail;
 
+	vma->vm_end = newbrk;
 	if (vma->anon_vma) {
 		anon_vma_interval_tree_post_update_vma(vma);
 		anon_vma_unlock_write(vma->anon_vma);
@@ -2968,8 +2902,9 @@  static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 		munlock_vma_pages_range(&unmap, newbrk, oldbrk);
 	}
 
+	next = mas_next(mas, ULONG_MAX);
 	mmap_write_downgrade(mm);
-	unmap_region(mm, &unmap, vma, newbrk, oldbrk);
+	unmap_region(mm, mas->tree, &unmap, vma, next, newbrk, oldbrk);
 	/* Statistics */
 	vm_stat_account(mm, unmap.vm_flags, -unmap_pages);
 	if (unmap.vm_flags & VM_ACCOUNT)
@@ -2980,6 +2915,7 @@  static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
 	return ret;
 
 mas_store_fail:
+	mas_unlock(mas);
 	vma->vm_end = oldbrk;
 	if (vma->anon_vma) {
 		anon_vma_interval_tree_post_update_vma(vma);
@@ -3000,15 +2936,14 @@  static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
  * do not match then create a new anonymous VMA.  Eventually we may be able to
  * do some brk-specific accounting here.
  */
-static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
-			unsigned long addr, unsigned long len,
-			unsigned long flags)
+static int do_brk_flags(struct ma_state *mas, struct ma_state *ma_prev,
+		struct vm_area_struct *vma, unsigned long addr,
+		unsigned long len, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *prev = NULL;
 	int error;
 	unsigned long mapped_addr;
-	validate_mm_mt(mm);
+	validate_mm(mm);
 
 	/* Until we need other flags, refuse anything except VM_EXEC. */
 	if ((flags & (~VM_EXEC)) != 0)
@@ -3033,7 +2968,6 @@  static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
 	if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
 		return -ENOMEM;
 
-	mas->last = addr + len - 1;
 	if (vma) {
 		/* Expand the existing vma if possible; almost never a singular
 		 * list, so this will almost always fail. */
@@ -3041,7 +2975,8 @@  static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
 		if ((!vma->anon_vma ||
 		     list_is_singular(&vma->anon_vma_chain)) &&
 		     ((vma->vm_flags & ~VM_SOFTDIRTY) == flags)){
-			mas->index = vma->vm_start;
+			ma_prev->index = vma->vm_start;
+			ma_prev->last = addr + len - 1;
 
 			vma_adjust_trans_huge(vma, addr, addr + len, 0);
 			if (vma->anon_vma) {
@@ -3050,7 +2985,24 @@  static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
 			}
 			vma->vm_end = addr + len;
 			vma->vm_flags |= VM_SOFTDIRTY;
-			if (mas_store_gfp(mas, vma, GFP_KERNEL))
+
+#if defined(CONFIG_DEBUG_MAPLE_TREE)
+			/* Make sure no VMAs are about to be lost. */
+			{
+				MA_STATE(test, ma_prev->tree, vma->vm_start,
+					 vma->vm_end - 1);
+				struct vm_area_struct *vma_mas;
+				int count = 0;
+
+				mas_for_each(&test, vma_mas, vma->vm_end - 1)
+					count++;
+
+				BUG_ON(count != 1);
+				mas_set_range(ma_prev, vma->vm_start,
+					      vma->vm_end - 1);
+			}
+#endif
+			if (mas_store_gfp(ma_prev, vma, GFP_KERNEL))
 				goto mas_mod_fail;
 
 			if (vma->anon_vma) {
@@ -3060,10 +3012,7 @@  static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
 			khugepaged_enter_vma_merge(vma, flags);
 			goto out;
 		}
-		prev = vma;
 	}
-	mas->index = addr;
-	mas_walk(mas);
 
 	/* create a vma struct for an anonymous mapping */
 	vma = vm_area_alloc(mm);
@@ -3076,14 +3025,15 @@  static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
 	vma->vm_pgoff = addr >> PAGE_SHIFT;
 	vma->vm_flags = flags;
 	vma->vm_page_prot = vm_get_page_prot(flags);
-	if (vma_mas_store(vma, mas))
-		goto mas_store_fail;
-
-	if (!prev)
-		prev = mas_prev(mas, 0);
-
-	__vma_link_list(mm, vma, prev);
+	if (vma->vm_file)
+		i_mmap_lock_write(vma->vm_file->f_mapping);
+	vma_mas_store(vma, mas);
 	mm->map_count++;
+	if (vma->vm_file) {
+		__vma_link_file(vma);
+		i_mmap_unlock_write(vma->vm_file->f_mapping);
+	}
+
 out:
 	perf_event_mmap(vma);
 	mm->total_vm += len >> PAGE_SHIFT;
@@ -3091,10 +3041,9 @@  static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
 	if (flags & VM_LOCKED)
 		mm->locked_vm += (len >> PAGE_SHIFT);
 	vma->vm_flags |= VM_SOFTDIRTY;
-	validate_mm_mt(mm);
+	validate_mm(mm);
 	return 0;
 
-mas_store_fail:
 	vm_area_free(vma);
 vma_alloc_fail:
 	vm_unacct_memory(len >> PAGE_SHIFT);
@@ -3128,9 +3077,7 @@  int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
-	// This vma left intentionally blank.
-	mas_walk(&mas);
-	ret = do_brk_flags(&mas, vma, addr, len, flags);
+	ret = do_brk_flags(&mas, &mas, vma, addr, len, flags);
 	populate = ((mm->def_flags & VM_LOCKED) != 0);
 	mmap_write_unlock(mm);
 	if (populate && !ret)
@@ -3151,6 +3098,8 @@  void exit_mmap(struct mm_struct *mm)
 	struct mmu_gather tlb;
 	struct vm_area_struct *vma;
 	unsigned long nr_accounted = 0;
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
+	int count = 0;
 
 	/* mm's last user has gone, and its about to be pulled down */
 	mmu_notifier_release(mm);
@@ -3186,12 +3135,19 @@  void exit_mmap(struct mm_struct *mm)
 	 * Lockdep will complain about not holding the mmap_lock, so we lie.
 	 */
 	rwsem_acquire(&mm->mmap_lock.dep_map, 0, 0, _THIS_IP_);
-	if (mm->locked_vm)
-		unlock_range(mm->mmap, &vma, ULONG_MAX);
+	if (mm->locked_vm) {
+		mas_for_each(&mas, vma, ULONG_MAX) {
+			if (vma->vm_flags & VM_LOCKED) {
+				mm->locked_vm -= vma_pages(vma);
+				munlock_vma_pages_all(vma);
+			}
+		}
+		mas_set(&mas, 0);
+	}
 
 	arch_exit_mmap(mm);
 
-	vma = mm->mmap;
+	vma = mas_find(&mas, ULONG_MAX);
 	if (!vma)	/* Can happen if dup_mmap() received an OOM */
 		return;
 
@@ -3200,20 +3156,24 @@  void exit_mmap(struct mm_struct *mm)
 	tlb_gather_mmu_fullmm(&tlb, mm);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
-	unmap_vmas(&tlb, vma, 0, -1);
-	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
+	unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX);
+	free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
 	tlb_finish_mmu(&tlb);
 
 	/*
-	 * Walk the list again, actually closing and freeing it,
-	 * with preemption enabled, without holding any MM locks.
+	 * Walk the list again, actually closing and freeing it, with preemption
+	 * enabled, without holding any MM locks besides the unreachable
+	 * mmap_write_lock.
 	 */
-	while (vma) {
+	do {
 		if (vma->vm_flags & VM_ACCOUNT)
 			nr_accounted += vma_pages(vma);
-		vma = remove_vma(vma);
+		remove_vma(vma);
+		count++;
 		cond_resched();
-	}
+	} while ((vma = mas_find(&mas, ULONG_MAX)) != NULL);
+
+	BUG_ON(count != mm->map_count);
 
 	trace_exit_mmap(mm);
 	__mt_destroy(&mm->mm_mt);
@@ -3254,7 +3214,7 @@  int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 		vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
 	}
 
-	vma_link(mm, vma, prev);
+	vma_link(mm, vma);
 	return 0;
 }
 
@@ -3282,7 +3242,8 @@  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		faulted_in_anon_vma = false;
 	}
 
-	if (range_has_overlap(mm, addr, addr + len, &prev))
+	new_vma = find_vma_prev(mm, addr, &prev);
+	if (new_vma->vm_start < addr + len)
 		return NULL;	/* should never get here */
 
 	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
@@ -3325,7 +3286,7 @@  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 			get_file(new_vma->vm_file);
 		if (new_vma->vm_ops && new_vma->vm_ops->open)
 			new_vma->vm_ops->open(new_vma);
-		vma_link(mm, new_vma, prev);
+		vma_link(mm, new_vma);
 		*need_rmap_locks = false;
 	}
 	validate_mm_mt(mm);
@@ -3625,12 +3586,13 @@  int mm_take_all_locks(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
 	struct anon_vma_chain *avc;
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
 
 	BUG_ON(mmap_read_trylock(mm));
 
 	mutex_lock(&mm_all_locks_mutex);
 
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	mas_for_each(&mas, vma, ULONG_MAX) {
 		if (signal_pending(current))
 			goto out_unlock;
 		if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3638,7 +3600,8 @@  int mm_take_all_locks(struct mm_struct *mm)
 			vm_lock_mapping(mm, vma->vm_file->f_mapping);
 	}
 
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	mas_set(&mas, 0);
+	mas_for_each(&mas, vma, ULONG_MAX) {
 		if (signal_pending(current))
 			goto out_unlock;
 		if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3646,7 +3609,8 @@  int mm_take_all_locks(struct mm_struct *mm)
 			vm_lock_mapping(mm, vma->vm_file->f_mapping);
 	}
 
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	mas_set(&mas, 0);
+	mas_for_each(&mas, vma, ULONG_MAX) {
 		if (signal_pending(current))
 			goto out_unlock;
 		if (vma->anon_vma)
@@ -3705,11 +3669,12 @@  void mm_drop_all_locks(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
 	struct anon_vma_chain *avc;
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
 
 	BUG_ON(mmap_read_trylock(mm));
 	BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
 
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	mas_for_each(&mas, vma, ULONG_MAX) {
 		if (vma->anon_vma)
 			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
 				vm_unlock_anon_vma(avc->anon_vma);
diff --git a/mm/nommu.c b/mm/nommu.c
index acb9aafb0afc..cb03f9c304d6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -574,7 +574,6 @@  static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
 	mas_reset(&mas);
 	/* add the VMA to the tree */
 	vma_mas_store(vma, &mas);
-	__vma_link_list(mm, vma, prev);
 }
 
 /*
@@ -599,7 +598,6 @@  static void delete_vma_from_mm(struct vm_area_struct *vma)
 
 	/* remove from the MM's tree and list */
 	vma_mas_remove(vma, &mas);
-	__vma_unlink_list(vma->vm_mm, vma);
 }
 
 /*
diff --git a/mm/util.c b/mm/util.c
index ab02382c2d57..934a120d1c96 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -271,46 +271,6 @@  void *memdup_user_nul(const void __user *src, size_t len)
 }
 EXPORT_SYMBOL(memdup_user_nul);
 
-void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
-		struct vm_area_struct *prev)
-{
-	struct vm_area_struct *next;
-
-	vma->vm_prev = prev;
-	if (prev) {
-		next = prev->vm_next;
-		prev->vm_next = vma;
-	} else {
-		next = mm->mmap;
-		mm->mmap = vma;
-	}
-	vma->vm_next = next;
-	if (next)
-		next->vm_prev = vma;
-	else
-		mm->highest_vm_end = vm_end_gap(vma);
-}
-
-void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma)
-{
-	struct vm_area_struct *prev, *next;
-
-	next = vma->vm_next;
-	prev = vma->vm_prev;
-	if (prev)
-		prev->vm_next = next;
-	else
-		mm->mmap = next;
-	if (next)
-		next->vm_prev = prev;
-	else {
-		if (prev)
-			mm->highest_vm_end = vm_end_gap(prev);
-		else
-			mm->highest_vm_end = 0;
-	}
-}
-
 /* Check if the vma is being used as a stack by this task */
 int vma_is_stack_for_current(struct vm_area_struct *vma)
 {