diff mbox series

[v4,07/66] mm: Add VMA iterator

Message ID 20211201142918.921493-8-Liam.Howlett@oracle.com (mailing list archive)
State New
Headers show
Series Introducing the Maple Tree | expand

Commit Message

Liam R. Howlett Dec. 1, 2021, 2:29 p.m. UTC
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

This thin layer of abstraction over the maple tree state is for
iterating over VMAs.  You can go forwards, go backwards or ask where
the iterator is.  Rename the existing vma_next() to __vma_next() --
it will be removed by the end of this series.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
---
 include/linux/mm.h       | 27 +++++++++++++++++++++++++++
 include/linux/mm_types.h | 21 +++++++++++++++++++++
 mm/mmap.c                | 10 +++++-----
 3 files changed, 53 insertions(+), 5 deletions(-)

Comments

Vlastimil Babka Dec. 9, 2021, 3:26 p.m. UTC | #1
On 12/1/21 15:29, Liam Howlett wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> 
> This thin layer of abstraction over the maple tree state is for
> iterating over VMAs.  You can go forwards, go backwards or ask where
> the iterator is.  Rename the existing vma_next() to __vma_next() --
> it will be removed by the end of this series.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>

Acked-by: Vlastimil Babka <vbabka@suse.cz>

With a question below.

> ---
>  include/linux/mm.h       | 27 +++++++++++++++++++++++++++
>  include/linux/mm_types.h | 21 +++++++++++++++++++++
>  mm/mmap.c                | 10 +++++-----
>  3 files changed, 53 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 9eae78a155be..acdccbe9b96b 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -696,6 +696,33 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
>  	return vma->vm_flags & VM_ACCESS_FLAGS;
>  }
>  
> +static inline
> +struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
> +{
> +	return mas_find(&vmi->mas, max);
> +}
> +
> +static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
> +{
> +	return vma_find(vmi, ULONG_MAX);

Why not mas_next()?

> +}
> +
> +static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
> +{
> +	return mas_prev(&vmi->mas, 0);
> +}
> +
> +static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
> +{
> +	return vmi->mas.index;
> +}
> +
> +#define for_each_vma(vmi, vma)		while ((vma = vma_next(&vmi)) != NULL)
> +
Liam R. Howlett Dec. 10, 2021, 2:02 a.m. UTC | #2
* Vlastimil Babka <vbabka@suse.cz> [211209 10:26]:
> On 12/1/21 15:29, Liam Howlett wrote:
> > From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> > 
> > This thin layer of abstraction over the maple tree state is for
> > iterating over VMAs.  You can go forwards, go backwards or ask where
> > the iterator is.  Rename the existing vma_next() to __vma_next() --
> > it will be removed by the end of this series.
> > 
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> 
> Acked-by: Vlastimil Babka <vbabka@suse.cz>
> 
> With a question below.
> 
> > ---
> >  include/linux/mm.h       | 27 +++++++++++++++++++++++++++
> >  include/linux/mm_types.h | 21 +++++++++++++++++++++
> >  mm/mmap.c                | 10 +++++-----
> >  3 files changed, 53 insertions(+), 5 deletions(-)
> > 
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 9eae78a155be..acdccbe9b96b 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -696,6 +696,33 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
> >  	return vma->vm_flags & VM_ACCESS_FLAGS;
> >  }
> >  
> > +static inline
> > +struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
> > +{
> > +	return mas_find(&vmi->mas, max);
> > +}
> > +
> > +static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
> > +{
> > +	return vma_find(vmi, ULONG_MAX);
> 
> Why not mas_next()?

vma_find() uses mas_find() which was created to implement find_vma().
As a replacement, the search looks for an entry at the address and if
nothing exists, it will continue the search upwards.  The result is that
the first entry can be found at the address passed.  Every subsequent
call to vma_find() would search from the end of the previous range - as
saved in the maple state, or the vma iterator in this case.

mas_next(), however is more of a traditional linked list operation that
finds the next entry _after_ the one containing the index in the maple
state.  The only difference is on the start when the maple state is not
currently pointing at an entry at all (the node is set to MAS_START).

mas_find() can be thought of as:

entry = mas_walk();
if (!entry)
	entry = mas_next_entry();

return entry;


mas_next can be though to as:

if (mas_is_start())
	mas_walk();

return mas_next_entry();


Matthew uses mas_find() for his implementation of the vma iterator so
that the first entry is not skipped.


> 
> > +}
> > +
> > +static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
> > +{
> > +	return mas_prev(&vmi->mas, 0);
> > +}
> > +
> > +static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
> > +{
> > +	return vmi->mas.index;
> > +}
> > +
> > +#define for_each_vma(vmi, vma)		while ((vma = vma_next(&vmi)) != NULL)
> > +
Vlastimil Babka Dec. 10, 2021, 3:08 p.m. UTC | #3
On 12/10/21 03:02, Liam Howlett wrote:
> 
> vma_find() uses mas_find() which was created to implement find_vma().
> As a replacement, the search looks for an entry at the address and if
> nothing exists, it will continue the search upwards.  The result is that
> the first entry can be found at the address passed.  Every subsequent
> call to vma_find() would search from the end of the previous range - as
> saved in the maple state, or the vma iterator in this case.
> 
> mas_next(), however is more of a traditional linked list operation that
> finds the next entry _after_ the one containing the index in the maple
> state.  The only difference is on the start when the maple state is not
> currently pointing at an entry at all (the node is set to MAS_START).
> 
> mas_find() can be thought of as:
> 
> entry = mas_walk();
> if (!entry)
> 	entry = mas_next_entry();
> 
> return entry;
> 
> 
> mas_next can be though to as:
> 
> if (mas_is_start())
> 	mas_walk();
> 
> return mas_next_entry();
> 
> 
> Matthew uses mas_find() for his implementation of the vma iterator so
> that the first entry is not skipped.

Yeah, but if vma_next() is going to replace the cases where we already have
a vma and use vma->vm_next to get the next one, then mas_next() would be a
better fit?

Do I understand correctly that e.g. after a mas_pause(), vma_next() done via
max_next() might return the same vma again, while vma_prev() will not, and
vma_next() implemented by mas_next() also wouldn't? Isn't that unexpected
semantics?

>> 
>> > +}
>> > +
>> > +static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
>> > +{
>> > +	return mas_prev(&vmi->mas, 0);
>> > +}
>> > +
>> > +static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
>> > +{
>> > +	return vmi->mas.index;
>> > +}
>> > +
>> > +#define for_each_vma(vmi, vma)		while ((vma = vma_next(&vmi)) != NULL)
>> > +
Liam R. Howlett Dec. 10, 2021, 6:24 p.m. UTC | #4
* Vlastimil Babka <vbabka@suse.cz> [211210 10:08]:
> On 12/10/21 03:02, Liam Howlett wrote:
> > 
> > vma_find() uses mas_find() which was created to implement find_vma().
> > As a replacement, the search looks for an entry at the address and if
> > nothing exists, it will continue the search upwards.  The result is that
> > the first entry can be found at the address passed.  Every subsequent
> > call to vma_find() would search from the end of the previous range - as
> > saved in the maple state, or the vma iterator in this case.
> > 
> > mas_next(), however is more of a traditional linked list operation that
> > finds the next entry _after_ the one containing the index in the maple
> > state.  The only difference is on the start when the maple state is not
> > currently pointing at an entry at all (the node is set to MAS_START).
> > 
> > mas_find() can be thought of as:
> > 
> > entry = mas_walk();
> > if (!entry)
> > 	entry = mas_next_entry();
> > 
> > return entry;
> > 
> > 
> > mas_next can be though to as:
> > 
> > if (mas_is_start())
> > 	mas_walk();
> > 
> > return mas_next_entry();
> > 
> > 
> > Matthew uses mas_find() for his implementation of the vma iterator so
> > that the first entry is not skipped.
> 
> Yeah, but if vma_next() is going to replace the cases where we already have
> a vma and use vma->vm_next to get the next one, then mas_next() would be a
> better fit?
> 
> Do I understand correctly that e.g. after a mas_pause(), vma_next() done via
> max_next() might return the same vma again, while vma_prev() will not, and
> vma_next() implemented by mas_next() also wouldn't? Isn't that unexpected
> semantics?
> 

No, mas_pause() will set the mas->node to MAS_PAUSE, which causes
mas_find() to start searching for mas->last + 1 and up so a duplicate
should not occur.

...Unless the VMA you found was expanded while paused but this, I think,
only happens on stack expansion.  During stack expansion the VMA can
grow with holding the mmap_lock in read mode, so if something is
iterating over the VMAs and pauses on the stack VMA then the stack grows
and the iterator resumes, it could return the stack vma twice if
expanding upwards... Is this actually a potential issue or did I miss
something?

> >> 
> >> > +}
> >> > +
> >> > +static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
> >> > +{
> >> > +	return mas_prev(&vmi->mas, 0);
> >> > +}
> >> > +
> >> > +static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
> >> > +{
> >> > +	return vmi->mas.index;
> >> > +}
> >> > +
> >> > +#define for_each_vma(vmi, vma)		while ((vma = vma_next(&vmi)) != NULL)
> >> > +
>
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9eae78a155be..acdccbe9b96b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -696,6 +696,33 @@  static inline bool vma_is_accessible(struct vm_area_struct *vma)
 	return vma->vm_flags & VM_ACCESS_FLAGS;
 }
 
+static inline
+struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
+{
+	return mas_find(&vmi->mas, max);
+}
+
+static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
+{
+	return vma_find(vmi, ULONG_MAX);
+}
+
+static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
+{
+	return mas_prev(&vmi->mas, 0);
+}
+
+static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
+{
+	return vmi->mas.index;
+}
+
+#define for_each_vma(vmi, vma)		while ((vma = vma_next(&vmi)) != NULL)
+
+/* The MM code likes to work with exclusive end addresses */
+#define for_each_vma_range(vmi, vma, end)				\
+	while ((vma = vma_find(&vmi, end - 1)) != NULL)
+
 #ifdef CONFIG_SHMEM
 /*
  * The vma_is_shmem is not inline because it is used only by slow
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d9ce412fca04..b0c3494011bb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -675,6 +675,27 @@  static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 	return (struct cpumask *)&mm->cpu_bitmap;
 }
 
+struct vma_iterator {
+	struct ma_state mas;
+};
+
+#define VMA_ITERATOR(name, mm, addr) 					\
+	struct vma_iterator name = {					\
+		.mas = {						\
+			.tree = &mm->mm_mt,				\
+			.index = addr,					\
+			.node = MAS_START,				\
+		},							\
+	}
+
+static inline void vma_iter_init(struct vma_iterator *vmi,
+		struct mm_struct *mm, unsigned long addr)
+{
+	vmi->mas.tree = &mm->mm_mt;
+	vmi->mas.index = addr;
+	vmi->mas.node = MAS_START;
+}
+
 struct mmu_gather;
 extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
 extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
diff --git a/mm/mmap.c b/mm/mmap.c
index c2f1431886d4..9fee6e6b276f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -626,7 +626,7 @@  static int find_vma_links(struct mm_struct *mm, unsigned long addr,
 }
 
 /*
- * vma_next() - Get the next VMA.
+ * __vma_next() - Get the next VMA.
  * @mm: The mm_struct.
  * @vma: The current vma.
  *
@@ -634,7 +634,7 @@  static int find_vma_links(struct mm_struct *mm, unsigned long addr,
  *
  * Returns: The next VMA after @vma.
  */
-static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
+static inline struct vm_area_struct *__vma_next(struct mm_struct *mm,
 					 struct vm_area_struct *vma)
 {
 	if (!vma)
@@ -1286,7 +1286,7 @@  struct vm_area_struct *vma_merge(struct mm_struct *mm,
 	if (vm_flags & VM_SPECIAL)
 		return NULL;
 
-	next = vma_next(mm, prev);
+	next = __vma_next(mm, prev);
 	area = next;
 	if (area && area->vm_end == end)		/* cases 6, 7, 8 */
 		next = next->vm_next;
@@ -2815,7 +2815,7 @@  static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end)
 {
-	struct vm_area_struct *next = vma_next(mm, prev);
+	struct vm_area_struct *next = __vma_next(mm, prev);
 	struct mmu_gather tlb;
 
 	lru_add_drain();
@@ -3028,7 +3028,7 @@  int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
 		if (error)
 			return error;
 	}
-	vma = vma_next(mm, prev);
+	vma = __vma_next(mm, prev);
 
 	if (unlikely(uf)) {
 		/*