diff mbox series

[v4,38/66] coredump: Remove vma linked list walk

Message ID 20211201142918.921493-39-Liam.Howlett@oracle.com (mailing list archive)
State New
Headers show
Series Introducing the Maple Tree | expand

Commit Message

Liam R. Howlett Dec. 1, 2021, 2:30 p.m. UTC
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

Use the Maple Tree iterator instead.  This is too complicated for the
VMA iterator to handle, so let's open-code it for now.  If this turns
out to be a common pattern, we can migrate it to common code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
---
 fs/coredump.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

Comments

Vlastimil Babka Jan. 19, 2022, 10:31 a.m. UTC | #1
On 12/1/21 15:30, Liam Howlett wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> 
> Use the Maple Tree iterator instead.  This is too complicated for the
> VMA iterator to handle, so let's open-code it for now.  If this turns
> out to be a common pattern, we can migrate it to common code.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> ---
>  fs/coredump.c | 33 ++++++++++++---------------------
>  1 file changed, 12 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/coredump.c b/fs/coredump.c
> index a6b3c196cdef..59347e42048d 100644
> --- a/fs/coredump.c
> +++ b/fs/coredump.c
> @@ -997,30 +997,20 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
>  	return vma->vm_end - vma->vm_start;
>  }
>  
> -static struct vm_area_struct *first_vma(struct task_struct *tsk,
> -					struct vm_area_struct *gate_vma)
> -{
> -	struct vm_area_struct *ret = tsk->mm->mmap;
> -
> -	if (ret)
> -		return ret;
> -	return gate_vma;
> -}
> -
>  /*
>   * Helper function for iterating across a vma list.  It ensures that the caller
>   * will visit `gate_vma' prior to terminating the search.
>   */
> -static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
> +static struct vm_area_struct *coredump_next_vma(struct ma_state *mas,
> +				       struct vm_area_struct *vma,
>  				       struct vm_area_struct *gate_vma)
>  {
> -	struct vm_area_struct *ret;
> -
> -	ret = this_vma->vm_next;
> -	if (ret)
> -		return ret;
> -	if (this_vma == gate_vma)
> +	if (vma == gate_vma)
>  		return NULL;
> +
> +	vma = mas_next(mas, ULONG_MAX);
> +	if (vma)
> +		return vma;

This looks suspicious. Before this patch if gate_vma was part of the linked
list, it was returned. Even more than once if it was not the last vma in the
list. After this patch, if it's part of the maple tree, it will not be
returned and the iteration will stop.

But I don't know what are the rules for gate_vma being part of linked
list/maple tree, thus whether this is a bug.

>  	return gate_vma;
>  }
>  
> @@ -1032,9 +1022,10 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
>  		      struct core_vma_metadata **vma_meta,
>  		      size_t *vma_data_size_ptr)
>  {
> -	struct vm_area_struct *vma, *gate_vma;
> +	struct vm_area_struct *gate_vma, *vma = NULL;
>  	struct mm_struct *mm = current->mm;
> -	int i;
> +	MA_STATE(mas, &mm->mm_mt, 0, 0);
> +	int i = 0;
>  	size_t vma_data_size = 0;
>  
>  	/*
> @@ -1054,8 +1045,7 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
>  		return -ENOMEM;
>  	}
>  
> -	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
> -			vma = next_vma(vma, gate_vma), i++) {
> +	while ((vma = coredump_next_vma(&mas, vma, gate_vma)) != NULL) {
>  		struct core_vma_metadata *m = (*vma_meta) + i;
>  
>  		m->start = vma->vm_start;
> @@ -1064,6 +1054,7 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
>  		m->dump_size = vma_dump_size(vma, cprm->mm_flags);
>  
>  		vma_data_size += m->dump_size;
> +		i++;
>  	}
>  
>  	mmap_write_unlock(mm);
Matthew Wilcox Jan. 25, 2022, 5 p.m. UTC | #2
On Wed, Jan 19, 2022 at 11:31:45AM +0100, Vlastimil Babka wrote:
> On 12/1/21 15:30, Liam Howlett wrote:
> > From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> > 
> > Use the Maple Tree iterator instead.  This is too complicated for the
> > VMA iterator to handle, so let's open-code it for now.  If this turns
> > out to be a common pattern, we can migrate it to common code.
> > 
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> > ---
> >  fs/coredump.c | 33 ++++++++++++---------------------
> >  1 file changed, 12 insertions(+), 21 deletions(-)
> > 
> > diff --git a/fs/coredump.c b/fs/coredump.c
> > index a6b3c196cdef..59347e42048d 100644
> > --- a/fs/coredump.c
> > +++ b/fs/coredump.c
> > @@ -997,30 +997,20 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
> >  	return vma->vm_end - vma->vm_start;
> >  }
> >  
> > -static struct vm_area_struct *first_vma(struct task_struct *tsk,
> > -					struct vm_area_struct *gate_vma)
> > -{
> > -	struct vm_area_struct *ret = tsk->mm->mmap;
> > -
> > -	if (ret)
> > -		return ret;
> > -	return gate_vma;
> > -}
> > -
> >  /*
> >   * Helper function for iterating across a vma list.  It ensures that the caller
> >   * will visit `gate_vma' prior to terminating the search.
> >   */
> > -static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
> > +static struct vm_area_struct *coredump_next_vma(struct ma_state *mas,
> > +				       struct vm_area_struct *vma,
> >  				       struct vm_area_struct *gate_vma)
> >  {
> > -	struct vm_area_struct *ret;
> > -
> > -	ret = this_vma->vm_next;
> > -	if (ret)
> > -		return ret;
> > -	if (this_vma == gate_vma)
> > +	if (vma == gate_vma)
> >  		return NULL;
> > +
> > +	vma = mas_next(mas, ULONG_MAX);
> > +	if (vma)
> > +		return vma;
> 
> This looks suspicious. Before this patch if gate_vma was part of the linked
> list, it was returned. Even more than once if it was not the last vma in the
> list. After this patch, if it's part of the maple tree, it will not be
> returned and the iteration will stop.
> 
> But I don't know what are the rules for gate_vma being part of linked
> list/maple tree, thus whether this is a bug.

As I understand it, and let's be clear that I'm tampering with things
that I do not fully understand, the gate VMA is not part of the linked
list today, and will not be part of the maple tree in future (*).  I
don't think it can be part of the linked list because there's one
gate VMA that's part of every process, so it can't be part of any
MM's linked list.

Code before my patch:
 - If the current VMA has a next VMA, return it
 - If the current VMA is the gate VMA, we're done, return NULL.
 - Return the gate VMA (after walking all the other VMAs)

Code after my patch:
 - If the current VMA is the gate VMA, we're done, return NULL.
 - If the next VMA is not NULL, return it.
 - Return the gate VMA (after walking all the other VMAs)

ie, I've switched the order of the two tests.
I think the linked list code could be rewritten to be in this order
too, but that seems like an unnecessary additional patch?

(*) Although it could be added to the maple tree; that's one of the
nice things about using external storage; you can simply point to
things; there's no restrictions on "must be part of exactly one list".
We have not made that change as part of this patch set, and I believe it
should be a future patch set that's evaluated on its own merits.

> >  	return gate_vma;
> >  }
> >  
> > @@ -1032,9 +1022,10 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
> >  		      struct core_vma_metadata **vma_meta,
> >  		      size_t *vma_data_size_ptr)
> >  {
> > -	struct vm_area_struct *vma, *gate_vma;
> > +	struct vm_area_struct *gate_vma, *vma = NULL;
> >  	struct mm_struct *mm = current->mm;
> > -	int i;
> > +	MA_STATE(mas, &mm->mm_mt, 0, 0);
> > +	int i = 0;
> >  	size_t vma_data_size = 0;
> >  
> >  	/*
> > @@ -1054,8 +1045,7 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
> >  		return -ENOMEM;
> >  	}
> >  
> > -	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
> > -			vma = next_vma(vma, gate_vma), i++) {
> > +	while ((vma = coredump_next_vma(&mas, vma, gate_vma)) != NULL) {
> >  		struct core_vma_metadata *m = (*vma_meta) + i;
> >  
> >  		m->start = vma->vm_start;
> > @@ -1064,6 +1054,7 @@ int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
> >  		m->dump_size = vma_dump_size(vma, cprm->mm_flags);
> >  
> >  		vma_data_size += m->dump_size;
> > +		i++;
> >  	}
> >  
> >  	mmap_write_unlock(mm);
>
diff mbox series

Patch

diff --git a/fs/coredump.c b/fs/coredump.c
index a6b3c196cdef..59347e42048d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -997,30 +997,20 @@  static unsigned long vma_dump_size(struct vm_area_struct *vma,
 	return vma->vm_end - vma->vm_start;
 }
 
-static struct vm_area_struct *first_vma(struct task_struct *tsk,
-					struct vm_area_struct *gate_vma)
-{
-	struct vm_area_struct *ret = tsk->mm->mmap;
-
-	if (ret)
-		return ret;
-	return gate_vma;
-}
-
 /*
  * Helper function for iterating across a vma list.  It ensures that the caller
  * will visit `gate_vma' prior to terminating the search.
  */
-static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
+static struct vm_area_struct *coredump_next_vma(struct ma_state *mas,
+				       struct vm_area_struct *vma,
 				       struct vm_area_struct *gate_vma)
 {
-	struct vm_area_struct *ret;
-
-	ret = this_vma->vm_next;
-	if (ret)
-		return ret;
-	if (this_vma == gate_vma)
+	if (vma == gate_vma)
 		return NULL;
+
+	vma = mas_next(mas, ULONG_MAX);
+	if (vma)
+		return vma;
 	return gate_vma;
 }
 
@@ -1032,9 +1022,10 @@  int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
 		      struct core_vma_metadata **vma_meta,
 		      size_t *vma_data_size_ptr)
 {
-	struct vm_area_struct *vma, *gate_vma;
+	struct vm_area_struct *gate_vma, *vma = NULL;
 	struct mm_struct *mm = current->mm;
-	int i;
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
+	int i = 0;
 	size_t vma_data_size = 0;
 
 	/*
@@ -1054,8 +1045,7 @@  int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
 		return -ENOMEM;
 	}
 
-	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
-			vma = next_vma(vma, gate_vma), i++) {
+	while ((vma = coredump_next_vma(&mas, vma, gate_vma)) != NULL) {
 		struct core_vma_metadata *m = (*vma_meta) + i;
 
 		m->start = vma->vm_start;
@@ -1064,6 +1054,7 @@  int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
 		m->dump_size = vma_dump_size(vma, cprm->mm_flags);
 
 		vma_data_size += m->dump_size;
+		i++;
 	}
 
 	mmap_write_unlock(mm);