diff mbox series

[hotfix,6.12,2/8] mm: unconditionally close VMAs on error

Message ID 9a84bad9fdebbdb0adca2b5b43ed63afceb5bacc.1729628198.git.lorenzo.stoakes@oracle.com (mailing list archive)
State New
Headers show
Series fix error handling in mmap_region() and refactor | expand

Commit Message

Lorenzo Stoakes Oct. 22, 2024, 8:40 p.m. UTC
Incorrect invocation of VMA callbacks when the VMA is no longer in a
consistent state is bug prone and risky to perform.

With regards to the important vm_ops->close() callback We have gone to
great lengths to try to track whether or not we ought to close VMAs.

Rather than doing so and risking making a mistake somewhere, instead
unconditionally close and reset vma->vm_ops to an empty dummy operations
set with a NULL .close operator.

We introduce a new function to do so - vma_close() - and simplify existing
vms logic which tracked whether we needed to close or not.

This simplifies the logic, avoids incorrect double-calling of the .close()
callback and allows us to update error paths to simply call vma_close()
unconditionally - making VMA closure idempotent.

Reported-by: Jann Horn <jannh@google.com>
Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails")
Cc: stable <stable@kernel.org>
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 mm/internal.h | 17 +++++++++++++++++
 mm/mmap.c     |  5 ++---
 mm/nommu.c    |  3 +--
 mm/vma.c      | 14 +++++---------
 mm/vma.h      |  4 +---
 5 files changed, 26 insertions(+), 17 deletions(-)

--
2.47.0

Comments

Jann Horn Oct. 22, 2024, 9:15 p.m. UTC | #1
On Tue, Oct 22, 2024 at 10:41 PM Lorenzo Stoakes
<lorenzo.stoakes@oracle.com> wrote:
> Incorrect invocation of VMA callbacks when the VMA is no longer in a
> consistent state is bug prone and risky to perform.
>
> With regards to the important vm_ops->close() callback We have gone to
> great lengths to try to track whether or not we ought to close VMAs.
>
> Rather than doing so and risking making a mistake somewhere, instead
> unconditionally close and reset vma->vm_ops to an empty dummy operations
> set with a NULL .close operator.
>
> We introduce a new function to do so - vma_close() - and simplify existing
> vms logic which tracked whether we needed to close or not.
>
> This simplifies the logic, avoids incorrect double-calling of the .close()
> callback and allows us to update error paths to simply call vma_close()
> unconditionally - making VMA closure idempotent.
>
> Reported-by: Jann Horn <jannh@google.com>
> Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails")
> Cc: stable <stable@kernel.org>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Reviewed-by: Jann Horn <jannh@google.com>

[...]
> diff --git a/mm/vma.h b/mm/vma.h
> index 55457cb68200..75558b5e9c8c 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -42,7 +42,6 @@ struct vma_munmap_struct {
>         int vma_count;                  /* Number of vmas that will be removed */
>         bool unlock;                    /* Unlock after the munmap */
>         bool clear_ptes;                /* If there are outstanding PTE to be cleared */
> -       bool closed_vm_ops;             /* call_mmap() was encountered, so vmas may be closed */
>         /* 1 byte hole */

nit: outdated comment, this hole is 2 bytes now



>         unsigned long nr_pages;         /* Number of pages being removed */
>         unsigned long locked_vm;        /* Number of locked pages */
Vlastimil Babka Oct. 23, 2024, 9:24 a.m. UTC | #2
On 10/22/24 22:40, Lorenzo Stoakes wrote:
> Incorrect invocation of VMA callbacks when the VMA is no longer in a
> consistent state is bug prone and risky to perform.
> 
> With regards to the important vm_ops->close() callback We have gone to
> great lengths to try to track whether or not we ought to close VMAs.
> 
> Rather than doing so and risking making a mistake somewhere, instead
> unconditionally close and reset vma->vm_ops to an empty dummy operations
> set with a NULL .close operator.
> 
> We introduce a new function to do so - vma_close() - and simplify existing
> vms logic which tracked whether we needed to close or not.
> 
> This simplifies the logic, avoids incorrect double-calling of the .close()
> callback and allows us to update error paths to simply call vma_close()
> unconditionally - making VMA closure idempotent.
> 
> Reported-by: Jann Horn <jannh@google.com>
> Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails")
> Cc: stable <stable@kernel.org>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Nice simplification. Nit below.

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

> +/*
> + * Unconditionally close the VMA if it has a close hook and prevent hooks from
> + * being invoked after close. VMA hooks are mutated.
> + */
> +static inline void vma_close(struct vm_area_struct *vma)
> +{
> +	if (vma->vm_ops && vma->vm_ops->close) {
> +		vma->vm_ops->close(vma);
> +
> +		/*
> +		 * The mapping is in an inconsistent state, and no further hooks
> +		 * may be invoked upon it.
> +		 */
> +		vma->vm_ops = &vma_dummy_vm_ops;
> +	}

Nit: if we want to "prevent hooks" as in "any hooks" then we should be
replacing existing vm_ops even if it has no close hook? If it's enough to
prevent further close() hooks (as commit log suggests) then the
implementation is fine but the comment might be misleading.

> +}
> +
>  #ifdef CONFIG_MMU
> 
>  /* Flags for folio_pte_batch(). */
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 10f4ccaf491b..d55c58e99a54 100644
Liam R. Howlett Oct. 23, 2024, 2:26 p.m. UTC | #3
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [241022 16:41]:
> Incorrect invocation of VMA callbacks when the VMA is no longer in a
> consistent state is bug prone and risky to perform.
> 
> With regards to the important vm_ops->close() callback We have gone to
> great lengths to try to track whether or not we ought to close VMAs.
> 
> Rather than doing so and risking making a mistake somewhere, instead
> unconditionally close and reset vma->vm_ops to an empty dummy operations
> set with a NULL .close operator.
> 
> We introduce a new function to do so - vma_close() - and simplify existing
> vms logic which tracked whether we needed to close or not.
> 
> This simplifies the logic, avoids incorrect double-calling of the .close()
> callback and allows us to update error paths to simply call vma_close()
> unconditionally - making VMA closure idempotent.
> 
> Reported-by: Jann Horn <jannh@google.com>
> Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails")
> Cc: stable <stable@kernel.org>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

> ---
>  mm/internal.h | 17 +++++++++++++++++
>  mm/mmap.c     |  5 ++---
>  mm/nommu.c    |  3 +--
>  mm/vma.c      | 14 +++++---------
>  mm/vma.h      |  4 +---
>  5 files changed, 26 insertions(+), 17 deletions(-)
> 
> diff --git a/mm/internal.h b/mm/internal.h
> index af032e76dfd4..3a45cc592fd0 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -135,6 +135,23 @@ static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
>  	return err;
>  }
> 
> +/*
> + * Unconditionally close the VMA if it has a close hook and prevent hooks from
> + * being invoked after close. VMA hooks are mutated.
> + */
> +static inline void vma_close(struct vm_area_struct *vma)
> +{
> +	if (vma->vm_ops && vma->vm_ops->close) {
> +		vma->vm_ops->close(vma);
> +
> +		/*
> +		 * The mapping is in an inconsistent state, and no further hooks
> +		 * may be invoked upon it.
> +		 */
> +		vma->vm_ops = &vma_dummy_vm_ops;
> +	}
> +}
> +
>  #ifdef CONFIG_MMU
> 
>  /* Flags for folio_pte_batch(). */
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 10f4ccaf491b..d55c58e99a54 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1576,8 +1576,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  	return addr;
> 
>  close_and_free_vma:
> -	if (file && !vms.closed_vm_ops && vma->vm_ops && vma->vm_ops->close)
> -		vma->vm_ops->close(vma);
> +	vma_close(vma);
> 
>  	if (file || vma->vm_file) {
>  unmap_and_free_vma:
> @@ -1937,7 +1936,7 @@ void exit_mmap(struct mm_struct *mm)
>  	do {
>  		if (vma->vm_flags & VM_ACCOUNT)
>  			nr_accounted += vma_pages(vma);
> -		remove_vma(vma, /* unreachable = */ true, /* closed = */ false);
> +		remove_vma(vma, /* unreachable = */ true);
>  		count++;
>  		cond_resched();
>  		vma = vma_next(&vmi);
> diff --git a/mm/nommu.c b/mm/nommu.c
> index f9ccc02458ec..635d028d647b 100644
> --- a/mm/nommu.c
> +++ b/mm/nommu.c
> @@ -589,8 +589,7 @@ static int delete_vma_from_mm(struct vm_area_struct *vma)
>   */
>  static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
>  {
> -	if (vma->vm_ops && vma->vm_ops->close)
> -		vma->vm_ops->close(vma);
> +	vma_close(vma);
>  	if (vma->vm_file)
>  		fput(vma->vm_file);
>  	put_nommu_region(vma->vm_region);
> diff --git a/mm/vma.c b/mm/vma.c
> index 3c5a80876725..bb7cfa2dc282 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -323,11 +323,10 @@ static bool can_vma_merge_right(struct vma_merge_struct *vmg,
>  /*
>   * Close a vm structure and free it.
>   */
> -void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed)
> +void remove_vma(struct vm_area_struct *vma, bool unreachable)
>  {
>  	might_sleep();
> -	if (!closed && vma->vm_ops && vma->vm_ops->close)
> -		vma->vm_ops->close(vma);
> +	vma_close(vma);
>  	if (vma->vm_file)
>  		fput(vma->vm_file);
>  	mpol_put(vma_policy(vma));
> @@ -1115,9 +1114,7 @@ void vms_clean_up_area(struct vma_munmap_struct *vms,
>  	vms_clear_ptes(vms, mas_detach, true);
>  	mas_set(mas_detach, 0);
>  	mas_for_each(mas_detach, vma, ULONG_MAX)
> -		if (vma->vm_ops && vma->vm_ops->close)
> -			vma->vm_ops->close(vma);
> -	vms->closed_vm_ops = true;
> +		vma_close(vma);
>  }
> 
>  /*
> @@ -1160,7 +1157,7 @@ void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
>  	/* Remove and clean up vmas */
>  	mas_set(mas_detach, 0);
>  	mas_for_each(mas_detach, vma, ULONG_MAX)
> -		remove_vma(vma, /* = */ false, vms->closed_vm_ops);
> +		remove_vma(vma, /* unreachable = */ false);
> 
>  	vm_unacct_memory(vms->nr_accounted);
>  	validate_mm(mm);
> @@ -1684,8 +1681,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>  	return new_vma;
> 
>  out_vma_link:
> -	if (new_vma->vm_ops && new_vma->vm_ops->close)
> -		new_vma->vm_ops->close(new_vma);
> +	vma_close(new_vma);
> 
>  	if (new_vma->vm_file)
>  		fput(new_vma->vm_file);
> diff --git a/mm/vma.h b/mm/vma.h
> index 55457cb68200..75558b5e9c8c 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -42,7 +42,6 @@ struct vma_munmap_struct {
>  	int vma_count;                  /* Number of vmas that will be removed */
>  	bool unlock;                    /* Unlock after the munmap */
>  	bool clear_ptes;                /* If there are outstanding PTE to be cleared */
> -	bool closed_vm_ops;		/* call_mmap() was encountered, so vmas may be closed */
>  	/* 1 byte hole */
>  	unsigned long nr_pages;         /* Number of pages being removed */
>  	unsigned long locked_vm;        /* Number of locked pages */
> @@ -198,7 +197,6 @@ static inline void init_vma_munmap(struct vma_munmap_struct *vms,
>  	vms->unmap_start = FIRST_USER_ADDRESS;
>  	vms->unmap_end = USER_PGTABLES_CEILING;
>  	vms->clear_ptes = false;
> -	vms->closed_vm_ops = false;
>  }
>  #endif
> 
> @@ -269,7 +267,7 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
>  		  unsigned long start, size_t len, struct list_head *uf,
>  		  bool unlock);
> 
> -void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed);
> +void remove_vma(struct vm_area_struct *vma, bool unreachable);
> 
>  void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
>  		struct vm_area_struct *prev, struct vm_area_struct *next);
> --
> 2.47.0
Liam R. Howlett Oct. 23, 2024, 2:41 p.m. UTC | #4
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [241022 16:41]:
> Incorrect invocation of VMA callbacks when the VMA is no longer in a
> consistent state is bug prone and risky to perform.
> 
> With regards to the important vm_ops->close() callback We have gone to
> great lengths to try to track whether or not we ought to close VMAs.
> 
> Rather than doing so and risking making a mistake somewhere, instead
> unconditionally close and reset vma->vm_ops to an empty dummy operations
> set with a NULL .close operator.
> 
> We introduce a new function to do so - vma_close() - and simplify existing
> vms logic which tracked whether we needed to close or not.
> 
> This simplifies the logic, avoids incorrect double-calling of the .close()
> callback and allows us to update error paths to simply call vma_close()
> unconditionally - making VMA closure idempotent.
> 
> Reported-by: Jann Horn <jannh@google.com>
> Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails")
> Cc: stable <stable@kernel.org>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

> ---
>  mm/internal.h | 17 +++++++++++++++++
>  mm/mmap.c     |  5 ++---
>  mm/nommu.c    |  3 +--
>  mm/vma.c      | 14 +++++---------
>  mm/vma.h      |  4 +---
>  5 files changed, 26 insertions(+), 17 deletions(-)
> 
> diff --git a/mm/internal.h b/mm/internal.h
> index af032e76dfd4..3a45cc592fd0 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -135,6 +135,23 @@ static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
>  	return err;
>  }
> 
> +/*
> + * Unconditionally close the VMA if it has a close hook and prevent hooks from
> + * being invoked after close. VMA hooks are mutated.
> + */
> +static inline void vma_close(struct vm_area_struct *vma)
> +{
> +	if (vma->vm_ops && vma->vm_ops->close) {
> +		vma->vm_ops->close(vma);
> +
> +		/*
> +		 * The mapping is in an inconsistent state, and no further hooks
> +		 * may be invoked upon it.
> +		 */
> +		vma->vm_ops = &vma_dummy_vm_ops;
> +	}
> +}
> +
>  #ifdef CONFIG_MMU
> 
>  /* Flags for folio_pte_batch(). */
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 10f4ccaf491b..d55c58e99a54 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1576,8 +1576,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  	return addr;
> 
>  close_and_free_vma:
> -	if (file && !vms.closed_vm_ops && vma->vm_ops && vma->vm_ops->close)
> -		vma->vm_ops->close(vma);
> +	vma_close(vma);
> 
>  	if (file || vma->vm_file) {
>  unmap_and_free_vma:
> @@ -1937,7 +1936,7 @@ void exit_mmap(struct mm_struct *mm)
>  	do {
>  		if (vma->vm_flags & VM_ACCOUNT)
>  			nr_accounted += vma_pages(vma);
> -		remove_vma(vma, /* unreachable = */ true, /* closed = */ false);
> +		remove_vma(vma, /* unreachable = */ true);
>  		count++;
>  		cond_resched();
>  		vma = vma_next(&vmi);
> diff --git a/mm/nommu.c b/mm/nommu.c
> index f9ccc02458ec..635d028d647b 100644
> --- a/mm/nommu.c
> +++ b/mm/nommu.c
> @@ -589,8 +589,7 @@ static int delete_vma_from_mm(struct vm_area_struct *vma)
>   */
>  static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
>  {
> -	if (vma->vm_ops && vma->vm_ops->close)
> -		vma->vm_ops->close(vma);
> +	vma_close(vma);
>  	if (vma->vm_file)
>  		fput(vma->vm_file);
>  	put_nommu_region(vma->vm_region);
> diff --git a/mm/vma.c b/mm/vma.c
> index 3c5a80876725..bb7cfa2dc282 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -323,11 +323,10 @@ static bool can_vma_merge_right(struct vma_merge_struct *vmg,
>  /*
>   * Close a vm structure and free it.
>   */
> -void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed)
> +void remove_vma(struct vm_area_struct *vma, bool unreachable)
>  {
>  	might_sleep();
> -	if (!closed && vma->vm_ops && vma->vm_ops->close)
> -		vma->vm_ops->close(vma);
> +	vma_close(vma);
>  	if (vma->vm_file)
>  		fput(vma->vm_file);
>  	mpol_put(vma_policy(vma));
> @@ -1115,9 +1114,7 @@ void vms_clean_up_area(struct vma_munmap_struct *vms,
>  	vms_clear_ptes(vms, mas_detach, true);
>  	mas_set(mas_detach, 0);
>  	mas_for_each(mas_detach, vma, ULONG_MAX)
> -		if (vma->vm_ops && vma->vm_ops->close)
> -			vma->vm_ops->close(vma);
> -	vms->closed_vm_ops = true;
> +		vma_close(vma);
>  }
> 
>  /*
> @@ -1160,7 +1157,7 @@ void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
>  	/* Remove and clean up vmas */
>  	mas_set(mas_detach, 0);
>  	mas_for_each(mas_detach, vma, ULONG_MAX)
> -		remove_vma(vma, /* = */ false, vms->closed_vm_ops);
> +		remove_vma(vma, /* unreachable = */ false);
> 
>  	vm_unacct_memory(vms->nr_accounted);
>  	validate_mm(mm);
> @@ -1684,8 +1681,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>  	return new_vma;
> 
>  out_vma_link:
> -	if (new_vma->vm_ops && new_vma->vm_ops->close)
> -		new_vma->vm_ops->close(new_vma);
> +	vma_close(new_vma);
> 
>  	if (new_vma->vm_file)
>  		fput(new_vma->vm_file);
> diff --git a/mm/vma.h b/mm/vma.h
> index 55457cb68200..75558b5e9c8c 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -42,7 +42,6 @@ struct vma_munmap_struct {
>  	int vma_count;                  /* Number of vmas that will be removed */
>  	bool unlock;                    /* Unlock after the munmap */
>  	bool clear_ptes;                /* If there are outstanding PTE to be cleared */
> -	bool closed_vm_ops;		/* call_mmap() was encountered, so vmas may be closed */
>  	/* 1 byte hole */
>  	unsigned long nr_pages;         /* Number of pages being removed */
>  	unsigned long locked_vm;        /* Number of locked pages */
> @@ -198,7 +197,6 @@ static inline void init_vma_munmap(struct vma_munmap_struct *vms,
>  	vms->unmap_start = FIRST_USER_ADDRESS;
>  	vms->unmap_end = USER_PGTABLES_CEILING;
>  	vms->clear_ptes = false;
> -	vms->closed_vm_ops = false;
>  }
>  #endif
> 
> @@ -269,7 +267,7 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
>  		  unsigned long start, size_t len, struct list_head *uf,
>  		  bool unlock);
> 
> -void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed);
> +void remove_vma(struct vm_area_struct *vma, bool unreachable);
> 
>  void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
>  		struct vm_area_struct *prev, struct vm_area_struct *next);
> --
> 2.47.0
Lorenzo Stoakes Oct. 23, 2024, 4:58 p.m. UTC | #5
On Wed, Oct 23, 2024 at 11:24:40AM +0200, Vlastimil Babka wrote:
> On 10/22/24 22:40, Lorenzo Stoakes wrote:
> > Incorrect invocation of VMA callbacks when the VMA is no longer in a
> > consistent state is bug prone and risky to perform.
> >
> > With regards to the important vm_ops->close() callback We have gone to
> > great lengths to try to track whether or not we ought to close VMAs.
> >
> > Rather than doing so and risking making a mistake somewhere, instead
> > unconditionally close and reset vma->vm_ops to an empty dummy operations
> > set with a NULL .close operator.
> >
> > We introduce a new function to do so - vma_close() - and simplify existing
> > vms logic which tracked whether we needed to close or not.
> >
> > This simplifies the logic, avoids incorrect double-calling of the .close()
> > callback and allows us to update error paths to simply call vma_close()
> > unconditionally - making VMA closure idempotent.
> >
> > Reported-by: Jann Horn <jannh@google.com>
> > Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails")
> > Cc: stable <stable@kernel.org>
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>
> Nice simplification. Nit below.

Thanks!

>
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
>
> > +/*
> > + * Unconditionally close the VMA if it has a close hook and prevent hooks from
> > + * being invoked after close. VMA hooks are mutated.
> > + */
> > +static inline void vma_close(struct vm_area_struct *vma)
> > +{
> > +	if (vma->vm_ops && vma->vm_ops->close) {
> > +		vma->vm_ops->close(vma);
> > +
> > +		/*
> > +		 * The mapping is in an inconsistent state, and no further hooks
> > +		 * may be invoked upon it.
> > +		 */
> > +		vma->vm_ops = &vma_dummy_vm_ops;
> > +	}
>
> Nit: if we want to "prevent hooks" as in "any hooks" then we should be
> replacing existing vm_ops even if it has no close hook? If it's enough to
> prevent further close() hooks (as commit log suggests) then the
> implementation is fine but the comment might be misleading.

We prevent hooks _after close_, if it has no close, then no, but I'll update the
comment to be crystal clear.

>
> > +}
> > +
> >  #ifdef CONFIG_MMU
> >
> >  /* Flags for folio_pte_batch(). */
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index 10f4ccaf491b..d55c58e99a54 100644
Lorenzo Stoakes Oct. 23, 2024, 5 p.m. UTC | #6
On Tue, Oct 22, 2024 at 11:15:10PM +0200, Jann Horn wrote:
> On Tue, Oct 22, 2024 at 10:41 PM Lorenzo Stoakes
> <lorenzo.stoakes@oracle.com> wrote:
> > Incorrect invocation of VMA callbacks when the VMA is no longer in a
> > consistent state is bug prone and risky to perform.
> >
> > With regards to the important vm_ops->close() callback We have gone to
> > great lengths to try to track whether or not we ought to close VMAs.
> >
> > Rather than doing so and risking making a mistake somewhere, instead
> > unconditionally close and reset vma->vm_ops to an empty dummy operations
> > set with a NULL .close operator.
> >
> > We introduce a new function to do so - vma_close() - and simplify existing
> > vms logic which tracked whether we needed to close or not.
> >
> > This simplifies the logic, avoids incorrect double-calling of the .close()
> > callback and allows us to update error paths to simply call vma_close()
> > unconditionally - making VMA closure idempotent.
> >
> > Reported-by: Jann Horn <jannh@google.com>
> > Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails")
> > Cc: stable <stable@kernel.org>
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>
> Reviewed-by: Jann Horn <jannh@google.com>

Thanks!

>
> [...]
> > diff --git a/mm/vma.h b/mm/vma.h
> > index 55457cb68200..75558b5e9c8c 100644
> > --- a/mm/vma.h
> > +++ b/mm/vma.h
> > @@ -42,7 +42,6 @@ struct vma_munmap_struct {
> >         int vma_count;                  /* Number of vmas that will be removed */
> >         bool unlock;                    /* Unlock after the munmap */
> >         bool clear_ptes;                /* If there are outstanding PTE to be cleared */
> > -       bool closed_vm_ops;             /* call_mmap() was encountered, so vmas may be closed */
> >         /* 1 byte hole */
>
> nit: outdated comment, this hole is 2 bytes now

Ack, will update.

>
>
>
> >         unsigned long nr_pages;         /* Number of pages being removed */
> >         unsigned long locked_vm;        /* Number of locked pages */
diff mbox series

Patch

diff --git a/mm/internal.h b/mm/internal.h
index af032e76dfd4..3a45cc592fd0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -135,6 +135,23 @@  static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
 	return err;
 }

+/*
+ * Unconditionally close the VMA if it has a close hook and prevent hooks from
+ * being invoked after close. VMA hooks are mutated.
+ */
+static inline void vma_close(struct vm_area_struct *vma)
+{
+	if (vma->vm_ops && vma->vm_ops->close) {
+		vma->vm_ops->close(vma);
+
+		/*
+		 * The mapping is in an inconsistent state, and no further hooks
+		 * may be invoked upon it.
+		 */
+		vma->vm_ops = &vma_dummy_vm_ops;
+	}
+}
+
 #ifdef CONFIG_MMU

 /* Flags for folio_pte_batch(). */
diff --git a/mm/mmap.c b/mm/mmap.c
index 10f4ccaf491b..d55c58e99a54 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1576,8 +1576,7 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 	return addr;

 close_and_free_vma:
-	if (file && !vms.closed_vm_ops && vma->vm_ops && vma->vm_ops->close)
-		vma->vm_ops->close(vma);
+	vma_close(vma);

 	if (file || vma->vm_file) {
 unmap_and_free_vma:
@@ -1937,7 +1936,7 @@  void exit_mmap(struct mm_struct *mm)
 	do {
 		if (vma->vm_flags & VM_ACCOUNT)
 			nr_accounted += vma_pages(vma);
-		remove_vma(vma, /* unreachable = */ true, /* closed = */ false);
+		remove_vma(vma, /* unreachable = */ true);
 		count++;
 		cond_resched();
 		vma = vma_next(&vmi);
diff --git a/mm/nommu.c b/mm/nommu.c
index f9ccc02458ec..635d028d647b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -589,8 +589,7 @@  static int delete_vma_from_mm(struct vm_area_struct *vma)
  */
 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-	if (vma->vm_ops && vma->vm_ops->close)
-		vma->vm_ops->close(vma);
+	vma_close(vma);
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	put_nommu_region(vma->vm_region);
diff --git a/mm/vma.c b/mm/vma.c
index 3c5a80876725..bb7cfa2dc282 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -323,11 +323,10 @@  static bool can_vma_merge_right(struct vma_merge_struct *vmg,
 /*
  * Close a vm structure and free it.
  */
-void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed)
+void remove_vma(struct vm_area_struct *vma, bool unreachable)
 {
 	might_sleep();
-	if (!closed && vma->vm_ops && vma->vm_ops->close)
-		vma->vm_ops->close(vma);
+	vma_close(vma);
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	mpol_put(vma_policy(vma));
@@ -1115,9 +1114,7 @@  void vms_clean_up_area(struct vma_munmap_struct *vms,
 	vms_clear_ptes(vms, mas_detach, true);
 	mas_set(mas_detach, 0);
 	mas_for_each(mas_detach, vma, ULONG_MAX)
-		if (vma->vm_ops && vma->vm_ops->close)
-			vma->vm_ops->close(vma);
-	vms->closed_vm_ops = true;
+		vma_close(vma);
 }

 /*
@@ -1160,7 +1157,7 @@  void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
 	/* Remove and clean up vmas */
 	mas_set(mas_detach, 0);
 	mas_for_each(mas_detach, vma, ULONG_MAX)
-		remove_vma(vma, /* = */ false, vms->closed_vm_ops);
+		remove_vma(vma, /* unreachable = */ false);

 	vm_unacct_memory(vms->nr_accounted);
 	validate_mm(mm);
@@ -1684,8 +1681,7 @@  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	return new_vma;

 out_vma_link:
-	if (new_vma->vm_ops && new_vma->vm_ops->close)
-		new_vma->vm_ops->close(new_vma);
+	vma_close(new_vma);

 	if (new_vma->vm_file)
 		fput(new_vma->vm_file);
diff --git a/mm/vma.h b/mm/vma.h
index 55457cb68200..75558b5e9c8c 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -42,7 +42,6 @@  struct vma_munmap_struct {
 	int vma_count;                  /* Number of vmas that will be removed */
 	bool unlock;                    /* Unlock after the munmap */
 	bool clear_ptes;                /* If there are outstanding PTE to be cleared */
-	bool closed_vm_ops;		/* call_mmap() was encountered, so vmas may be closed */
 	/* 1 byte hole */
 	unsigned long nr_pages;         /* Number of pages being removed */
 	unsigned long locked_vm;        /* Number of locked pages */
@@ -198,7 +197,6 @@  static inline void init_vma_munmap(struct vma_munmap_struct *vms,
 	vms->unmap_start = FIRST_USER_ADDRESS;
 	vms->unmap_end = USER_PGTABLES_CEILING;
 	vms->clear_ptes = false;
-	vms->closed_vm_ops = false;
 }
 #endif

@@ -269,7 +267,7 @@  int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
 		  unsigned long start, size_t len, struct list_head *uf,
 		  bool unlock);

-void remove_vma(struct vm_area_struct *vma, bool unreachable, bool closed);
+void remove_vma(struct vm_area_struct *vma, bool unreachable);

 void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
 		struct vm_area_struct *prev, struct vm_area_struct *next);