diff mbox series

mm: Close race between munmap() and expand_upwards()/downwards()

Message ID 20200709105309.42495-1-kirill.shutemov@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series mm: Close race between munmap() and expand_upwards()/downwards() | expand

Commit Message

kirill.shutemov@linux.intel.com July 9, 2020, 10:53 a.m. UTC
VMA with VM_GROWSDOWN or VM_GROWSUP flag set can change their size under
mmap_read_lock(). It can lead to race with __do_munmap():

	Thread A			Thread B
__do_munmap()
  detach_vmas_to_be_unmapped()
  mmap_write_downgrade()
				expand_downwards()
				  vma->vm_start = address;
				  // The VMA now overlaps with
				  // VMAs detached by the Thread A
				// page fault populates expanded part
				// of the VMA
  unmap_region()
    // Zaps pagetables partly
    // populated by Thread B

Similar race exists for expand_upwards().

The fix is to avoid downgrading mmap_lock in __do_munmap() if detached
VMAs are next to VM_GROWSDOWN or VM_GROWSUP VMA.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Jann Horn <jannh@google.com>
Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
Cc: <stable@vger.kernel.org> # 4.20
Cc: Yang Shi <yang.shi@linux.alibaba.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
---
 mm/mmap.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

Comments

Vlastimil Babka July 10, 2020, 10:58 a.m. UTC | #1
On 7/9/20 12:53 PM, Kirill A. Shutemov wrote:
> VMA with VM_GROWSDOWN or VM_GROWSUP flag set can change their size under
> mmap_read_lock(). It can lead to race with __do_munmap():
> 
> 	Thread A			Thread B
> __do_munmap()
>   detach_vmas_to_be_unmapped()
>   mmap_write_downgrade()
> 				expand_downwards()
> 				  vma->vm_start = address;
> 				  // The VMA now overlaps with
> 				  // VMAs detached by the Thread A
> 				// page fault populates expanded part
> 				// of the VMA
>   unmap_region()
>     // Zaps pagetables partly
>     // populated by Thread B
> 
> Similar race exists for expand_upwards().
> 
> The fix is to avoid downgrading mmap_lock in __do_munmap() if detached
> VMAs are next to VM_GROWSDOWN or VM_GROWSUP VMA.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Reported-by: Jann Horn <jannh@google.com>
> Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
> Cc: <stable@vger.kernel.org> # 4.20
> Cc: Yang Shi <yang.shi@linux.alibaba.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Oleg Nesterov <oleg@redhat.com>
> Cc: Matthew Wilcox <willy@infradead.org>

Acked-by: Vlastimil Babka <vbabka@suse.cz>
Thanks!

> ---
>  mm/mmap.c | 16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 59a4682ebf3f..71df4b36b42a 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2620,7 +2620,7 @@ static void unmap_region(struct mm_struct *mm,
>   * Create a list of vma's touched by the unmap, removing them from the mm's
>   * vma list as we go..
>   */
> -static void
> +static bool
>  detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
>  	struct vm_area_struct *prev, unsigned long end)
>  {
> @@ -2645,6 +2645,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
>  
>  	/* Kill the cache */
>  	vmacache_invalidate(mm);
> +
> +	/*
> +	 * Do not downgrade mmap_sem if we are next to VM_GROWSDOWN or
> +	 * VM_GROWSUP VMA. Such VMAs can change their size under
> +	 * down_read(mmap_sem) and collide with the VMA we are about to unmap.
> +	 */
> +	if (vma && (vma->vm_flags & VM_GROWSDOWN))
> +		return false;
> +	if (prev && (prev->vm_flags & VM_GROWSUP))
> +		return false;
> +	return true;
>  }
>  
>  /*
> @@ -2825,7 +2836,8 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
>  	}
>  
>  	/* Detach vmas from rbtree */
> -	detach_vmas_to_be_unmapped(mm, vma, prev, end);
> +	if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
> +		downgrade = false;
>  
>  	if (downgrade)
>  		mmap_write_downgrade(mm);
>
Yang Shi July 10, 2020, 5:35 p.m. UTC | #2
On 7/9/20 3:53 AM, Kirill A. Shutemov wrote:
> VMA with VM_GROWSDOWN or VM_GROWSUP flag set can change their size under
> mmap_read_lock(). It can lead to race with __do_munmap():
>
> 	Thread A			Thread B
> __do_munmap()
>    detach_vmas_to_be_unmapped()
>    mmap_write_downgrade()
> 				expand_downwards()
> 				  vma->vm_start = address;
> 				  // The VMA now overlaps with
> 				  // VMAs detached by the Thread A
> 				// page fault populates expanded part
> 				// of the VMA
>    unmap_region()
>      // Zaps pagetables partly
>      // populated by Thread B
>
> Similar race exists for expand_upwards().
>
> The fix is to avoid downgrading mmap_lock in __do_munmap() if detached
> VMAs are next to VM_GROWSDOWN or VM_GROWSUP VMA.

Thanks for catching this. The fix makes sense to me. Reviewed-by: Yang 
Shi <yang.shi@linux.alibaba.com>

>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Reported-by: Jann Horn <jannh@google.com>
> Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
> Cc: <stable@vger.kernel.org> # 4.20
> Cc: Yang Shi <yang.shi@linux.alibaba.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Oleg Nesterov <oleg@redhat.com>
> Cc: Matthew Wilcox <willy@infradead.org>
> ---
>   mm/mmap.c | 16 ++++++++++++++--
>   1 file changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 59a4682ebf3f..71df4b36b42a 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2620,7 +2620,7 @@ static void unmap_region(struct mm_struct *mm,
>    * Create a list of vma's touched by the unmap, removing them from the mm's
>    * vma list as we go..
>    */
> -static void
> +static bool
>   detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
>   	struct vm_area_struct *prev, unsigned long end)
>   {
> @@ -2645,6 +2645,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
>   
>   	/* Kill the cache */
>   	vmacache_invalidate(mm);
> +
> +	/*
> +	 * Do not downgrade mmap_sem if we are next to VM_GROWSDOWN or
> +	 * VM_GROWSUP VMA. Such VMAs can change their size under
> +	 * down_read(mmap_sem) and collide with the VMA we are about to unmap.
> +	 */
> +	if (vma && (vma->vm_flags & VM_GROWSDOWN))
> +		return false;
> +	if (prev && (prev->vm_flags & VM_GROWSUP))
> +		return false;
> +	return true;
>   }
>   
>   /*
> @@ -2825,7 +2836,8 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
>   	}
>   
>   	/* Detach vmas from rbtree */
> -	detach_vmas_to_be_unmapped(mm, vma, prev, end);
> +	if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
> +		downgrade = false;
>   
>   	if (downgrade)
>   		mmap_write_downgrade(mm);
diff mbox series

Patch

diff --git a/mm/mmap.c b/mm/mmap.c
index 59a4682ebf3f..71df4b36b42a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2620,7 +2620,7 @@  static void unmap_region(struct mm_struct *mm,
  * Create a list of vma's touched by the unmap, removing them from the mm's
  * vma list as we go..
  */
-static void
+static bool
 detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct vm_area_struct *prev, unsigned long end)
 {
@@ -2645,6 +2645,17 @@  detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	/* Kill the cache */
 	vmacache_invalidate(mm);
+
+	/*
+	 * Do not downgrade mmap_sem if we are next to VM_GROWSDOWN or
+	 * VM_GROWSUP VMA. Such VMAs can change their size under
+	 * down_read(mmap_sem) and collide with the VMA we are about to unmap.
+	 */
+	if (vma && (vma->vm_flags & VM_GROWSDOWN))
+		return false;
+	if (prev && (prev->vm_flags & VM_GROWSUP))
+		return false;
+	return true;
 }
 
 /*
@@ -2825,7 +2836,8 @@  int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
 	}
 
 	/* Detach vmas from rbtree */
-	detach_vmas_to_be_unmapped(mm, vma, prev, end);
+	if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
+		downgrade = false;
 
 	if (downgrade)
 		mmap_write_downgrade(mm);