diff mbox series

[RFC,v7,4/4] mm: unmap special vmas with regular do_munmap()

Message ID 1533857763-43527-5-git-send-email-yang.shi@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series mm: zap pages with read mmap_sem in munmap for large mapping | expand

Commit Message

Yang Shi Aug. 9, 2018, 11:36 p.m. UTC
Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
have uprobes set, need get done with write mmap_sem held since
they may update vm_flags.

So, it might be not safe enough to deal with these kind of special
mappings with read mmap_sem. Deal with such mappings with regular
do_munmap() call.

Michal suggested to make this as a separate patch for safer and more
bisectable sake.

Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
---
 mm/mmap.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

Comments

Vlastimil Babka Aug. 10, 2018, 9:51 a.m. UTC | #1
On 08/10/2018 01:36 AM, Yang Shi wrote:
> Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
> have uprobes set, need get done with write mmap_sem held since
> they may update vm_flags.
> 
> So, it might be not safe enough to deal with these kind of special
> mappings with read mmap_sem. Deal with such mappings with regular
> do_munmap() call.
> 
> Michal suggested to make this as a separate patch for safer and more
> bisectable sake.

Hm I believe Michal meant the opposite "evolution" though. Patch 2/4
should be done in a way that special mappings keep using the regular
path, and this patch would convert them to the new path. Possibly even
each special case separately.

> Cc: Michal Hocko <mhocko@kernel.org>
> Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
> ---
>  mm/mmap.c | 24 ++++++++++++++++++++++++
>  1 file changed, 24 insertions(+)
> 
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 2234d5a..06cb83c 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2766,6 +2766,16 @@ static inline void munlock_vmas(struct vm_area_struct *vma,
>  	}
>  }
>  
> +static inline bool can_zap_with_rlock(struct vm_area_struct *vma)
> +{
> +	if ((vma->vm_file &&
> +	     vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) ||
> +	     (vma->vm_flags | (VM_HUGETLB | VM_PFNMAP)))
> +		return false;
> +
> +	return true;
> +}
> +
>  /*
>   * Zap pages with read mmap_sem held
>   *
> @@ -2808,6 +2818,17 @@ static int do_munmap_zap_rlock(struct mm_struct *mm, unsigned long start,
>  			goto out;
>  	}
>  
> +	/*
> +	 * Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
> +	 * have uprobes set, need get done with write mmap_sem held since
> +	 * they may update vm_flags. Deal with such mappings with regular
> +	 * do_munmap() call.
> +	 */
> +	for (vma = start_vma; vma && vma->vm_start < end; vma = vma->vm_next) {
> +		if (!can_zap_with_rlock(vma))
> +			goto regular_path;
> +	}
> +
>  	/* Handle mlocked vmas */
>  	if (mm->locked_vm) {
>  		vma = start_vma;
> @@ -2828,6 +2849,9 @@ static int do_munmap_zap_rlock(struct mm_struct *mm, unsigned long start,
>  
>  	return 0;
>  
> +regular_path:
> +	ret = do_munmap(mm, start, len, uf);
> +
>  out:
>  	up_write(&mm->mmap_sem);
>  	return ret;
>
Vlastimil Babka Aug. 10, 2018, 10:46 a.m. UTC | #2
On 08/10/2018 01:36 AM, Yang Shi wrote:
> Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
> have uprobes set, need get done with write mmap_sem held since
> they may update vm_flags.
> 
> So, it might be not safe enough to deal with these kind of special
> mappings with read mmap_sem. Deal with such mappings with regular
> do_munmap() call.
> 
> Michal suggested to make this as a separate patch for safer and more
> bisectable sake.
> 
> Cc: Michal Hocko <mhocko@kernel.org>
> Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
> ---
>  mm/mmap.c | 24 ++++++++++++++++++++++++
>  1 file changed, 24 insertions(+)
> 
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 2234d5a..06cb83c 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2766,6 +2766,16 @@ static inline void munlock_vmas(struct vm_area_struct *vma,
>  	}
>  }
>  
> +static inline bool can_zap_with_rlock(struct vm_area_struct *vma)
> +{
> +	if ((vma->vm_file &&
> +	     vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) |

vma_has_uprobes() seems to be rather expensive check with e.g.
unconditional spinlock. uprobe_munmap() seems to have some precondition
cheaper checks for e.g. cases when there's no uprobes in the system
(should be common?).

BTW, uprobe_munmap() touches mm->flags, not vma->flags, so it should be
evaluated more carefully for being called under mmap sem for reading, as
having vmas already detached is no guarantee.

> +	     (vma->vm_flags | (VM_HUGETLB | VM_PFNMAP)))

			    ^ I think replace '|' with '&' here?

> +		return false;
> +
> +	return true;
> +}
> +
>  /*
>   * Zap pages with read mmap_sem held
>   *
> @@ -2808,6 +2818,17 @@ static int do_munmap_zap_rlock(struct mm_struct *mm, unsigned long start,
>  			goto out;
>  	}
>  
> +	/*
> +	 * Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
> +	 * have uprobes set, need get done with write mmap_sem held since
> +	 * they may update vm_flags. Deal with such mappings with regular
> +	 * do_munmap() call.
> +	 */
> +	for (vma = start_vma; vma && vma->vm_start < end; vma = vma->vm_next) {
> +		if (!can_zap_with_rlock(vma))
> +			goto regular_path;
> +	}
> +
>  	/* Handle mlocked vmas */
>  	if (mm->locked_vm) {
>  		vma = start_vma;
> @@ -2828,6 +2849,9 @@ static int do_munmap_zap_rlock(struct mm_struct *mm, unsigned long start,
>  
>  	return 0;
>  
> +regular_path:

I think it's missing a down_write_* here.

> +	ret = do_munmap(mm, start, len, uf);
> +
>  out:
>  	up_write(&mm->mmap_sem);
>  	return ret;
>
Michal Hocko Aug. 10, 2018, 11:59 a.m. UTC | #3
On Fri 10-08-18 11:51:54, Vlastimil Babka wrote:
> On 08/10/2018 01:36 AM, Yang Shi wrote:
> > Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
> > have uprobes set, need get done with write mmap_sem held since
> > they may update vm_flags.
> > 
> > So, it might be not safe enough to deal with these kind of special
> > mappings with read mmap_sem. Deal with such mappings with regular
> > do_munmap() call.
> > 
> > Michal suggested to make this as a separate patch for safer and more
> > bisectable sake.
> 
> Hm I believe Michal meant the opposite "evolution" though. Patch 2/4
> should be done in a way that special mappings keep using the regular
> path, and this patch would convert them to the new path. Possibly even
> each special case separately.

yes, that is what I meant. Each of the special case should have its own
patch and changelog explaining why it is safe.
Yang Shi Aug. 10, 2018, 4:51 p.m. UTC | #4
On 8/10/18 2:51 AM, Vlastimil Babka wrote:
> On 08/10/2018 01:36 AM, Yang Shi wrote:
>> Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
>> have uprobes set, need get done with write mmap_sem held since
>> they may update vm_flags.
>>
>> So, it might be not safe enough to deal with these kind of special
>> mappings with read mmap_sem. Deal with such mappings with regular
>> do_munmap() call.
>>
>> Michal suggested to make this as a separate patch for safer and more
>> bisectable sake.
> Hm I believe Michal meant the opposite "evolution" though. Patch 2/4
> should be done in a way that special mappings keep using the regular
> path, and this patch would convert them to the new path. Possibly even
> each special case separately.

Aha, thanks. I understood it oppositely.

Yang

>
>> Cc: Michal Hocko <mhocko@kernel.org>
>> Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
>> ---
>>   mm/mmap.c | 24 ++++++++++++++++++++++++
>>   1 file changed, 24 insertions(+)
>>
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index 2234d5a..06cb83c 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -2766,6 +2766,16 @@ static inline void munlock_vmas(struct vm_area_struct *vma,
>>   	}
>>   }
>>   
>> +static inline bool can_zap_with_rlock(struct vm_area_struct *vma)
>> +{
>> +	if ((vma->vm_file &&
>> +	     vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) ||
>> +	     (vma->vm_flags | (VM_HUGETLB | VM_PFNMAP)))
>> +		return false;
>> +
>> +	return true;
>> +}
>> +
>>   /*
>>    * Zap pages with read mmap_sem held
>>    *
>> @@ -2808,6 +2818,17 @@ static int do_munmap_zap_rlock(struct mm_struct *mm, unsigned long start,
>>   			goto out;
>>   	}
>>   
>> +	/*
>> +	 * Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
>> +	 * have uprobes set, need get done with write mmap_sem held since
>> +	 * they may update vm_flags. Deal with such mappings with regular
>> +	 * do_munmap() call.
>> +	 */
>> +	for (vma = start_vma; vma && vma->vm_start < end; vma = vma->vm_next) {
>> +		if (!can_zap_with_rlock(vma))
>> +			goto regular_path;
>> +	}
>> +
>>   	/* Handle mlocked vmas */
>>   	if (mm->locked_vm) {
>>   		vma = start_vma;
>> @@ -2828,6 +2849,9 @@ static int do_munmap_zap_rlock(struct mm_struct *mm, unsigned long start,
>>   
>>   	return 0;
>>   
>> +regular_path:
>> +	ret = do_munmap(mm, start, len, uf);
>> +
>>   out:
>>   	up_write(&mm->mmap_sem);
>>   	return ret;
>>
Yang Shi Aug. 10, 2018, 5 p.m. UTC | #5
On 8/10/18 3:46 AM, Vlastimil Babka wrote:
> On 08/10/2018 01:36 AM, Yang Shi wrote:
>> Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
>> have uprobes set, need get done with write mmap_sem held since
>> they may update vm_flags.
>>
>> So, it might be not safe enough to deal with these kind of special
>> mappings with read mmap_sem. Deal with such mappings with regular
>> do_munmap() call.
>>
>> Michal suggested to make this as a separate patch for safer and more
>> bisectable sake.
>>
>> Cc: Michal Hocko <mhocko@kernel.org>
>> Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
>> ---
>>   mm/mmap.c | 24 ++++++++++++++++++++++++
>>   1 file changed, 24 insertions(+)
>>
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index 2234d5a..06cb83c 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -2766,6 +2766,16 @@ static inline void munlock_vmas(struct vm_area_struct *vma,
>>   	}
>>   }
>>   
>> +static inline bool can_zap_with_rlock(struct vm_area_struct *vma)
>> +{
>> +	if ((vma->vm_file &&
>> +	     vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) |
> vma_has_uprobes() seems to be rather expensive check with e.g.
> unconditional spinlock. uprobe_munmap() seems to have some precondition
> cheaper checks for e.g. cases when there's no uprobes in the system
> (should be common?).

I think they are common, i.e. checking vm prot since uprobes are 
typically installed for VM_EXEC vmas. We could use those checks to save 
some cycles.

>
> BTW, uprobe_munmap() touches mm->flags, not vma->flags, so it should be
> evaluated more carefully for being called under mmap sem for reading, as
> having vmas already detached is no guarantee.

We might just leave uprobe vmas to use regular do_munmap? I'm supposed 
they should be not very common. And, uprobes just can be installed for 
VM_EXEC vma, although there may be large text segments, typically 
VM_EXEC vmas are unmapped when process exits, so the latency might be fine.

>
>> +	     (vma->vm_flags | (VM_HUGETLB | VM_PFNMAP)))
> 			    ^ I think replace '|' with '&' here?

Yes, thanks for catching this.

>
>> +		return false;
>> +
>> +	return true;
>> +}
>> +
>>   /*
>>    * Zap pages with read mmap_sem held
>>    *
>> @@ -2808,6 +2818,17 @@ static int do_munmap_zap_rlock(struct mm_struct *mm, unsigned long start,
>>   			goto out;
>>   	}
>>   
>> +	/*
>> +	 * Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
>> +	 * have uprobes set, need get done with write mmap_sem held since
>> +	 * they may update vm_flags. Deal with such mappings with regular
>> +	 * do_munmap() call.
>> +	 */
>> +	for (vma = start_vma; vma && vma->vm_start < end; vma = vma->vm_next) {
>> +		if (!can_zap_with_rlock(vma))
>> +			goto regular_path;
>> +	}
>> +
>>   	/* Handle mlocked vmas */
>>   	if (mm->locked_vm) {
>>   		vma = start_vma;
>> @@ -2828,6 +2849,9 @@ static int do_munmap_zap_rlock(struct mm_struct *mm, unsigned long start,
>>   
>>   	return 0;
>>   
>> +regular_path:
> I think it's missing a down_write_* here.

No, the jump is called before downgrade_write.

Thanks,
Yang

>
>> +	ret = do_munmap(mm, start, len, uf);
>> +
>>   out:
>>   	up_write(&mm->mmap_sem);
>>   	return ret;
>>
diff mbox series

Patch

diff --git a/mm/mmap.c b/mm/mmap.c
index 2234d5a..06cb83c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2766,6 +2766,16 @@  static inline void munlock_vmas(struct vm_area_struct *vma,
 	}
 }
 
+static inline bool can_zap_with_rlock(struct vm_area_struct *vma)
+{
+	if ((vma->vm_file &&
+	     vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) ||
+	     (vma->vm_flags | (VM_HUGETLB | VM_PFNMAP)))
+		return false;
+
+	return true;
+}
+
 /*
  * Zap pages with read mmap_sem held
  *
@@ -2808,6 +2818,17 @@  static int do_munmap_zap_rlock(struct mm_struct *mm, unsigned long start,
 			goto out;
 	}
 
+	/*
+	 * Unmapping vmas, which have VM_HUGETLB | VM_PFNMAP flag set or
+	 * have uprobes set, need get done with write mmap_sem held since
+	 * they may update vm_flags. Deal with such mappings with regular
+	 * do_munmap() call.
+	 */
+	for (vma = start_vma; vma && vma->vm_start < end; vma = vma->vm_next) {
+		if (!can_zap_with_rlock(vma))
+			goto regular_path;
+	}
+
 	/* Handle mlocked vmas */
 	if (mm->locked_vm) {
 		vma = start_vma;
@@ -2828,6 +2849,9 @@  static int do_munmap_zap_rlock(struct mm_struct *mm, unsigned long start,
 
 	return 0;
 
+regular_path:
+	ret = do_munmap(mm, start, len, uf);
+
 out:
 	up_write(&mm->mmap_sem);
 	return ret;