diff mbox

mm: thp: passing correct vm_flags to hugepage_vma_check

Message ID 20180629181752.792831-1-songliubraving@fb.com (mailing list archive)
State New, archived
Headers show

Commit Message

Song Liu June 29, 2018, 6:17 p.m. UTC
Back in May, I sent patch similar to 02b75dc8160d:

https://patchwork.kernel.org/patch/10416233/  (v1)

This patch got positive feedback. However, I realized there is a problem,
that vma->vm_flags in khugepaged_enter_vma_merge() is stale. The separate
argument vm_flags contains the latest value. Therefore, it is
necessary to pass this vm_flags into hugepage_vma_check(). To fix this
problem,  I resent v2 and v3 of the work:

https://patchwork.kernel.org/patch/10419527/   (v2)
https://patchwork.kernel.org/patch/10433937/   (v3)

To my surprise, after I thought we all agreed on v3 of the work. Yang's
patch, which is similar to correct looking (but wrong) v1, got applied.
So we still have the issue of stale vma->vm_flags. This patch fixes this
issue. Please apply.

Fixes: 02b75dc8160d ("mm: thp: register mm for khugepaged when merging vma for shmem")
Cc: Yang Shi <yang.shi@linux.alibaba.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 mm/khugepaged.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

Comments

Rik van Riel June 29, 2018, 6:26 p.m. UTC | #1
On Fri, 2018-06-29 at 11:17 -0700, Song Liu wrote:

> To my surprise, after I thought we all agreed on v3 of the work.
> Yang's
> patch, which is similar to correct looking (but wrong) v1, got
> applied.
> So we still have the issue of stale vma->vm_flags. This patch fixes
> this
> issue. Please apply.
> 
> Fixes: 02b75dc8160d ("mm: thp: register mm for khugepaged when
> merging vma for shmem")
> Cc: Yang Shi <yang.shi@linux.alibaba.com>
> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Cc: Hugh Dickins <hughd@google.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Rik van Riel <riel@surriel.com>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> 
Reviewed-by: Rik van Riel <riel@surriel.com>
Yang Shi June 29, 2018, 7:05 p.m. UTC | #2
On 6/29/18 11:17 AM, Song Liu wrote:
> Back in May, I sent patch similar to 02b75dc8160d:
>
> https://patchwork.kernel.org/patch/10416233/  (v1)
>
> This patch got positive feedback. However, I realized there is a problem,
> that vma->vm_flags in khugepaged_enter_vma_merge() is stale. The separate
> argument vm_flags contains the latest value. Therefore, it is
> necessary to pass this vm_flags into hugepage_vma_check(). To fix this
> problem,  I resent v2 and v3 of the work:
>
> https://patchwork.kernel.org/patch/10419527/   (v2)
> https://patchwork.kernel.org/patch/10433937/   (v3)
>
> To my surprise, after I thought we all agreed on v3 of the work. Yang's
> patch, which is similar to correct looking (but wrong) v1, got applied.
> So we still have the issue of stale vma->vm_flags. This patch fixes this
> issue. Please apply.

Thanks for catching this.

Reviewed-by: Yang Shi <yang.shi@linux.alibaba.com>

>
> Fixes: 02b75dc8160d ("mm: thp: register mm for khugepaged when merging vma for shmem")
> Cc: Yang Shi <yang.shi@linux.alibaba.com>
> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Cc: Hugh Dickins <hughd@google.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Rik van Riel <riel@surriel.com>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>   mm/khugepaged.c | 15 ++++++++-------
>   1 file changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index b2c328030aa2..38b7db1933a3 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -397,10 +397,11 @@ static inline int khugepaged_test_exit(struct mm_struct *mm)
>   	return atomic_read(&mm->mm_users) == 0;
>   }
>   
> -static bool hugepage_vma_check(struct vm_area_struct *vma)
> +static bool hugepage_vma_check(struct vm_area_struct *vma,
> +			       unsigned long vm_flags)
>   {
> -	if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
> -	    (vma->vm_flags & VM_NOHUGEPAGE) ||
> +	if ((!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
> +	    (vm_flags & VM_NOHUGEPAGE) ||
>   	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
>   		return false;
>   	if (shmem_file(vma->vm_file)) {
> @@ -413,7 +414,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
>   		return false;
>   	if (is_vma_temporary_stack(vma))
>   		return false;
> -	return !(vma->vm_flags & VM_NO_KHUGEPAGED);
> +	return !(vm_flags & VM_NO_KHUGEPAGED);
>   }
>   
>   int __khugepaged_enter(struct mm_struct *mm)
> @@ -458,7 +459,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
>   	 * khugepaged does not yet work on non-shmem files or special
>   	 * mappings. And file-private shmem THP is not supported.
>   	 */
> -	if (!hugepage_vma_check(vma))
> +	if (!hugepage_vma_check(vma, vm_flags))
>   		return 0;
>   
>   	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
> @@ -861,7 +862,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
>   	hend = vma->vm_end & HPAGE_PMD_MASK;
>   	if (address < hstart || address + HPAGE_PMD_SIZE > hend)
>   		return SCAN_ADDRESS_RANGE;
> -	if (!hugepage_vma_check(vma))
> +	if (!hugepage_vma_check(vma, vma->vm_flags))
>   		return SCAN_VMA_CHECK;
>   	return 0;
>   }
> @@ -1660,7 +1661,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>   			progress++;
>   			break;
>   		}
> -		if (!hugepage_vma_check(vma)) {
> +		if (!hugepage_vma_check(vma, vma->vm_flags)) {
>   skip:
>   			progress++;
>   			continue;
Andrew Morton June 30, 2018, 2:25 a.m. UTC | #3
On Fri, 29 Jun 2018 11:17:52 -0700 Song Liu <songliubraving@fb.com> wrote:

> Back in May, I sent patch similar to 02b75dc8160d:
> 
> https://patchwork.kernel.org/patch/10416233/  (v1)
> 
> This patch got positive feedback. However, I realized there is a problem,
> that vma->vm_flags in khugepaged_enter_vma_merge() is stale. The separate
> argument vm_flags contains the latest value. Therefore, it is
> necessary to pass this vm_flags into hugepage_vma_check(). To fix this
> problem,  I resent v2 and v3 of the work:
> 
> https://patchwork.kernel.org/patch/10419527/   (v2)
> https://patchwork.kernel.org/patch/10433937/   (v3)
> 
> To my surprise, after I thought we all agreed on v3 of the work. Yang's
> patch, which is similar to correct looking (but wrong) v1, got applied.
> So we still have the issue of stale vma->vm_flags. This patch fixes this
> issue. Please apply.

That's a ueful history lesson but most of it isn't relevant to this
change.  So I'd suggest this changelog:

: khugepaged_enter_vma_merge() passes a stale vma->vm_flags to
: hugepage_vma_check().  The argument vm_flags contains the latest value. 
: Therefore, it is necessary to pass this vm_flags into
: hugepage_vma_check().

Also, please (as always) tell us the user-visible runtime effects of
this bug so that others can decide which kernel(s) need the fix?
Song Liu July 1, 2018, 6:31 a.m. UTC | #4
> On Jun 29, 2018, at 7:25 PM, Andrew Morton <akpm@linux-foundation.org> wrote:
> 
> On Fri, 29 Jun 2018 11:17:52 -0700 Song Liu <songliubraving@fb.com> wrote:
> 
>> Back in May, I sent patch similar to 02b75dc8160d:
>> 
>> https://patchwork.kernel.org/patch/10416233/  (v1)
>> 
>> This patch got positive feedback. However, I realized there is a problem,
>> that vma->vm_flags in khugepaged_enter_vma_merge() is stale. The separate
>> argument vm_flags contains the latest value. Therefore, it is
>> necessary to pass this vm_flags into hugepage_vma_check(). To fix this
>> problem,  I resent v2 and v3 of the work:
>> 
>> https://patchwork.kernel.org/patch/10419527/   (v2)
>> https://patchwork.kernel.org/patch/10433937/   (v3)
>> 
>> To my surprise, after I thought we all agreed on v3 of the work. Yang's
>> patch, which is similar to correct looking (but wrong) v1, got applied.
>> So we still have the issue of stale vma->vm_flags. This patch fixes this
>> issue. Please apply.
> 
> That's a ueful history lesson but most of it isn't relevant to this
> change.  So I'd suggest this changelog:
> 
> : khugepaged_enter_vma_merge() passes a stale vma->vm_flags to
> : hugepage_vma_check().  The argument vm_flags contains the latest value. 
> : Therefore, it is necessary to pass this vm_flags into
> : hugepage_vma_check().

This looks good. Thanks!

> Also, please (as always) tell us the user-visible runtime effects of
> this bug so that others can decide which kernel(s) need the fix?

With this bug, madvise(MADV_HUGEPAGE) for mmap files in shmem fails to
put memory in huge pages. Here is an example of failed madvise():

   /* mount /dev/shm with huge=advise:
    *     mount -o remount,huge=advise /dev/shm */
   /* create file /dev/shm/huge */
   #define HUGE_FILE "/dev/shm/huge"

   fd = open(HUGE_FILE, O_RDONLY);
   ptr = mmap(NULL, FILE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0);
   ret = madvise(ptr, FILE_SIZE, MADV_HUGEPAGE);

madvise() will return 0, but this memory region is never put in huge
page (check from /proc/meminfo: ShmemHugePages).

Thanks,
Song
diff mbox

Patch

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b2c328030aa2..38b7db1933a3 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -397,10 +397,11 @@  static inline int khugepaged_test_exit(struct mm_struct *mm)
 	return atomic_read(&mm->mm_users) == 0;
 }
 
-static bool hugepage_vma_check(struct vm_area_struct *vma)
+static bool hugepage_vma_check(struct vm_area_struct *vma,
+			       unsigned long vm_flags)
 {
-	if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-	    (vma->vm_flags & VM_NOHUGEPAGE) ||
+	if ((!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
+	    (vm_flags & VM_NOHUGEPAGE) ||
 	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 		return false;
 	if (shmem_file(vma->vm_file)) {
@@ -413,7 +414,7 @@  static bool hugepage_vma_check(struct vm_area_struct *vma)
 		return false;
 	if (is_vma_temporary_stack(vma))
 		return false;
-	return !(vma->vm_flags & VM_NO_KHUGEPAGED);
+	return !(vm_flags & VM_NO_KHUGEPAGED);
 }
 
 int __khugepaged_enter(struct mm_struct *mm)
@@ -458,7 +459,7 @@  int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 	 * khugepaged does not yet work on non-shmem files or special
 	 * mappings. And file-private shmem THP is not supported.
 	 */
-	if (!hugepage_vma_check(vma))
+	if (!hugepage_vma_check(vma, vm_flags))
 		return 0;
 
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
@@ -861,7 +862,7 @@  static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (address < hstart || address + HPAGE_PMD_SIZE > hend)
 		return SCAN_ADDRESS_RANGE;
-	if (!hugepage_vma_check(vma))
+	if (!hugepage_vma_check(vma, vma->vm_flags))
 		return SCAN_VMA_CHECK;
 	return 0;
 }
@@ -1660,7 +1661,7 @@  static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 			progress++;
 			break;
 		}
-		if (!hugepage_vma_check(vma)) {
+		if (!hugepage_vma_check(vma, vma->vm_flags)) {
 skip:
 			progress++;
 			continue;