diff mbox series

[3/3] mm,thp,shmem: make khugepaged obey tmpfs mount flags

Message ID 20201124194925.623931-4-riel@surriel.com (mailing list archive)
State New, archived
Headers show
Series mm,thp,shm: limit shmem THP alloc gfp_mask | expand

Commit Message

Rik van Riel Nov. 24, 2020, 7:49 p.m. UTC
Currently if thp enabled=[madvise], mounting a tmpfs filesystem
with huge=always and mmapping files from that tmpfs does not
result in khugepaged collapsing those mappings, despite the
mount flag indicating that it should.

Fix that by breaking up the blocks of tests in hugepage_vma_check
a little bit, and testing things in the correct order.

Signed-off-by: Rik van Riel <riel@surriel.com>
Fixes: c2231020ea7b ("mm: thp: register mm for khugepaged when merging vma for shmem")
---
 include/linux/khugepaged.h |  2 ++
 mm/khugepaged.c            | 22 ++++++++++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

Comments

Vlastimil Babka Nov. 26, 2020, 5:18 p.m. UTC | #1
On 11/24/20 8:49 PM, Rik van Riel wrote:
> Currently if thp enabled=[madvise], mounting a tmpfs filesystem
> with huge=always and mmapping files from that tmpfs does not
> result in khugepaged collapsing those mappings, despite the
> mount flag indicating that it should.
> 
> Fix that by breaking up the blocks of tests in hugepage_vma_check
> a little bit, and testing things in the correct order.
> 
> Signed-off-by: Rik van Riel <riel@surriel.com>
> Fixes: c2231020ea7b ("mm: thp: register mm for khugepaged when merging vma for shmem")

Looks ok. But, it we have sysfs thp enabled=never, and shmem mount explicitly 
thp enabled, then shmem mount overrides the global sysfs setting and thp's will 
be allocated there, right? However, khugepaged_enabled() will be false and thus 
khugepaged won't run at all? So a similar situation than what you're fixing here.

> ---
>   include/linux/khugepaged.h |  2 ++
>   mm/khugepaged.c            | 22 ++++++++++++++++------
>   2 files changed, 18 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
> index c941b7377321..2fcc01891b47 100644
> --- a/include/linux/khugepaged.h
> +++ b/include/linux/khugepaged.h
> @@ -3,6 +3,7 @@
>   #define _LINUX_KHUGEPAGED_H
>   
>   #include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
> +#include <linux/shmem_fs.h>
>   
>   
>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> @@ -57,6 +58,7 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
>   {
>   	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
>   		if ((khugepaged_always() ||
> +		     (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) ||
>   		     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
>   		    !(vm_flags & VM_NOHUGEPAGE) &&
>   		    !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 4e3dff13eb70..abab394c4206 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -440,18 +440,28 @@ static inline int khugepaged_test_exit(struct mm_struct *mm)
>   static bool hugepage_vma_check(struct vm_area_struct *vma,
>   			       unsigned long vm_flags)
>   {
> -	if ((!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
> -	    (vm_flags & VM_NOHUGEPAGE) ||
> +	/* Explicitly disabled through madvise. */
> +	if ((vm_flags & VM_NOHUGEPAGE) ||
>   	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
>   		return false;
>   
> -	if (shmem_file(vma->vm_file) ||
> -	    (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
> -	     vma->vm_file &&
> -	     (vm_flags & VM_DENYWRITE))) {
> +	/* Enabled via shmem mount options or sysfs settings. */
> +	if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
>   		return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
>   				HPAGE_PMD_NR);
>   	}
> +
> +	/* THP settings require madvise. */
> +	if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
> +		return false;
> +
> +	/* Read-only file mappings need to be aligned for THP to work. */
> +	if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
> +	    (vm_flags & VM_DENYWRITE)) {
> +		return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
> +				HPAGE_PMD_NR);
> +	}
> +
>   	if (!vma->anon_vma || vma->vm_ops)
>   		return false;
>   	if (vma_is_temporary_stack(vma))
>
Rik van Riel Nov. 26, 2020, 6:14 p.m. UTC | #2
On Thu, 2020-11-26 at 18:18 +0100, Vlastimil Babka wrote:
> On 11/24/20 8:49 PM, Rik van Riel wrote:
> > Currently if thp enabled=[madvise], mounting a tmpfs filesystem
> > with huge=always and mmapping files from that tmpfs does not
> > result in khugepaged collapsing those mappings, despite the
> > mount flag indicating that it should.
> > 
> > Fix that by breaking up the blocks of tests in hugepage_vma_check
> > a little bit, and testing things in the correct order.
> > 
> > Signed-off-by: Rik van Riel <riel@surriel.com>
> > Fixes: c2231020ea7b ("mm: thp: register mm for khugepaged when
> > merging vma for shmem")
> 
> Looks ok. But, it we have sysfs thp enabled=never, and shmem mount
> explicitly 
> thp enabled, then shmem mount overrides the global sysfs setting and
> thp's will 
> be allocated there, right? However, khugepaged_enabled() will be
> false and thus 
> khugepaged won't run at all? So a similar situation than what you're
> fixing here.

Indeed, that is somewhat similar. Whether or not shmem
allocations attempt huge pages is controlled by both
the file /sys/kernel/mm/transparent_hugepage/shmem_enabled
and mount options.

This patch makes khugepaged treat the mount options
and/or
sysfs flag as enabling collapsing of huge pages in case
enabled = [always] for regular THPs.

Should I send another patch on top
of this that causes
khugepaged to be enabled when regular THPs are disabled,
but shmem THPs are enabled in any way?
Vlastimil Babka Nov. 26, 2020, 7:42 p.m. UTC | #3
On 11/26/20 7:14 PM, Rik van Riel wrote:
> On Thu, 2020-11-26 at 18:18 +0100, Vlastimil Babka wrote:
>> On 11/24/20 8:49 PM, Rik van Riel wrote:
>>> Currently if thp enabled=[madvise], mounting a tmpfs filesystem
>>> with huge=always and mmapping files from that tmpfs does not
>>> result in khugepaged collapsing those mappings, despite the
>>> mount flag indicating that it should.
>>>
>>> Fix that by breaking up the blocks of tests in hugepage_vma_check
>>> a little bit, and testing things in the correct order.
>>>
>>> Signed-off-by: Rik van Riel <riel@surriel.com>
>>> Fixes: c2231020ea7b ("mm: thp: register mm for khugepaged when
>>> merging vma for shmem")
>>
>> Looks ok. But, it we have sysfs thp enabled=never, and shmem mount
>> explicitly 
>> thp enabled, then shmem mount overrides the global sysfs setting and
>> thp's will 
>> be allocated there, right? However, khugepaged_enabled() will be
>> false and thus 
>> khugepaged won't run at all? So a similar situation than what you're
>> fixing here.
> 
> Indeed, that is somewhat similar. Whether or not shmem
> allocations attempt huge pages is controlled by both
> the file /sys/kernel/mm/transparent_hugepage/shmem_enabled

Ah right, there's also that sysfs file.

> and mount options.
> 
> This patch makes khugepaged treat the mount options
> and/or
> sysfs flag as enabling collapsing of huge pages in case
> enabled = [always] for regular THPs.
> 
> Should I send another patch on top
> of this that causes
> khugepaged to be enabled when regular THPs are disabled,
> but shmem THPs are enabled in any way?

I think it would make sense. Although it might involve counting
thp-enabled shmem mounts and only run khugepaged when there are >0 of them.
Rik van Riel Nov. 26, 2020, 8:14 p.m. UTC | #4
On Thu, 2020-11-26 at 20:42 +0100, Vlastimil Babka wrote:
> On 11/26/20 7:14 PM, Rik van Riel wrote:
> > On Thu, 2020-11-26 at 18:18 +0100, Vlastimil Babka wrote:
> > > 
> > This patch makes khugepaged treat the mount options
> > and/or
> > sysfs flag as enabling collapsing of huge pages in case
> > enabled = [always] for regular THPs.
> > 
> > Should I send another patch on top
> > of this that causes
> > khugepaged to be enabled when regular THPs are disabled,
> > but shmem THPs are enabled in any way?
> 
> I think it would make sense. Although it might involve counting
> thp-enabled shmem mounts and only run khugepaged when there are >0 of
> them.

That seems feasible. I can do that as a follow-up 4/3
patch after the Thanksgiving weekend :)
diff mbox series

Patch

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index c941b7377321..2fcc01891b47 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -3,6 +3,7 @@ 
 #define _LINUX_KHUGEPAGED_H
 
 #include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
+#include <linux/shmem_fs.h>
 
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -57,6 +58,7 @@  static inline int khugepaged_enter(struct vm_area_struct *vma,
 {
 	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
 		if ((khugepaged_always() ||
+		     (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) ||
 		     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
 		    !(vm_flags & VM_NOHUGEPAGE) &&
 		    !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 4e3dff13eb70..abab394c4206 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -440,18 +440,28 @@  static inline int khugepaged_test_exit(struct mm_struct *mm)
 static bool hugepage_vma_check(struct vm_area_struct *vma,
 			       unsigned long vm_flags)
 {
-	if ((!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-	    (vm_flags & VM_NOHUGEPAGE) ||
+	/* Explicitly disabled through madvise. */
+	if ((vm_flags & VM_NOHUGEPAGE) ||
 	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 		return false;
 
-	if (shmem_file(vma->vm_file) ||
-	    (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
-	     vma->vm_file &&
-	     (vm_flags & VM_DENYWRITE))) {
+	/* Enabled via shmem mount options or sysfs settings. */
+	if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
 		return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
 				HPAGE_PMD_NR);
 	}
+
+	/* THP settings require madvise. */
+	if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
+		return false;
+
+	/* Read-only file mappings need to be aligned for THP to work. */
+	if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
+	    (vm_flags & VM_DENYWRITE)) {
+		return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
+				HPAGE_PMD_NR);
+	}
+
 	if (!vma->anon_vma || vma->vm_ops)
 		return false;
 	if (vma_is_temporary_stack(vma))