diff mbox series

mm: shmem: fix khugepaged activation policy for shmem

Message ID 7c796904528e21152ba5aa639e963e0ae45b7040.1725600217.git.baolin.wang@linux.alibaba.com (mailing list archive)
State New
Headers show
Series mm: shmem: fix khugepaged activation policy for shmem | expand

Commit Message

Baolin Wang Sept. 6, 2024, 5:28 a.m. UTC
Shmem has a separate interface (different from anonymous pages) to control
huge page allocation, that means shmem THP can be enabled while anonymous
THP is disabled. However, in this case, khugepaged will not start to collapse
shmem THP, which is unreasonable.

To fix this issue, we should call start_stop_khugepaged() to activate or
deactivate the khugepaged thread when setting shmem mTHP interfaces.
Moreover, add a new helper shmem_hpage_pmd_enabled() to help to check
whether shmem THP is enabled, which will determine if khugepaged should
be activated.

Reported-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
Hi Ryan,

I remember we discussed the shmem khugepaged activation issue before, but
I haven’t seen any follow-up patches to fix it. Recently, I am trying to
fix the shmem mTHP collapse issue in the series [1], and I also addressed
this activation issue. Please correct me if you have a better idea. Thanks.

[1] https://lore.kernel.org/all/cover.1724140601.git.baolin.wang@linux.alibaba.com/T/#u
---
 include/linux/shmem_fs.h |  6 ++++++
 mm/khugepaged.c          |  2 ++
 mm/shmem.c               | 29 +++++++++++++++++++++++++++--
 3 files changed, 35 insertions(+), 2 deletions(-)

Comments

Ryan Roberts Sept. 6, 2024, 8:55 a.m. UTC | #1
On 06/09/2024 06:28, Baolin Wang wrote:
> Shmem has a separate interface (different from anonymous pages) to control
> huge page allocation, that means shmem THP can be enabled while anonymous
> THP is disabled. However, in this case, khugepaged will not start to collapse
> shmem THP, which is unreasonable.
> 
> To fix this issue, we should call start_stop_khugepaged() to activate or
> deactivate the khugepaged thread when setting shmem mTHP interfaces.
> Moreover, add a new helper shmem_hpage_pmd_enabled() to help to check
> whether shmem THP is enabled, which will determine if khugepaged should
> be activated.
> 
> Reported-by: Ryan Roberts <ryan.roberts@arm.com>
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> ---
> Hi Ryan,
> 
> I remember we discussed the shmem khugepaged activation issue before, but
> I haven’t seen any follow-up patches to fix it. Recently, I am trying to
> fix the shmem mTHP collapse issue in the series [1], and I also addressed
> this activation issue. Please correct me if you have a better idea. Thanks.

Thanks for for sorting this - it looks like a good approach to me! Just a couple
of nits. Regardless:

Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>

> 
> [1] https://lore.kernel.org/all/cover.1724140601.git.baolin.wang@linux.alibaba.com/T/#u
> ---
>  include/linux/shmem_fs.h |  6 ++++++
>  mm/khugepaged.c          |  2 ++
>  mm/shmem.c               | 29 +++++++++++++++++++++++++++--
>  3 files changed, 35 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 515a9a6a3c6f..ee6635052383 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -114,6 +114,7 @@ int shmem_unuse(unsigned int type);
>  unsigned long shmem_allowable_huge_orders(struct inode *inode,
>  				struct vm_area_struct *vma, pgoff_t index,
>  				loff_t write_end, bool shmem_huge_force);
> +bool shmem_hpage_pmd_enabled(void);
>  #else
>  static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
>  				struct vm_area_struct *vma, pgoff_t index,
> @@ -121,6 +122,11 @@ static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
>  {
>  	return 0;
>  }
> +
> +static inline bool shmem_hpage_pmd_enabled(void)
> +{
> +	return false;
> +}
>  #endif
>  
>  #ifdef CONFIG_SHMEM
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index f9c39898eaff..caf10096d4d1 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -430,6 +430,8 @@ static bool hugepage_pmd_enabled(void)
>  	if (test_bit(PMD_ORDER, &huge_anon_orders_inherit) &&
>  	    hugepage_global_enabled())
>  		return true;
> +	if (shmem_hpage_pmd_enabled())
> +		return true;

nit: There is a comment at the top of this function, perhaps that could be
extended to cover shmem too?

>  	return false;
>  }
>  
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 74f093d88c78..d7c342ae2b37 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1653,6 +1653,23 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
>  }
>  
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +bool shmem_hpage_pmd_enabled(void)
> +{
> +	if (shmem_huge == SHMEM_HUGE_DENY)
> +		return false;
> +	if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_always))

question: When is it correct to use HPAGE_PMD_ORDER vs PMD_ORDER? I tend to use
PMD_ORDER (in hugepage_pmd_enabled() for example).

> +		return true;
> +	if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_madvise))
> +		return true;
> +	if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_within_size))
> +		return true;
> +	if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_inherit) &&
> +	    shmem_huge != SHMEM_HUGE_NEVER)
> +		return true;
> +
> +	return false;
> +}
> +
>  unsigned long shmem_allowable_huge_orders(struct inode *inode,
>  				struct vm_area_struct *vma, pgoff_t index,
>  				loff_t write_end, bool shmem_huge_force)
> @@ -5036,7 +5053,7 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
>  		struct kobj_attribute *attr, const char *buf, size_t count)
>  {
>  	char tmp[16];
> -	int huge;
> +	int huge, err;
>  
>  	if (count + 1 > sizeof(tmp))
>  		return -EINVAL;
> @@ -5060,7 +5077,9 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
>  	shmem_huge = huge;
>  	if (shmem_huge > SHMEM_HUGE_DENY)
>  		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
> -	return count;
> +
> +	err = start_stop_khugepaged();
> +	return err ? err : count;
>  }
>  
>  struct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled);
> @@ -5137,6 +5156,12 @@ static ssize_t thpsize_shmem_enabled_store(struct kobject *kobj,
>  		ret = -EINVAL;
>  	}
>  
> +	if (ret > 0) {
> +		int err = start_stop_khugepaged();
> +
> +		if (err)
> +			ret = err;
> +	}
>  	return ret;
>  }
>
Baolin Wang Sept. 9, 2024, 1:22 a.m. UTC | #2
On 2024/9/6 16:55, Ryan Roberts wrote:
> On 06/09/2024 06:28, Baolin Wang wrote:
>> Shmem has a separate interface (different from anonymous pages) to control
>> huge page allocation, that means shmem THP can be enabled while anonymous
>> THP is disabled. However, in this case, khugepaged will not start to collapse
>> shmem THP, which is unreasonable.
>>
>> To fix this issue, we should call start_stop_khugepaged() to activate or
>> deactivate the khugepaged thread when setting shmem mTHP interfaces.
>> Moreover, add a new helper shmem_hpage_pmd_enabled() to help to check
>> whether shmem THP is enabled, which will determine if khugepaged should
>> be activated.
>>
>> Reported-by: Ryan Roberts <ryan.roberts@arm.com>
>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>> ---
>> Hi Ryan,
>>
>> I remember we discussed the shmem khugepaged activation issue before, but
>> I haven’t seen any follow-up patches to fix it. Recently, I am trying to
>> fix the shmem mTHP collapse issue in the series [1], and I also addressed
>> this activation issue. Please correct me if you have a better idea. Thanks.
> 
> Thanks for for sorting this - it looks like a good approach to me! Just a couple
> of nits. Regardless:
> 
> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>

Thanks for reviewing.

> 
>>
>> [1] https://lore.kernel.org/all/cover.1724140601.git.baolin.wang@linux.alibaba.com/T/#u
>> ---
>>   include/linux/shmem_fs.h |  6 ++++++
>>   mm/khugepaged.c          |  2 ++
>>   mm/shmem.c               | 29 +++++++++++++++++++++++++++--
>>   3 files changed, 35 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
>> index 515a9a6a3c6f..ee6635052383 100644
>> --- a/include/linux/shmem_fs.h
>> +++ b/include/linux/shmem_fs.h
>> @@ -114,6 +114,7 @@ int shmem_unuse(unsigned int type);
>>   unsigned long shmem_allowable_huge_orders(struct inode *inode,
>>   				struct vm_area_struct *vma, pgoff_t index,
>>   				loff_t write_end, bool shmem_huge_force);
>> +bool shmem_hpage_pmd_enabled(void);
>>   #else
>>   static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
>>   				struct vm_area_struct *vma, pgoff_t index,
>> @@ -121,6 +122,11 @@ static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
>>   {
>>   	return 0;
>>   }
>> +
>> +static inline bool shmem_hpage_pmd_enabled(void)
>> +{
>> +	return false;
>> +}
>>   #endif
>>   
>>   #ifdef CONFIG_SHMEM
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index f9c39898eaff..caf10096d4d1 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -430,6 +430,8 @@ static bool hugepage_pmd_enabled(void)
>>   	if (test_bit(PMD_ORDER, &huge_anon_orders_inherit) &&
>>   	    hugepage_global_enabled())
>>   		return true;
>> +	if (shmem_hpage_pmd_enabled())
>> +		return true;
> 
> nit: There is a comment at the top of this function, perhaps that could be
> extended to cover shmem too?

Sure. How about the following changes?

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 945c0f2aff81..b0ac46ae561b 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -416,9 +416,11 @@ static inline int 
hpage_collapse_test_exit_or_disable(struct mm_struct *mm)
  static bool hugepage_pmd_enabled(void)
  {
         /*
-        * We cover both the anon and the file-backed case here; file-backed
+        * We cover the anon, shmem and the file-backed case here; 
file-backed
          * hugepages, when configured in, are determined by the global 
control.
          * Anon pmd-sized hugepages are determined by the pmd-size control.
+        * Shmem pmd-sized hugepages are also determined by its pmd-size 
control,
+        * except when the global shmem_huge is set to SHMEM_HUGE_DENY.
          */
         if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
             hugepage_global_enabled())

>>   	return false;
>>   }
>>   
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 74f093d88c78..d7c342ae2b37 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1653,6 +1653,23 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
>>   }
>>   
>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +bool shmem_hpage_pmd_enabled(void)
>> +{
>> +	if (shmem_huge == SHMEM_HUGE_DENY)
>> +		return false;
>> +	if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_always))
> 
> question: When is it correct to use HPAGE_PMD_ORDER vs PMD_ORDER? I tend to use
> PMD_ORDER (in hugepage_pmd_enabled() for example).

In shmem, the HPAGE_PMD_* related macros are all controlled by 
CONFIG_TRANSPARENT_HUGEPAGE, and at this point, HPAGE_PMD_ORDER and 
PMD_ORDER are equal. I would like to keep consistency in the shmem code 
by using the HPAGE_PMD_* related macros.
David Hildenbrand Sept. 9, 2024, 6:52 a.m. UTC | #3
On 06.09.24 10:55, Ryan Roberts wrote:
> On 06/09/2024 06:28, Baolin Wang wrote:
>> Shmem has a separate interface (different from anonymous pages) to control
>> huge page allocation, that means shmem THP can be enabled while anonymous
>> THP is disabled. However, in this case, khugepaged will not start to collapse
>> shmem THP, which is unreasonable.
>>
>> To fix this issue, we should call start_stop_khugepaged() to activate or
>> deactivate the khugepaged thread when setting shmem mTHP interfaces.
>> Moreover, add a new helper shmem_hpage_pmd_enabled() to help to check
>> whether shmem THP is enabled, which will determine if khugepaged should
>> be activated.
>>
>> Reported-by: Ryan Roberts <ryan.roberts@arm.com>
>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>> ---
>> Hi Ryan,
>>
>> I remember we discussed the shmem khugepaged activation issue before, but
>> I haven’t seen any follow-up patches to fix it. Recently, I am trying to
>> fix the shmem mTHP collapse issue in the series [1], and I also addressed
>> this activation issue. Please correct me if you have a better idea. Thanks.
> 
> Thanks for for sorting this - it looks like a good approach to me! Just a couple
> of nits. Regardless:
> 
> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
> 
>>
>> [1] https://lore.kernel.org/all/cover.1724140601.git.baolin.wang@linux.alibaba.com/T/#u
>> ---
>>   include/linux/shmem_fs.h |  6 ++++++
>>   mm/khugepaged.c          |  2 ++
>>   mm/shmem.c               | 29 +++++++++++++++++++++++++++--
>>   3 files changed, 35 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
>> index 515a9a6a3c6f..ee6635052383 100644
>> --- a/include/linux/shmem_fs.h
>> +++ b/include/linux/shmem_fs.h
>> @@ -114,6 +114,7 @@ int shmem_unuse(unsigned int type);
>>   unsigned long shmem_allowable_huge_orders(struct inode *inode,
>>   				struct vm_area_struct *vma, pgoff_t index,
>>   				loff_t write_end, bool shmem_huge_force);
>> +bool shmem_hpage_pmd_enabled(void);
>>   #else
>>   static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
>>   				struct vm_area_struct *vma, pgoff_t index,
>> @@ -121,6 +122,11 @@ static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
>>   {
>>   	return 0;
>>   }
>> +
>> +static inline bool shmem_hpage_pmd_enabled(void)
>> +{
>> +	return false;
>> +}
>>   #endif
>>   
>>   #ifdef CONFIG_SHMEM
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index f9c39898eaff..caf10096d4d1 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -430,6 +430,8 @@ static bool hugepage_pmd_enabled(void)
>>   	if (test_bit(PMD_ORDER, &huge_anon_orders_inherit) &&
>>   	    hugepage_global_enabled())
>>   		return true;
>> +	if (shmem_hpage_pmd_enabled())
>> +		return true;
> 
> nit: There is a comment at the top of this function, perhaps that could be
> extended to cover shmem too?
> 
>>   	return false;
>>   }
>>   
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 74f093d88c78..d7c342ae2b37 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1653,6 +1653,23 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
>>   }
>>   
>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +bool shmem_hpage_pmd_enabled(void)
>> +{
>> +	if (shmem_huge == SHMEM_HUGE_DENY)
>> +		return false;
>> +	if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_always))
> 
> question: When is it correct to use HPAGE_PMD_ORDER vs PMD_ORDER? I tend to use
> PMD_ORDER (in hugepage_pmd_enabled() for example).

I think the HPAGE_PMD_ORDER thingy originally was introduced to detect 
code that is supposed to handle THPs, but really cannot handle them 
properly -- e.g., no arch support, not enabled in the kernel config.

My take is that we should start removing this HPAGE_* stuff.
diff mbox series

Patch

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 515a9a6a3c6f..ee6635052383 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -114,6 +114,7 @@  int shmem_unuse(unsigned int type);
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
 				loff_t write_end, bool shmem_huge_force);
+bool shmem_hpage_pmd_enabled(void);
 #else
 static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
@@ -121,6 +122,11 @@  static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
 {
 	return 0;
 }
+
+static inline bool shmem_hpage_pmd_enabled(void)
+{
+	return false;
+}
 #endif
 
 #ifdef CONFIG_SHMEM
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f9c39898eaff..caf10096d4d1 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -430,6 +430,8 @@  static bool hugepage_pmd_enabled(void)
 	if (test_bit(PMD_ORDER, &huge_anon_orders_inherit) &&
 	    hugepage_global_enabled())
 		return true;
+	if (shmem_hpage_pmd_enabled())
+		return true;
 	return false;
 }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 74f093d88c78..d7c342ae2b37 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1653,6 +1653,23 @@  static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+bool shmem_hpage_pmd_enabled(void)
+{
+	if (shmem_huge == SHMEM_HUGE_DENY)
+		return false;
+	if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_always))
+		return true;
+	if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_madvise))
+		return true;
+	if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_within_size))
+		return true;
+	if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_inherit) &&
+	    shmem_huge != SHMEM_HUGE_NEVER)
+		return true;
+
+	return false;
+}
+
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
 				loff_t write_end, bool shmem_huge_force)
@@ -5036,7 +5053,7 @@  static ssize_t shmem_enabled_store(struct kobject *kobj,
 		struct kobj_attribute *attr, const char *buf, size_t count)
 {
 	char tmp[16];
-	int huge;
+	int huge, err;
 
 	if (count + 1 > sizeof(tmp))
 		return -EINVAL;
@@ -5060,7 +5077,9 @@  static ssize_t shmem_enabled_store(struct kobject *kobj,
 	shmem_huge = huge;
 	if (shmem_huge > SHMEM_HUGE_DENY)
 		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
-	return count;
+
+	err = start_stop_khugepaged();
+	return err ? err : count;
 }
 
 struct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled);
@@ -5137,6 +5156,12 @@  static ssize_t thpsize_shmem_enabled_store(struct kobject *kobj,
 		ret = -EINVAL;
 	}
 
+	if (ret > 0) {
+		int err = start_stop_khugepaged();
+
+		if (err)
+			ret = err;
+	}
 	return ret;
 }