diff mbox series

[v2,3/3] mm: hugetlb: add hugetlb_free_vmemmap sysctl

Message ID 20220302083758.32528-4-songmuchun@bytedance.com (mailing list archive)
State New
Headers show
Series add hugetlb_free_vmemmap sysctl | expand

Commit Message

Muchun Song March 2, 2022, 8:37 a.m. UTC
We must add "hugetlb_free_vmemmap=on" to boot cmdline and reboot the
server to enable the feature of freeing vmemmap pages of HugeTLB
pages. Rebooting usually taske a long time. Add a sysctl to enable
the feature at runtime and do not need to reboot.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 Documentation/admin-guide/sysctl/vm.rst | 13 ++++++++++
 include/linux/memory_hotplug.h          |  9 +++++++
 mm/hugetlb_vmemmap.c                    | 42 ++++++++++++++++++++++++++++-----
 mm/hugetlb_vmemmap.h                    |  4 +++-
 mm/memory_hotplug.c                     |  5 ++++
 5 files changed, 66 insertions(+), 7 deletions(-)

Comments

Luis Chamberlain March 2, 2022, 9:25 p.m. UTC | #1
On Wed, Mar 02, 2022 at 04:37:58PM +0800, Muchun Song wrote:
> We must add "hugetlb_free_vmemmap=on" to boot cmdline and reboot the
> server to enable the feature of freeing vmemmap pages of HugeTLB
> pages. Rebooting usually taske a long time. Add a sysctl to enable
> the feature at runtime and do not need to reboot.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---
>  Documentation/admin-guide/sysctl/vm.rst | 13 ++++++++++
>  include/linux/memory_hotplug.h          |  9 +++++++
>  mm/hugetlb_vmemmap.c                    | 42 ++++++++++++++++++++++++++++-----
>  mm/hugetlb_vmemmap.h                    |  4 +++-
>  mm/memory_hotplug.c                     |  5 ++++
>  5 files changed, 66 insertions(+), 7 deletions(-)
> 
> diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
> index f4804ce37c58..01f18e6cc227 100644
> --- a/Documentation/admin-guide/sysctl/vm.rst
> +++ b/Documentation/admin-guide/sysctl/vm.rst
> @@ -561,6 +561,19 @@ Change the minimum size of the hugepage pool.
>  See Documentation/admin-guide/mm/hugetlbpage.rst
>  
>  
> +hugetlb_free_vmemmap
> +====================
> +
> +A toggle value indicating if vmemmap pages are allowed to be optimized.
> +If it is off (0), then it can be set true (1).  Once true, the vmemmap
> +pages associated with each HugeTLB page will be optimized, and the toggle
> +cannot be set back to false.  It only optimizes the subsequent allocation
> +of HugeTLB pages from buddy system, while already allocated HugeTLB pages
> +will not be optimized.

The commit log or documentation does not descrie why its safe to toggle
one way and not the other?

  Luis

> +
> +See Documentation/admin-guide/mm/hugetlbpage.rst
> +
> +
>  nr_hugepages_mempolicy
>  ======================
>  
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index e0b2209ab71c..20d7edf62a6a 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -351,4 +351,13 @@ void arch_remove_linear_mapping(u64 start, u64 size);
>  extern bool mhp_supports_memmap_on_memory(unsigned long size);
>  #endif /* CONFIG_MEMORY_HOTPLUG */
>  
> +#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
> +bool mhp_memmap_on_memory(void);
> +#else
> +static inline bool mhp_memmap_on_memory(void)
> +{
> +	return false;
> +}
> +#endif
> +
>  #endif /* __LINUX_MEMORY_HOTPLUG_H */
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 836d1117f08b..3bcc8f25bd50 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -10,6 +10,7 @@
>  
>  #define pr_fmt(fmt)	"HugeTLB: " fmt
>  
> +#include <linux/memory_hotplug.h>
>  #include "hugetlb_vmemmap.h"
>  
>  /*
> @@ -118,17 +119,14 @@ void __init hugetlb_vmemmap_init(struct hstate *h)
>  	BUILD_BUG_ON(__NR_USED_SUBPAGE >=
>  		     RESERVE_VMEMMAP_SIZE / sizeof(struct page));
>  
> -	if (!hugetlb_free_vmemmap_enabled())
> -		return;
> -
> -	if (IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON) &&
> -	    !is_power_of_2(sizeof(struct page))) {
> +	if (!is_power_of_2(sizeof(struct page))) {
>  		/*
>  		 * The hugetlb_free_vmemmap_enabled_key can be enabled when
>  		 * CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON. It should
>  		 * be disabled if "struct page" crosses page boundaries.
>  		 */
> -		static_branch_disable(&hugetlb_free_vmemmap_enabled_key);
> +		if (IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON))
> +			static_branch_disable(&hugetlb_free_vmemmap_enabled_key);
>  		return;
>  	}
>  
> @@ -147,3 +145,35 @@ void __init hugetlb_vmemmap_init(struct hstate *h)
>  	pr_info("can free %d vmemmap pages for %s\n", h->nr_free_vmemmap_pages,
>  		h->name);
>  }
> +
> +static struct ctl_table hugetlb_vmemmap_sysctls[] = {
> +	{
> +		.procname	= "hugetlb_free_vmemmap",
> +		.data		= &hugetlb_free_vmemmap_enabled_key.key,
> +		.mode		= 0644,
> +		/* only handle a transition from default "0" to "1" */
> +		.proc_handler	= proc_do_static_key,
> +		.extra1		= SYSCTL_ONE,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{ }
> +};
> +
> +static __init int hugetlb_vmemmap_sysctls_init(void)
> +{
> +	/*
> +	 * The vmemmap pages cannot be optimized if
> +	 * "memory_hotplug.memmap_on_memory" is enabled unless
> +	 * "hugetlb_free_vmemmap" is enabled as well since
> +	 * "hugetlb_free_vmemmap" takes precedence over
> +	 * "memory_hotplug.memmap_on_memory".
> +	 */
> +	if (mhp_memmap_on_memory() && !hugetlb_free_vmemmap_enabled())
> +		return 0;
> +
> +	if (is_power_of_2(sizeof(struct page)))
> +		register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
> +
> +	return 0;
> +}
> +late_initcall(hugetlb_vmemmap_sysctls_init);
> diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
> index cb2bef8f9e73..b67a159027f4 100644
> --- a/mm/hugetlb_vmemmap.h
> +++ b/mm/hugetlb_vmemmap.h
> @@ -21,7 +21,9 @@ void hugetlb_vmemmap_init(struct hstate *h);
>   */
>  static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
>  {
> -	return h->nr_free_vmemmap_pages;
> +	if (hugetlb_free_vmemmap_enabled())
> +		return h->nr_free_vmemmap_pages;
> +	return 0;
>  }
>  #else
>  static inline int alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index c226a337c1ef..c2115e566abc 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -50,6 +50,11 @@ static bool memmap_on_memory __ro_after_init;
>  #ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
>  module_param(memmap_on_memory, bool, 0444);
>  MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
> +
> +bool mhp_memmap_on_memory(void)
> +{
> +	return memmap_on_memory;
> +}
>  #endif
>  
>  enum {
> -- 
> 2.11.0
>
Muchun Song March 3, 2022, 11:15 a.m. UTC | #2
On Thu, Mar 3, 2022 at 5:25 AM Luis Chamberlain <mcgrof@kernel.org> wrote:
>
> On Wed, Mar 02, 2022 at 04:37:58PM +0800, Muchun Song wrote:
> > We must add "hugetlb_free_vmemmap=on" to boot cmdline and reboot the
> > server to enable the feature of freeing vmemmap pages of HugeTLB
> > pages. Rebooting usually taske a long time. Add a sysctl to enable
> > the feature at runtime and do not need to reboot.
> >
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > ---
> >  Documentation/admin-guide/sysctl/vm.rst | 13 ++++++++++
> >  include/linux/memory_hotplug.h          |  9 +++++++
> >  mm/hugetlb_vmemmap.c                    | 42 ++++++++++++++++++++++++++++-----
> >  mm/hugetlb_vmemmap.h                    |  4 +++-
> >  mm/memory_hotplug.c                     |  5 ++++
> >  5 files changed, 66 insertions(+), 7 deletions(-)
> >
> > diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
> > index f4804ce37c58..01f18e6cc227 100644
> > --- a/Documentation/admin-guide/sysctl/vm.rst
> > +++ b/Documentation/admin-guide/sysctl/vm.rst
> > @@ -561,6 +561,19 @@ Change the minimum size of the hugepage pool.
> >  See Documentation/admin-guide/mm/hugetlbpage.rst
> >
> >
> > +hugetlb_free_vmemmap
> > +====================
> > +
> > +A toggle value indicating if vmemmap pages are allowed to be optimized.
> > +If it is off (0), then it can be set true (1).  Once true, the vmemmap
> > +pages associated with each HugeTLB page will be optimized, and the toggle
> > +cannot be set back to false.  It only optimizes the subsequent allocation
> > +of HugeTLB pages from buddy system, while already allocated HugeTLB pages
> > +will not be optimized.
>
> The commit log or documentation does not descrie why its safe to toggle
> one way and not the other?
>

I thought it was easy to handle the transition from disable to enable
(code is simple).  I might be wrong. I'll try to handle the other side in
the next version if it is not hard to handle.

Thanks Luis.
Luis Chamberlain March 3, 2022, 2:59 p.m. UTC | #3
On Thu, Mar 03, 2022 at 07:15:05PM +0800, Muchun Song wrote:
> On Thu, Mar 3, 2022 at 5:25 AM Luis Chamberlain <mcgrof@kernel.org> wrote:
> >
> > On Wed, Mar 02, 2022 at 04:37:58PM +0800, Muchun Song wrote:
> > > We must add "hugetlb_free_vmemmap=on" to boot cmdline and reboot the
> > > server to enable the feature of freeing vmemmap pages of HugeTLB
> > > pages. Rebooting usually taske a long time. Add a sysctl to enable
> > > the feature at runtime and do not need to reboot.
> > >
> > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > > ---
> > >  Documentation/admin-guide/sysctl/vm.rst | 13 ++++++++++
> > >  include/linux/memory_hotplug.h          |  9 +++++++
> > >  mm/hugetlb_vmemmap.c                    | 42 ++++++++++++++++++++++++++++-----
> > >  mm/hugetlb_vmemmap.h                    |  4 +++-
> > >  mm/memory_hotplug.c                     |  5 ++++
> > >  5 files changed, 66 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
> > > index f4804ce37c58..01f18e6cc227 100644
> > > --- a/Documentation/admin-guide/sysctl/vm.rst
> > > +++ b/Documentation/admin-guide/sysctl/vm.rst
> > > @@ -561,6 +561,19 @@ Change the minimum size of the hugepage pool.
> > >  See Documentation/admin-guide/mm/hugetlbpage.rst
> > >
> > >
> > > +hugetlb_free_vmemmap
> > > +====================
> > > +
> > > +A toggle value indicating if vmemmap pages are allowed to be optimized.
> > > +If it is off (0), then it can be set true (1).  Once true, the vmemmap
> > > +pages associated with each HugeTLB page will be optimized, and the toggle
> > > +cannot be set back to false.  It only optimizes the subsequent allocation
> > > +of HugeTLB pages from buddy system, while already allocated HugeTLB pages
> > > +will not be optimized.
> >
> > The commit log or documentation does not descrie why its safe to toggle
> > one way and not the other?
> >
> 
> I thought it was easy to handle the transition from disable to enable
> (code is simple).  I might be wrong. I'll try to handle the other side in
> the next version if it is not hard to handle.

You should do the homework and explain why something is not possible.
And if you are enabling to disable something why is it safe to do so
at runtime?

  Luis
diff mbox series

Patch

diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index f4804ce37c58..01f18e6cc227 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -561,6 +561,19 @@  Change the minimum size of the hugepage pool.
 See Documentation/admin-guide/mm/hugetlbpage.rst
 
 
+hugetlb_free_vmemmap
+====================
+
+A toggle value indicating if vmemmap pages are allowed to be optimized.
+If it is off (0), then it can be set true (1).  Once true, the vmemmap
+pages associated with each HugeTLB page will be optimized, and the toggle
+cannot be set back to false.  It only optimizes the subsequent allocation
+of HugeTLB pages from buddy system, while already allocated HugeTLB pages
+will not be optimized.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
 nr_hugepages_mempolicy
 ======================
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index e0b2209ab71c..20d7edf62a6a 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -351,4 +351,13 @@  void arch_remove_linear_mapping(u64 start, u64 size);
 extern bool mhp_supports_memmap_on_memory(unsigned long size);
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
+#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
+bool mhp_memmap_on_memory(void);
+#else
+static inline bool mhp_memmap_on_memory(void)
+{
+	return false;
+}
+#endif
+
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 836d1117f08b..3bcc8f25bd50 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -10,6 +10,7 @@ 
 
 #define pr_fmt(fmt)	"HugeTLB: " fmt
 
+#include <linux/memory_hotplug.h>
 #include "hugetlb_vmemmap.h"
 
 /*
@@ -118,17 +119,14 @@  void __init hugetlb_vmemmap_init(struct hstate *h)
 	BUILD_BUG_ON(__NR_USED_SUBPAGE >=
 		     RESERVE_VMEMMAP_SIZE / sizeof(struct page));
 
-	if (!hugetlb_free_vmemmap_enabled())
-		return;
-
-	if (IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON) &&
-	    !is_power_of_2(sizeof(struct page))) {
+	if (!is_power_of_2(sizeof(struct page))) {
 		/*
 		 * The hugetlb_free_vmemmap_enabled_key can be enabled when
 		 * CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON. It should
 		 * be disabled if "struct page" crosses page boundaries.
 		 */
-		static_branch_disable(&hugetlb_free_vmemmap_enabled_key);
+		if (IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON))
+			static_branch_disable(&hugetlb_free_vmemmap_enabled_key);
 		return;
 	}
 
@@ -147,3 +145,35 @@  void __init hugetlb_vmemmap_init(struct hstate *h)
 	pr_info("can free %d vmemmap pages for %s\n", h->nr_free_vmemmap_pages,
 		h->name);
 }
+
+static struct ctl_table hugetlb_vmemmap_sysctls[] = {
+	{
+		.procname	= "hugetlb_free_vmemmap",
+		.data		= &hugetlb_free_vmemmap_enabled_key.key,
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= proc_do_static_key,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_ONE,
+	},
+	{ }
+};
+
+static __init int hugetlb_vmemmap_sysctls_init(void)
+{
+	/*
+	 * The vmemmap pages cannot be optimized if
+	 * "memory_hotplug.memmap_on_memory" is enabled unless
+	 * "hugetlb_free_vmemmap" is enabled as well since
+	 * "hugetlb_free_vmemmap" takes precedence over
+	 * "memory_hotplug.memmap_on_memory".
+	 */
+	if (mhp_memmap_on_memory() && !hugetlb_free_vmemmap_enabled())
+		return 0;
+
+	if (is_power_of_2(sizeof(struct page)))
+		register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
+
+	return 0;
+}
+late_initcall(hugetlb_vmemmap_sysctls_init);
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index cb2bef8f9e73..b67a159027f4 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -21,7 +21,9 @@  void hugetlb_vmemmap_init(struct hstate *h);
  */
 static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
 {
-	return h->nr_free_vmemmap_pages;
+	if (hugetlb_free_vmemmap_enabled())
+		return h->nr_free_vmemmap_pages;
+	return 0;
 }
 #else
 static inline int alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c226a337c1ef..c2115e566abc 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -50,6 +50,11 @@  static bool memmap_on_memory __ro_after_init;
 #ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
 module_param(memmap_on_memory, bool, 0444);
 MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
+
+bool mhp_memmap_on_memory(void)
+{
+	return memmap_on_memory;
+}
 #endif
 
 enum {