diff mbox series

mm: thp: Add new kernel parameters transparent_hugepage_defrag/khugepaged_defrag

Message ID 20200603065049.11598-1-gavin.guo@canonical.com (mailing list archive)
State New, archived
Headers show
Series mm: thp: Add new kernel parameters transparent_hugepage_defrag/khugepaged_defrag | expand

Commit Message

Gavin Guo June 3, 2020, 6:50 a.m. UTC
There is no way to set up the defrag options in boot time. And it's
useful to set it up by default instead of making it work by a
systemd/upstart service or put the command to set up defrag inside
/etc/rc.local.

Signed-off-by: Gavin Guo <gavin.guo@canonical.com>
---
 .../admin-guide/kernel-parameters.txt         | 18 ++++++++
 mm/huge_memory.c                              | 43 +++++++++++++++++++
 mm/khugepaged.c                               | 21 +++++++++
 3 files changed, 82 insertions(+)

Comments

Vlastimil Babka June 3, 2020, 11:17 a.m. UTC | #1
On 6/3/20 8:50 AM, Gavin Guo wrote:
> There is no way to set up the defrag options in boot time. And it's
> useful to set it up by default instead of making it work by a
> systemd/upstart service or put the command to set up defrag inside
> /etc/rc.local.
> 
> Signed-off-by: Gavin Guo <gavin.guo@canonical.com>

Well, maybe isntead of adding these handlers, we could extend the new boot
parameter sysctl support (handling procfs /proc/sys/) to sysfs (/sys) as well,
as Eric already suggested? [1]

[1] https://lore.kernel.org/linux-api/87bloj2skm.fsf@x220.int.ebiederm.org/

> ---
>  .../admin-guide/kernel-parameters.txt         | 18 ++++++++
>  mm/huge_memory.c                              | 43 +++++++++++++++++++
>  mm/khugepaged.c                               | 21 +++++++++
>  3 files changed, 82 insertions(+)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 6253849afac2..a9fd020d78db 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2149,6 +2149,16 @@
>  	kgdbwait	[KGDB] Stop kernel execution and enter the
>  			kernel debugger at the earliest opportunity.
>  
> +	khugepaged_defrag=
> +			[KNL]
> +			Format: { "0" | "1" }
> +			0 - disable the defrag
> +			1 - enable the defrag
> +			Control the defrag efforts when generating the
> +			transparent hugepages through khugepaged.
> +			See Documentation/admin-guide/mm/transhuge.rst
> +			for more details.
> +
>  	kmac=		[MIPS] korina ethernet MAC address.
>  			Configure the RouterBoard 532 series on-chip
>  			Ethernet adapter MAC address.
> @@ -5146,6 +5156,14 @@
>  			See Documentation/admin-guide/mm/transhuge.rst
>  			for more details.
>  
> +	transparent_hugepage_defrag=
> +			[KNL]
> +			Format: [always|defer|defer+madvise|madvise|never]
> +			Control the defrag efforts when generating the
> +			transparent hugepages.
> +			See Documentation/admin-guide/mm/transhuge.rst
> +			for more details.
> +
>  	tsc=		Disable clocksource stability checks for TSC.
>  			Format: <string>
>  			[x86] reliable: mark tsc clocksource as reliable, this
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 8091b780cd7a..86b20a3a1aac 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -481,6 +481,49 @@ static int __init setup_transparent_hugepage(char *str)
>  }
>  __setup("transparent_hugepage=", setup_transparent_hugepage);
>  
> +static int __init setup_transparent_hugepage_defrag(char *str)
> +{
> +	int ret = 0;
> +	if (!str)
> +		goto out;
> +	if (!strcmp(str, "always")) {
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
> +		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
> +		ret = 1;
> +	} else if (!strcmp(str, "defer+madvise")) {
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
> +		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
> +		ret = 1;
> +	} else if (!strcmp(str, "defer")) {
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
> +		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
> +		ret = 1;
> +	} else if (!strcmp(str, "madvise")) {
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
> +		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
> +		ret = 1;
> +	} else if (!strcmp(str, "never")) {
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
> +		ret = 1;
> +	}
> +out:
> +	if (!ret)
> +		pr_warn("transparent_hugepage_defrag= cannot parse, ignored\n");
> +	return ret;
> +}
> +__setup("transparent_hugepage_defrag=", setup_transparent_hugepage_defrag);
> +
>  pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
>  {
>  	if (likely(vma->vm_flags & VM_WRITE))
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index b043c40a21d4..39bbf2107a23 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -394,6 +394,27 @@ int __init khugepaged_init(void)
>  	return 0;
>  }
>  
> +static int __init setup_khugepaged_defrag(char *str)
> +{
> +	int ret = 0;
> +	if (!str)
> +		goto out;
> +	if (!strcmp(str, "0")) {
> +		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
> +			  &transparent_hugepage_flags);
> +		ret = 1;
> +	} else if (!strcmp(str, "1")) {
> +		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
> +			&transparent_hugepage_flags);
> +		ret = 1;
> +	}
> +out:
> +	if (!ret)
> +		pr_warn("khugepaged_defrag= cannot parse, ignored\n");
> +	return ret;
> +}
> +__setup("khugepaged_defrag=", setup_khugepaged_defrag);
> +
>  void __init khugepaged_destroy(void)
>  {
>  	kmem_cache_destroy(mm_slot_cache);
>
David Rientjes June 3, 2020, 7:27 p.m. UTC | #2
On Wed, 3 Jun 2020, Vlastimil Babka wrote:

> > There is no way to set up the defrag options in boot time. And it's
> > useful to set it up by default instead of making it work by a
> > systemd/upstart service or put the command to set up defrag inside
> > /etc/rc.local.
> > 
> > Signed-off-by: Gavin Guo <gavin.guo@canonical.com>
> 
> Well, maybe isntead of adding these handlers, we could extend the new boot
> parameter sysctl support (handling procfs /proc/sys/) to sysfs (/sys) as well,
> as Eric already suggested? [1]
> 
> [1] https://lore.kernel.org/linux-api/87bloj2skm.fsf@x220.int.ebiederm.org/
> 

Fully agreed, I think the solution needs to be more generic since thp 
defrag isn't special here.  With the generic support to tune sysctls and 
sysfs tunables from the command line it seems like this patch would be 
redundant.
Gavin Guo June 3, 2020, 10:09 p.m. UTC | #3
On Thu, Jun 4, 2020 at 3:27 AM David Rientjes <rientjes@google.com> wrote:
>
> On Wed, 3 Jun 2020, Vlastimil Babka wrote:
>
> > > There is no way to set up the defrag options in boot time. And it's
> > > useful to set it up by default instead of making it work by a
> > > systemd/upstart service or put the command to set up defrag inside
> > > /etc/rc.local.
> > >
> > > Signed-off-by: Gavin Guo <gavin.guo@canonical.com>
> >
> > Well, maybe isntead of adding these handlers, we could extend the new boot
> > parameter sysctl support (handling procfs /proc/sys/) to sysfs (/sys) as well,
> > as Eric already suggested? [1]
> >
> > [1] https://lore.kernel.org/linux-api/87bloj2skm.fsf@x220.int.ebiederm.org/
> >
>
> Fully agreed, I think the solution needs to be more generic since thp
> defrag isn't special here.  With the generic support to tune sysctls and
> sysfs tunables from the command line it seems like this patch would be
> redundant.

Agreed, I'll try to investigate more on how to do that in a generic way.
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6253849afac2..a9fd020d78db 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2149,6 +2149,16 @@ 
 	kgdbwait	[KGDB] Stop kernel execution and enter the
 			kernel debugger at the earliest opportunity.
 
+	khugepaged_defrag=
+			[KNL]
+			Format: { "0" | "1" }
+			0 - disable the defrag
+			1 - enable the defrag
+			Control the defrag efforts when generating the
+			transparent hugepages through khugepaged.
+			See Documentation/admin-guide/mm/transhuge.rst
+			for more details.
+
 	kmac=		[MIPS] korina ethernet MAC address.
 			Configure the RouterBoard 532 series on-chip
 			Ethernet adapter MAC address.
@@ -5146,6 +5156,14 @@ 
 			See Documentation/admin-guide/mm/transhuge.rst
 			for more details.
 
+	transparent_hugepage_defrag=
+			[KNL]
+			Format: [always|defer|defer+madvise|madvise|never]
+			Control the defrag efforts when generating the
+			transparent hugepages.
+			See Documentation/admin-guide/mm/transhuge.rst
+			for more details.
+
 	tsc=		Disable clocksource stability checks for TSC.
 			Format: <string>
 			[x86] reliable: mark tsc clocksource as reliable, this
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8091b780cd7a..86b20a3a1aac 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -481,6 +481,49 @@  static int __init setup_transparent_hugepage(char *str)
 }
 __setup("transparent_hugepage=", setup_transparent_hugepage);
 
+static int __init setup_transparent_hugepage_defrag(char *str)
+{
+	int ret = 0;
+	if (!str)
+		goto out;
+	if (!strcmp(str, "always")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+		ret = 1;
+	} else if (!strcmp(str, "defer+madvise")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+		ret = 1;
+	} else if (!strcmp(str, "defer")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
+		ret = 1;
+	} else if (!strcmp(str, "madvise")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+		ret = 1;
+	} else if (!strcmp(str, "never")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+		ret = 1;
+	}
+out:
+	if (!ret)
+		pr_warn("transparent_hugepage_defrag= cannot parse, ignored\n");
+	return ret;
+}
+__setup("transparent_hugepage_defrag=", setup_transparent_hugepage_defrag);
+
 pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 {
 	if (likely(vma->vm_flags & VM_WRITE))
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b043c40a21d4..39bbf2107a23 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -394,6 +394,27 @@  int __init khugepaged_init(void)
 	return 0;
 }
 
+static int __init setup_khugepaged_defrag(char *str)
+{
+	int ret = 0;
+	if (!str)
+		goto out;
+	if (!strcmp(str, "0")) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
+			  &transparent_hugepage_flags);
+		ret = 1;
+	} else if (!strcmp(str, "1")) {
+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
+			&transparent_hugepage_flags);
+		ret = 1;
+	}
+out:
+	if (!ret)
+		pr_warn("khugepaged_defrag= cannot parse, ignored\n");
+	return ret;
+}
+__setup("khugepaged_defrag=", setup_khugepaged_defrag);
+
 void __init khugepaged_destroy(void)
 {
 	kmem_cache_destroy(mm_slot_cache);