Message ID | 20200603065049.11598-1-gavin.guo@canonical.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: thp: Add new kernel parameters transparent_hugepage_defrag/khugepaged_defrag | expand |
On 6/3/20 8:50 AM, Gavin Guo wrote: > There is no way to set up the defrag options in boot time. And it's > useful to set it up by default instead of making it work by a > systemd/upstart service or put the command to set up defrag inside > /etc/rc.local. > > Signed-off-by: Gavin Guo <gavin.guo@canonical.com> Well, maybe isntead of adding these handlers, we could extend the new boot parameter sysctl support (handling procfs /proc/sys/) to sysfs (/sys) as well, as Eric already suggested? [1] [1] https://lore.kernel.org/linux-api/87bloj2skm.fsf@x220.int.ebiederm.org/ > --- > .../admin-guide/kernel-parameters.txt | 18 ++++++++ > mm/huge_memory.c | 43 +++++++++++++++++++ > mm/khugepaged.c | 21 +++++++++ > 3 files changed, 82 insertions(+) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index 6253849afac2..a9fd020d78db 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -2149,6 +2149,16 @@ > kgdbwait [KGDB] Stop kernel execution and enter the > kernel debugger at the earliest opportunity. > > + khugepaged_defrag= > + [KNL] > + Format: { "0" | "1" } > + 0 - disable the defrag > + 1 - enable the defrag > + Control the defrag efforts when generating the > + transparent hugepages through khugepaged. > + See Documentation/admin-guide/mm/transhuge.rst > + for more details. > + > kmac= [MIPS] korina ethernet MAC address. > Configure the RouterBoard 532 series on-chip > Ethernet adapter MAC address. > @@ -5146,6 +5156,14 @@ > See Documentation/admin-guide/mm/transhuge.rst > for more details. > > + transparent_hugepage_defrag= > + [KNL] > + Format: [always|defer|defer+madvise|madvise|never] > + Control the defrag efforts when generating the > + transparent hugepages. > + See Documentation/admin-guide/mm/transhuge.rst > + for more details. > + > tsc= Disable clocksource stability checks for TSC. > Format: <string> > [x86] reliable: mark tsc clocksource as reliable, this > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 8091b780cd7a..86b20a3a1aac 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -481,6 +481,49 @@ static int __init setup_transparent_hugepage(char *str) > } > __setup("transparent_hugepage=", setup_transparent_hugepage); > > +static int __init setup_transparent_hugepage_defrag(char *str) > +{ > + int ret = 0; > + if (!str) > + goto out; > + if (!strcmp(str, "always")) { > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); > + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); > + ret = 1; > + } else if (!strcmp(str, "defer+madvise")) { > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); > + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); > + ret = 1; > + } else if (!strcmp(str, "defer")) { > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); > + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); > + ret = 1; > + } else if (!strcmp(str, "madvise")) { > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); > + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); > + ret = 1; > + } else if (!strcmp(str, "never")) { > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); > + ret = 1; > + } > +out: > + if (!ret) > + pr_warn("transparent_hugepage_defrag= cannot parse, ignored\n"); > + return ret; > +} > +__setup("transparent_hugepage_defrag=", setup_transparent_hugepage_defrag); > + > pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) > { > if (likely(vma->vm_flags & VM_WRITE)) > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index b043c40a21d4..39bbf2107a23 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -394,6 +394,27 @@ int __init khugepaged_init(void) > return 0; > } > > +static int __init setup_khugepaged_defrag(char *str) > +{ > + int ret = 0; > + if (!str) > + goto out; > + if (!strcmp(str, "0")) { > + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, > + &transparent_hugepage_flags); > + ret = 1; > + } else if (!strcmp(str, "1")) { > + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, > + &transparent_hugepage_flags); > + ret = 1; > + } > +out: > + if (!ret) > + pr_warn("khugepaged_defrag= cannot parse, ignored\n"); > + return ret; > +} > +__setup("khugepaged_defrag=", setup_khugepaged_defrag); > + > void __init khugepaged_destroy(void) > { > kmem_cache_destroy(mm_slot_cache); >
On Wed, 3 Jun 2020, Vlastimil Babka wrote: > > There is no way to set up the defrag options in boot time. And it's > > useful to set it up by default instead of making it work by a > > systemd/upstart service or put the command to set up defrag inside > > /etc/rc.local. > > > > Signed-off-by: Gavin Guo <gavin.guo@canonical.com> > > Well, maybe isntead of adding these handlers, we could extend the new boot > parameter sysctl support (handling procfs /proc/sys/) to sysfs (/sys) as well, > as Eric already suggested? [1] > > [1] https://lore.kernel.org/linux-api/87bloj2skm.fsf@x220.int.ebiederm.org/ > Fully agreed, I think the solution needs to be more generic since thp defrag isn't special here. With the generic support to tune sysctls and sysfs tunables from the command line it seems like this patch would be redundant.
On Thu, Jun 4, 2020 at 3:27 AM David Rientjes <rientjes@google.com> wrote: > > On Wed, 3 Jun 2020, Vlastimil Babka wrote: > > > > There is no way to set up the defrag options in boot time. And it's > > > useful to set it up by default instead of making it work by a > > > systemd/upstart service or put the command to set up defrag inside > > > /etc/rc.local. > > > > > > Signed-off-by: Gavin Guo <gavin.guo@canonical.com> > > > > Well, maybe isntead of adding these handlers, we could extend the new boot > > parameter sysctl support (handling procfs /proc/sys/) to sysfs (/sys) as well, > > as Eric already suggested? [1] > > > > [1] https://lore.kernel.org/linux-api/87bloj2skm.fsf@x220.int.ebiederm.org/ > > > > Fully agreed, I think the solution needs to be more generic since thp > defrag isn't special here. With the generic support to tune sysctls and > sysfs tunables from the command line it seems like this patch would be > redundant. Agreed, I'll try to investigate more on how to do that in a generic way.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6253849afac2..a9fd020d78db 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2149,6 +2149,16 @@ kgdbwait [KGDB] Stop kernel execution and enter the kernel debugger at the earliest opportunity. + khugepaged_defrag= + [KNL] + Format: { "0" | "1" } + 0 - disable the defrag + 1 - enable the defrag + Control the defrag efforts when generating the + transparent hugepages through khugepaged. + See Documentation/admin-guide/mm/transhuge.rst + for more details. + kmac= [MIPS] korina ethernet MAC address. Configure the RouterBoard 532 series on-chip Ethernet adapter MAC address. @@ -5146,6 +5156,14 @@ See Documentation/admin-guide/mm/transhuge.rst for more details. + transparent_hugepage_defrag= + [KNL] + Format: [always|defer|defer+madvise|madvise|never] + Control the defrag efforts when generating the + transparent hugepages. + See Documentation/admin-guide/mm/transhuge.rst + for more details. + tsc= Disable clocksource stability checks for TSC. Format: <string> [x86] reliable: mark tsc clocksource as reliable, this diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8091b780cd7a..86b20a3a1aac 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -481,6 +481,49 @@ static int __init setup_transparent_hugepage(char *str) } __setup("transparent_hugepage=", setup_transparent_hugepage); +static int __init setup_transparent_hugepage_defrag(char *str) +{ + int ret = 0; + if (!str) + goto out; + if (!strcmp(str, "always")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + ret = 1; + } else if (!strcmp(str, "defer+madvise")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + ret = 1; + } else if (!strcmp(str, "defer")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + ret = 1; + } else if (!strcmp(str, "madvise")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + ret = 1; + } else if (!strcmp(str, "never")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + ret = 1; + } +out: + if (!ret) + pr_warn("transparent_hugepage_defrag= cannot parse, ignored\n"); + return ret; +} +__setup("transparent_hugepage_defrag=", setup_transparent_hugepage_defrag); + pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) { if (likely(vma->vm_flags & VM_WRITE)) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b043c40a21d4..39bbf2107a23 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -394,6 +394,27 @@ int __init khugepaged_init(void) return 0; } +static int __init setup_khugepaged_defrag(char *str) +{ + int ret = 0; + if (!str) + goto out; + if (!strcmp(str, "0")) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, + &transparent_hugepage_flags); + ret = 1; + } else if (!strcmp(str, "1")) { + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, + &transparent_hugepage_flags); + ret = 1; + } +out: + if (!ret) + pr_warn("khugepaged_defrag= cannot parse, ignored\n"); + return ret; +} +__setup("khugepaged_defrag=", setup_khugepaged_defrag); + void __init khugepaged_destroy(void) { kmem_cache_destroy(mm_slot_cache);
There is no way to set up the defrag options in boot time. And it's useful to set it up by default instead of making it work by a systemd/upstart service or put the command to set up defrag inside /etc/rc.local. Signed-off-by: Gavin Guo <gavin.guo@canonical.com> --- .../admin-guide/kernel-parameters.txt | 18 ++++++++ mm/huge_memory.c | 43 +++++++++++++++++++ mm/khugepaged.c | 21 +++++++++ 3 files changed, 82 insertions(+)