Message ID | 20240817045516.58037-1-21cnbao@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v5] mm: override mTHP "enabled" defaults at kernel cmdline | expand |
On 17.08.24 06:55, Barry Song wrote: > From: Ryan Roberts <ryan.roberts@arm.com> > > Add thp_anon= cmdline parameter to allow specifying the default enablement > of each supported anon THP size. The parameter accepts the following > format and can be provided multiple times to configure each size: > > thp_anon=<size>,<size>[KMG]:<value>;<size>-<size>[KMG]:<value> > > An example: > > thp_anon=16K-64K:always;128K,512K:inherit;256K:madvise;1M-2M:never > > See Documentation/admin-guide/mm/transhuge.rst for more details. > > Configuring the defaults at boot time is useful to allow early user > space to take advantage of mTHP before its been configured through > sysfs. > > Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> > Co-developed-by: Barry Song <v-songbaohua@oppo.com> > Signed-off-by: Barry Song <v-songbaohua@oppo.com> > Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> > Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com> > Cc: David Hildenbrand <david@redhat.com> > Cc: Jonathan Corbet <corbet@lwn.net> > Cc: Lance Yang <ioworker0@gmail.com> Acked-by: David Hildenbrand <david@redhat.com> Some nits: > --- > -v5: > * collect Baolin's reviewed-by and tested-by tags, thanks! > * use get_order and check size is power 2, David, Baolin; > * use proper __initdata > > .../admin-guide/kernel-parameters.txt | 9 ++ > Documentation/admin-guide/mm/transhuge.rst | 38 +++++-- > mm/huge_memory.c | 100 +++++++++++++++++- > 3 files changed, 139 insertions(+), 8 deletions(-) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index f0057bac20fb..d0d141d50638 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -6629,6 +6629,15 @@ > <deci-seconds>: poll all this frequency > 0: no polling (default) > > + thp_anon= [KNL] > + Format: <size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state> > + state is one of "always", "madvise", "never" or "inherit". > + Can be used to control the default behavior of the s/Can be used to control/Control/ > + system with respect to anonymous transparent hugepages. > + Can be used multiple times for multiple anon THP sizes. > + See Documentation/admin-guide/mm/transhuge.rst for more > + details. > + > threadirqs [KNL,EARLY] > Force threading of all interrupt handlers except those > marked explicitly IRQF_NO_THREAD. > diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst > index 60522f49178b..4468851b6ecb 100644 > --- a/Documentation/admin-guide/mm/transhuge.rst > +++ b/Documentation/admin-guide/mm/transhuge.rst > @@ -284,13 +284,37 @@ that THP is shared. Exceeding the number would block the collapse:: > > A higher value may increase memory footprint for some workloads. [...] > unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, > unsigned long vm_flags, > @@ -756,7 +757,10 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) > * disable all other sizes. powerpc's PMD_ORDER isn't a compile-time > * constant so we have to do this here. > */ > - huge_anon_orders_inherit = BIT(PMD_ORDER); > + if (!anon_orders_configured) { > + huge_anon_orders_inherit = BIT(PMD_ORDER); > + anon_orders_configured = true; > + } > > *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); > if (unlikely(!*hugepage_kobj)) { > @@ -941,6 +945,100 @@ static int __init setup_transparent_hugepage(char *str) > } > __setup("transparent_hugepage=", setup_transparent_hugepage); > > +static inline int get_order_from_str(const char *size_str) > +{ > + unsigned long size; > + char *endptr; > + int order; > + > + size = memparse(size_str, &endptr); > + > + if (!is_power_of_2(size)) > + goto err; > + order = get_order(size); > + if ((1 << order) & ~THP_ORDERS_ALL_ANON) Could do "if (BIT(order) & ~THP_ORDERS_ALL_ANON)" > + goto err; > + > + return order; > +err: > + pr_err("invalid size %s in thp_anon boot parameter\n", size_str); > + return -EINVAL; > +} > + > +static char str_dup[PAGE_SIZE] __initdata; > +static int __init setup_thp_anon(char *str) > +{ > + char *token, *range, *policy, *subtoken; > + unsigned long always, inherit, madvise; > + char *start_size, *end_size; > + int start, end, nr; > + char *p; > + > + if (!str || strlen(str) + 1 > PAGE_SIZE) > + goto err; > + strcpy(str_dup, str); > + > + always = huge_anon_orders_always; > + madvise = huge_anon_orders_madvise; > + inherit = huge_anon_orders_inherit; Should we only pickup these values if "anon_orders_configured", otherwise start with 0? Likely that's implicit right now. > + p = str_dup; > + while ((token = strsep(&p, ";")) != NULL) { > + range = strsep(&token, ":"); > + policy = token; > + > + if (!policy) > + goto err; > + > + while ((subtoken = strsep(&range, ",")) != NULL) { > + if (strchr(subtoken, '-')) { > + start_size = strsep(&subtoken, "-"); > + end_size = subtoken; > + > + start = get_order_from_str(start_size); > + end = get_order_from_str(end_size); > + } else { > + start = end = get_order_from_str(subtoken); > + } > + > + if (start < 0 || end < 0 || start > end) > + goto err; > + > + nr = end - start + 1; This only works as long as we don't have any "Holes", which is the case right now. > + if (!strcmp(policy, "always")) { > + bitmap_set(&always, start, nr); > + bitmap_clear(&inherit, start, nr); > + bitmap_clear(&madvise, start, nr); > + } else if (!strcmp(policy, "madvise")) { > + bitmap_set(&madvise, start, nr); > + bitmap_clear(&inherit, start, nr); > + bitmap_clear(&always, start, nr); > + } else if (!strcmp(policy, "inherit")) { > + bitmap_set(&inherit, start, nr); > + bitmap_clear(&madvise, start, nr); > + bitmap_clear(&always, start, nr); > + } else if (!strcmp(policy, "never")) { > + bitmap_clear(&inherit, start, nr); > + bitmap_clear(&madvise, start, nr); > + bitmap_clear(&always, start, nr); > + } else { > + pr_err("invalid policy %s in thp_anon boot parameter\n", policy); > + goto err; > + } > + } > + } > + > + huge_anon_orders_always = always; > + huge_anon_orders_madvise = madvise; > + huge_anon_orders_inherit = inherit; > + anon_orders_configured = true; > + return 1; > + > +err: > + pr_warn("thp_anon=%s: cannot parse, ignored\n", str); "cannot parse, ignored" -> "error parsing string, ignoring setting" ? > + return 0; > +} > +__setup("thp_anon=", setup_thp_anon); > + > pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) > { > if (likely(vma->vm_flags & VM_WRITE))
On Tue, Aug 20, 2024 at 7:53 PM David Hildenbrand <david@redhat.com> wrote: > > On 17.08.24 06:55, Barry Song wrote: > > From: Ryan Roberts <ryan.roberts@arm.com> > > > > Add thp_anon= cmdline parameter to allow specifying the default enablement > > of each supported anon THP size. The parameter accepts the following > > format and can be provided multiple times to configure each size: > > > > thp_anon=<size>,<size>[KMG]:<value>;<size>-<size>[KMG]:<value> > > > > An example: > > > > thp_anon=16K-64K:always;128K,512K:inherit;256K:madvise;1M-2M:never > > > > See Documentation/admin-guide/mm/transhuge.rst for more details. > > > > Configuring the defaults at boot time is useful to allow early user > > space to take advantage of mTHP before its been configured through > > sysfs. > > > > Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> > > Co-developed-by: Barry Song <v-songbaohua@oppo.com> > > Signed-off-by: Barry Song <v-songbaohua@oppo.com> > > Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> > > Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com> > > Cc: David Hildenbrand <david@redhat.com> > > Cc: Jonathan Corbet <corbet@lwn.net> > > Cc: Lance Yang <ioworker0@gmail.com> > > > Acked-by: David Hildenbrand <david@redhat.com> > thanks! > Some nits: > > > --- > > -v5: > > * collect Baolin's reviewed-by and tested-by tags, thanks! > > * use get_order and check size is power 2, David, Baolin; > > * use proper __initdata > > > > .../admin-guide/kernel-parameters.txt | 9 ++ > > Documentation/admin-guide/mm/transhuge.rst | 38 +++++-- > > mm/huge_memory.c | 100 +++++++++++++++++- > > 3 files changed, 139 insertions(+), 8 deletions(-) > > > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > > index f0057bac20fb..d0d141d50638 100644 > > --- a/Documentation/admin-guide/kernel-parameters.txt > > +++ b/Documentation/admin-guide/kernel-parameters.txt > > @@ -6629,6 +6629,15 @@ > > <deci-seconds>: poll all this frequency > > 0: no polling (default) > > > > + thp_anon= [KNL] > > + Format: <size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state> > > + state is one of "always", "madvise", "never" or "inherit". > > + Can be used to control the default behavior of the > > s/Can be used to control/Control/ ack > > > + system with respect to anonymous transparent hugepages. > > + Can be used multiple times for multiple anon THP sizes. > > + See Documentation/admin-guide/mm/transhuge.rst for more > > + details. > > + > > threadirqs [KNL,EARLY] > > Force threading of all interrupt handlers except those > > marked explicitly IRQF_NO_THREAD. > > diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst > > index 60522f49178b..4468851b6ecb 100644 > > --- a/Documentation/admin-guide/mm/transhuge.rst > > +++ b/Documentation/admin-guide/mm/transhuge.rst > > @@ -284,13 +284,37 @@ that THP is shared. Exceeding the number would block the collapse:: > > > > A higher value may increase memory footprint for some workloads. > > [...] > > > unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, > > unsigned long vm_flags, > > @@ -756,7 +757,10 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) > > * disable all other sizes. powerpc's PMD_ORDER isn't a compile-time > > * constant so we have to do this here. > > */ > > - huge_anon_orders_inherit = BIT(PMD_ORDER); > > + if (!anon_orders_configured) { > > + huge_anon_orders_inherit = BIT(PMD_ORDER); > > + anon_orders_configured = true; I realized this is redundant since anon_orders_configured won't be accessed later. so i would like to also drop "anon_orders_configured = true" in v6. > > + } > > > > *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); > > if (unlikely(!*hugepage_kobj)) { > > @@ -941,6 +945,100 @@ static int __init setup_transparent_hugepage(char *str) > > } > > __setup("transparent_hugepage=", setup_transparent_hugepage); > > > > +static inline int get_order_from_str(const char *size_str) > > +{ > > + unsigned long size; > > + char *endptr; > > + int order; > > + > > + size = memparse(size_str, &endptr); > > + > > + if (!is_power_of_2(size)) > > + goto err; > > + order = get_order(size); > > + if ((1 << order) & ~THP_ORDERS_ALL_ANON) > > Could do > > "if (BIT(order) & ~THP_ORDERS_ALL_ANON)" ack > > > + goto err; > > + > > + return order; > > +err: > > + pr_err("invalid size %s in thp_anon boot parameter\n", size_str); > > + return -EINVAL; > > +} > > + > > +static char str_dup[PAGE_SIZE] __initdata; > > +static int __init setup_thp_anon(char *str) > > +{ > > + char *token, *range, *policy, *subtoken; > > + unsigned long always, inherit, madvise; > > + char *start_size, *end_size; > > + int start, end, nr; > > + char *p; > > + > > + if (!str || strlen(str) + 1 > PAGE_SIZE) > > + goto err; > > + strcpy(str_dup, str); > > + > > + always = huge_anon_orders_always; > > + madvise = huge_anon_orders_madvise; > > + inherit = huge_anon_orders_inherit; > > Should we only pickup these values if "anon_orders_configured", > otherwise start with 0? Likely that's implicit right now. My point is that, initially, those values are always 0, so copying them won't cause any issues. Of course, we could add a check like if (anon_orders_configured) copy... but it doesn't seem to be a hot path by any means? in that case, i will have to initialize: unsigned long always = 0, inherit = 0, madvise = 0; > > > + p = str_dup; > > + while ((token = strsep(&p, ";")) != NULL) { > > + range = strsep(&token, ":"); > > + policy = token; > > + > > + if (!policy) > > + goto err; > > + > > + while ((subtoken = strsep(&range, ",")) != NULL) { > > + if (strchr(subtoken, '-')) { > > + start_size = strsep(&subtoken, "-"); > > + end_size = subtoken; > > + > > + start = get_order_from_str(start_size); > > + end = get_order_from_str(end_size); > > + } else { > > + start = end = get_order_from_str(subtoken); > > + } > > + > > + if (start < 0 || end < 0 || start > end) > > + goto err; > > + > > + nr = end - start + 1; > > This only works as long as we don't have any "Holes", which is the case > right now. Right. If a gap appears in the future, I can verify that all bits from start to end are valid against THP_ORDERS_ALL_ANON. > > > + if (!strcmp(policy, "always")) { > > + bitmap_set(&always, start, nr); > > + bitmap_clear(&inherit, start, nr); > > + bitmap_clear(&madvise, start, nr); > > + } else if (!strcmp(policy, "madvise")) { > > + bitmap_set(&madvise, start, nr); > > + bitmap_clear(&inherit, start, nr); > > + bitmap_clear(&always, start, nr); > > + } else if (!strcmp(policy, "inherit")) { > > + bitmap_set(&inherit, start, nr); > > + bitmap_clear(&madvise, start, nr); > > + bitmap_clear(&always, start, nr); > > + } else if (!strcmp(policy, "never")) { > > + bitmap_clear(&inherit, start, nr); > > + bitmap_clear(&madvise, start, nr); > > + bitmap_clear(&always, start, nr); > > + } else { > > + pr_err("invalid policy %s in thp_anon boot parameter\n", policy); > > + goto err; > > + } > > + } > > + } > > + > > + huge_anon_orders_always = always; > > + huge_anon_orders_madvise = madvise; > > + huge_anon_orders_inherit = inherit; > > + anon_orders_configured = true; > > + return 1; > > + > > +err: > > + pr_warn("thp_anon=%s: cannot parse, ignored\n", str); > > "cannot parse, ignored" -> "error parsing string, ignoring setting" ? Ack. > > > + return 0; > > +} > > +__setup("thp_anon=", setup_thp_anon); > > + > > pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) > > { > > if (likely(vma->vm_flags & VM_WRITE)) > > -- > Cheers, > > David / dhildenb > Thanks Barry
long __thp_vma_allowable_orders(struct vm_area_struct *vma,>>> unsigned long vm_flags, >>> @@ -756,7 +757,10 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) >>> * disable all other sizes. powerpc's PMD_ORDER isn't a compile-time >>> * constant so we have to do this here. >>> */ >>> - huge_anon_orders_inherit = BIT(PMD_ORDER); >>> + if (!anon_orders_configured) { >>> + huge_anon_orders_inherit = BIT(PMD_ORDER); >>> + anon_orders_configured = true; > > I realized this is redundant since anon_orders_configured won't be > accessed later. > so i would like to also drop "anon_orders_configured = true" in v6. Makes sense. >>> +static char str_dup[PAGE_SIZE] __initdata; >>> +static int __init setup_thp_anon(char *str) >>> +{ >>> + char *token, *range, *policy, *subtoken; >>> + unsigned long always, inherit, madvise; >>> + char *start_size, *end_size; >>> + int start, end, nr; >>> + char *p; >>> + >>> + if (!str || strlen(str) + 1 > PAGE_SIZE) >>> + goto err; >>> + strcpy(str_dup, str); >>> + >>> + always = huge_anon_orders_always; >>> + madvise = huge_anon_orders_madvise; >>> + inherit = huge_anon_orders_inherit; >> >> Should we only pickup these values if "anon_orders_configured", >> otherwise start with 0? Likely that's implicit right now. > > My point is that, initially, those values are always 0, so copying > them won't cause any issues. Right, it's more a conceptual thing: on the first cmdline configuration, we start from scratch. Afterwards we start with the state that the previous configuration left behind. I'm fine with leaving it as is as well, whatever you prefer.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f0057bac20fb..d0d141d50638 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6629,6 +6629,15 @@ <deci-seconds>: poll all this frequency 0: no polling (default) + thp_anon= [KNL] + Format: <size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state> + state is one of "always", "madvise", "never" or "inherit". + Can be used to control the default behavior of the + system with respect to anonymous transparent hugepages. + Can be used multiple times for multiple anon THP sizes. + See Documentation/admin-guide/mm/transhuge.rst for more + details. + threadirqs [KNL,EARLY] Force threading of all interrupt handlers except those marked explicitly IRQF_NO_THREAD. diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index 60522f49178b..4468851b6ecb 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -284,13 +284,37 @@ that THP is shared. Exceeding the number would block the collapse:: A higher value may increase memory footprint for some workloads. -Boot parameter -============== - -You can change the sysfs boot time defaults of Transparent Hugepage -Support by passing the parameter ``transparent_hugepage=always`` or -``transparent_hugepage=madvise`` or ``transparent_hugepage=never`` -to the kernel command line. +Boot parameters +=============== + +You can change the sysfs boot time default for the top-level "enabled" +control by passing the parameter ``transparent_hugepage=always`` or +``transparent_hugepage=madvise`` or ``transparent_hugepage=never`` to the +kernel command line. + +Alternatively, each supported anonymous THP size can be controlled by +passing ``thp_anon=<size>,<size>[KMG]:<state>;<size>-<size>[KMG]:<state>``, +where ``<size>`` is the THP size (must be a power of 2 of PAGE_SIZE and +supported anonymous THP) and ``<state>`` is one of ``always``, ``madvise``, +``never`` or ``inherit``. + +For example, the following will set 16K, 32K, 64K THP to ``always``, +set 128K, 512K to ``inherit``, set 256K to ``madvise`` and 1M, 2M +to ``never``:: + + thp_anon=16K-64K:always;128K,512K:inherit;256K:madvise;1M-2M:never + +``thp_anon=`` may be specified multiple times to configure all THP sizes as +required. If ``thp_anon=`` is specified at least once, any anon THP sizes +not explicitly configured on the command line are implicitly set to +``never``. + +``transparent_hugepage`` setting only affects the global toggle. If +``thp_anon`` is not specified, PMD_ORDER THP will default to ``inherit``. +However, if a valid ``thp_anon`` setting is provided by the user, the +PMD_ORDER THP policy will be overridden. If the policy for PMD_ORDER +is not defined within a valid ``thp_anon``, its policy will default to +``never``. Hugepages in tmpfs/shmem ======================== diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 69cef10ed9aa..c8ca577526cf 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -82,6 +82,7 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL; unsigned long huge_anon_orders_always __read_mostly; unsigned long huge_anon_orders_madvise __read_mostly; unsigned long huge_anon_orders_inherit __read_mostly; +static bool anon_orders_configured __initdata; unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long vm_flags, @@ -756,7 +757,10 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) * disable all other sizes. powerpc's PMD_ORDER isn't a compile-time * constant so we have to do this here. */ - huge_anon_orders_inherit = BIT(PMD_ORDER); + if (!anon_orders_configured) { + huge_anon_orders_inherit = BIT(PMD_ORDER); + anon_orders_configured = true; + } *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); if (unlikely(!*hugepage_kobj)) { @@ -941,6 +945,100 @@ static int __init setup_transparent_hugepage(char *str) } __setup("transparent_hugepage=", setup_transparent_hugepage); +static inline int get_order_from_str(const char *size_str) +{ + unsigned long size; + char *endptr; + int order; + + size = memparse(size_str, &endptr); + + if (!is_power_of_2(size)) + goto err; + order = get_order(size); + if ((1 << order) & ~THP_ORDERS_ALL_ANON) + goto err; + + return order; +err: + pr_err("invalid size %s in thp_anon boot parameter\n", size_str); + return -EINVAL; +} + +static char str_dup[PAGE_SIZE] __initdata; +static int __init setup_thp_anon(char *str) +{ + char *token, *range, *policy, *subtoken; + unsigned long always, inherit, madvise; + char *start_size, *end_size; + int start, end, nr; + char *p; + + if (!str || strlen(str) + 1 > PAGE_SIZE) + goto err; + strcpy(str_dup, str); + + always = huge_anon_orders_always; + madvise = huge_anon_orders_madvise; + inherit = huge_anon_orders_inherit; + p = str_dup; + while ((token = strsep(&p, ";")) != NULL) { + range = strsep(&token, ":"); + policy = token; + + if (!policy) + goto err; + + while ((subtoken = strsep(&range, ",")) != NULL) { + if (strchr(subtoken, '-')) { + start_size = strsep(&subtoken, "-"); + end_size = subtoken; + + start = get_order_from_str(start_size); + end = get_order_from_str(end_size); + } else { + start = end = get_order_from_str(subtoken); + } + + if (start < 0 || end < 0 || start > end) + goto err; + + nr = end - start + 1; + if (!strcmp(policy, "always")) { + bitmap_set(&always, start, nr); + bitmap_clear(&inherit, start, nr); + bitmap_clear(&madvise, start, nr); + } else if (!strcmp(policy, "madvise")) { + bitmap_set(&madvise, start, nr); + bitmap_clear(&inherit, start, nr); + bitmap_clear(&always, start, nr); + } else if (!strcmp(policy, "inherit")) { + bitmap_set(&inherit, start, nr); + bitmap_clear(&madvise, start, nr); + bitmap_clear(&always, start, nr); + } else if (!strcmp(policy, "never")) { + bitmap_clear(&inherit, start, nr); + bitmap_clear(&madvise, start, nr); + bitmap_clear(&always, start, nr); + } else { + pr_err("invalid policy %s in thp_anon boot parameter\n", policy); + goto err; + } + } + } + + huge_anon_orders_always = always; + huge_anon_orders_madvise = madvise; + huge_anon_orders_inherit = inherit; + anon_orders_configured = true; + return 1; + +err: + pr_warn("thp_anon=%s: cannot parse, ignored\n", str); + return 0; +} +__setup("thp_anon=", setup_thp_anon); + pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) { if (likely(vma->vm_flags & VM_WRITE))