Message ID | 20241216165105.56185-3-dev.jain@arm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | khugepaged: Asynchronous mTHP collapse | expand |
On 2024/12/17 00:50, Dev Jain wrote: > Pass order to alloc_charge_folio() and update mTHP statistics. > > Signed-off-by: Dev Jain <dev.jain@arm.com> > --- > include/linux/huge_mm.h | 2 ++ > mm/huge_memory.c | 4 ++++ > mm/khugepaged.c | 13 +++++++++---- > 3 files changed, 15 insertions(+), 4 deletions(-) > > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h > index 93e509b6c00e..8b6d0fed99b3 100644 > --- a/include/linux/huge_mm.h > +++ b/include/linux/huge_mm.h > @@ -119,6 +119,8 @@ enum mthp_stat_item { > MTHP_STAT_ANON_FAULT_ALLOC, > MTHP_STAT_ANON_FAULT_FALLBACK, > MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, > + MTHP_STAT_ANON_COLLAPSE_ALLOC, > + MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED, > MTHP_STAT_ZSWPOUT, > MTHP_STAT_SWPIN, > MTHP_STAT_SWPIN_FALLBACK, > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 2da5520bfe24..2e582fad4c77 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -615,6 +615,8 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) > DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC); > DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK); > DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE); > +DEFINE_MTHP_STAT_ATTR(anon_collapse_alloc, MTHP_STAT_ANON_COLLAPSE_ALLOC); > +DEFINE_MTHP_STAT_ATTR(anon_collapse_alloc_failed, MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); > DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT); > DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN); > DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK); > @@ -636,6 +638,8 @@ static struct attribute *anon_stats_attrs[] = { > &anon_fault_alloc_attr.attr, > &anon_fault_fallback_attr.attr, > &anon_fault_fallback_charge_attr.attr, > + &anon_collapse_alloc_attr.attr, > + &anon_collapse_alloc_failed_attr.attr, > #ifndef CONFIG_SHMEM > &zswpout_attr.attr, > &swpin_attr.attr, > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 95643e6e5f31..02cd424b8e48 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -1073,21 +1073,26 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, > } > > static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm, > - struct collapse_control *cc) > + int order, struct collapse_control *cc) > { > gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : > GFP_TRANSHUGE); > int node = hpage_collapse_find_target_node(cc); > struct folio *folio; > > - folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask); > + folio = __folio_alloc(gfp, order, node, &cc->alloc_nmask); > if (!folio) { > *foliop = NULL; > count_vm_event(THP_COLLAPSE_ALLOC_FAILED); > + if (order != HPAGE_PMD_ORDER) > + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); > return SCAN_ALLOC_HUGE_PAGE_FAIL; > } > > count_vm_event(THP_COLLAPSE_ALLOC); > + if (order != HPAGE_PMD_ORDER) > + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC); File collapse will also call alloc_charge_folio() to allocate THP, so using term '_ANON_' is not suitable for both anon and file collapse.
On Mon, Dec 16, 2024 at 10:20:55PM +0530, Dev Jain wrote: > static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm, > - struct collapse_control *cc) > + int order, struct collapse_control *cc) unsigned, surely? > if (!folio) { > *foliop = NULL; > count_vm_event(THP_COLLAPSE_ALLOC_FAILED); > + if (order != HPAGE_PMD_ORDER) > + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); i don't understand why we need new statistics here. we already have a signal that memory allocation failures are preventing collapse from being successful, why do we care if it's mthp or actual thp? > count_vm_event(THP_COLLAPSE_ALLOC); > + if (order != HPAGE_PMD_ORDER) > + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC); similar question
On 17/12/24 8:21 am, Baolin Wang wrote: > > > On 2024/12/17 00:50, Dev Jain wrote: >> Pass order to alloc_charge_folio() and update mTHP statistics. >> >> Signed-off-by: Dev Jain <dev.jain@arm.com> >> --- >> include/linux/huge_mm.h | 2 ++ >> mm/huge_memory.c | 4 ++++ >> mm/khugepaged.c | 13 +++++++++---- >> 3 files changed, 15 insertions(+), 4 deletions(-) >> >> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h >> index 93e509b6c00e..8b6d0fed99b3 100644 >> --- a/include/linux/huge_mm.h >> +++ b/include/linux/huge_mm.h >> @@ -119,6 +119,8 @@ enum mthp_stat_item { >> MTHP_STAT_ANON_FAULT_ALLOC, >> MTHP_STAT_ANON_FAULT_FALLBACK, >> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, >> + MTHP_STAT_ANON_COLLAPSE_ALLOC, >> + MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED, >> MTHP_STAT_ZSWPOUT, >> MTHP_STAT_SWPIN, >> MTHP_STAT_SWPIN_FALLBACK, >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index 2da5520bfe24..2e582fad4c77 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -615,6 +615,8 @@ static struct kobj_attribute _name##_attr = >> __ATTR_RO(_name) >> DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC); >> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, >> MTHP_STAT_ANON_FAULT_FALLBACK); >> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, >> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE); >> +DEFINE_MTHP_STAT_ATTR(anon_collapse_alloc, >> MTHP_STAT_ANON_COLLAPSE_ALLOC); >> +DEFINE_MTHP_STAT_ATTR(anon_collapse_alloc_failed, >> MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); >> DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT); >> DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN); >> DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK); >> @@ -636,6 +638,8 @@ static struct attribute *anon_stats_attrs[] = { >> &anon_fault_alloc_attr.attr, >> &anon_fault_fallback_attr.attr, >> &anon_fault_fallback_charge_attr.attr, >> + &anon_collapse_alloc_attr.attr, >> + &anon_collapse_alloc_failed_attr.attr, >> #ifndef CONFIG_SHMEM >> &zswpout_attr.attr, >> &swpin_attr.attr, >> diff --git a/mm/khugepaged.c b/mm/khugepaged.c >> index 95643e6e5f31..02cd424b8e48 100644 >> --- a/mm/khugepaged.c >> +++ b/mm/khugepaged.c >> @@ -1073,21 +1073,26 @@ static int __collapse_huge_page_swapin(struct >> mm_struct *mm, >> } >> static int alloc_charge_folio(struct folio **foliop, struct >> mm_struct *mm, >> - struct collapse_control *cc) >> + int order, struct collapse_control *cc) >> { >> gfp_t gfp = (cc->is_khugepaged ? >> alloc_hugepage_khugepaged_gfpmask() : >> GFP_TRANSHUGE); >> int node = hpage_collapse_find_target_node(cc); >> struct folio *folio; >> - folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, >> &cc->alloc_nmask); >> + folio = __folio_alloc(gfp, order, node, &cc->alloc_nmask); >> if (!folio) { >> *foliop = NULL; >> count_vm_event(THP_COLLAPSE_ALLOC_FAILED); >> + if (order != HPAGE_PMD_ORDER) >> + count_mthp_stat(order, >> MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); >> return SCAN_ALLOC_HUGE_PAGE_FAIL; >> } >> count_vm_event(THP_COLLAPSE_ALLOC); >> + if (order != HPAGE_PMD_ORDER) >> + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC); > > File collapse will also call alloc_charge_folio() to allocate THP, so > using term '_ANON_' is not suitable for both anon and file collapse. But currently file collapse will only allocate a PMD-folio, and I have extended to mTHP only for anon, so it makes sense? Although I get your point in general...
On 16/12/2024 16:50, Dev Jain wrote: > Pass order to alloc_charge_folio() and update mTHP statistics. > > Signed-off-by: Dev Jain <dev.jain@arm.com> > --- > include/linux/huge_mm.h | 2 ++ > mm/huge_memory.c | 4 ++++ > mm/khugepaged.c | 13 +++++++++---- > 3 files changed, 15 insertions(+), 4 deletions(-) > > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h > index 93e509b6c00e..8b6d0fed99b3 100644 > --- a/include/linux/huge_mm.h > +++ b/include/linux/huge_mm.h > @@ -119,6 +119,8 @@ enum mthp_stat_item { > MTHP_STAT_ANON_FAULT_ALLOC, > MTHP_STAT_ANON_FAULT_FALLBACK, > MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, > + MTHP_STAT_ANON_COLLAPSE_ALLOC, > + MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED, > MTHP_STAT_ZSWPOUT, > MTHP_STAT_SWPIN, > MTHP_STAT_SWPIN_FALLBACK, > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 2da5520bfe24..2e582fad4c77 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -615,6 +615,8 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) > DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC); > DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK); > DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE); > +DEFINE_MTHP_STAT_ATTR(anon_collapse_alloc, MTHP_STAT_ANON_COLLAPSE_ALLOC); > +DEFINE_MTHP_STAT_ATTR(anon_collapse_alloc_failed, MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); > DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT); > DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN); > DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK); > @@ -636,6 +638,8 @@ static struct attribute *anon_stats_attrs[] = { > &anon_fault_alloc_attr.attr, > &anon_fault_fallback_attr.attr, > &anon_fault_fallback_charge_attr.attr, > + &anon_collapse_alloc_attr.attr, > + &anon_collapse_alloc_failed_attr.attr, > #ifndef CONFIG_SHMEM > &zswpout_attr.attr, > &swpin_attr.attr, > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 95643e6e5f31..02cd424b8e48 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -1073,21 +1073,26 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, > } > > static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm, > - struct collapse_control *cc) > + int order, struct collapse_control *cc) > { > gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : > GFP_TRANSHUGE); > int node = hpage_collapse_find_target_node(cc); > struct folio *folio; > > - folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask); > + folio = __folio_alloc(gfp, order, node, &cc->alloc_nmask); > if (!folio) { > *foliop = NULL; > count_vm_event(THP_COLLAPSE_ALLOC_FAILED); > + if (order != HPAGE_PMD_ORDER) > + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); Bug? We should be calling count_mthp_stat() for all orders, but only calling count_vm_event(THP_*) for PMD_ORDER, as per pattern laid out by other mTHP stats. The aim is for existing THP stats (which are implicitly only counting PMD-sized THP) to continue only to count PMD-sized THP. It's a userspace ABI and we were scared of the potential to break things if we changed the existing counters' semantics. > return SCAN_ALLOC_HUGE_PAGE_FAIL; > } > > count_vm_event(THP_COLLAPSE_ALLOC); > + if (order != HPAGE_PMD_ORDER) > + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC); Same problem. Also, I agree with Baolin that we don't want "anon" in the title. This is a generic path used for file-backed memory. So once you fix the bug, the new stats will also be counting the file-backed memory too (although for now, only for PMD_ORDER). > + > if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { > folio_put(folio); > *foliop = NULL; > @@ -1124,7 +1129,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, > */ > mmap_read_unlock(mm); > > - result = alloc_charge_folio(&folio, mm, cc); > + result = alloc_charge_folio(&folio, mm, order, cc); Where is order coming from? I'm guessing that's added later, so this patch won't compile on it's own? Perhaps HPAGE_PMD_ORDER for now? > if (result != SCAN_SUCCEED) > goto out_nolock; > > @@ -1850,7 +1855,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, > VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); > VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); > > - result = alloc_charge_folio(&new_folio, mm, cc); > + result = alloc_charge_folio(&new_folio, mm, HPAGE_PMD_ORDER, cc); > if (result != SCAN_SUCCEED) > goto out; >
On 17/12/2024 04:17, Matthew Wilcox wrote: > On Mon, Dec 16, 2024 at 10:20:55PM +0530, Dev Jain wrote: >> static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm, >> - struct collapse_control *cc) >> + int order, struct collapse_control *cc) > > unsigned, surely? I'm obviously feeling argumentative this morning... There are plenty of examples of order being signed and unsigned in the code base... it's a mess. Certainly the mTHP changes up to now have opted for (signed) int. And get_order(), which I would assume to the authority, returns (signed) int. Personally I prefer int for small positive integers; it's more compact. But if we're trying to establish a pattern to use unsigned int for all new uses of order, that fine too, let's just document it somewhere? > >> if (!folio) { >> *foliop = NULL; >> count_vm_event(THP_COLLAPSE_ALLOC_FAILED); >> + if (order != HPAGE_PMD_ORDER) >> + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); > > i don't understand why we need new statistics here. we already have a > signal that memory allocation failures are preventing collapse from > being successful, why do we care if it's mthp or actual thp? We previously decided that all existing THP stats would continue to only count PMD-sized THP for fear of breaking userspace in subtle ways, and instead would introduce new mTHP stats that can count for each order. We already have MTHP_STAT_ANON_FAULT_ALLOC and MTHP_STAT_ANON_FAULT_FALLBACK (amongst others) so these new stats fit the pattern well, IMHO. (except for the bug that I called out in the other mail; we should call count_mthp_stat() for all orders and call count_vm_event() only for PMD_ORDER). > >> count_vm_event(THP_COLLAPSE_ALLOC); >> + if (order != HPAGE_PMD_ORDER) >> + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC); > > similar question >
On 17/12/24 12:23 pm, Ryan Roberts wrote: > On 16/12/2024 16:50, Dev Jain wrote: >> Pass order to alloc_charge_folio() and update mTHP statistics. >> >> Signed-off-by: Dev Jain <dev.jain@arm.com> >> --- >> include/linux/huge_mm.h | 2 ++ >> mm/huge_memory.c | 4 ++++ >> mm/khugepaged.c | 13 +++++++++---- >> 3 files changed, 15 insertions(+), 4 deletions(-) >> >> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h >> index 93e509b6c00e..8b6d0fed99b3 100644 >> --- a/include/linux/huge_mm.h >> +++ b/include/linux/huge_mm.h >> @@ -119,6 +119,8 @@ enum mthp_stat_item { >> MTHP_STAT_ANON_FAULT_ALLOC, >> MTHP_STAT_ANON_FAULT_FALLBACK, >> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, >> + MTHP_STAT_ANON_COLLAPSE_ALLOC, >> + MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED, >> MTHP_STAT_ZSWPOUT, >> MTHP_STAT_SWPIN, >> MTHP_STAT_SWPIN_FALLBACK, >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index 2da5520bfe24..2e582fad4c77 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -615,6 +615,8 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) >> DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC); >> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK); >> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE); >> +DEFINE_MTHP_STAT_ATTR(anon_collapse_alloc, MTHP_STAT_ANON_COLLAPSE_ALLOC); >> +DEFINE_MTHP_STAT_ATTR(anon_collapse_alloc_failed, MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); >> DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT); >> DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN); >> DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK); >> @@ -636,6 +638,8 @@ static struct attribute *anon_stats_attrs[] = { >> &anon_fault_alloc_attr.attr, >> &anon_fault_fallback_attr.attr, >> &anon_fault_fallback_charge_attr.attr, >> + &anon_collapse_alloc_attr.attr, >> + &anon_collapse_alloc_failed_attr.attr, >> #ifndef CONFIG_SHMEM >> &zswpout_attr.attr, >> &swpin_attr.attr, >> diff --git a/mm/khugepaged.c b/mm/khugepaged.c >> index 95643e6e5f31..02cd424b8e48 100644 >> --- a/mm/khugepaged.c >> +++ b/mm/khugepaged.c >> @@ -1073,21 +1073,26 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, >> } >> >> static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm, >> - struct collapse_control *cc) >> + int order, struct collapse_control *cc) >> { >> gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : >> GFP_TRANSHUGE); >> int node = hpage_collapse_find_target_node(cc); >> struct folio *folio; >> >> - folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask); >> + folio = __folio_alloc(gfp, order, node, &cc->alloc_nmask); >> if (!folio) { >> *foliop = NULL; >> count_vm_event(THP_COLLAPSE_ALLOC_FAILED); >> + if (order != HPAGE_PMD_ORDER) >> + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); > Bug? We should be calling count_mthp_stat() for all orders, but only calling > count_vm_event(THP_*) for PMD_ORDER, as per pattern laid out by other mTHP stats. Ah okay. > > The aim is for existing THP stats (which are implicitly only counting PMD-sized > THP) to continue only to count PMD-sized THP. It's a userspace ABI and we were > scared of the potential to break things if we changed the existing counters' > semantics. > >> return SCAN_ALLOC_HUGE_PAGE_FAIL; >> } >> >> count_vm_event(THP_COLLAPSE_ALLOC); >> + if (order != HPAGE_PMD_ORDER) >> + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC); > Same problem. > > Also, I agree with Baolin that we don't want "anon" in the title. This is a > generic path used for file-backed memory. So once you fix the bug, the new stats > will also be counting the file-backed memory too (although for now, only for > PMD_ORDER). Sure. >> + >> if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { >> folio_put(folio); >> *foliop = NULL; >> @@ -1124,7 +1129,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, >> */ >> mmap_read_unlock(mm); >> >> - result = alloc_charge_folio(&folio, mm, cc); >> + result = alloc_charge_folio(&folio, mm, order, cc); > Where is order coming from? I'm guessing that's added later, so this patch won't > compile on it's own? Perhaps HPAGE_PMD_ORDER for now? Okay yes, this won't compile on its own. I'll ensure sequential buildability next time. > >> if (result != SCAN_SUCCEED) >> goto out_nolock; >> >> @@ -1850,7 +1855,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, >> VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); >> VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); >> >> - result = alloc_charge_folio(&new_folio, mm, cc); >> + result = alloc_charge_folio(&new_folio, mm, HPAGE_PMD_ORDER, cc); >> if (result != SCAN_SUCCEED) >> goto out; >> >
On 17 Dec 2024, at 2:09, Ryan Roberts wrote: > On 17/12/2024 04:17, Matthew Wilcox wrote: >> On Mon, Dec 16, 2024 at 10:20:55PM +0530, Dev Jain wrote: >>> static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm, >>> - struct collapse_control *cc) >>> + int order, struct collapse_control *cc) >> >> unsigned, surely? > > I'm obviously feeling argumentative this morning... > > There are plenty of examples of order being signed and unsigned in the code > base... it's a mess. Certainly the mTHP changes up to now have opted for > (signed) int. And get_order(), which I would assume to the authority, returns > (signed) int. > > Personally I prefer int for small positive integers; it's more compact. But if > we're trying to establish a pattern to use unsigned int for all new uses of > order, that fine too, let's just document it somewhere? If unsigned is used, I wonder how to handle for (unsigned order = 9; order >= 0; order--) case. We will need a signed order to make this work, right? -- Best Regards, Yan, Zi
On Tue, 17 Dec 2024, Ryan Roberts wrote: > We previously decided that all existing THP stats would continue to only count > PMD-sized THP for fear of breaking userspace in subtle ways, and instead would > introduce new mTHP stats that can count for each order. We already have > MTHP_STAT_ANON_FAULT_ALLOC and MTHP_STAT_ANON_FAULT_FALLBACK (amongst others) so > these new stats fit the pattern well, IMHO. Could we move all the stats somewhere into sysfs where we can get them by page order? /proc/vmstat keeps getting new counters.
On 20/12/2024 17:41, Christoph Lameter (Ampere) wrote: > On Tue, 17 Dec 2024, Ryan Roberts wrote: > >> We previously decided that all existing THP stats would continue to only count >> PMD-sized THP for fear of breaking userspace in subtle ways, and instead would >> introduce new mTHP stats that can count for each order. We already have >> MTHP_STAT_ANON_FAULT_ALLOC and MTHP_STAT_ANON_FAULT_FALLBACK (amongst others) so >> these new stats fit the pattern well, IMHO. > > Could we move all the stats somewhere into sysfs where we can get them by > page order? /proc/vmstat keeps getting new counters. This is exactly what has been done already for mthp stats. They live at: /sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/stats/. So there is a directory per size, and there is a file per stat.
On Fri, 20 Dec 2024, Ryan Roberts wrote: > > Could we move all the stats somewhere into sysfs where we can get them by > > page order? /proc/vmstat keeps getting new counters. > > This is exactly what has been done already for mthp stats. They live at: > > /sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/stats/. > > So there is a directory per size, and there is a file per stat. Then lets drop all THP and huge page counters from vmstat.
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 93e509b6c00e..8b6d0fed99b3 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -119,6 +119,8 @@ enum mthp_stat_item { MTHP_STAT_ANON_FAULT_ALLOC, MTHP_STAT_ANON_FAULT_FALLBACK, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, + MTHP_STAT_ANON_COLLAPSE_ALLOC, + MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED, MTHP_STAT_ZSWPOUT, MTHP_STAT_SWPIN, MTHP_STAT_SWPIN_FALLBACK, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2da5520bfe24..2e582fad4c77 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -615,6 +615,8 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC); DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK); DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE); +DEFINE_MTHP_STAT_ATTR(anon_collapse_alloc, MTHP_STAT_ANON_COLLAPSE_ALLOC); +DEFINE_MTHP_STAT_ATTR(anon_collapse_alloc_failed, MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT); DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN); DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK); @@ -636,6 +638,8 @@ static struct attribute *anon_stats_attrs[] = { &anon_fault_alloc_attr.attr, &anon_fault_fallback_attr.attr, &anon_fault_fallback_charge_attr.attr, + &anon_collapse_alloc_attr.attr, + &anon_collapse_alloc_failed_attr.attr, #ifndef CONFIG_SHMEM &zswpout_attr.attr, &swpin_attr.attr, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 95643e6e5f31..02cd424b8e48 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1073,21 +1073,26 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, } static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm, - struct collapse_control *cc) + int order, struct collapse_control *cc) { gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : GFP_TRANSHUGE); int node = hpage_collapse_find_target_node(cc); struct folio *folio; - folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask); + folio = __folio_alloc(gfp, order, node, &cc->alloc_nmask); if (!folio) { *foliop = NULL; count_vm_event(THP_COLLAPSE_ALLOC_FAILED); + if (order != HPAGE_PMD_ORDER) + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC_FAILED); return SCAN_ALLOC_HUGE_PAGE_FAIL; } count_vm_event(THP_COLLAPSE_ALLOC); + if (order != HPAGE_PMD_ORDER) + count_mthp_stat(order, MTHP_STAT_ANON_COLLAPSE_ALLOC); + if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { folio_put(folio); *foliop = NULL; @@ -1124,7 +1129,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, */ mmap_read_unlock(mm); - result = alloc_charge_folio(&folio, mm, cc); + result = alloc_charge_folio(&folio, mm, order, cc); if (result != SCAN_SUCCEED) goto out_nolock; @@ -1850,7 +1855,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); - result = alloc_charge_folio(&new_folio, mm, cc); + result = alloc_charge_folio(&new_folio, mm, HPAGE_PMD_ORDER, cc); if (result != SCAN_SUCCEED) goto out;
Pass order to alloc_charge_folio() and update mTHP statistics. Signed-off-by: Dev Jain <dev.jain@arm.com> --- include/linux/huge_mm.h | 2 ++ mm/huge_memory.c | 4 ++++ mm/khugepaged.c | 13 +++++++++---- 3 files changed, 15 insertions(+), 4 deletions(-)