Message ID | 20241108212946.2642085-3-joshua.hahnjy@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | memcg/hugetlb: Rework memcg hugetlb charging | expand |
On Fri, Nov 08, 2024 at 01:29:45PM -0800, Joshua Hahn wrote: > This patch introduces mem_cgroup_charge_hugetlb, which combines the > logic of mem_cgroup{try,commit}_hugetlb. This reduces the footprint of > memcg in hugetlb code, and also consolidates the error path that memcg > can take into just one point. > > Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com> > > --- > include/linux/memcontrol.h | 2 ++ > mm/hugetlb.c | 34 ++++++++++++---------------------- > mm/memcontrol.c | 19 +++++++++++++++++++ > 3 files changed, 33 insertions(+), 22 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index bb49e0d4b377..d90c1ac791f1 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -678,6 +678,8 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, > int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, > long nr_pages); > > +int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp); > + > int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, > gfp_t gfp, swp_entry_t entry); > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index fbb10e52d7ea..6f6841483039 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -2967,21 +2967,13 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, > struct hugepage_subpool *spool = subpool_vma(vma); > struct hstate *h = hstate_vma(vma); > struct folio *folio; > - long map_chg, map_commit, nr_pages = pages_per_huge_page(h); > + long map_chg, map_commit; > long gbl_chg; > - int memcg_charge_ret, ret, idx; > + int ret, idx; > struct hugetlb_cgroup *h_cg = NULL; > - struct mem_cgroup *memcg; > bool deferred_reserve; > gfp_t gfp = htlb_alloc_mask(h) | __GFP_RETRY_MAYFAIL; > > - memcg = get_mem_cgroup_from_current(); > - memcg_charge_ret = mem_cgroup_hugetlb_try_charge(memcg, gfp, nr_pages); > - if (memcg_charge_ret == -ENOMEM) { > - mem_cgroup_put(memcg); > - return ERR_PTR(-ENOMEM); > - } > - > idx = hstate_index(h); > /* > * Examine the region/reserve map to determine if the process > @@ -2989,12 +2981,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, > * code of zero indicates a reservation exists (no change). > */ > map_chg = gbl_chg = vma_needs_reservation(h, vma, addr); > - if (map_chg < 0) { > - if (!memcg_charge_ret) > - mem_cgroup_cancel_charge(memcg, nr_pages); > - mem_cgroup_put(memcg); > + if (map_chg < 0) > return ERR_PTR(-ENOMEM); > - } > > /* > * Processes that did not create the mapping will have no > @@ -3092,10 +3080,15 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, > } > } > > - if (!memcg_charge_ret) > - mem_cgroup_commit_charge(folio, memcg); > - lruvec_stat_mod_folio(folio, NR_HUGETLB, pages_per_huge_page(h)); > - mem_cgroup_put(memcg); > + ret = mem_cgroup_charge_hugetlb(folio, gfp); > + if (ret == -ENOMEM) { > + spin_unlock_irq(&hugetlb_lock); spin_unlock_irq?? > + free_huge_folio(folio); free_huge_folio() will call lruvec_stat_mod_folio() unconditionally but you are only calling it on success. This may underflow the metric. > + return ERR_PTR(-ENOMEM); > + } > + else if (!ret) > + lruvec_stat_mod_folio(folio, NR_HUGETLB, > + pages_per_huge_page(h)); > > return folio; > > @@ -3110,9 +3103,6 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, > hugepage_subpool_put_pages(spool, 1); > out_end_reservation: > vma_end_reservation(h, vma, addr); > - if (!memcg_charge_ret) > - mem_cgroup_cancel_charge(memcg, nr_pages); > - mem_cgroup_put(memcg); > return ERR_PTR(-ENOSPC); > } > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 97f63ec9c9fb..95ee77fe27af 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -4529,6 +4529,25 @@ int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, > return 0; > } > > +int mem_cgroup_charge_hugetlb(struct folio *folio, gfp_t gfp) > +{ > + struct mem_cgroup *memcg = get_mem_cgroup_from_current(); > + int ret = 0; > + > + if (mem_cgroup_disabled() || !memcg_accounts_hugetlb() || > + !memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) { > + ret = -EOPNOTSUPP; why EOPNOTSUPP? You need to return 0 here. We do want lruvec_stat_mod_folio() to be called. > + goto out; > + } > + > + if (charge_memcg(folio, memcg, gfp)) > + ret = -ENOMEM; > + > +out: > + mem_cgroup_put(memcg); > + return ret; > +} > + > /** > * mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin. > * @folio: folio to charge. > -- > 2.43.5 >
Hi Joshua, On Fri, 8 Nov 2024 13:29:45 -0800 Joshua Hahn <joshua.hahnjy@gmail.com> wrote: > This patch introduces mem_cgroup_charge_hugetlb, which combines the > logic of mem_cgroup{try,commit}_hugetlb. This reduces the footprint of Nit. Seems the regular expression is not technically correct? > memcg in hugetlb code, and also consolidates the error path that memcg > can take into just one point. > > Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com> > > --- > include/linux/memcontrol.h | 2 ++ > mm/hugetlb.c | 34 ++++++++++++---------------------- > mm/memcontrol.c | 19 +++++++++++++++++++ > 3 files changed, 33 insertions(+), 22 deletions(-) [...] > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 97f63ec9c9fb..95ee77fe27af 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -4529,6 +4529,25 @@ int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, > return 0; > } > > +int mem_cgroup_charge_hugetlb(struct folio *folio, gfp_t gfp) Can we add a kernel-doc comment for this function? Maybe that for mem_cgroup_hugetlb_try_charge() can be stolen with only small updates? > +{ > + struct mem_cgroup *memcg = get_mem_cgroup_from_current(); > + int ret = 0; > + > + if (mem_cgroup_disabled() || !memcg_accounts_hugetlb() || > + !memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) { > + ret = -EOPNOTSUPP; > + goto out; > + } > + > + if (charge_memcg(folio, memcg, gfp)) > + ret = -ENOMEM; > + > +out: > + mem_cgroup_put(memcg); > + return ret; > +} > + > /** > * mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin. > * @folio: folio to charge. > -- > 2.43.5 Thanks, SJ
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bb49e0d4b377..d90c1ac791f1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -678,6 +678,8 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, long nr_pages); +int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp); + int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fbb10e52d7ea..6f6841483039 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2967,21 +2967,13 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, struct hugepage_subpool *spool = subpool_vma(vma); struct hstate *h = hstate_vma(vma); struct folio *folio; - long map_chg, map_commit, nr_pages = pages_per_huge_page(h); + long map_chg, map_commit; long gbl_chg; - int memcg_charge_ret, ret, idx; + int ret, idx; struct hugetlb_cgroup *h_cg = NULL; - struct mem_cgroup *memcg; bool deferred_reserve; gfp_t gfp = htlb_alloc_mask(h) | __GFP_RETRY_MAYFAIL; - memcg = get_mem_cgroup_from_current(); - memcg_charge_ret = mem_cgroup_hugetlb_try_charge(memcg, gfp, nr_pages); - if (memcg_charge_ret == -ENOMEM) { - mem_cgroup_put(memcg); - return ERR_PTR(-ENOMEM); - } - idx = hstate_index(h); /* * Examine the region/reserve map to determine if the process @@ -2989,12 +2981,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, * code of zero indicates a reservation exists (no change). */ map_chg = gbl_chg = vma_needs_reservation(h, vma, addr); - if (map_chg < 0) { - if (!memcg_charge_ret) - mem_cgroup_cancel_charge(memcg, nr_pages); - mem_cgroup_put(memcg); + if (map_chg < 0) return ERR_PTR(-ENOMEM); - } /* * Processes that did not create the mapping will have no @@ -3092,10 +3080,15 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, } } - if (!memcg_charge_ret) - mem_cgroup_commit_charge(folio, memcg); - lruvec_stat_mod_folio(folio, NR_HUGETLB, pages_per_huge_page(h)); - mem_cgroup_put(memcg); + ret = mem_cgroup_charge_hugetlb(folio, gfp); + if (ret == -ENOMEM) { + spin_unlock_irq(&hugetlb_lock); + free_huge_folio(folio); + return ERR_PTR(-ENOMEM); + } + else if (!ret) + lruvec_stat_mod_folio(folio, NR_HUGETLB, + pages_per_huge_page(h)); return folio; @@ -3110,9 +3103,6 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, hugepage_subpool_put_pages(spool, 1); out_end_reservation: vma_end_reservation(h, vma, addr); - if (!memcg_charge_ret) - mem_cgroup_cancel_charge(memcg, nr_pages); - mem_cgroup_put(memcg); return ERR_PTR(-ENOSPC); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 97f63ec9c9fb..95ee77fe27af 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4529,6 +4529,25 @@ int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, return 0; } +int mem_cgroup_charge_hugetlb(struct folio *folio, gfp_t gfp) +{ + struct mem_cgroup *memcg = get_mem_cgroup_from_current(); + int ret = 0; + + if (mem_cgroup_disabled() || !memcg_accounts_hugetlb() || + !memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) { + ret = -EOPNOTSUPP; + goto out; + } + + if (charge_memcg(folio, memcg, gfp)) + ret = -ENOMEM; + +out: + mem_cgroup_put(memcg); + return ret; +} + /** * mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin. * @folio: folio to charge.
This patch introduces mem_cgroup_charge_hugetlb, which combines the logic of mem_cgroup{try,commit}_hugetlb. This reduces the footprint of memcg in hugetlb code, and also consolidates the error path that memcg can take into just one point. Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com> --- include/linux/memcontrol.h | 2 ++ mm/hugetlb.c | 34 ++++++++++++---------------------- mm/memcontrol.c | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 22 deletions(-)