diff mbox series

[2/3] memcg/hugetlb: Introduce mem_cgroup_charge_hugetlb

Message ID 20241108212946.2642085-3-joshua.hahnjy@gmail.com (mailing list archive)
State New
Headers show
Series memcg/hugetlb: Rework memcg hugetlb charging | expand

Commit Message

Joshua Hahn Nov. 8, 2024, 9:29 p.m. UTC
This patch introduces mem_cgroup_charge_hugetlb, which combines the
logic of mem_cgroup{try,commit}_hugetlb. This reduces the footprint of
memcg in hugetlb code, and also consolidates the error path that memcg
can take into just one point.

Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>

---
 include/linux/memcontrol.h |  2 ++
 mm/hugetlb.c               | 34 ++++++++++++----------------------
 mm/memcontrol.c            | 19 +++++++++++++++++++
 3 files changed, 33 insertions(+), 22 deletions(-)

Comments

Shakeel Butt Nov. 8, 2024, 10:42 p.m. UTC | #1
On Fri, Nov 08, 2024 at 01:29:45PM -0800, Joshua Hahn wrote:
> This patch introduces mem_cgroup_charge_hugetlb, which combines the
> logic of mem_cgroup{try,commit}_hugetlb. This reduces the footprint of
> memcg in hugetlb code, and also consolidates the error path that memcg
> can take into just one point.
> 
> Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
> 
> ---
>  include/linux/memcontrol.h |  2 ++
>  mm/hugetlb.c               | 34 ++++++++++++----------------------
>  mm/memcontrol.c            | 19 +++++++++++++++++++
>  3 files changed, 33 insertions(+), 22 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index bb49e0d4b377..d90c1ac791f1 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -678,6 +678,8 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
>  int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
>  		long nr_pages);
>  
> +int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp);
> +
>  int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
>  				  gfp_t gfp, swp_entry_t entry);
>  
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index fbb10e52d7ea..6f6841483039 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2967,21 +2967,13 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
>  	struct hugepage_subpool *spool = subpool_vma(vma);
>  	struct hstate *h = hstate_vma(vma);
>  	struct folio *folio;
> -	long map_chg, map_commit, nr_pages = pages_per_huge_page(h);
> +	long map_chg, map_commit;
>  	long gbl_chg;
> -	int memcg_charge_ret, ret, idx;
> +	int ret, idx;
>  	struct hugetlb_cgroup *h_cg = NULL;
> -	struct mem_cgroup *memcg;
>  	bool deferred_reserve;
>  	gfp_t gfp = htlb_alloc_mask(h) | __GFP_RETRY_MAYFAIL;
>  
> -	memcg = get_mem_cgroup_from_current();
> -	memcg_charge_ret = mem_cgroup_hugetlb_try_charge(memcg, gfp, nr_pages);
> -	if (memcg_charge_ret == -ENOMEM) {
> -		mem_cgroup_put(memcg);
> -		return ERR_PTR(-ENOMEM);
> -	}
> -
>  	idx = hstate_index(h);
>  	/*
>  	 * Examine the region/reserve map to determine if the process
> @@ -2989,12 +2981,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
>  	 * code of zero indicates a reservation exists (no change).
>  	 */
>  	map_chg = gbl_chg = vma_needs_reservation(h, vma, addr);
> -	if (map_chg < 0) {
> -		if (!memcg_charge_ret)
> -			mem_cgroup_cancel_charge(memcg, nr_pages);
> -		mem_cgroup_put(memcg);
> +	if (map_chg < 0)
>  		return ERR_PTR(-ENOMEM);
> -	}
>  
>  	/*
>  	 * Processes that did not create the mapping will have no
> @@ -3092,10 +3080,15 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
>  		}
>  	}
>  
> -	if (!memcg_charge_ret)
> -		mem_cgroup_commit_charge(folio, memcg);
> -	lruvec_stat_mod_folio(folio, NR_HUGETLB, pages_per_huge_page(h));
> -	mem_cgroup_put(memcg);
> +	ret = mem_cgroup_charge_hugetlb(folio, gfp);
> +	if (ret == -ENOMEM) {
> +		spin_unlock_irq(&hugetlb_lock);

spin_unlock_irq??

> +		free_huge_folio(folio);

free_huge_folio() will call lruvec_stat_mod_folio() unconditionally but
you are only calling it on success. This may underflow the metric.

> +		return ERR_PTR(-ENOMEM);
> +	}
> +	else if (!ret)
> +		lruvec_stat_mod_folio(folio, NR_HUGETLB,
> +		      pages_per_huge_page(h));
>  
>  	return folio;
>  
> @@ -3110,9 +3103,6 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
>  		hugepage_subpool_put_pages(spool, 1);
>  out_end_reservation:
>  	vma_end_reservation(h, vma, addr);
> -	if (!memcg_charge_ret)
> -		mem_cgroup_cancel_charge(memcg, nr_pages);
> -	mem_cgroup_put(memcg);
>  	return ERR_PTR(-ENOSPC);
>  }
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 97f63ec9c9fb..95ee77fe27af 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4529,6 +4529,25 @@ int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
>  	return 0;
>  }
>  
> +int mem_cgroup_charge_hugetlb(struct folio *folio, gfp_t gfp)
> +{
> +	struct mem_cgroup *memcg = get_mem_cgroup_from_current();
> +	int ret = 0;
> +
> +	if (mem_cgroup_disabled() || !memcg_accounts_hugetlb() ||
> +		!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
> +		ret = -EOPNOTSUPP;

why EOPNOTSUPP? You need to return 0 here. We do want
lruvec_stat_mod_folio() to be called.

> +		goto out;
> +	}
> +
> +	if (charge_memcg(folio, memcg, gfp))
> +		ret = -ENOMEM;
> +
> +out:
> +	mem_cgroup_put(memcg);
> +	return ret;
> +}
> +
>  /**
>   * mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin.
>   * @folio: folio to charge.
> -- 
> 2.43.5
>
SeongJae Park Nov. 9, 2024, 1:03 a.m. UTC | #2
Hi Joshua,

On Fri, 8 Nov 2024 13:29:45 -0800 Joshua Hahn <joshua.hahnjy@gmail.com> wrote:

> This patch introduces mem_cgroup_charge_hugetlb, which combines the
> logic of mem_cgroup{try,commit}_hugetlb. This reduces the footprint of

Nit.  Seems the regular expression is not technically correct?

> memcg in hugetlb code, and also consolidates the error path that memcg
> can take into just one point.
> 
> Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
> 
> ---
>  include/linux/memcontrol.h |  2 ++
>  mm/hugetlb.c               | 34 ++++++++++++----------------------
>  mm/memcontrol.c            | 19 +++++++++++++++++++
>  3 files changed, 33 insertions(+), 22 deletions(-)
[...]
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 97f63ec9c9fb..95ee77fe27af 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4529,6 +4529,25 @@ int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
>  	return 0;
>  }
>  
> +int mem_cgroup_charge_hugetlb(struct folio *folio, gfp_t gfp)

Can we add a kernel-doc comment for this function?  Maybe that for
mem_cgroup_hugetlb_try_charge() can be stolen with only small updates?

> +{
> +	struct mem_cgroup *memcg = get_mem_cgroup_from_current();
> +	int ret = 0;
> +
> +	if (mem_cgroup_disabled() || !memcg_accounts_hugetlb() ||
> +		!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
> +		ret = -EOPNOTSUPP;
> +		goto out;
> +	}
> +
> +	if (charge_memcg(folio, memcg, gfp))
> +		ret = -ENOMEM;
> +
> +out:
> +	mem_cgroup_put(memcg);
> +	return ret;
> +}
> +
>  /**
>   * mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin.
>   * @folio: folio to charge.
> -- 
> 2.43.5


Thanks,
SJ
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bb49e0d4b377..d90c1ac791f1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -678,6 +678,8 @@  static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
 int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
 		long nr_pages);
 
+int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp);
+
 int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
 				  gfp_t gfp, swp_entry_t entry);
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fbb10e52d7ea..6f6841483039 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2967,21 +2967,13 @@  struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
 	struct hugepage_subpool *spool = subpool_vma(vma);
 	struct hstate *h = hstate_vma(vma);
 	struct folio *folio;
-	long map_chg, map_commit, nr_pages = pages_per_huge_page(h);
+	long map_chg, map_commit;
 	long gbl_chg;
-	int memcg_charge_ret, ret, idx;
+	int ret, idx;
 	struct hugetlb_cgroup *h_cg = NULL;
-	struct mem_cgroup *memcg;
 	bool deferred_reserve;
 	gfp_t gfp = htlb_alloc_mask(h) | __GFP_RETRY_MAYFAIL;
 
-	memcg = get_mem_cgroup_from_current();
-	memcg_charge_ret = mem_cgroup_hugetlb_try_charge(memcg, gfp, nr_pages);
-	if (memcg_charge_ret == -ENOMEM) {
-		mem_cgroup_put(memcg);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	idx = hstate_index(h);
 	/*
 	 * Examine the region/reserve map to determine if the process
@@ -2989,12 +2981,8 @@  struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
 	 * code of zero indicates a reservation exists (no change).
 	 */
 	map_chg = gbl_chg = vma_needs_reservation(h, vma, addr);
-	if (map_chg < 0) {
-		if (!memcg_charge_ret)
-			mem_cgroup_cancel_charge(memcg, nr_pages);
-		mem_cgroup_put(memcg);
+	if (map_chg < 0)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	/*
 	 * Processes that did not create the mapping will have no
@@ -3092,10 +3080,15 @@  struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
 		}
 	}
 
-	if (!memcg_charge_ret)
-		mem_cgroup_commit_charge(folio, memcg);
-	lruvec_stat_mod_folio(folio, NR_HUGETLB, pages_per_huge_page(h));
-	mem_cgroup_put(memcg);
+	ret = mem_cgroup_charge_hugetlb(folio, gfp);
+	if (ret == -ENOMEM) {
+		spin_unlock_irq(&hugetlb_lock);
+		free_huge_folio(folio);
+		return ERR_PTR(-ENOMEM);
+	}
+	else if (!ret)
+		lruvec_stat_mod_folio(folio, NR_HUGETLB,
+		      pages_per_huge_page(h));
 
 	return folio;
 
@@ -3110,9 +3103,6 @@  struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
 		hugepage_subpool_put_pages(spool, 1);
 out_end_reservation:
 	vma_end_reservation(h, vma, addr);
-	if (!memcg_charge_ret)
-		mem_cgroup_cancel_charge(memcg, nr_pages);
-	mem_cgroup_put(memcg);
 	return ERR_PTR(-ENOSPC);
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 97f63ec9c9fb..95ee77fe27af 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4529,6 +4529,25 @@  int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
 	return 0;
 }
 
+int mem_cgroup_charge_hugetlb(struct folio *folio, gfp_t gfp)
+{
+	struct mem_cgroup *memcg = get_mem_cgroup_from_current();
+	int ret = 0;
+
+	if (mem_cgroup_disabled() || !memcg_accounts_hugetlb() ||
+		!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (charge_memcg(folio, memcg, gfp))
+		ret = -ENOMEM;
+
+out:
+	mem_cgroup_put(memcg);
+	return ret;
+}
+
 /**
  * mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin.
  * @folio: folio to charge.