diff mbox series

[v3,1/5] mm/memcg: Pass both memcg and lruvec to mod_memcg_lruvec_state()

Message ID 20210414012027.5352-2-longman@redhat.com (mailing list archive)
State New
Headers show
Series mm/memcg: Reduce kmemcache memory accounting overhead | expand

Commit Message

Waiman Long April 14, 2021, 1:20 a.m. UTC
The caller of mod_memcg_lruvec_state() has both memcg and lruvec readily
available. So both of them are now passed to mod_memcg_lruvec_state()
and __mod_memcg_lruvec_state(). The __mod_memcg_lruvec_state() is
updated to allow either of the two parameters to be set to null. This
makes mod_memcg_lruvec_state() equivalent to mod_memcg_state() if lruvec
is null.

The new __mod_memcg_lruvec_state() function will be used in the next
patch as a replacement of mod_memcg_state() in mm/percpu.c for the
consolidation of the memory uncharge and vmstat update functions in
the kmem_cache_free() path.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
---
 include/linux/memcontrol.h | 12 +++++++-----
 mm/memcontrol.c            | 19 +++++++++++++------
 mm/slab.h                  |  2 +-
 3 files changed, 21 insertions(+), 12 deletions(-)

Comments

Masayoshi Mizuma April 15, 2021, 3:27 a.m. UTC | #1
On Tue, Apr 13, 2021 at 09:20:23PM -0400, Waiman Long wrote:
> The caller of mod_memcg_lruvec_state() has both memcg and lruvec readily
> available. So both of them are now passed to mod_memcg_lruvec_state()
> and __mod_memcg_lruvec_state(). The __mod_memcg_lruvec_state() is
> updated to allow either of the two parameters to be set to null. This
> makes mod_memcg_lruvec_state() equivalent to mod_memcg_state() if lruvec
> is null.
> 
> The new __mod_memcg_lruvec_state() function will be used in the next
> patch as a replacement of mod_memcg_state() in mm/percpu.c for the
> consolidation of the memory uncharge and vmstat update functions in
> the kmem_cache_free() path.
> 
> Signed-off-by: Waiman Long <longman@redhat.com>
> Acked-by: Roman Gushchin <guro@fb.com>
> Reviewed-by: Shakeel Butt <shakeelb@google.com>
> ---
>  include/linux/memcontrol.h | 12 +++++++-----
>  mm/memcontrol.c            | 19 +++++++++++++------
>  mm/slab.h                  |  2 +-
>  3 files changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 0c04d39a7967..95f12996e66c 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -955,8 +955,8 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
>  	return x;
>  }
>  
> -void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
> -			      int val);
> +void __mod_memcg_lruvec_state(struct mem_cgroup *memcg, struct lruvec *lruvec,
> +			      enum node_stat_item idx, int val);
>  void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val);
>  
>  static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
> @@ -969,13 +969,14 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
>  	local_irq_restore(flags);
>  }
>  
> -static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
> +static inline void mod_memcg_lruvec_state(struct mem_cgroup *memcg,
> +					  struct lruvec *lruvec,
>  					  enum node_stat_item idx, int val)
>  {
>  	unsigned long flags;
>  
>  	local_irq_save(flags);
> -	__mod_memcg_lruvec_state(lruvec, idx, val);
> +	__mod_memcg_lruvec_state(memcg, lruvec, idx, val);
>  	local_irq_restore(flags);
>  }
>  
> @@ -1369,7 +1370,8 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
>  	return node_page_state(lruvec_pgdat(lruvec), idx);
>  }
>  
> -static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
> +static inline void __mod_memcg_lruvec_state(struct mem_cgroup *memcg,
> +					    struct lruvec *lruvec,
>  					    enum node_stat_item idx, int val)
>  {
>  }
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index e064ac0d850a..d66e1e38f8ac 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -799,20 +799,27 @@ parent_nodeinfo(struct mem_cgroup_per_node *pn, int nid)
>  	return mem_cgroup_nodeinfo(parent, nid);
>  }
>  
> -void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
> -			      int val)
> +/*
> + * Either one of memcg or lruvec can be NULL, but not both.
> + */
> +void __mod_memcg_lruvec_state(struct mem_cgroup *memcg, struct lruvec *lruvec,
> +			      enum node_stat_item idx, int val)
>  {
>  	struct mem_cgroup_per_node *pn;
> -	struct mem_cgroup *memcg;
>  	long x, threshold = MEMCG_CHARGE_BATCH;
>  
> +	/* Update lruvec */
>  	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> -	memcg = pn->memcg;
> +
> +	if (!memcg)
> +		memcg = pn->memcg;
>  
>  	/* Update memcg */
>  	__mod_memcg_state(memcg, idx, val);
>  
> -	/* Update lruvec */
> +	if (!lruvec)
> +		return;
> +
>  	__this_cpu_add(pn->lruvec_stat_local->count[idx], val);
>  
>  	if (vmstat_item_in_bytes(idx))
> @@ -848,7 +855,7 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
>  
>  	/* Update memcg and lruvec */
>  	if (!mem_cgroup_disabled())
> -		__mod_memcg_lruvec_state(lruvec, idx, val);
> +		__mod_memcg_lruvec_state(NULL, lruvec, idx, val);
>  }
>  
>  void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx,
> diff --git a/mm/slab.h b/mm/slab.h
> index 076582f58f68..bc6c7545e487 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -293,7 +293,7 @@ static inline void mod_objcg_state(struct obj_cgroup *objcg,
>  	rcu_read_lock();
>  	memcg = obj_cgroup_memcg(objcg);
>  	lruvec = mem_cgroup_lruvec(memcg, pgdat);
> -	mod_memcg_lruvec_state(lruvec, idx, nr);
> +	mod_memcg_lruvec_state(memcg, lruvec, idx, nr);
>  	rcu_read_unlock();
>  }
>  
> -- 
> 2.18.1
> 

Please feel free to add:

Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>

Thanks!
Masa
Johannes Weiner April 15, 2021, 4:40 p.m. UTC | #2
On Tue, Apr 13, 2021 at 09:20:23PM -0400, Waiman Long wrote:
> The caller of mod_memcg_lruvec_state() has both memcg and lruvec readily
> available. So both of them are now passed to mod_memcg_lruvec_state()
> and __mod_memcg_lruvec_state(). The __mod_memcg_lruvec_state() is
> updated to allow either of the two parameters to be set to null. This
> makes mod_memcg_lruvec_state() equivalent to mod_memcg_state() if lruvec
> is null.
> 
> The new __mod_memcg_lruvec_state() function will be used in the next
> patch as a replacement of mod_memcg_state() in mm/percpu.c for the
> consolidation of the memory uncharge and vmstat update functions in
> the kmem_cache_free() path.

This requires users who want both to pass a pgdat that can be derived
from the lruvec. This is error prone, and we just acked a patch that
removes this very thing from mem_cgroup_page_lruvec().

With the suggestion for patch 2, this shouldn't be necessary anymore,
though. And sort of underlines my point around that combined function
creating akwward code above and below it.
Waiman Long April 15, 2021, 4:59 p.m. UTC | #3
On 4/15/21 12:40 PM, Johannes Weiner wrote:
> On Tue, Apr 13, 2021 at 09:20:23PM -0400, Waiman Long wrote:
>> The caller of mod_memcg_lruvec_state() has both memcg and lruvec readily
>> available. So both of them are now passed to mod_memcg_lruvec_state()
>> and __mod_memcg_lruvec_state(). The __mod_memcg_lruvec_state() is
>> updated to allow either of the two parameters to be set to null. This
>> makes mod_memcg_lruvec_state() equivalent to mod_memcg_state() if lruvec
>> is null.
>>
>> The new __mod_memcg_lruvec_state() function will be used in the next
>> patch as a replacement of mod_memcg_state() in mm/percpu.c for the
>> consolidation of the memory uncharge and vmstat update functions in
>> the kmem_cache_free() path.
> This requires users who want both to pass a pgdat that can be derived
> from the lruvec. This is error prone, and we just acked a patch that
> removes this very thing from mem_cgroup_page_lruvec().
>
> With the suggestion for patch 2, this shouldn't be necessary anymore,
> though. And sort of underlines my point around that combined function
> creating akwward code above and below it.
>
The reason of passing in the pgdat is because of the caching of vmstat 
data. lruvec may be gone if the corresponding memory cgroup is removed, 
but pgdat should stay put. That is why I put pgdat in the obj_stock for 
caching. I could also put the node id instead of pgdat.

Cheers,
Longman
Johannes Weiner April 16, 2021, 3:48 p.m. UTC | #4
On Thu, Apr 15, 2021 at 12:59:21PM -0400, Waiman Long wrote:
> On 4/15/21 12:40 PM, Johannes Weiner wrote:
> > On Tue, Apr 13, 2021 at 09:20:23PM -0400, Waiman Long wrote:
> > > The caller of mod_memcg_lruvec_state() has both memcg and lruvec readily
> > > available. So both of them are now passed to mod_memcg_lruvec_state()
> > > and __mod_memcg_lruvec_state(). The __mod_memcg_lruvec_state() is
> > > updated to allow either of the two parameters to be set to null. This
> > > makes mod_memcg_lruvec_state() equivalent to mod_memcg_state() if lruvec
> > > is null.
> > > 
> > > The new __mod_memcg_lruvec_state() function will be used in the next
> > > patch as a replacement of mod_memcg_state() in mm/percpu.c for the
> > > consolidation of the memory uncharge and vmstat update functions in
> > > the kmem_cache_free() path.
> > This requires users who want both to pass a pgdat that can be derived
> > from the lruvec. This is error prone, and we just acked a patch that
> > removes this very thing from mem_cgroup_page_lruvec().
> > 
> > With the suggestion for patch 2, this shouldn't be necessary anymore,
> > though. And sort of underlines my point around that combined function
> > creating akwward code above and below it.
> > 
> The reason of passing in the pgdat is because of the caching of vmstat data.
> lruvec may be gone if the corresponding memory cgroup is removed, but pgdat
> should stay put. That is why I put pgdat in the obj_stock for caching. I
> could also put the node id instead of pgdat.

Internally storing the pgdat is fine.

I was talking about the interface requiring the user to pass redundant
information (pgdat, can be derived from lruvec) because the parameter
is overloaded to modify the function's behavior, which is unintuitive.
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0c04d39a7967..95f12996e66c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -955,8 +955,8 @@  static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 	return x;
 }
 
-void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
-			      int val);
+void __mod_memcg_lruvec_state(struct mem_cgroup *memcg, struct lruvec *lruvec,
+			      enum node_stat_item idx, int val);
 void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val);
 
 static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
@@ -969,13 +969,14 @@  static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
 	local_irq_restore(flags);
 }
 
-static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
+static inline void mod_memcg_lruvec_state(struct mem_cgroup *memcg,
+					  struct lruvec *lruvec,
 					  enum node_stat_item idx, int val)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	__mod_memcg_lruvec_state(lruvec, idx, val);
+	__mod_memcg_lruvec_state(memcg, lruvec, idx, val);
 	local_irq_restore(flags);
 }
 
@@ -1369,7 +1370,8 @@  static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 	return node_page_state(lruvec_pgdat(lruvec), idx);
 }
 
-static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
+static inline void __mod_memcg_lruvec_state(struct mem_cgroup *memcg,
+					    struct lruvec *lruvec,
 					    enum node_stat_item idx, int val)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e064ac0d850a..d66e1e38f8ac 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -799,20 +799,27 @@  parent_nodeinfo(struct mem_cgroup_per_node *pn, int nid)
 	return mem_cgroup_nodeinfo(parent, nid);
 }
 
-void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
-			      int val)
+/*
+ * Either one of memcg or lruvec can be NULL, but not both.
+ */
+void __mod_memcg_lruvec_state(struct mem_cgroup *memcg, struct lruvec *lruvec,
+			      enum node_stat_item idx, int val)
 {
 	struct mem_cgroup_per_node *pn;
-	struct mem_cgroup *memcg;
 	long x, threshold = MEMCG_CHARGE_BATCH;
 
+	/* Update lruvec */
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-	memcg = pn->memcg;
+
+	if (!memcg)
+		memcg = pn->memcg;
 
 	/* Update memcg */
 	__mod_memcg_state(memcg, idx, val);
 
-	/* Update lruvec */
+	if (!lruvec)
+		return;
+
 	__this_cpu_add(pn->lruvec_stat_local->count[idx], val);
 
 	if (vmstat_item_in_bytes(idx))
@@ -848,7 +855,7 @@  void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 
 	/* Update memcg and lruvec */
 	if (!mem_cgroup_disabled())
-		__mod_memcg_lruvec_state(lruvec, idx, val);
+		__mod_memcg_lruvec_state(NULL, lruvec, idx, val);
 }
 
 void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx,
diff --git a/mm/slab.h b/mm/slab.h
index 076582f58f68..bc6c7545e487 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -293,7 +293,7 @@  static inline void mod_objcg_state(struct obj_cgroup *objcg,
 	rcu_read_lock();
 	memcg = obj_cgroup_memcg(objcg);
 	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-	mod_memcg_lruvec_state(lruvec, idx, nr);
+	mod_memcg_lruvec_state(memcg, lruvec, idx, nr);
 	rcu_read_unlock();
 }