diff mbox series

[v2,4/5] mm: memcontrol: introduce remote objcg charging API

Message ID 20210303055917.66054-5-songmuchun@bytedance.com (mailing list archive)
State New, archived
Headers show
Series Use obj_cgroup APIs to charge kmem pages | expand

Commit Message

Muchun Song March 3, 2021, 5:59 a.m. UTC
The remote memcg charing APIs is a mechanism to charge pages to a given
memcg. Since all kernel memory are charged by using obj_cgroup APIs.
Actually, we want to charge kernel memory to the remote object cgroup
instead of memory cgroup. So introduce remote objcg charging APIs to
charge the kmem pages by using objcg_cgroup APIs. And if remote memcg
and objcg are both set, objcg takes precedence over memcg to charge
the kmem pages.

In the later patch, we will use those API to charge kernel memory to
the remote objcg.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 include/linux/sched.h    |  4 ++++
 include/linux/sched/mm.h | 38 ++++++++++++++++++++++++++++++++++++++
 kernel/fork.c            |  3 +++
 mm/memcontrol.c          | 44 ++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 85 insertions(+), 4 deletions(-)

Comments

Roman Gushchin March 5, 2021, 7:48 p.m. UTC | #1
On Wed, Mar 03, 2021 at 01:59:16PM +0800, Muchun Song wrote:
> The remote memcg charing APIs is a mechanism to charge pages to a given
> memcg. Since all kernel memory are charged by using obj_cgroup APIs.
> Actually, we want to charge kernel memory to the remote object cgroup
> instead of memory cgroup. So introduce remote objcg charging APIs to
> charge the kmem pages by using objcg_cgroup APIs. And if remote memcg
> and objcg are both set, objcg takes precedence over memcg to charge
> the kmem pages.
> 
> In the later patch, we will use those API to charge kernel memory to
> the remote objcg.

I'd abandon/postpone the rest of the patchset (patches 4 and 5) as now.
They add a lot of new code to solve a theoretical problem (please, fix
me if I'm wrong), which is not a panic or data corruption, but
a sub-optimal garbage collection behavior. I think we need a better
motivation or/and an implementation which makes the code simpler
and smaller.

> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---
>  include/linux/sched.h    |  4 ++++
>  include/linux/sched/mm.h | 38 ++++++++++++++++++++++++++++++++++++++
>  kernel/fork.c            |  3 +++
>  mm/memcontrol.c          | 44 ++++++++++++++++++++++++++++++++++++++++----
>  4 files changed, 85 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index ee46f5cab95b..8edcc71a0a1d 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1318,6 +1318,10 @@ struct task_struct {
>  	/* Used by memcontrol for targeted memcg charge: */
>  	struct mem_cgroup		*active_memcg;
>  #endif
> +#ifdef CONFIG_MEMCG_KMEM
> +	/* Used by memcontrol for targeted objcg charge: */
> +	struct obj_cgroup		*active_objcg;
> +#endif
>  
>  #ifdef CONFIG_BLK_CGROUP
>  	struct request_queue		*throttle_queue;
> diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> index 1ae08b8462a4..be1189598b09 100644
> --- a/include/linux/sched/mm.h
> +++ b/include/linux/sched/mm.h
> @@ -330,6 +330,44 @@ set_active_memcg(struct mem_cgroup *memcg)
>  }
>  #endif
>  
> +#ifdef CONFIG_MEMCG_KMEM
> +DECLARE_PER_CPU(struct obj_cgroup *, int_active_objcg);
> +
> +/**
> + * set_active_objcg - Starts the remote objcg kmem pages charging scope.
> + * @objcg: objcg to charge.
> + *
> + * This function marks the beginning of the remote objcg charging scope. All the
> + * __GFP_ACCOUNT kmem page allocations till the end of the scope will be charged
> + * to the given objcg.
> + *
> + * NOTE: This function can nest. Users must save the return value and
> + * reset the previous value after their own charging scope is over.
> + *
> + * If remote memcg and objcg are both set, objcg takes precedence over memcg
> + * to charge the kmem pages.
> + */
> +static inline struct obj_cgroup *set_active_objcg(struct obj_cgroup *objcg)
> +{
> +	struct obj_cgroup *old;
> +
> +	if (in_interrupt()) {
> +		old = this_cpu_read(int_active_objcg);
> +		this_cpu_write(int_active_objcg, objcg);
> +	} else {
> +		old = current->active_objcg;
> +		current->active_objcg = objcg;
> +	}
> +
> +	return old;
> +}
> +#else
> +static inline struct obj_cgroup *set_active_objcg(struct obj_cgroup *objcg)
> +{
> +	return NULL;
> +}
> +#endif
> +
>  #ifdef CONFIG_MEMBARRIER
>  enum {
>  	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY		= (1U << 0),
> diff --git a/kernel/fork.c b/kernel/fork.c
> index d66cd1014211..b4b9dd5d122f 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -945,6 +945,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
>  #ifdef CONFIG_MEMCG
>  	tsk->active_memcg = NULL;
>  #endif
> +#ifdef CONFIG_MEMCG_KMEM
> +	tsk->active_objcg = NULL;
> +#endif
>  	return tsk;
>  
>  free_stack:
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 0cf342d22547..e48d4ab0af76 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -79,6 +79,11 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
>  /* Active memory cgroup to use from an interrupt context */
>  DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
>  
> +#ifdef CONFIG_MEMCG_KMEM
> +/* Active object cgroup to use from an interrupt context */
> +DEFINE_PER_CPU(struct obj_cgroup *, int_active_objcg);
> +#endif
> +
>  /* Socket memory accounting disabled? */
>  static bool cgroup_memory_nosocket;
>  
> @@ -1076,7 +1081,7 @@ static __always_inline struct mem_cgroup *get_active_memcg(void)
>  	return memcg;
>  }
>  
> -static __always_inline bool memcg_kmem_bypass(void)
> +static __always_inline bool memcg_charge_bypass(void)
>  {
>  	/* Allow remote memcg charging from any context. */
>  	if (unlikely(active_memcg()))
> @@ -1094,7 +1099,7 @@ static __always_inline bool memcg_kmem_bypass(void)
>   */
>  static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
>  {
> -	if (memcg_kmem_bypass())
> +	if (memcg_charge_bypass())
>  		return NULL;
>  
>  	if (unlikely(active_memcg()))
> @@ -1103,6 +1108,29 @@ static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
>  	return get_mem_cgroup_from_mm(current->mm);
>  }
>  
> +#ifdef CONFIG_MEMCG_KMEM
> +static __always_inline struct obj_cgroup *active_objcg(void)
> +{
> +	if (in_interrupt())
> +		return this_cpu_read(int_active_objcg);
> +	else
> +		return current->active_objcg;
> +}
> +
> +static __always_inline bool kmem_charge_bypass(void)
> +{
> +	/* Allow remote charging from any context. */
> +	if (unlikely(active_objcg() || active_memcg()))
> +		return false;
> +
> +	/* Memcg to charge can't be determined. */
> +	if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
> +		return true;
> +
> +	return false;
> +}
> +#endif
> +
>  /**
>   * mem_cgroup_iter - iterate over memory cgroup hierarchy
>   * @root: hierarchy root
> @@ -2997,13 +3025,20 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p)
>  
>  __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
>  {
> -	struct obj_cgroup *objcg = NULL;
> +	struct obj_cgroup *objcg;
>  	struct mem_cgroup *memcg;
>  
> -	if (memcg_kmem_bypass())
> +	if (kmem_charge_bypass())
>  		return NULL;
>  
>  	rcu_read_lock();
> +	objcg = active_objcg();
> +	if (unlikely(objcg)) {
> +		/* remote object cgroup must hold a reference. */
> +		obj_cgroup_get(objcg);
> +		goto out;
> +	}
> +
>  	if (unlikely(active_memcg()))
>  		memcg = active_memcg();
>  	else
> @@ -3015,6 +3050,7 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
>  			break;
>  		objcg = NULL;
>  	}
> +out:
>  	rcu_read_unlock();
>  
>  	return objcg;
> -- 
> 2.11.0
>
diff mbox series

Patch

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ee46f5cab95b..8edcc71a0a1d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1318,6 +1318,10 @@  struct task_struct {
 	/* Used by memcontrol for targeted memcg charge: */
 	struct mem_cgroup		*active_memcg;
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+	/* Used by memcontrol for targeted objcg charge: */
+	struct obj_cgroup		*active_objcg;
+#endif
 
 #ifdef CONFIG_BLK_CGROUP
 	struct request_queue		*throttle_queue;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 1ae08b8462a4..be1189598b09 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -330,6 +330,44 @@  set_active_memcg(struct mem_cgroup *memcg)
 }
 #endif
 
+#ifdef CONFIG_MEMCG_KMEM
+DECLARE_PER_CPU(struct obj_cgroup *, int_active_objcg);
+
+/**
+ * set_active_objcg - Starts the remote objcg kmem pages charging scope.
+ * @objcg: objcg to charge.
+ *
+ * This function marks the beginning of the remote objcg charging scope. All the
+ * __GFP_ACCOUNT kmem page allocations till the end of the scope will be charged
+ * to the given objcg.
+ *
+ * NOTE: This function can nest. Users must save the return value and
+ * reset the previous value after their own charging scope is over.
+ *
+ * If remote memcg and objcg are both set, objcg takes precedence over memcg
+ * to charge the kmem pages.
+ */
+static inline struct obj_cgroup *set_active_objcg(struct obj_cgroup *objcg)
+{
+	struct obj_cgroup *old;
+
+	if (in_interrupt()) {
+		old = this_cpu_read(int_active_objcg);
+		this_cpu_write(int_active_objcg, objcg);
+	} else {
+		old = current->active_objcg;
+		current->active_objcg = objcg;
+	}
+
+	return old;
+}
+#else
+static inline struct obj_cgroup *set_active_objcg(struct obj_cgroup *objcg)
+{
+	return NULL;
+}
+#endif
+
 #ifdef CONFIG_MEMBARRIER
 enum {
 	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY		= (1U << 0),
diff --git a/kernel/fork.c b/kernel/fork.c
index d66cd1014211..b4b9dd5d122f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -945,6 +945,9 @@  static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #ifdef CONFIG_MEMCG
 	tsk->active_memcg = NULL;
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+	tsk->active_objcg = NULL;
+#endif
 	return tsk;
 
 free_stack:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0cf342d22547..e48d4ab0af76 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -79,6 +79,11 @@  struct mem_cgroup *root_mem_cgroup __read_mostly;
 /* Active memory cgroup to use from an interrupt context */
 DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
 
+#ifdef CONFIG_MEMCG_KMEM
+/* Active object cgroup to use from an interrupt context */
+DEFINE_PER_CPU(struct obj_cgroup *, int_active_objcg);
+#endif
+
 /* Socket memory accounting disabled? */
 static bool cgroup_memory_nosocket;
 
@@ -1076,7 +1081,7 @@  static __always_inline struct mem_cgroup *get_active_memcg(void)
 	return memcg;
 }
 
-static __always_inline bool memcg_kmem_bypass(void)
+static __always_inline bool memcg_charge_bypass(void)
 {
 	/* Allow remote memcg charging from any context. */
 	if (unlikely(active_memcg()))
@@ -1094,7 +1099,7 @@  static __always_inline bool memcg_kmem_bypass(void)
  */
 static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
 {
-	if (memcg_kmem_bypass())
+	if (memcg_charge_bypass())
 		return NULL;
 
 	if (unlikely(active_memcg()))
@@ -1103,6 +1108,29 @@  static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
 	return get_mem_cgroup_from_mm(current->mm);
 }
 
+#ifdef CONFIG_MEMCG_KMEM
+static __always_inline struct obj_cgroup *active_objcg(void)
+{
+	if (in_interrupt())
+		return this_cpu_read(int_active_objcg);
+	else
+		return current->active_objcg;
+}
+
+static __always_inline bool kmem_charge_bypass(void)
+{
+	/* Allow remote charging from any context. */
+	if (unlikely(active_objcg() || active_memcg()))
+		return false;
+
+	/* Memcg to charge can't be determined. */
+	if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+		return true;
+
+	return false;
+}
+#endif
+
 /**
  * mem_cgroup_iter - iterate over memory cgroup hierarchy
  * @root: hierarchy root
@@ -2997,13 +3025,20 @@  struct mem_cgroup *mem_cgroup_from_obj(void *p)
 
 __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
 {
-	struct obj_cgroup *objcg = NULL;
+	struct obj_cgroup *objcg;
 	struct mem_cgroup *memcg;
 
-	if (memcg_kmem_bypass())
+	if (kmem_charge_bypass())
 		return NULL;
 
 	rcu_read_lock();
+	objcg = active_objcg();
+	if (unlikely(objcg)) {
+		/* remote object cgroup must hold a reference. */
+		obj_cgroup_get(objcg);
+		goto out;
+	}
+
 	if (unlikely(active_memcg()))
 		memcg = active_memcg();
 	else
@@ -3015,6 +3050,7 @@  __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
 			break;
 		objcg = NULL;
 	}
+out:
 	rcu_read_unlock();
 
 	return objcg;