diff mbox series

[v2,3/5] mm/memcg: Cache vmstat data in percpu memcg_stock_pcp

Message ID 20210412225503.15119-4-longman@redhat.com (mailing list archive)
State New
Headers show
Series mm/memcg: Reduce kmemcache memory accounting overhead | expand

Commit Message

Waiman Long April 12, 2021, 10:55 p.m. UTC
Before the new slab memory controller with per object byte charging,
charging and vmstat data update happen only when new slab pages are
allocated or freed. Now they are done with every kmem_cache_alloc()
and kmem_cache_free(). This causes additional overhead for workloads
that generate a lot of alloc and free calls.

The memcg_stock_pcp is used to cache byte charge for a specific
obj_cgroup to reduce that overhead. To further reducing it, this patch
makes the vmstat data cached in the memcg_stock_pcp structure as well
until it accumulates a page size worth of update or when other cached
data change.

On a 2-socket Cascade Lake server with instrumentation enabled and this
patch applied, it was found that about 17% (946796 out of 5515184) of the
time when __mod_obj_stock_state() is called leads to an actual call to
mod_objcg_state() after initial boot. When doing parallel kernel build,
the figure was about 16% (21894614 out of 139780628). So caching the
vmstat data reduces the number of calls to mod_objcg_state() by more
than 80%.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 mm/memcontrol.c | 78 +++++++++++++++++++++++++++++++++++++++++++------
 mm/slab.h       | 26 +++++++----------
 2 files changed, 79 insertions(+), 25 deletions(-)

Comments

Shakeel Butt April 12, 2021, 11:03 p.m. UTC | #1
On Mon, Apr 12, 2021 at 3:55 PM Waiman Long <longman@redhat.com> wrote:
>
> Before the new slab memory controller with per object byte charging,
> charging and vmstat data update happen only when new slab pages are
> allocated or freed. Now they are done with every kmem_cache_alloc()
> and kmem_cache_free(). This causes additional overhead for workloads
> that generate a lot of alloc and free calls.
>
> The memcg_stock_pcp is used to cache byte charge for a specific
> obj_cgroup to reduce that overhead. To further reducing it, this patch
> makes the vmstat data cached in the memcg_stock_pcp structure as well
> until it accumulates a page size worth of update or when other cached
> data change.
>
> On a 2-socket Cascade Lake server with instrumentation enabled and this
> patch applied, it was found that about 17% (946796 out of 5515184) of the
> time when __mod_obj_stock_state() is called leads to an actual call to
> mod_objcg_state() after initial boot. When doing parallel kernel build,
> the figure was about 16% (21894614 out of 139780628). So caching the
> vmstat data reduces the number of calls to mod_objcg_state() by more
> than 80%.
>
> Signed-off-by: Waiman Long <longman@redhat.com>

Reviewed-by: Shakeel Butt <shakeelb@google.com>
kernel test robot April 13, 2021, 6:32 p.m. UTC | #2
Hi Waiman,

I love your patch! Perhaps something to improve:

[auto build test WARNING on dennis-percpu/for-next]
[also build test WARNING on linus/master v5.12-rc7 next-20210413]
[cannot apply to hnaz-linux-mm/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Waiman-Long/mm-memcg-Reduce-kmemcache-memory-accounting-overhead/20210413-065659
base:   https://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git for-next
config: mips-randconfig-r031-20210413 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 9829f5e6b1bca9b61efc629770d28bb9014dec45)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install mips cross compiling tool for clang build
        # apt-get install binutils-mips-linux-gnu
        # https://github.com/0day-ci/linux/commit/565fa80b02eeb06c48a88ac6354a4fc9e5addd06
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Waiman-Long/mm-memcg-Reduce-kmemcache-memory-accounting-overhead/20210413-065659
        git checkout 565fa80b02eeb06c48a88ac6354a4fc9e5addd06
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from mm/mempool.c:22:
>> mm/slab.h:401:13: warning: unused function 'mod_obj_stock_state' [-Wunused-function]
   static void mod_obj_stock_state(struct obj_cgroup *objcg,
               ^
   1 warning generated.
--
   In file included from mm/oom_kill.c:49:
>> mm/slab.h:401:13: warning: unused function 'mod_obj_stock_state'
   static void mod_obj_stock_state(struct obj_cgroup
   ^
   fatal error: error in backend: Nested variants found in inline asm string: ' .set push
   .set mips64r2
   .if ( 0x00 ) != -1)) 0x00 ) != -1)) : ($( static struct ftrace_branch_data __attribute__((__aligned__(4))) __attribute__((__section__("_ftrace_branch"))) __if_trace = $( .func = __func__, .file = "arch/mips/include/asm/bitops.h", .line = 105, $); 0x00 ) != -1)) : $))) ) && ( 0 ); .set push; .set mips64r2; .rept 1; sync 0x00; .endr; .set pop; .else; ; .endif
   1: ll $0, $1
   or $0, $2
   sc $0, $1
   beqz $0, 1b
   .set pop
   '
   clang-13: error: clang frontend command failed with exit code 70 (use -v to see invocation)
   clang version 13.0.0 (git://gitmirror/llvm_project 9829f5e6b1bca9b61efc629770d28bb9014dec45)
   Target: mipsel-unknown-linux-gnu
   Thread model: posix
   InstalledDir: /opt/cross/clang-9829f5e6b1/bin
   clang-13: note: diagnostic msg:
   Makefile arch block certs crypto drivers fs include init ipc kernel lib mm scripts security sound source usr virt
--
   In file included from mm/slab_common.c:33:
>> mm/slab.h:401:13: warning: unused function 'mod_obj_stock_state'
   static void mod_obj_stock_state(struct obj_cgroup
   ^
   fatal error: error in backend: Nested variants found in inline asm string: ' .set push
   .set mips64r2
   .if ( 0x00 ) != -1)) 0x00 ) != -1)) : ($( static struct ftrace_branch_data __attribute__((__aligned__(4))) __attribute__((__section__("_ftrace_branch"))) __if_trace = $( .func = __func__, .file = "arch/mips/include/asm/atomic.h", .line = 153, $); 0x00 ) != -1)) : $))) ) && ( 0 ); .set push; .set mips64r2; .rept 1; sync 0x00; .endr; .set pop; .else; ; .endif
   1: ll $0, $1 # atomic_add
   addu $0, $2
   sc $0, $1
   beqz $0, 1b
   .set pop
   '
   clang-13: error: clang frontend command failed with exit code 70 (use -v to see invocation)
   clang version 13.0.0 (git://gitmirror/llvm_project 9829f5e6b1bca9b61efc629770d28bb9014dec45)
   Target: mipsel-unknown-linux-gnu
   Thread model: posix
   InstalledDir: /opt/cross/clang-9829f5e6b1/bin
   clang-13: note: diagnostic msg:
   Makefile arch block certs crypto drivers fs include init ipc kernel lib mm scripts security sound source usr virt
--
   In file included from mm/slub.c:20:
>> mm/slab.h:401:13: warning: unused function 'mod_obj_stock_state'
   static void mod_obj_stock_state(struct obj_cgroup
   ^
   mm/slub.c:1503:29: warning: unused function 'node_nr_slabs'
   static inline unsigned long node_nr_slabs(struct kmem_cache_node
   ^
   mm/slub.c:1521:21: warning: unused function 'kmalloc_large_node_hook'
   static inline void size_t size, gfp_t flags)
   ^
   fatal error: error in backend: Nested variants found in inline asm string: ' .set push
   .set mips64r2
   .if ( 0x00 ) != -1)) 0x00 ) != -1)) : ($( static struct ftrace_branch_data __attribute__((__aligned__(4))) __attribute__((__section__("_ftrace_branch"))) __if_trace = $( .func = __func__, .file = "arch/mips/include/asm/bitops.h", .line = 192, $); 0x00 ) != -1)) : $))) ) && ( 0 ); .set push; .set mips64r2; .rept 1; sync 0x00; .endr; .set pop; .else; ; .endif
   1: ll $0, $2
   or $1, $0, $3
   sc $1, $2
   beqz $1, 1b
   .set pop
   '
   clang-13: error: clang frontend command failed with exit code 70 (use -v to see invocation)
   clang version 13.0.0 (git://gitmirror/llvm_project 9829f5e6b1bca9b61efc629770d28bb9014dec45)
   Target: mipsel-unknown-linux-gnu
   Thread model: posix
   InstalledDir: /opt/cross/clang-9829f5e6b1/bin
   clang-13: note: diagnostic msg:
   Makefile arch block certs crypto drivers fs include init ipc kernel lib mm scripts security sound source usr virt


vim +/mod_obj_stock_state +401 mm/slab.h

   400	
 > 401	static void mod_obj_stock_state(struct obj_cgroup *objcg,
   402					struct pglist_data *pgdat, int idx, int nr)
   403	{
   404	}
   405	#endif /* CONFIG_MEMCG_KMEM */
   406	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b19100c68aa0..539c3b632e47 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2220,7 +2220,10 @@  struct memcg_stock_pcp {
 
 #ifdef CONFIG_MEMCG_KMEM
 	struct obj_cgroup *cached_objcg;
+	struct pglist_data *cached_pgdat;
 	unsigned int nr_bytes;
+	int vmstat_idx;
+	int vmstat_bytes;
 #endif
 
 	struct work_struct work;
@@ -3157,6 +3160,21 @@  void __memcg_kmem_uncharge_page(struct page *page, int order)
 	css_put(&memcg->css);
 }
 
+static inline void mod_objcg_state(struct obj_cgroup *objcg,
+				   struct pglist_data *pgdat,
+				   enum node_stat_item idx, int nr)
+{
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec = NULL;
+
+	rcu_read_lock();
+	memcg = obj_cgroup_memcg(objcg);
+	if (pgdat)
+		lruvec = mem_cgroup_lruvec(memcg, pgdat);
+	__mod_memcg_lruvec_state(memcg, lruvec, idx, nr);
+	rcu_read_unlock();
+}
+
 static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 {
 	struct memcg_stock_pcp *stock;
@@ -3207,6 +3225,14 @@  static void drain_obj_stock(struct memcg_stock_pcp *stock)
 		stock->nr_bytes = 0;
 	}
 
+	if (stock->vmstat_bytes) {
+		mod_objcg_state(old, stock->cached_pgdat, stock->vmstat_idx,
+				stock->vmstat_bytes);
+		stock->vmstat_bytes = 0;
+		stock->vmstat_idx = 0;
+		stock->cached_pgdat = NULL;
+	}
+
 	obj_cgroup_put(old);
 	stock->cached_objcg = NULL;
 }
@@ -3251,6 +3277,48 @@  static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 	local_irq_restore(flags);
 }
 
+static void __mod_obj_stock_state(struct obj_cgroup *objcg,
+				  struct pglist_data *pgdat, int idx, int nr)
+{
+	struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
+
+	if (stock->cached_objcg != objcg) {
+		/* Output the current data as is */
+	} else if (!stock->vmstat_bytes) {
+		/* Save the current data */
+		stock->vmstat_bytes = nr;
+		stock->vmstat_idx = idx;
+		stock->cached_pgdat = pgdat;
+		nr = 0;
+	} else if ((stock->cached_pgdat != pgdat) ||
+		   (stock->vmstat_idx != idx)) {
+		/* Output the cached data & save the current data */
+		swap(nr, stock->vmstat_bytes);
+		swap(idx, stock->vmstat_idx);
+		swap(pgdat, stock->cached_pgdat);
+	} else {
+		stock->vmstat_bytes += nr;
+		if (abs(nr) > PAGE_SIZE) {
+			nr = stock->vmstat_bytes;
+			stock->vmstat_bytes = 0;
+		} else {
+			nr = 0;
+		}
+	}
+	if (nr)
+		mod_objcg_state(objcg, pgdat, idx, nr);
+}
+
+void mod_obj_stock_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
+			 int idx, int nr)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__mod_obj_stock_state(objcg, pgdat, idx, nr);
+	local_irq_restore(flags);
+}
+
 int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
 {
 	struct mem_cgroup *memcg;
@@ -3300,18 +3368,10 @@  void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
 				   struct pglist_data *pgdat, int idx)
 {
 	unsigned long flags;
-	struct mem_cgroup *memcg;
-	struct lruvec *lruvec = NULL;
 
 	local_irq_save(flags);
 	__refill_obj_stock(objcg, size);
-
-	rcu_read_lock();
-	memcg = obj_cgroup_memcg(objcg);
-	if (pgdat)
-		lruvec = mem_cgroup_lruvec(memcg, pgdat);
-	__mod_memcg_lruvec_state(memcg, lruvec, idx, -(int)size);
-	rcu_read_unlock();
+	__mod_obj_stock_state(objcg, pgdat, idx, -(int)size);
 	local_irq_restore(flags);
 }
 
diff --git a/mm/slab.h b/mm/slab.h
index 677cdc52e641..ae971975d9fc 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -239,6 +239,8 @@  static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
 #ifdef CONFIG_MEMCG_KMEM
 int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
 				 gfp_t gfp, bool new_page);
+void mod_obj_stock_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
+			 int idx, int nr);
 
 static inline void memcg_free_page_obj_cgroups(struct page *page)
 {
@@ -283,20 +285,6 @@  static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
 	return true;
 }
 
-static inline void mod_objcg_state(struct obj_cgroup *objcg,
-				   struct pglist_data *pgdat,
-				   enum node_stat_item idx, int nr)
-{
-	struct mem_cgroup *memcg;
-	struct lruvec *lruvec;
-
-	rcu_read_lock();
-	memcg = obj_cgroup_memcg(objcg);
-	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-	mod_memcg_lruvec_state(memcg, lruvec, idx, nr);
-	rcu_read_unlock();
-}
-
 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
 					      struct obj_cgroup *objcg,
 					      gfp_t flags, size_t size,
@@ -324,8 +312,9 @@  static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
 			off = obj_to_index(s, page, p[i]);
 			obj_cgroup_get(objcg);
 			page_objcgs(page)[off] = objcg;
-			mod_objcg_state(objcg, page_pgdat(page),
-					cache_vmstat_idx(s), obj_full_size(s));
+			mod_obj_stock_state(objcg, page_pgdat(page),
+					    cache_vmstat_idx(s),
+					    obj_full_size(s));
 		} else {
 			obj_cgroup_uncharge(objcg, obj_full_size(s));
 		}
@@ -408,6 +397,11 @@  static inline void memcg_slab_free_hook(struct kmem_cache *s,
 					void **p, int objects)
 {
 }
+
+static void mod_obj_stock_state(struct obj_cgroup *objcg,
+				struct pglist_data *pgdat, int idx, int nr)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 static inline struct kmem_cache *virt_to_cache(const void *obj)