diff mbox series

[5/5] mm/memcg: Optimize user context object stock access

Message ID 20210409231842.8840-6-longman@redhat.com (mailing list archive)
State New, archived
Headers show
Series mm/memcg: Reduce kmemcache memory accounting overhead | expand

Commit Message

Waiman Long April 9, 2021, 11:18 p.m. UTC
Most kmem_cache_alloc() calls are from user context. With instrumentation
enabled, the measured amount of kmem_cache_alloc() calls from non-task
context was about 0.01% of the total.

The irq disable/enable sequence used in this case to access content
from object stock is slow.  To optimize for user context access, there
are now two object stocks for task context and interrupt context access
respectively.

The task context object stock can be accessed after disabling preemption
which is cheap in non-preempt kernel. The interrupt context object stock
can only be accessed after disabling interrupt. User context code can
access interrupt object stock, but not vice versa.

The mod_objcg_state() function is also modified to make sure that memcg
and lruvec stat updates are done with interrupted disabled.

The downside of this change is that there are more data stored in local
object stocks and not reflected in the charge counter and the vmstat
arrays.  However, this is a small price to pay for better performance.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 mm/memcontrol.c | 71 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 57 insertions(+), 14 deletions(-)

Comments

kernel test robot April 10, 2021, 6:07 a.m. UTC | #1
Hi Waiman,

I love your patch! Perhaps something to improve:

[auto build test WARNING on dennis-percpu/for-next]
[also build test WARNING on linus/master v5.12-rc6 next-20210409]
[cannot apply to hnaz-linux-mm/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Waiman-Long/mm-memcg-Reduce-kmemcache-memory-accounting-overhead/20210410-071958
base:   https://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git for-next
config: arm64-randconfig-r031-20210409 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project dd453a1389b6a7e6d9214b449d3c54981b1a89b6)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm64 cross compiling tool for clang build
        # apt-get install binutils-aarch64-linux-gnu
        # https://github.com/0day-ci/linux/commit/1f4e22fce44599095a55535301ca83adc5d3a4fe
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Waiman-Long/mm-memcg-Reduce-kmemcache-memory-accounting-overhead/20210410-071958
        git checkout 1f4e22fce44599095a55535301ca83adc5d3a4fe
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> mm/memcontrol.c:3234:10: warning: variable 'stock' is uninitialized when used here [-Wuninitialized]
           stock = get_obj_stock(flags);
                   ^~~~~~~~~~~~~~~~~~~~
   mm/memcontrol.c:2284:16: note: expanded from macro 'get_obj_stock'
                   obj_stock = &stock->task_obj;   \
                                ^~~~~
   mm/memcontrol.c:3234:10: note: variable 'stock' is declared here
   mm/memcontrol.c:2278:2: note: expanded from macro 'get_obj_stock'
           struct memcg_stock_pcp *stock;          \
           ^
   mm/memcontrol.c:3329:2: warning: variable 'stock' is uninitialized when used here [-Wuninitialized]
           get_obj_stock(flags);
           ^~~~~~~~~~~~~~~~~~~~
   mm/memcontrol.c:2284:16: note: expanded from macro 'get_obj_stock'
                   obj_stock = &stock->task_obj;   \
                                ^~~~~
   mm/memcontrol.c:3329:2: note: variable 'stock' is declared here
   mm/memcontrol.c:2278:2: note: expanded from macro 'get_obj_stock'
           struct memcg_stock_pcp *stock;          \
           ^
   mm/memcontrol.c:3371:2: warning: variable 'stock' is uninitialized when used here [-Wuninitialized]
           get_obj_stock(flags);
           ^~~~~~~~~~~~~~~~~~~~
   mm/memcontrol.c:2284:16: note: expanded from macro 'get_obj_stock'
                   obj_stock = &stock->task_obj;   \
                                ^~~~~
   mm/memcontrol.c:3371:2: note: variable 'stock' is declared here
   mm/memcontrol.c:2278:2: note: expanded from macro 'get_obj_stock'
           struct memcg_stock_pcp *stock;          \
           ^
   mm/memcontrol.c:3426:2: warning: variable 'stock' is uninitialized when used here [-Wuninitialized]
           get_obj_stock(flags);
           ^~~~~~~~~~~~~~~~~~~~
   mm/memcontrol.c:2284:16: note: expanded from macro 'get_obj_stock'
                   obj_stock = &stock->task_obj;   \
                                ^~~~~
   mm/memcontrol.c:3426:2: note: variable 'stock' is declared here
   mm/memcontrol.c:2278:2: note: expanded from macro 'get_obj_stock'
           struct memcg_stock_pcp *stock;          \
           ^
   4 warnings generated.


vim +/stock +3234 mm/memcontrol.c

  3227	
  3228	static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
  3229	{
  3230		struct obj_stock *stock;
  3231		unsigned long flags;
  3232		bool ret = false;
  3233	
> 3234		stock = get_obj_stock(flags);
  3235	
  3236		stock = current_obj_stock();
  3237		if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
  3238			stock->nr_bytes -= nr_bytes;
  3239			ret = true;
  3240		}
  3241	
  3242		put_obj_stock(flags);
  3243	
  3244		return ret;
  3245	}
  3246	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Waiman Long April 12, 2021, 2:07 p.m. UTC | #2
On 4/10/21 2:07 AM, kernel test robot wrote:
> Hi Waiman,
>
> I love your patch! Perhaps something to improve:
>
> [auto build test WARNING on dennis-percpu/for-next]
> [also build test WARNING on linus/master v5.12-rc6 next-20210409]
> [cannot apply to hnaz-linux-mm/master]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url:    https://github.com/0day-ci/linux/commits/Waiman-Long/mm-memcg-Reduce-kmemcache-memory-accounting-overhead/20210410-071958
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git for-next
> config: arm64-randconfig-r031-20210409 (attached as .config)
> compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project dd453a1389b6a7e6d9214b449d3c54981b1a89b6)
> reproduce (this is a W=1 build):
>          wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>          chmod +x ~/bin/make.cross
>          # install arm64 cross compiling tool for clang build
>          # apt-get install binutils-aarch64-linux-gnu
>          # https://github.com/0day-ci/linux/commit/1f4e22fce44599095a55535301ca83adc5d3a4fe
>          git remote add linux-review https://github.com/0day-ci/linux
>          git fetch --no-tags linux-review Waiman-Long/mm-memcg-Reduce-kmemcache-memory-accounting-overhead/20210410-071958
>          git checkout 1f4e22fce44599095a55535301ca83adc5d3a4fe
>          # save the attached .config to linux build tree
>          COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All warnings (new ones prefixed by >>):
>
>>> mm/memcontrol.c:3234:10: warning: variable 'stock' is uninitialized when used here [-Wuninitialized]
>             stock = get_obj_stock(flags);
>                     ^~~~~~~~~~~~~~~~~~~~
>     mm/memcontrol.c:2284:16: note: expanded from macro 'get_obj_stock'
>                     obj_stock = &stock->task_obj;   \
>                                  ^~~~~
>     mm/memcontrol.c:3234:10: note: variable 'stock' is declared here
>     mm/memcontrol.c:2278:2: note: expanded from macro 'get_obj_stock'
>             struct memcg_stock_pcp *stock;          \
>             ^
>     mm/memcontrol.c:3329:2: warning: variable 'stock' is uninitialized when used here [-Wuninitialized]
>             get_obj_stock(flags);
>             ^~~~~~~~~~~~~~~~~~~~
>     mm/memcontrol.c:2284:16: note: expanded from macro 'get_obj_stock'
>                     obj_stock = &stock->task_obj;   \
>                                  ^~~~~
>     mm/memcontrol.c:3329:2: note: variable 'stock' is declared here
>     mm/memcontrol.c:2278:2: note: expanded from macro 'get_obj_stock'
>             struct memcg_stock_pcp *stock;          \
>             ^
>     mm/memcontrol.c:3371:2: warning: variable 'stock' is uninitialized when used here [-Wuninitialized]
>             get_obj_stock(flags);
>             ^~~~~~~~~~~~~~~~~~~~
>     mm/memcontrol.c:2284:16: note: expanded from macro 'get_obj_stock'
>                     obj_stock = &stock->task_obj;   \
>                                  ^~~~~
>     mm/memcontrol.c:3371:2: note: variable 'stock' is declared here
>     mm/memcontrol.c:2278:2: note: expanded from macro 'get_obj_stock'
>             struct memcg_stock_pcp *stock;          \
>             ^
>     mm/memcontrol.c:3426:2: warning: variable 'stock' is uninitialized when used here [-Wuninitialized]
>             get_obj_stock(flags);
>             ^~~~~~~~~~~~~~~~~~~~
>     mm/memcontrol.c:2284:16: note: expanded from macro 'get_obj_stock'
>                     obj_stock = &stock->task_obj;   \
>                                  ^~~~~
>     mm/memcontrol.c:3426:2: note: variable 'stock' is declared here
>     mm/memcontrol.c:2278:2: note: expanded from macro 'get_obj_stock'
>             struct memcg_stock_pcp *stock;          \
>             ^
>     4 warnings generated.
>
>
> vim +/stock +3234 mm/memcontrol.c
>
>    3227	
>    3228	static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
>    3229	{
>    3230		struct obj_stock *stock;
>    3231		unsigned long flags;
>    3232		bool ret = false;
>    3233	
>> 3234		stock = get_obj_stock(flags);
>    3235	
>    3236		stock = current_obj_stock();
>    3237		if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
>    3238			stock->nr_bytes -= nr_bytes;
>    3239			ret = true;
>    3240		}
>    3241	
>    3242		put_obj_stock(flags);
>    3243	
>    3244		return ret;
>    3245	}
>    3246	
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

My bad, I somehow missed it. I will fix that in the version.

Thanks,
Longman
Roman Gushchin April 12, 2021, 6:55 p.m. UTC | #3
On Fri, Apr 09, 2021 at 07:18:42PM -0400, Waiman Long wrote:
> Most kmem_cache_alloc() calls are from user context. With instrumentation
> enabled, the measured amount of kmem_cache_alloc() calls from non-task
> context was about 0.01% of the total.
> 
> The irq disable/enable sequence used in this case to access content
> from object stock is slow.  To optimize for user context access, there
> are now two object stocks for task context and interrupt context access
> respectively.
> 
> The task context object stock can be accessed after disabling preemption
> which is cheap in non-preempt kernel. The interrupt context object stock
> can only be accessed after disabling interrupt. User context code can
> access interrupt object stock, but not vice versa.
> 
> The mod_objcg_state() function is also modified to make sure that memcg
> and lruvec stat updates are done with interrupted disabled.
> 
> The downside of this change is that there are more data stored in local
> object stocks and not reflected in the charge counter and the vmstat
> arrays.  However, this is a small price to pay for better performance.

I agree, the extra memory space is not a significant concern.
I'd be more worried about the code complexity, but the result looks
nice to me!

Acked-by: Roman Gushchin <guro@fb.com>

Btw, it seems that the mm tree ran a bit off, so I had to apply this series
on top of Linus's tree to review. Please, rebase.

Thanks!
Waiman Long April 12, 2021, 7:58 p.m. UTC | #4
On 4/12/21 2:55 PM, Roman Gushchin wrote:
> On Fri, Apr 09, 2021 at 07:18:42PM -0400, Waiman Long wrote:
>> Most kmem_cache_alloc() calls are from user context. With instrumentation
>> enabled, the measured amount of kmem_cache_alloc() calls from non-task
>> context was about 0.01% of the total.
>>
>> The irq disable/enable sequence used in this case to access content
>> from object stock is slow.  To optimize for user context access, there
>> are now two object stocks for task context and interrupt context access
>> respectively.
>>
>> The task context object stock can be accessed after disabling preemption
>> which is cheap in non-preempt kernel. The interrupt context object stock
>> can only be accessed after disabling interrupt. User context code can
>> access interrupt object stock, but not vice versa.
>>
>> The mod_objcg_state() function is also modified to make sure that memcg
>> and lruvec stat updates are done with interrupted disabled.
>>
>> The downside of this change is that there are more data stored in local
>> object stocks and not reflected in the charge counter and the vmstat
>> arrays.  However, this is a small price to pay for better performance.
> I agree, the extra memory space is not a significant concern.
> I'd be more worried about the code complexity, but the result looks
> nice to me!
>
> Acked-by: Roman Gushchin <guro@fb.com>
>
> Btw, it seems that the mm tree ran a bit off, so I had to apply this series
> on top of Linus's tree to review. Please, rebase.

This patchset is based on the code in Linus' tree. I had applied the 
patchset to linux-next to see if there was any conflicts. Two of the 
patches had minor fuzzes around the edge but no actual merge conflict 
for now.

Cheers,
Longman
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 69f728383efe..00c9074e42e5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2229,7 +2229,8 @@  struct obj_stock {
 struct memcg_stock_pcp {
 	struct mem_cgroup *cached; /* this never be root cgroup */
 	unsigned int nr_pages;
-	struct obj_stock obj;
+	struct obj_stock task_obj;
+	struct obj_stock irq_obj;
 
 	struct work_struct work;
 	unsigned long flags;
@@ -2254,11 +2255,46 @@  static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 }
 #endif
 
+/*
+ * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
+ * sequence used in this case to access content from object stock is slow.
+ * To optimize for user context access, there are now two object stocks for
+ * task context and interrupt context access respectively.
+ *
+ * The task context object stock can be accessed by disabling preemption only
+ * which is cheap in non-preempt kernel. The interrupt context object stock
+ * can only be accessed after disabling interrupt. User context code can
+ * access interrupt object stock, but not vice versa.
+ */
 static inline struct obj_stock *current_obj_stock(void)
 {
 	struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
 
-	return &stock->obj;
+	return in_task() ? &stock->task_obj : &stock->irq_obj;
+}
+
+#define get_obj_stock(flags)			\
+({						\
+	struct memcg_stock_pcp *stock;		\
+	struct obj_stock *obj_stock;		\
+						\
+	if (in_task()) {			\
+		preempt_disable();		\
+		(flags) = -1L;			\
+		obj_stock = &stock->task_obj;	\
+	} else {				\
+		local_irq_save(flags);		\
+		obj_stock = &stock->irq_obj;	\
+	}					\
+	obj_stock;				\
+})
+
+static inline void put_obj_stock(unsigned long flags)
+{
+	if (flags == -1L)
+		preempt_enable();
+	else
+		local_irq_restore(flags);
 }
 
 /**
@@ -2327,7 +2363,9 @@  static void drain_local_stock(struct work_struct *dummy)
 	local_irq_save(flags);
 
 	stock = this_cpu_ptr(&memcg_stock);
-	drain_obj_stock(&stock->obj);
+	drain_obj_stock(&stock->irq_obj);
+	if (in_task())
+		drain_obj_stock(&stock->task_obj);
 	drain_stock(stock);
 	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
 
@@ -3183,7 +3221,7 @@  static inline void mod_objcg_state(struct obj_cgroup *objcg,
 	memcg = obj_cgroup_memcg(objcg);
 	if (pgdat)
 		lruvec = mem_cgroup_lruvec(memcg, pgdat);
-	__mod_memcg_lruvec_state(memcg, lruvec, idx, nr);
+	mod_memcg_lruvec_state(memcg, lruvec, idx, nr);
 	rcu_read_unlock();
 }
 
@@ -3193,7 +3231,7 @@  static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 	unsigned long flags;
 	bool ret = false;
 
-	local_irq_save(flags);
+	stock = get_obj_stock(flags);
 
 	stock = current_obj_stock();
 	if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
@@ -3201,7 +3239,7 @@  static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 		ret = true;
 	}
 
-	local_irq_restore(flags);
+	put_obj_stock(flags);
 
 	return ret;
 }
@@ -3254,8 +3292,13 @@  static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
 {
 	struct mem_cgroup *memcg;
 
-	if (stock->obj.cached_objcg) {
-		memcg = obj_cgroup_memcg(stock->obj.cached_objcg);
+	if (in_task() && stock->task_obj.cached_objcg) {
+		memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg);
+		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
+			return true;
+	}
+	if (stock->irq_obj.cached_objcg) {
+		memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg);
 		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
 			return true;
 	}
@@ -3283,9 +3326,9 @@  static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 {
 	unsigned long flags;
 
-	local_irq_save(flags);
+	get_obj_stock(flags);
 	__refill_obj_stock(objcg, nr_bytes);
-	local_irq_restore(flags);
+	put_obj_stock(flags);
 }
 
 static void __mod_obj_stock_state(struct obj_cgroup *objcg,
@@ -3325,9 +3368,9 @@  void mod_obj_stock_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
 {
 	unsigned long flags;
 
-	local_irq_save(flags);
+	get_obj_stock(flags);
 	__mod_obj_stock_state(objcg, pgdat, idx, nr);
-	local_irq_restore(flags);
+	put_obj_stock(flags);
 }
 
 int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
@@ -3380,10 +3423,10 @@  void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
 {
 	unsigned long flags;
 
-	local_irq_save(flags);
+	get_obj_stock(flags);
 	__refill_obj_stock(objcg, size);
 	__mod_obj_stock_state(objcg, pgdat, idx, -(int)size);
-	local_irq_restore(flags);
+	put_obj_stock(flags);
 }
 
 #endif /* CONFIG_MEMCG_KMEM */