diff mbox series

[4/4] mm: memcg: apply proactive reclaim into cgroupv1

Message ID 20231108065818.19932-5-link@vivo.com (mailing list archive)
State New
Headers show
Series Introduce unbalance proactive reclaim | expand

Commit Message

Huan Yang Nov. 8, 2023, 6:58 a.m. UTC
For android use, apply proactive reclaim into cgroupv1

Signed-off-by: Huan Yang <link@vivo.com>
---
 .../admin-guide/cgroup-v1/memory.rst          |  38 +++-
 mm/memcontrol.c                               | 170 +++++++++---------
 2 files changed, 124 insertions(+), 84 deletions(-)

Comments

kernel test robot Nov. 8, 2023, 9:06 p.m. UTC | #1
Hi Huan,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on tj-cgroup/for-next linus/master v6.6 next-20231108]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Huan-Yang/mm-vmscan-LRU-unbalance-cgroup-reclaim/20231108-151757
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20231108065818.19932-5-link%40vivo.com
patch subject: [PATCH 4/4] mm: memcg: apply proactive reclaim into cgroupv1
reproduce: (https://download.01.org/0day-ci/archive/20231109/202311090446.NKFRnuGv-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311090446.NKFRnuGv-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> Documentation/admin-guide/cgroup-v1/memory.rst:93: WARNING: Malformed table.
>> Documentation/admin-guide/cgroup-v1/memory.rst:977: WARNING: Title underline too short.

vim +93 Documentation/admin-guide/cgroup-v1/memory.rst

    62	
    63	==================================== ==========================================
    64	 tasks				     attach a task(thread) and show list of
    65					     threads
    66	 cgroup.procs			     show list of processes
    67	 cgroup.event_control		     an interface for event_fd()
    68					     This knob is not available on CONFIG_PREEMPT_RT systems.
    69	 memory.usage_in_bytes		     show current usage for memory
    70					     (See 5.5 for details)
    71	 memory.memsw.usage_in_bytes	     show current usage for memory+Swap
    72					     (See 5.5 for details)
    73	 memory.limit_in_bytes		     set/show limit of memory usage
    74	 memory.memsw.limit_in_bytes	     set/show limit of memory+Swap usage
    75	 memory.failcnt			     show the number of memory usage hits limits
    76	 memory.memsw.failcnt		     show the number of memory+Swap hits limits
    77	 memory.max_usage_in_bytes	     show max memory usage recorded
    78	 memory.memsw.max_usage_in_bytes     show max memory+Swap usage recorded
    79	 memory.soft_limit_in_bytes	     set/show soft limit of memory usage
    80					     This knob is not available on CONFIG_PREEMPT_RT systems.
    81	 memory.stat			     show various statistics
    82	 memory.use_hierarchy		     set/show hierarchical account enabled
    83	                                     This knob is deprecated and shouldn't be
    84	                                     used.
    85	 memory.force_empty		     trigger forced page reclaim
    86	 memory.pressure_level		     set memory pressure notifications
    87	 memory.swappiness		     set/show swappiness parameter of vmscan
    88					     (See sysctl's vm.swappiness)
    89	 memory.move_charge_at_immigrate     set/show controls of moving charges
    90	                                     This knob is deprecated and shouldn't be
    91	                                     used.
    92	 memory.oom_control		     set/show oom controls.
  > 93	 memory.memory		     proactive reclaim.
    94	 memory.numa_stat		     show the number of memory usage per numa
    95					     node
    96	 memory.kmem.limit_in_bytes          Deprecated knob to set and read the kernel
    97	                                     memory hard limit. Kernel hard limit is not
    98	                                     supported since 5.16. Writing any value to
    99	                                     do file will not have any effect same as if
   100	                                     nokmem kernel parameter was specified.
   101	                                     Kernel memory is still charged and reported
   102	                                     by memory.kmem.usage_in_bytes.
   103	 memory.kmem.usage_in_bytes          show current kernel memory allocation
   104	 memory.kmem.failcnt                 show the number of kernel memory usage
   105					     hits limits
   106	 memory.kmem.max_usage_in_bytes      show max kernel memory usage recorded
   107	
   108	 memory.kmem.tcp.limit_in_bytes      set/show hard limit for tcp buf memory
   109	 memory.kmem.tcp.usage_in_bytes      show current tcp buf memory allocation
   110	 memory.kmem.tcp.failcnt             show the number of tcp buf memory usage
   111					     hits limits
   112	 memory.kmem.tcp.max_usage_in_bytes  show max tcp buf memory usage recorded
   113	==================================== ==========================================
   114
diff mbox series

Patch

diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index ca7d9402f6be..600bf26a470a 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -90,6 +90,7 @@  Brief summary of control files.
                                      This knob is deprecated and shouldn't be
                                      used.
  memory.oom_control		     set/show oom controls.
+ memory.memory		     proactive reclaim.
  memory.numa_stat		     show the number of memory usage per numa
 				     node
  memory.kmem.limit_in_bytes          Deprecated knob to set and read the kernel
@@ -972,7 +973,42 @@  Test:
    (Expect a bunch of notifications, and eventually, the oom-killer will
    trigger.)
 
-12. TODO
+12. Proactive Reclaim
+========
+memory.reclaim A write-only nested-keyed file which exists for all cgroups.
+
+This is a simple interface to trigger memory reclaim in the
+target cgroup.
+
+This file accepts a few key, the number of bytes to reclaim.
+Few nested keys are currently supported.
+
+Example::
+
+  echo "1G" > memory.reclaim
+
+The interface extended with nested keys to configure the
+reclaim behavior. For example, specify the swappiness of
+memory to reclaim from (anon, file, ..).
+
+Example::
+
+  echo "1G" 200 > memory.reclaim (only reclaim anon)
+  echo "1G" 0  > memory.reclaim (only reclaim file)
+  echo "1G" 1  > memory.reclaim (only reclaim file)
+
+Please note that the kernel can over or under reclaim from
+the target cgroup. If less bytes are reclaimed than the
+specified amount, -EAGAIN is returned.
+
+Please note that the proactive reclaim (triggered by this
+interface) is not meant to indicate memory pressure on the
+memory cgroup. Therefore socket memory balancing triggered by
+the memory reclaim normally is not exercised in this case.
+This means that the networking layer will not adapt based on
+reclaim induced by memory.reclaim.
+
+13. TODO
 ========
 
 1. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a0e460abd41c..03de3387d714 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5209,6 +5209,89 @@  static int mem_cgroup_slab_show(struct seq_file *m, void *p)
 
 static int memory_stat_show(struct seq_file *m, void *v);
 
+static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
+			      size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
+	unsigned long nr_to_reclaim, nr_reclaimed = 0;
+	unsigned int reclaim_options;
+	int swappiness = -1, org_swappiness, n;
+	char *tmpbuf;
+	int err;
+
+	tmpbuf = kvzalloc(nbytes, GFP_KERNEL);
+	if (unlikely(!tmpbuf))
+		return -ENOMEM;
+
+	buf = skip_spaces(buf);
+	n = sscanf(buf, "%s %d", tmpbuf, &swappiness);
+	if (n < 1) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	if (n == 2 && (swappiness > 200 || swappiness < 0)) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	err = page_counter_memparse(tmpbuf, "", &nr_to_reclaim);
+	if (err)
+		goto out_free;
+
+	reclaim_options	= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
+	if (swappiness != -1) {
+		org_swappiness = memcg->swappiness;
+		memcg->swappiness = swappiness;
+		if (swappiness == 200)
+			reclaim_options |= MEMCG_RECLAIM_ANON;
+		else if (swappiness == 0 || swappiness == 1)
+			reclaim_options |= MEMCG_RECLAIM_FILE;
+	}
+
+	while (nr_reclaimed < nr_to_reclaim) {
+		unsigned long reclaimed;
+
+		if (signal_pending(current)) {
+			err = -EINTR;
+			goto out;
+		}
+
+		/*
+		 * This is the final attempt, drain percpu lru caches in the
+		 * hope of introducing more evictable pages for
+		 * try_to_free_mem_cgroup_pages().
+		 */
+		if (!nr_retries)
+			lru_add_drain_all();
+
+		reclaimed = try_to_free_mem_cgroup_pages(memcg,
+					min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
+					GFP_KERNEL, reclaim_options);
+
+		if (!reclaimed && !nr_retries--) {
+			err = -EAGAIN;
+			goto out;
+		}
+
+		nr_reclaimed += reclaimed;
+	}
+
+	if (swappiness != -1)
+		memcg->swappiness = org_swappiness;
+
+	return nbytes;
+
+out:
+	if (swappiness != -1)
+		memcg->swappiness = org_swappiness;
+
+out_free:
+	kvfree(tmpbuf);
+	return err;
+}
+
 static struct cftype mem_cgroup_legacy_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -5272,6 +5355,10 @@  static struct cftype mem_cgroup_legacy_files[] = {
 		.seq_show = mem_cgroup_oom_control_read,
 		.write_u64 = mem_cgroup_oom_control_write,
 	},
+	{
+		.name = "reclaim",
+		.write = memory_reclaim,
+	},
 	{
 		.name = "pressure_level",
 		.seq_show = mem_cgroup_dummy_seq_show,
@@ -6946,89 +7033,6 @@  static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
 	return nbytes;
 }
 
-static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
-			      size_t nbytes, loff_t off)
-{
-	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
-	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
-	unsigned long nr_to_reclaim, nr_reclaimed = 0;
-	unsigned int reclaim_options;
-	int swappiness = -1, org_swappiness, n;
-	char *tmpbuf;
-	int err;
-
-	tmpbuf = kvzalloc(nbytes, GFP_KERNEL);
-	if (unlikely(!tmpbuf))
-		return -ENOMEM;
-
-	buf = skip_spaces(buf);
-	n = sscanf(buf, "%s %d", tmpbuf, &swappiness);
-	if (n < 1) {
-		err = -EINVAL;
-		goto out_free;
-	}
-
-	if (n == 2 && (swappiness > 200 || swappiness < 0)) {
-		err = -EINVAL;
-		goto out_free;
-	}
-
-	err = page_counter_memparse(tmpbuf, "", &nr_to_reclaim);
-	if (err)
-		goto out_free;
-
-	reclaim_options	= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
-	if (swappiness != -1) {
-		org_swappiness = memcg->swappiness;
-		memcg->swappiness = swappiness;
-		if (swappiness == 200)
-			reclaim_options |= MEMCG_RECLAIM_ANON;
-		else if (swappiness == 0 || swappiness == 1)
-			reclaim_options |= MEMCG_RECLAIM_FILE;
-	}
-
-	while (nr_reclaimed < nr_to_reclaim) {
-		unsigned long reclaimed;
-
-		if (signal_pending(current)) {
-			err = -EINTR;
-			goto out;
-		}
-
-		/*
-		 * This is the final attempt, drain percpu lru caches in the
-		 * hope of introducing more evictable pages for
-		 * try_to_free_mem_cgroup_pages().
-		 */
-		if (!nr_retries)
-			lru_add_drain_all();
-
-		reclaimed = try_to_free_mem_cgroup_pages(memcg,
-					min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
-					GFP_KERNEL, reclaim_options);
-
-		if (!reclaimed && !nr_retries--) {
-			err = -EAGAIN;
-			goto out;
-		}
-
-		nr_reclaimed += reclaimed;
-	}
-
-	if (swappiness != -1)
-		memcg->swappiness = org_swappiness;
-
-	return nbytes;
-
-out:
-	if (swappiness != -1)
-		memcg->swappiness = org_swappiness;
-
-out_free:
-	kvfree(tmpbuf);
-	return err;
-}
-
 static struct cftype memory_files[] = {
 	{
 		.name = "current",