[v3] mm: Add nodes= arg to memory.reclaim

Message ID	20221202223533.1785418-1-almasrymina@google.com (mailing list archive)
State	New
Headers	show Return-Path: <owner-linux-mm@kvack.org> Date: Fri, 2 Dec 2022 14:35:31 -0800 Mime-Version: 1.0 Message-ID: <20221202223533.1785418-1-almasrymina@google.com> Subject: [PATCH v3] mm: Add nodes= arg to memory.reclaim From: Mina Almasry <almasrymina@google.com> To: Tejun Heo <tj@kernel.org>, Zefan Li <lizefan.x@bytedance.com>, Johannes Weiner <hannes@cmpxchg.org>, Jonathan Corbet <corbet@lwn.net>, Michal Hocko <mhocko@kernel.org>, Roman Gushchin <roman.gushchin@linux.dev>, Shakeel Butt <shakeelb@google.com>, Muchun Song <songmuchun@bytedance.com>, Andrew Morton <akpm@linux-foundation.org> Cc: Huang Ying <ying.huang@intel.com>, Yang Shi <yang.shi@linux.alibaba.com>, Yosry Ahmed <yosryahmed@google.com>, weixugc@google.com, fvdl@google.com, Mina Almasry <almasrymina@google.com>, Michal Hocko <mhocko@suse.com>, bagasdotme@gmail.com, cgroups@vger.kernel.org, linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org Content-Type: text/plain; charset="UTF-8" default: False [-1.10 / 9.00]; BAYES_HAM(-6.00)[100.00%]; SORBS_IRL_BL(3.00)[209.85.128.202:from]; MID_CONTAINS_FROM(1.00)[]; MV_CASE(0.50)[]; FORGED_SENDER(0.30)[almasrymina@google.com,3GX6KYwsKCGIALMASRYMINAGOOGLE.COMLINUX-MMKVACK.ORG@flex--almasrymina.bounces.google.com]; RCVD_NO_TLS_LAST(0.10)[]; MIME_GOOD(-0.10)[text/plain]; BAD_REP_POLICIES(0.10)[]; PREVIOUSLY_DELIVERED(0.00)[linux-mm@kvack.org]; FROM_HAS_DN(0.00)[]; TO_DN_SOME(0.00)[]; R_DKIM_ALLOW(0.00)[google.com:s=20210112]; RCVD_COUNT_TWO(0.00)[2]; MIME_TRACE(0.00)[0:+]; R_SPF_ALLOW(0.00)[+ip4:209.85.128.0/17]; RCPT_COUNT_TWELVE(0.00)[21]; DKIM_TRACE(0.00)[google.com:+]; ARC_SIGNED(0.00)[hostedemail.com:s=arc-20220608:i=1]; DMARC_POLICY_ALLOW(0.00)[google.com,reject]; FROM_NEQ_ENVFROM(0.00)[almasrymina@google.com,3GX6KYwsKCGIALMASRYMINAGOOGLE.COMLINUX-MMKVACK.ORG@flex--almasrymina.bounces.google.com]; TO_MATCH_ENVRCPT_SOME(0.00)[]; ARC_NA(0.00)[] Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	[v3] mm: Add nodes= arg to memory.reclaim \| expand [v3] mm: Add nodes= arg to memory.reclaim

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 74cec76be9f2..c8ae7c897f14 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1245,17 +1245,13 @@ PAGE_SIZE multiple when read back. This is a simple interface to trigger memory reclaim in the target cgroup. - This file accepts a single key, the number of bytes to reclaim. - No nested keys are currently supported. + This file accepts a string which contains the number of bytes to + reclaim. Example:: echo "1G" > memory.reclaim - The interface can be later extended with nested keys to - configure the reclaim behavior. For example, specify the - type of memory to reclaim from (anon, file, ..). - Please note that the kernel can over or under reclaim from the target cgroup. If less bytes are reclaimed than the specified amount, -EAGAIN is returned. @@ -1267,6 +1263,13 @@ PAGE_SIZE multiple when read back. This means that the networking layer will not adapt based on reclaim induced by memory.reclaim. + This file also allows the user to specify the nodes to reclaim from, + via the 'nodes=' key, for example:: + + echo "1G nodes=0,1" > memory.reclaim + + The above instructs the kernel to reclaim memory from nodes 0,1. + memory.peak A read-only single value file which exists on non-root cgroups. diff --git a/include/linux/swap.h b/include/linux/swap.h index 0ceed49516ad..2787b84eaf12 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -418,7 +418,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - unsigned int reclaim_options); + unsigned int reclaim_options, + nodemask_t *nodemask); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 23750cec0036..0f02f47a87e4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -63,6 +63,7 @@ #include <linux/resume_user_mode.h> #include <linux/psi.h> #include <linux/seq_buf.h> +#include <linux/parser.h> #include "internal.h" #include <net/sock.h> #include <net/ip.h> @@ -2392,7 +2393,8 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg, psi_memstall_enter(&pflags); nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, - MEMCG_RECLAIM_MAY_SWAP); + MEMCG_RECLAIM_MAY_SWAP, + NULL); psi_memstall_leave(&pflags); } while ((memcg = parent_mem_cgroup(memcg)) && !mem_cgroup_is_root(memcg)); @@ -2683,7 +2685,8 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, psi_memstall_enter(&pflags); nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, - gfp_mask, reclaim_options); + gfp_mask, reclaim_options, + NULL); psi_memstall_leave(&pflags); if (mem_cgroup_margin(mem_over_limit) >= nr_pages) @@ -3503,7 +3506,8 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg, } if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, - memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) { + memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP, + NULL)) { ret = -EBUSY; break; } @@ -3614,7 +3618,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) return -EINTR; if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, - MEMCG_RECLAIM_MAY_SWAP)) + MEMCG_RECLAIM_MAY_SWAP, + NULL)) nr_retries--; } @@ -6407,7 +6412,8 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, } reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high, - GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP); + GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, + NULL); if (!reclaimed && !nr_retries--) break; @@ -6456,7 +6462,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, if (nr_reclaims) { if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max, - GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP)) + GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, + NULL)) nr_reclaims--; continue; } @@ -6579,21 +6586,54 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of, return nbytes; } +enum { + MEMORY_RECLAIM_NODES = 0, + MEMORY_RECLAIM_NULL, +}; + +static const match_table_t if_tokens = { + { MEMORY_RECLAIM_NODES, "nodes=%s" }, + { MEMORY_RECLAIM_NULL, NULL }, +}; + static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); unsigned int nr_retries = MAX_RECLAIM_RETRIES; unsigned long nr_to_reclaim, nr_reclaimed = 0; - unsigned int reclaim_options; - int err; + unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP | + MEMCG_RECLAIM_PROACTIVE; + char *old_buf, *start; + substring_t args[MAX_OPT_ARGS]; + int token; + char value[256]; + nodemask_t nodemask = NODE_MASK_ALL; buf = strstrip(buf); - err = page_counter_memparse(buf, "", &nr_to_reclaim); - if (err) - return err; - reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE; + old_buf = buf; + nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE; + if (buf == old_buf) + return -EINVAL; + + buf = strstrip(buf); + + while ((start = strsep(&buf, " ")) != NULL) { + if (!strlen(start)) + continue; + token = match_token(start, if_tokens, args); + match_strlcpy(value, args, sizeof(value)); + switch (token) { + case MEMORY_RECLAIM_NODES: + if (nodelist_parse(value, nodemask) < 0) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + while (nr_reclaimed < nr_to_reclaim) { unsigned long reclaimed; @@ -6610,7 +6650,8 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_to_reclaim - nr_reclaimed, - GFP_KERNEL, reclaim_options); + GFP_KERNEL, reclaim_options, + &nodemask); if (!reclaimed && !nr_retries--) return -EAGAIN; diff --git a/mm/vmscan.c b/mm/vmscan.c index 7b8e8e43806b..62b0c9b46bd2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6735,7 +6735,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - unsigned int reclaim_options) + unsigned int reclaim_options, + nodemask_t *nodemask) { unsigned long nr_reclaimed; unsigned int noreclaim_flag; @@ -6750,6 +6751,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_unmap = 1, .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP), .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE), + .nodemask = nodemask, }; /* * Traverse the ZONELIST_FALLBACK zonelist of the current node to put

[v3] mm: Add nodes= arg to memory.reclaim

Commit Message

Comments

Patch