@@ -409,6 +409,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
+#define MEMCG_RECLAIM_DIS_UNMAP_FILE (1 << 3)
#define MIN_SWAPPINESS 0
#define MAX_SWAPPINESS 200
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
@@ -4282,11 +4282,13 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
enum {
MEMORY_RECLAIM_SWAPPINESS = 0,
+ MEMORY_RECLAIM_DISABLE_UNMAP_FILE,
MEMORY_RECLAIM_NULL,
};
static const match_table_t tokens = {
{ MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
+ { MEMORY_RECLAIM_DISABLE_UNMAP_FILE, "disable_unmap_file"},
{ MEMORY_RECLAIM_NULL, NULL },
};
@@ -4297,7 +4299,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
unsigned long nr_to_reclaim, nr_reclaimed = 0;
int swappiness = -1;
- unsigned int reclaim_options;
+ unsigned int reclaim_options = 0;
char *old_buf, *start;
substring_t args[MAX_OPT_ARGS];
@@ -4320,12 +4322,15 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
if (swappiness < MIN_SWAPPINESS || swappiness > MAX_SWAPPINESS)
return -EINVAL;
break;
+ case MEMORY_RECLAIM_DISABLE_UNMAP_FILE:
+ reclaim_options = MEMCG_RECLAIM_DIS_UNMAP_FILE;
+ break;
default:
return -EINVAL;
}
}
- reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
+ reclaim_options |= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
while (nr_reclaimed < nr_to_reclaim) {
/* Will converge on zero, but reclaim enforces a minimum */
unsigned long batch_size = (nr_to_reclaim - nr_reclaimed) / 4;
@@ -6609,6 +6609,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
};
+
+ if (reclaim_options & MEMCG_RECLAIM_DIS_UNMAP_FILE)
+ sc.may_unmap &= ~UNMAP_FILE;
+
/*
* Traverse the ZONELIST_FALLBACK zonelist of the current node to put
* equal pressure on all the nodes. This is based on the assumption that
Allow proactively memory reclaimers to submit an additional disbale_unmap_file argument to memory.reclaim. This will skip the mapped file for that reclaim attempt. For example: echo "2M disable_unmap_file" > /sys/fs/cgroup/test/memory.reclaim will perform reclaim on the test cgroup with no mapped file page. The memory.reclaim is a useful interface. We can carry out proactive memory reclaim in the user space, which can increase the utilization rate of memory. In the actual usage scenarios, we found that when there are sufficient anonymous pages, mapped file pages with a relatively small proportion would still be reclaimed. This is likely to cause an increase in refaults and an increase in task delay, because mapped file pages usually include important executable codes, data, and shared libraries, etc. According to the verified situation, if we can skip this part of the memory, the business delay will be reduced. Even if there are sufficient anonymous pages and a small number of page cache and mapped file pages, mapped file pages will still be reclaimed. Here is an example of anonymous pages being sufficient but mapped file pages still being reclaimed: cat memory.stat | grep -wE 'anon|file|file_mapped' anon 3406462976 file 332967936 file_mapped 300302336 echo 1g > memory.reclaim swappiness=200 > memory.reclaim cat memory.stat | grep -wE 'anon|file|file_mapped' anon 2613276672 file 52523008 file_mapped 30982144 echo 1g > memory.reclaim swappiness=200 > memory.reclaim cat memory.stat | grep -wE 'anon|file|file_mapped' anon 1552130048 file 39759872 file_mapped 20299776 With this patch, the file_mapped pages will be skiped. echo 1g > memory.reclaim swappiness=200 disable_unmap_file > memory.reclaim cat memory.stat | grep -wE 'anon|file|file_mapped' anon 480059392 file 37978112 file_mapped 20299776 IMO,it is difficult to balance the priorities of various pages in the kernel, there are too many scenarios to consider. However, for the scenario of proactive memory reclaim in user space, we can make a simple judgment in this case. Signed-off-by: Zhongkun He <hezhongkun.hzk@bytedance.com> --- include/linux/swap.h | 1 + mm/memcontrol.c | 9 +++++++-- mm/vmscan.c | 4 ++++ 3 files changed, 12 insertions(+), 2 deletions(-)