@@ -354,7 +354,8 @@ extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- bool may_swap);
+ bool may_swap,
+ bool force_reclaim);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
@@ -2237,7 +2237,8 @@ static void reclaim_high(struct mem_cgroup *memcg,
if (page_counter_read(&memcg->memory) <= memcg->high)
continue;
memcg_memory_event(memcg, MEMCG_HIGH);
- try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+ try_to_free_mem_cgroup_pages(memcg, nr_pages,
+ gfp_mask, true, false);
} while ((memcg = parent_mem_cgroup(memcg)));
}
@@ -2330,7 +2331,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
memcg_memory_event(mem_over_limit, MEMCG_MAX);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
- gfp_mask, may_swap);
+ gfp_mask, may_swap, false);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
goto retry;
@@ -2860,7 +2861,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
}
if (!try_to_free_mem_cgroup_pages(memcg, 1,
- GFP_KERNEL, !memsw)) {
+ GFP_KERNEL, !memsw, true)) {
ret = -EBUSY;
break;
}
@@ -2993,7 +2994,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
return -EINTR;
progress = try_to_free_mem_cgroup_pages(memcg, 1,
- GFP_KERNEL, true);
+ GFP_KERNEL, true, true);
if (!progress) {
nr_retries--;
/* maybe some writeback is necessary */
@@ -5549,7 +5550,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
nr_pages = page_counter_read(&memcg->memory);
if (nr_pages > high)
try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
- GFP_KERNEL, true);
+ GFP_KERNEL, true, true);
memcg_wb_domain_size_changed(memcg);
return nbytes;
@@ -5596,7 +5597,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
if (nr_reclaims) {
if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
- GFP_KERNEL, true))
+ GFP_KERNEL, true, true))
nr_reclaims--;
continue;
}
@@ -3212,7 +3212,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- bool may_swap)
+ bool may_swap,
+ bool force_reclaim)
{
struct zonelist *zonelist;
unsigned long nr_reclaimed;
@@ -3243,13 +3244,15 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
- psi_memstall_enter(&pflags);
+ if (!force_reclaim)
+ psi_memstall_enter(&pflags);
noreclaim_flag = memalloc_noreclaim_save();
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
memalloc_noreclaim_restore(noreclaim_flag);
- psi_memstall_leave(&pflags);
+ if (!force_reclaim)
+ psi_memstall_leave(&pflags);
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
There're several cases like resize and force_empty that don't need to account to psi, otherwise is misleading. We also have a module reclaiming dying memcgs at background to avoid too many dead memcgs which can cause lots of trouble, then it makes the psi inaccuracy even worse without this patch. Signed-off-by: Xunlei Pang <xlpang@linux.alibaba.com> --- include/linux/swap.h | 3 ++- mm/memcontrol.c | 13 +++++++------ mm/vmscan.c | 9 ++++++--- 3 files changed, 15 insertions(+), 10 deletions(-)