diff mbox series

[RFC,3/4] use memory.low local node protection for local node reclaim

Message ID 20240920221202.1734227-4-kaiyang2@cs.cmu.edu (mailing list archive)
State New
Headers show
Series memory tiering fairness by per-cgroup control of promotion and demotion | expand

Commit Message

Kaiyang Zhao Sept. 20, 2024, 10:11 p.m. UTC
From: Kaiyang Zhao <kaiyang2@cs.cmu.edu>

When reclaim targets the top-tier node usage by the root memcg,
apply local memory.low protection instead of global protection.

Signed-off-by: Kaiyang Zhao <kaiyang2@cs.cmu.edu>
---
 include/linux/memcontrol.h | 23 ++++++++++++++---------
 mm/memcontrol.c            |  4 ++--
 mm/vmscan.c                | 19 ++++++++++++++-----
 3 files changed, 30 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 94aba4498fca..256912b91922 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -586,9 +586,9 @@  static inline bool mem_cgroup_disabled(void)
 static inline void mem_cgroup_protection(struct mem_cgroup *root,
 					 struct mem_cgroup *memcg,
 					 unsigned long *min,
-					 unsigned long *low)
+					 unsigned long *low, unsigned long *locallow)
 {
-	*min = *low = 0;
+	*min = *low = *locallow = 0;
 
 	if (mem_cgroup_disabled())
 		return;
@@ -631,10 +631,11 @@  static inline void mem_cgroup_protection(struct mem_cgroup *root,
 
 	*min = READ_ONCE(memcg->memory.emin);
 	*low = READ_ONCE(memcg->memory.elow);
+	*locallow = READ_ONCE(memcg->memory.elocallow);
 }
 
 void mem_cgroup_calculate_protection(struct mem_cgroup *root,
-				     struct mem_cgroup *memcg);
+				     struct mem_cgroup *memcg, int is_local);
 
 static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
 					  struct mem_cgroup *memcg)
@@ -651,13 +652,17 @@  static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
 unsigned long get_cgroup_local_usage(struct mem_cgroup *memcg, bool flush);
 
 static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
-					struct mem_cgroup *memcg)
+					struct mem_cgroup *memcg, int is_local)
 {
 	if (mem_cgroup_unprotected(target, memcg))
 		return false;
 
-	return READ_ONCE(memcg->memory.elow) >=
-		page_counter_read(&memcg->memory);
+	if (is_local)
+		return READ_ONCE(memcg->memory.elocallow) >=
+			get_cgroup_local_usage(memcg, true);
+	else
+		return READ_ONCE(memcg->memory.elow) >=
+			page_counter_read(&memcg->memory);
 }
 
 static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
@@ -1159,13 +1164,13 @@  static inline void memcg_memory_event_mm(struct mm_struct *mm,
 static inline void mem_cgroup_protection(struct mem_cgroup *root,
 					 struct mem_cgroup *memcg,
 					 unsigned long *min,
-					 unsigned long *low)
+					 unsigned long *low, unsigned long *locallow)
 {
 	*min = *low = 0;
 }
 
 static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
-						   struct mem_cgroup *memcg)
+						   struct mem_cgroup *memcg, int is_local)
 {
 }
 
@@ -1175,7 +1180,7 @@  static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
 	return true;
 }
 static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
-					struct mem_cgroup *memcg)
+					struct mem_cgroup *memcg, int is_local)
 {
 	return false;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d7c5fff12105..61718ba998fe 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4495,7 +4495,7 @@  struct cgroup_subsys memory_cgrp_subsys = {
  *          of a top-down tree iteration, not for isolated queries.
  */
 void mem_cgroup_calculate_protection(struct mem_cgroup *root,
-				     struct mem_cgroup *memcg)
+				     struct mem_cgroup *memcg, int is_local)
 {
 	bool recursive_protection =
 		cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT;
@@ -4507,7 +4507,7 @@  void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 		root = root_mem_cgroup;
 
 	page_counter_calculate_protection(&root->memory, &memcg->memory,
-					recursive_protection, false);
+					recursive_protection, is_local);
 }
 
 static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ce471d686a88..a2681d52fc5f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2377,6 +2377,7 @@  static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 	enum scan_balance scan_balance;
 	unsigned long ap, fp;
 	enum lru_list lru;
+	int is_local = (pgdat->node_id == 0) && root_reclaim(sc);
 
 	/* If we have no swap space, do not bother scanning anon folios. */
 	if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) {
@@ -2457,12 +2458,14 @@  static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 	for_each_evictable_lru(lru) {
 		bool file = is_file_lru(lru);
 		unsigned long lruvec_size;
-		unsigned long low, min;
+		unsigned long low, min, locallow;
 		unsigned long scan;
 
 		lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
 		mem_cgroup_protection(sc->target_mem_cgroup, memcg,
-				      &min, &low);
+				      &min, &low, &locallow);
+		if (is_local)
+			low = locallow;
 
 		if (min || low) {
 			/*
@@ -2494,7 +2497,12 @@  static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 			 * again by how much of the total memory used is under
 			 * hard protection.
 			 */
-			unsigned long cgroup_size = mem_cgroup_size(memcg);
+			unsigned long cgroup_size;
+
+			if (is_local)
+				cgroup_size = get_cgroup_local_usage(memcg, true);
+			else
+				cgroup_size = mem_cgroup_size(memcg);
 			unsigned long protection;
 
 			/* memory.low scaling, make sure we retry before OOM */
@@ -5869,6 +5877,7 @@  static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 	};
 	struct mem_cgroup_reclaim_cookie *partial = &reclaim;
 	struct mem_cgroup *memcg;
+	int is_local = (pgdat->node_id == 0) && root_reclaim(sc);
 
 	/*
 	 * In most cases, direct reclaimers can do partial walks
@@ -5896,7 +5905,7 @@  static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 		 */
 		cond_resched();
 
-		mem_cgroup_calculate_protection(target_memcg, memcg);
+		mem_cgroup_calculate_protection(target_memcg, memcg, is_local);
 
 		if (mem_cgroup_below_min(target_memcg, memcg)) {
 			/*
@@ -5904,7 +5913,7 @@  static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 			 * If there is no reclaimable memory, OOM.
 			 */
 			continue;
-		} else if (mem_cgroup_below_low(target_memcg, memcg)) {
+		} else if (mem_cgroup_below_low(target_memcg, memcg, is_local)) {
 			/*
 			 * Soft protection.
 			 * Respect the protection only as long as