diff mbox series

[RFC,3/4] use memory.low local node protection for local node reclaim

Message ID 20240920221202.1734227-4-kaiyang2@cs.cmu.edu (mailing list archive)
State New
Headers show
Series memory tiering fairness by per-cgroup control of promotion and demotion | expand

Commit Message

Kaiyang Zhao Sept. 20, 2024, 10:11 p.m. UTC
From: Kaiyang Zhao <kaiyang2@cs.cmu.edu>

When reclaim targets the top-tier node usage by the root memcg,
apply local memory.low protection instead of global protection.

Signed-off-by: Kaiyang Zhao <kaiyang2@cs.cmu.edu>
---
 include/linux/memcontrol.h | 23 ++++++++++++++---------
 mm/memcontrol.c            |  4 ++--
 mm/vmscan.c                | 19 ++++++++++++++-----
 3 files changed, 30 insertions(+), 16 deletions(-)

Comments

Gregory Price Oct. 15, 2024, 9:52 p.m. UTC | #1
On Fri, Sep 20, 2024 at 10:11:50PM +0000, kaiyang2@cs.cmu.edu wrote:
> From: Kaiyang Zhao <kaiyang2@cs.cmu.edu>
> 
> When reclaim targets the top-tier node usage by the root memcg,
> apply local memory.low protection instead of global protection.
> 

Changelog probably needs a little more context about the intended
affect of this change.  What exactly is the implication of this
change compared to applying it against elow?

> Signed-off-by: Kaiyang Zhao <kaiyang2@cs.cmu.edu>
> ---
>  include/linux/memcontrol.h | 23 ++++++++++++++---------
>  mm/memcontrol.c            |  4 ++--
>  mm/vmscan.c                | 19 ++++++++++++++-----
>  3 files changed, 30 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 94aba4498fca..256912b91922 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -586,9 +586,9 @@ static inline bool mem_cgroup_disabled(void)
>  static inline void mem_cgroup_protection(struct mem_cgroup *root,
>  					 struct mem_cgroup *memcg,
>  					 unsigned long *min,
> -					 unsigned long *low)
> +					 unsigned long *low, unsigned long *locallow)
>  {
> -	*min = *low = 0;
> +	*min = *low = *locallow = 0;
>  

"locallow" can be read as "loc allow" or "local low", probably you
want to change all the references to local_low.

Sorry for not saying this on earlier feedback.


>  	if (mem_cgroup_disabled())
>  		return;
> @@ -631,10 +631,11 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root,
>  
>  	*min = READ_ONCE(memcg->memory.emin);
>  	*low = READ_ONCE(memcg->memory.elow);
> +	*locallow = READ_ONCE(memcg->memory.elocallow);
>  }
>  
>  void mem_cgroup_calculate_protection(struct mem_cgroup *root,
> -				     struct mem_cgroup *memcg);
> +				     struct mem_cgroup *memcg, int is_local);
>  
>  static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
>  					  struct mem_cgroup *memcg)
> @@ -651,13 +652,17 @@ static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
>  unsigned long get_cgroup_local_usage(struct mem_cgroup *memcg, bool flush);
>  
>  static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
> -					struct mem_cgroup *memcg)
> +					struct mem_cgroup *memcg, int is_local)
>  {
>  	if (mem_cgroup_unprotected(target, memcg))
>  		return false;
>  
> -	return READ_ONCE(memcg->memory.elow) >=
> -		page_counter_read(&memcg->memory);
> +	if (is_local)
> +		return READ_ONCE(memcg->memory.elocallow) >=
> +			get_cgroup_local_usage(memcg, true);
> +	else
> +		return READ_ONCE(memcg->memory.elow) >=
> +			page_counter_read(&memcg->memory);

Don't need else case here is if block returns.

>  }
>  
>  static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
> @@ -1159,13 +1164,13 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
>  static inline void mem_cgroup_protection(struct mem_cgroup *root,
>  					 struct mem_cgroup *memcg,
>  					 unsigned long *min,
> -					 unsigned long *low)
> +					 unsigned long *low, unsigned long *locallow)
>  {
>  	*min = *low = 0;
>  }
>  
>  static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
> -						   struct mem_cgroup *memcg)
> +						   struct mem_cgroup *memcg, int is_local)
>  {
>  }
>  
> @@ -1175,7 +1180,7 @@ static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
>  	return true;
>  }
>  static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
> -					struct mem_cgroup *memcg)
> +					struct mem_cgroup *memcg, int is_local)
>  {
>  	return false;
>  }
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d7c5fff12105..61718ba998fe 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4495,7 +4495,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
>   *          of a top-down tree iteration, not for isolated queries.
>   */
>  void mem_cgroup_calculate_protection(struct mem_cgroup *root,
> -				     struct mem_cgroup *memcg)
> +				     struct mem_cgroup *memcg, int is_local)
>  {
>  	bool recursive_protection =
>  		cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT;
> @@ -4507,7 +4507,7 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root,
>  		root = root_mem_cgroup;
>  
>  	page_counter_calculate_protection(&root->memory, &memcg->memory,
> -					recursive_protection, false);
> +					recursive_protection, is_local);
>  }
>  
>  static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index ce471d686a88..a2681d52fc5f 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2377,6 +2377,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
>  	enum scan_balance scan_balance;
>  	unsigned long ap, fp;
>  	enum lru_list lru;
> +	int is_local = (pgdat->node_id == 0) && root_reclaim(sc);

int should be bool to be more explicit as to what the valid values are.

Should be addressed across the patch set.

>  
>  	/* If we have no swap space, do not bother scanning anon folios. */
>  	if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) {
> @@ -2457,12 +2458,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
>  	for_each_evictable_lru(lru) {
>  		bool file = is_file_lru(lru);
>  		unsigned long lruvec_size;
> -		unsigned long low, min;
> +		unsigned long low, min, locallow;
>  		unsigned long scan;
>  
>  		lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
>  		mem_cgroup_protection(sc->target_mem_cgroup, memcg,
> -				      &min, &low);
> +				      &min, &low, &locallow);
> +		if (is_local)
> +			low = locallow;
>  
>  		if (min || low) {
>  			/*
> @@ -2494,7 +2497,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
>  			 * again by how much of the total memory used is under
>  			 * hard protection.
>  			 */
> -			unsigned long cgroup_size = mem_cgroup_size(memcg);
> +			unsigned long cgroup_size;
> +
> +			if (is_local)
> +				cgroup_size = get_cgroup_local_usage(memcg, true);
> +			else
> +				cgroup_size = mem_cgroup_size(memcg);
>  			unsigned long protection;
>  
>  			/* memory.low scaling, make sure we retry before OOM */
> @@ -5869,6 +5877,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
>  	};
>  	struct mem_cgroup_reclaim_cookie *partial = &reclaim;
>  	struct mem_cgroup *memcg;
> +	int is_local = (pgdat->node_id == 0) && root_reclaim(sc);
>  
>  	/*
>  	 * In most cases, direct reclaimers can do partial walks
> @@ -5896,7 +5905,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
>  		 */
>  		cond_resched();
>  
> -		mem_cgroup_calculate_protection(target_memcg, memcg);
> +		mem_cgroup_calculate_protection(target_memcg, memcg, is_local);
>  
>  		if (mem_cgroup_below_min(target_memcg, memcg)) {
>  			/*
> @@ -5904,7 +5913,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
>  			 * If there is no reclaimable memory, OOM.
>  			 */
>  			continue;
> -		} else if (mem_cgroup_below_low(target_memcg, memcg)) {
> +		} else if (mem_cgroup_below_low(target_memcg, memcg, is_local)) {
>  			/*
>  			 * Soft protection.
>  			 * Respect the protection only as long as
> -- 
> 2.43.0
>
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 94aba4498fca..256912b91922 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -586,9 +586,9 @@  static inline bool mem_cgroup_disabled(void)
 static inline void mem_cgroup_protection(struct mem_cgroup *root,
 					 struct mem_cgroup *memcg,
 					 unsigned long *min,
-					 unsigned long *low)
+					 unsigned long *low, unsigned long *locallow)
 {
-	*min = *low = 0;
+	*min = *low = *locallow = 0;
 
 	if (mem_cgroup_disabled())
 		return;
@@ -631,10 +631,11 @@  static inline void mem_cgroup_protection(struct mem_cgroup *root,
 
 	*min = READ_ONCE(memcg->memory.emin);
 	*low = READ_ONCE(memcg->memory.elow);
+	*locallow = READ_ONCE(memcg->memory.elocallow);
 }
 
 void mem_cgroup_calculate_protection(struct mem_cgroup *root,
-				     struct mem_cgroup *memcg);
+				     struct mem_cgroup *memcg, int is_local);
 
 static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
 					  struct mem_cgroup *memcg)
@@ -651,13 +652,17 @@  static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
 unsigned long get_cgroup_local_usage(struct mem_cgroup *memcg, bool flush);
 
 static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
-					struct mem_cgroup *memcg)
+					struct mem_cgroup *memcg, int is_local)
 {
 	if (mem_cgroup_unprotected(target, memcg))
 		return false;
 
-	return READ_ONCE(memcg->memory.elow) >=
-		page_counter_read(&memcg->memory);
+	if (is_local)
+		return READ_ONCE(memcg->memory.elocallow) >=
+			get_cgroup_local_usage(memcg, true);
+	else
+		return READ_ONCE(memcg->memory.elow) >=
+			page_counter_read(&memcg->memory);
 }
 
 static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
@@ -1159,13 +1164,13 @@  static inline void memcg_memory_event_mm(struct mm_struct *mm,
 static inline void mem_cgroup_protection(struct mem_cgroup *root,
 					 struct mem_cgroup *memcg,
 					 unsigned long *min,
-					 unsigned long *low)
+					 unsigned long *low, unsigned long *locallow)
 {
 	*min = *low = 0;
 }
 
 static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
-						   struct mem_cgroup *memcg)
+						   struct mem_cgroup *memcg, int is_local)
 {
 }
 
@@ -1175,7 +1180,7 @@  static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
 	return true;
 }
 static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
-					struct mem_cgroup *memcg)
+					struct mem_cgroup *memcg, int is_local)
 {
 	return false;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d7c5fff12105..61718ba998fe 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4495,7 +4495,7 @@  struct cgroup_subsys memory_cgrp_subsys = {
  *          of a top-down tree iteration, not for isolated queries.
  */
 void mem_cgroup_calculate_protection(struct mem_cgroup *root,
-				     struct mem_cgroup *memcg)
+				     struct mem_cgroup *memcg, int is_local)
 {
 	bool recursive_protection =
 		cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT;
@@ -4507,7 +4507,7 @@  void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 		root = root_mem_cgroup;
 
 	page_counter_calculate_protection(&root->memory, &memcg->memory,
-					recursive_protection, false);
+					recursive_protection, is_local);
 }
 
 static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ce471d686a88..a2681d52fc5f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2377,6 +2377,7 @@  static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 	enum scan_balance scan_balance;
 	unsigned long ap, fp;
 	enum lru_list lru;
+	int is_local = (pgdat->node_id == 0) && root_reclaim(sc);
 
 	/* If we have no swap space, do not bother scanning anon folios. */
 	if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) {
@@ -2457,12 +2458,14 @@  static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 	for_each_evictable_lru(lru) {
 		bool file = is_file_lru(lru);
 		unsigned long lruvec_size;
-		unsigned long low, min;
+		unsigned long low, min, locallow;
 		unsigned long scan;
 
 		lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
 		mem_cgroup_protection(sc->target_mem_cgroup, memcg,
-				      &min, &low);
+				      &min, &low, &locallow);
+		if (is_local)
+			low = locallow;
 
 		if (min || low) {
 			/*
@@ -2494,7 +2497,12 @@  static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 			 * again by how much of the total memory used is under
 			 * hard protection.
 			 */
-			unsigned long cgroup_size = mem_cgroup_size(memcg);
+			unsigned long cgroup_size;
+
+			if (is_local)
+				cgroup_size = get_cgroup_local_usage(memcg, true);
+			else
+				cgroup_size = mem_cgroup_size(memcg);
 			unsigned long protection;
 
 			/* memory.low scaling, make sure we retry before OOM */
@@ -5869,6 +5877,7 @@  static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 	};
 	struct mem_cgroup_reclaim_cookie *partial = &reclaim;
 	struct mem_cgroup *memcg;
+	int is_local = (pgdat->node_id == 0) && root_reclaim(sc);
 
 	/*
 	 * In most cases, direct reclaimers can do partial walks
@@ -5896,7 +5905,7 @@  static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 		 */
 		cond_resched();
 
-		mem_cgroup_calculate_protection(target_memcg, memcg);
+		mem_cgroup_calculate_protection(target_memcg, memcg, is_local);
 
 		if (mem_cgroup_below_min(target_memcg, memcg)) {
 			/*
@@ -5904,7 +5913,7 @@  static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 			 * If there is no reclaimable memory, OOM.
 			 */
 			continue;
-		} else if (mem_cgroup_below_low(target_memcg, memcg)) {
+		} else if (mem_cgroup_below_low(target_memcg, memcg, is_local)) {
 			/*
 			 * Soft protection.
 			 * Respect the protection only as long as