diff mbox series

[09/10] mm/vmscan: never demote for memcg reclaim

Message ID 20210401183233.64A91C97@viggo.jf.intel.com (mailing list archive)
State New, archived
Headers show
Series Migrate Pages in lieu of discard | expand

Commit Message

Dave Hansen April 1, 2021, 6:32 p.m. UTC
From: Dave Hansen <dave.hansen@linux.intel.com>

Global reclaim aims to reduce the amount of memory used on
a given node or set of nodes.  Migrating pages to another
node serves this purpose.

memcg reclaim is different.  Its goal is to reduce the
total memory consumption of the entire memcg, across all
nodes.  Migration does not assist memcg reclaim because
it just moves page contents between nodes rather than
actually reducing memory consumption.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Suggested-by: Yang Shi <yang.shi@linux.alibaba.com>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Cc: Wei Xu <weixugc@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: osalvador <osalvador@suse.de>
---

 b/mm/vmscan.c |   18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

Comments

Wei Xu April 2, 2021, 12:18 a.m. UTC | #1
On Thu, Apr 1, 2021 at 11:35 AM Dave Hansen <dave.hansen@linux.intel.com> wrote:
>
>
> From: Dave Hansen <dave.hansen@linux.intel.com>
>
> Global reclaim aims to reduce the amount of memory used on
> a given node or set of nodes.  Migrating pages to another
> node serves this purpose.
>
> memcg reclaim is different.  Its goal is to reduce the
> total memory consumption of the entire memcg, across all
> nodes.  Migration does not assist memcg reclaim because
> it just moves page contents between nodes rather than
> actually reducing memory consumption.
>
> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
> Suggested-by: Yang Shi <yang.shi@linux.alibaba.com>
> Reviewed-by: Yang Shi <shy828301@gmail.com>
> Cc: Wei Xu <weixugc@google.com>
> Cc: David Rientjes <rientjes@google.com>
> Cc: Huang Ying <ying.huang@intel.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: osalvador <osalvador@suse.de>
> ---
>
>  b/mm/vmscan.c |   18 ++++++++++++------
>  1 file changed, 12 insertions(+), 6 deletions(-)
>
> diff -puN mm/vmscan.c~never-demote-for-memcg-reclaim mm/vmscan.c
> --- a/mm/vmscan.c~never-demote-for-memcg-reclaim        2021-03-31 15:17:20.476000239 -0700
> +++ b/mm/vmscan.c       2021-03-31 15:17:20.487000239 -0700
> @@ -288,7 +288,8 @@ static bool writeback_throttling_sane(st
>  #endif
>
>  static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
> -                                         int node_id)
> +                                         int node_id,
> +                                         struct scan_control *sc)

I don't see "sc" gets used in can_reclaim_anon_pages().  Remove it?

>  {
>         if (memcg == NULL) {
>                 /*
> @@ -326,7 +327,7 @@ unsigned long zone_reclaimable_pages(str
>
>         nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
>                 zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
> -       if (can_reclaim_anon_pages(NULL, zone_to_nid(zone)))
> +       if (can_reclaim_anon_pages(NULL, zone_to_nid(zone), NULL))
>                 nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
>                         zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
>
> @@ -1064,7 +1065,8 @@ static enum page_references page_check_r
>         return PAGEREF_RECLAIM;
>  }
>
> -static bool migrate_demote_page_ok(struct page *page)
> +static bool migrate_demote_page_ok(struct page *page,
> +                                  struct scan_control *sc)
>  {
>         int next_nid = next_demotion_node(page_to_nid(page));
>
> @@ -1072,6 +1074,10 @@ static bool migrate_demote_page_ok(struc
>         VM_BUG_ON_PAGE(PageHuge(page), page);
>         VM_BUG_ON_PAGE(PageLRU(page), page);
>
> +       /* It is pointless to do demotion in memcg reclaim */
> +       if (cgroup_reclaim(sc))
> +               return false;
> +
>         if (next_nid == NUMA_NO_NODE)
>                 return false;
>         if (PageTransHuge(page) && !thp_migration_supported())
> @@ -1328,7 +1334,7 @@ retry:
>                  * Before reclaiming the page, try to relocate
>                  * its contents to another node.
>                  */
> -               if (do_demote_pass && migrate_demote_page_ok(page)) {
> +               if (do_demote_pass && migrate_demote_page_ok(page, sc)) {
>                         list_add(&page->lru, &demote_pages);
>                         unlock_page(page);
>                         continue;
> @@ -2362,7 +2368,7 @@ static void get_scan_count(struct lruvec
>         enum lru_list lru;
>
>         /* If we have no swap space, do not bother scanning anon pages. */
> -       if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id)) {
> +       if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) {
>                 scan_balance = SCAN_FILE;
>                 goto out;
>         }
> @@ -2737,7 +2743,7 @@ static inline bool should_continue_recla
>          */
>         pages_for_compaction = compact_gap(sc->order);
>         inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
> -       if (can_reclaim_anon_pages(NULL, pgdat->node_id))
> +       if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
>                 inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
>
>         return inactive_lru_pages > pages_for_compaction;
> _
diff mbox series

Patch

diff -puN mm/vmscan.c~never-demote-for-memcg-reclaim mm/vmscan.c
--- a/mm/vmscan.c~never-demote-for-memcg-reclaim	2021-03-31 15:17:20.476000239 -0700
+++ b/mm/vmscan.c	2021-03-31 15:17:20.487000239 -0700
@@ -288,7 +288,8 @@  static bool writeback_throttling_sane(st
 #endif
 
 static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
-					  int node_id)
+					  int node_id,
+					  struct scan_control *sc)
 {
 	if (memcg == NULL) {
 		/*
@@ -326,7 +327,7 @@  unsigned long zone_reclaimable_pages(str
 
 	nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
 		zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
-	if (can_reclaim_anon_pages(NULL, zone_to_nid(zone)))
+	if (can_reclaim_anon_pages(NULL, zone_to_nid(zone), NULL))
 		nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
 			zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
 
@@ -1064,7 +1065,8 @@  static enum page_references page_check_r
 	return PAGEREF_RECLAIM;
 }
 
-static bool migrate_demote_page_ok(struct page *page)
+static bool migrate_demote_page_ok(struct page *page,
+				   struct scan_control *sc)
 {
 	int next_nid = next_demotion_node(page_to_nid(page));
 
@@ -1072,6 +1074,10 @@  static bool migrate_demote_page_ok(struc
 	VM_BUG_ON_PAGE(PageHuge(page), page);
 	VM_BUG_ON_PAGE(PageLRU(page), page);
 
+	/* It is pointless to do demotion in memcg reclaim */
+	if (cgroup_reclaim(sc))
+		return false;
+
 	if (next_nid == NUMA_NO_NODE)
 		return false;
 	if (PageTransHuge(page) && !thp_migration_supported())
@@ -1328,7 +1334,7 @@  retry:
 		 * Before reclaiming the page, try to relocate
 		 * its contents to another node.
 		 */
-		if (do_demote_pass && migrate_demote_page_ok(page)) {
+		if (do_demote_pass && migrate_demote_page_ok(page, sc)) {
 			list_add(&page->lru, &demote_pages);
 			unlock_page(page);
 			continue;
@@ -2362,7 +2368,7 @@  static void get_scan_count(struct lruvec
 	enum lru_list lru;
 
 	/* If we have no swap space, do not bother scanning anon pages. */
-	if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id)) {
+	if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) {
 		scan_balance = SCAN_FILE;
 		goto out;
 	}
@@ -2737,7 +2743,7 @@  static inline bool should_continue_recla
 	 */
 	pages_for_compaction = compact_gap(sc->order);
 	inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
-	if (can_reclaim_anon_pages(NULL, pgdat->node_id))
+	if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
 		inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
 
 	return inactive_lru_pages > pages_for_compaction;