diff mbox series

[-V8,2/6] memory tiering: add page promotion counter

Message ID 20210914013701.344956-3-ying.huang@intel.com (mailing list archive)
State New
Headers show
Series NUMA balancing: optimize memory placement for memory tiering system | expand

Commit Message

Huang, Ying Sept. 14, 2021, 1:36 a.m. UTC
To distinguish the number of the memory tiering promoted pages from
that of the originally inter-socket NUMA balancing migrated pages.
The counter is per-node (count in the target node).  So this can be
used to identify promotion imbalance among the NUMA nodes.

Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Wei Xu <weixugc@google.com>
Cc: osalvador <osalvador@suse.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
---
 include/linux/mmzone.h |  3 +++
 include/linux/node.h   |  5 +++++
 mm/migrate.c           | 11 +++++++++--
 mm/vmstat.c            |  3 +++
 4 files changed, 20 insertions(+), 2 deletions(-)

Comments

Yang Shi Sept. 14, 2021, 10:41 p.m. UTC | #1
On Mon, Sep 13, 2021 at 6:37 PM Huang Ying <ying.huang@intel.com> wrote:
>
> To distinguish the number of the memory tiering promoted pages from
> that of the originally inter-socket NUMA balancing migrated pages.
> The counter is per-node (count in the target node).  So this can be
> used to identify promotion imbalance among the NUMA nodes.

I'd like this patch be the very first one in the series. Since we need
such counters regardless of all the optimizations. And actually I
think this patch could go with the merged "migration in lieu of
discard" patchset.

>
> Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Rik van Riel <riel@surriel.com>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: Yang Shi <shy828301@gmail.com>
> Cc: Zi Yan <ziy@nvidia.com>
> Cc: Wei Xu <weixugc@google.com>
> Cc: osalvador <osalvador@suse.de>
> Cc: Shakeel Butt <shakeelb@google.com>
> Cc: linux-kernel@vger.kernel.org
> Cc: linux-mm@kvack.org
> ---
>  include/linux/mmzone.h |  3 +++
>  include/linux/node.h   |  5 +++++
>  mm/migrate.c           | 11 +++++++++--
>  mm/vmstat.c            |  3 +++
>  4 files changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 6a1d79d84675..37ccd6158765 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -209,6 +209,9 @@ enum node_stat_item {
>         NR_PAGETABLE,           /* used for pagetables */
>  #ifdef CONFIG_SWAP
>         NR_SWAPCACHE,
> +#endif
> +#ifdef CONFIG_NUMA_BALANCING
> +       PGPROMOTE_SUCCESS,      /* promote successfully */
>  #endif
>         NR_VM_NODE_STAT_ITEMS
>  };
> diff --git a/include/linux/node.h b/include/linux/node.h
> index 8e5a29897936..26e96fcc66af 100644
> --- a/include/linux/node.h
> +++ b/include/linux/node.h
> @@ -181,4 +181,9 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
>
>  #define to_node(device) container_of(device, struct node, dev)
>
> +static inline bool node_is_toptier(int node)
> +{
> +       return node_state(node, N_CPU);
> +}
> +
>  #endif /* _LINUX_NODE_H_ */
> diff --git a/mm/migrate.c b/mm/migrate.c
> index a159a36dd412..6f7a6e2ef41f 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2163,6 +2163,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
>         pg_data_t *pgdat = NODE_DATA(node);
>         int isolated;
>         int nr_remaining;
> +       int nr_succeeded;
>         LIST_HEAD(migratepages);
>         new_page_t *new;
>         bool compound;
> @@ -2201,7 +2202,8 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
>
>         list_add(&page->lru, &migratepages);
>         nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
> -                                    MIGRATE_ASYNC, MR_NUMA_MISPLACED, NULL);
> +                                    MIGRATE_ASYNC, MR_NUMA_MISPLACED,
> +                                    &nr_succeeded);
>         if (nr_remaining) {
>                 if (!list_empty(&migratepages)) {
>                         list_del(&page->lru);
> @@ -2210,8 +2212,13 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
>                         putback_lru_page(page);
>                 }
>                 isolated = 0;
> -       } else
> +       } else {
>                 count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_pages);
> +               if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING &&
> +                   !node_is_toptier(page_to_nid(page)) && node_is_toptier(node))
> +                       mod_node_page_state(NODE_DATA(node), PGPROMOTE_SUCCESS,
> +                                           nr_succeeded);
> +       }
>         BUG_ON(!list_empty(&migratepages));
>         return isolated;
>
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 8ce2620344b2..fff0ec94d795 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1236,6 +1236,9 @@ const char * const vmstat_text[] = {
>  #ifdef CONFIG_SWAP
>         "nr_swapcached",
>  #endif
> +#ifdef CONFIG_NUMA_BALANCING
> +       "pgpromote_success",
> +#endif
>
>         /* enum writeback_stat_item counters */
>         "nr_dirty_threshold",
> --
> 2.30.2
>
Huang, Ying Sept. 15, 2021, 1:53 a.m. UTC | #2
Yang Shi <shy828301@gmail.com> writes:

> On Mon, Sep 13, 2021 at 6:37 PM Huang Ying <ying.huang@intel.com> wrote:
>>
>> To distinguish the number of the memory tiering promoted pages from
>> that of the originally inter-socket NUMA balancing migrated pages.
>> The counter is per-node (count in the target node).  So this can be
>> used to identify promotion imbalance among the NUMA nodes.
>
> I'd like this patch be the very first one in the series. Since we need
> such counters regardless of all the optimizations. And actually I
> think this patch could go with the merged "migration in lieu of
> discard" patchset.

Yes.  This sounds reasonable.  I will change this in the next version.

Best Regards,
Huang, Ying
diff mbox series

Patch

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6a1d79d84675..37ccd6158765 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -209,6 +209,9 @@  enum node_stat_item {
 	NR_PAGETABLE,		/* used for pagetables */
 #ifdef CONFIG_SWAP
 	NR_SWAPCACHE,
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+	PGPROMOTE_SUCCESS,	/* promote successfully */
 #endif
 	NR_VM_NODE_STAT_ITEMS
 };
diff --git a/include/linux/node.h b/include/linux/node.h
index 8e5a29897936..26e96fcc66af 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -181,4 +181,9 @@  static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
 
 #define to_node(device) container_of(device, struct node, dev)
 
+static inline bool node_is_toptier(int node)
+{
+	return node_state(node, N_CPU);
+}
+
 #endif /* _LINUX_NODE_H_ */
diff --git a/mm/migrate.c b/mm/migrate.c
index a159a36dd412..6f7a6e2ef41f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2163,6 +2163,7 @@  int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 	pg_data_t *pgdat = NODE_DATA(node);
 	int isolated;
 	int nr_remaining;
+	int nr_succeeded;
 	LIST_HEAD(migratepages);
 	new_page_t *new;
 	bool compound;
@@ -2201,7 +2202,8 @@  int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 
 	list_add(&page->lru, &migratepages);
 	nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
-				     MIGRATE_ASYNC, MR_NUMA_MISPLACED, NULL);
+				     MIGRATE_ASYNC, MR_NUMA_MISPLACED,
+				     &nr_succeeded);
 	if (nr_remaining) {
 		if (!list_empty(&migratepages)) {
 			list_del(&page->lru);
@@ -2210,8 +2212,13 @@  int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 			putback_lru_page(page);
 		}
 		isolated = 0;
-	} else
+	} else {
 		count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_pages);
+		if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING &&
+		    !node_is_toptier(page_to_nid(page)) && node_is_toptier(node))
+			mod_node_page_state(NODE_DATA(node), PGPROMOTE_SUCCESS,
+					    nr_succeeded);
+	}
 	BUG_ON(!list_empty(&migratepages));
 	return isolated;
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8ce2620344b2..fff0ec94d795 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1236,6 +1236,9 @@  const char * const vmstat_text[] = {
 #ifdef CONFIG_SWAP
 	"nr_swapcached",
 #endif
+#ifdef CONFIG_NUMA_BALANCING
+	"pgpromote_success",
+#endif
 
 	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",