diff mbox series

[5/6] mm/migration: export demotion_path of a node via sysfs

Message ID 20220416053902.68517-6-dave@stgolabs.net (mailing list archive)
State New
Headers show
Series mm: proactive reclaim and memory tiering topics | expand

Commit Message

Davidlohr Bueso April 16, 2022, 5:39 a.m. UTC
Add a /sys/devices/system/node/nodeX/demotion_path file
to export the possible target(s) in node_demotion[node].

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---
 Documentation/ABI/stable/sysfs-devices-node |  6 ++++
 drivers/base/node.c                         | 39 +++++++++++++++++++++
 include/linux/migrate.h                     | 15 ++++++++
 mm/migrate.c                                | 15 +-------
 4 files changed, 61 insertions(+), 14 deletions(-)

Comments

Yang Shi April 22, 2022, 5:31 p.m. UTC | #1
On Fri, Apr 15, 2022 at 10:39 PM Davidlohr Bueso <dave@stgolabs.net> wrote:
>
> Add a /sys/devices/system/node/nodeX/demotion_path file
> to export the possible target(s) in node_demotion[node].

I'm not sure if you noticed that Jagdish Gediya is working on the
similar patch, please see
https://lore.kernel.org/linux-mm/20220413092206.73974-1-jvgediya@linux.ibm.com/

It would be better to combine the two to avoid duplicate effort.

>
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> ---
>  Documentation/ABI/stable/sysfs-devices-node |  6 ++++
>  drivers/base/node.c                         | 39 +++++++++++++++++++++
>  include/linux/migrate.h                     | 15 ++++++++
>  mm/migrate.c                                | 15 +-------
>  4 files changed, 61 insertions(+), 14 deletions(-)
>
> diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
> index 3c935e1334f7..f620c6ae013c 100644
> --- a/Documentation/ABI/stable/sysfs-devices-node
> +++ b/Documentation/ABI/stable/sysfs-devices-node
> @@ -192,3 +192,9 @@ Description:
>                 When it completes successfully, the specified amount or more memory
>                 will have been reclaimed, and -EAGAIN if less bytes are reclaimed
>                 than the specified amount.
> +
> +What:          /sys/devices/system/node/nodeX/demotion_path
> +Date:          April 2022
> +Contact:       Davidlohr Bueso <dave@stgolabs.net>
> +Description:
> +               Shows nodes within the next tier of slower memory below this node.
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index d80c478e2a6e..ab4bae777535 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -17,6 +17,7 @@
>  #include <linux/nodemask.h>
>  #include <linux/cpu.h>
>  #include <linux/device.h>
> +#include <linux/migrate.h>
>  #include <linux/pm_runtime.h>
>  #include <linux/swap.h>
>  #include <linux/slab.h>
> @@ -560,11 +561,49 @@ static ssize_t node_read_distance(struct device *dev,
>  }
>  static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
>
> +static ssize_t node_read_demotion_path(struct device *dev,
> +                                      struct device_attribute *attr, char *buf)
> +{
> +       int nid = dev->id;
> +       int len = 0;
> +       int i;
> +       struct demotion_nodes *nd;
> +
> +       /*
> +        * buf is currently PAGE_SIZE in length and each node needs 4 chars
> +        * at the most (target + space or newline).
> +        */
> +       BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
> +
> +       if (!node_demotion) {
> +               len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
> +               goto done;
> +       }
> +
> +       nd = &node_demotion[nid];
> +
> +       rcu_read_lock();
> +       if (nd->nr == 0)
> +               len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
> +       else {
> +               for (i = 0; i < nd->nr; i++) {
> +                       len += sysfs_emit_at(buf, len, "%s%d",
> +                                            i ? " " : "", nd->nodes[i]);
> +               }
> +       }
> +       rcu_read_unlock();
> +done:
> +       len += sysfs_emit_at(buf, len, "\n");
> +       return len;
> +}
> +static DEVICE_ATTR(demotion_path, 0444, node_read_demotion_path, NULL);
> +
>  static struct attribute *node_dev_attrs[] = {
>         &dev_attr_meminfo.attr,
>         &dev_attr_numastat.attr,
>         &dev_attr_distance.attr,
>         &dev_attr_vmstat.attr,
> +       &dev_attr_demotion_path.attr,
>         NULL
>  };
>
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index 90e75d5a54d6..b0ac6a717e44 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -111,6 +111,21 @@ static inline int migrate_misplaced_page(struct page *page,
>  }
>  #endif /* CONFIG_NUMA_BALANCING */
>
> +#define DEFAULT_DEMOTION_TARGET_NODES 15
> +
> +#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
> +#define DEMOTION_TARGET_NODES  (MAX_NUMNODES - 1)
> +#else
> +#define DEMOTION_TARGET_NODES  DEFAULT_DEMOTION_TARGET_NODES
> +#endif
> +
> +struct demotion_nodes {
> +       unsigned short nr;
> +       short nodes[DEMOTION_TARGET_NODES];
> +};
> +
> +extern struct demotion_nodes *node_demotion __read_mostly;
> +
>  #ifdef CONFIG_MIGRATION
>
>  /*
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 6c31ee1e1c9b..e47ea25fcfe8 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2172,20 +2172,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
>   * must be held over all reads to ensure that no cycles are
>   * observed.
>   */
> -#define DEFAULT_DEMOTION_TARGET_NODES 15
> -
> -#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
> -#define DEMOTION_TARGET_NODES  (MAX_NUMNODES - 1)
> -#else
> -#define DEMOTION_TARGET_NODES  DEFAULT_DEMOTION_TARGET_NODES
> -#endif
> -
> -struct demotion_nodes {
> -       unsigned short nr;
> -       short nodes[DEMOTION_TARGET_NODES];
> -};
> -
> -static struct demotion_nodes *node_demotion __read_mostly;
> +struct demotion_nodes *node_demotion __read_mostly;
>
>  /**
>   * next_demotion_node() - Get the next node in the demotion path
> --
> 2.26.2
>
>
Yang Shi April 22, 2022, 5:33 p.m. UTC | #2
On Fri, Apr 22, 2022 at 10:31 AM Yang Shi <shy828301@gmail.com> wrote:
>
> On Fri, Apr 15, 2022 at 10:39 PM Davidlohr Bueso <dave@stgolabs.net> wrote:
> >
> > Add a /sys/devices/system/node/nodeX/demotion_path file
> > to export the possible target(s) in node_demotion[node].
>
> I'm not sure if you noticed that Jagdish Gediya is working on the
> similar patch, please see
> https://lore.kernel.org/linux-mm/20220413092206.73974-1-jvgediya@linux.ibm.com/

Loop in Jagdish Gediya, Ying Huang and Wei Xu.

>
> It would be better to combine the two to avoid duplicate effort.
>
> >
> > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> > ---
> >  Documentation/ABI/stable/sysfs-devices-node |  6 ++++
> >  drivers/base/node.c                         | 39 +++++++++++++++++++++
> >  include/linux/migrate.h                     | 15 ++++++++
> >  mm/migrate.c                                | 15 +-------
> >  4 files changed, 61 insertions(+), 14 deletions(-)
> >
> > diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
> > index 3c935e1334f7..f620c6ae013c 100644
> > --- a/Documentation/ABI/stable/sysfs-devices-node
> > +++ b/Documentation/ABI/stable/sysfs-devices-node
> > @@ -192,3 +192,9 @@ Description:
> >                 When it completes successfully, the specified amount or more memory
> >                 will have been reclaimed, and -EAGAIN if less bytes are reclaimed
> >                 than the specified amount.
> > +
> > +What:          /sys/devices/system/node/nodeX/demotion_path
> > +Date:          April 2022
> > +Contact:       Davidlohr Bueso <dave@stgolabs.net>
> > +Description:
> > +               Shows nodes within the next tier of slower memory below this node.
> > diff --git a/drivers/base/node.c b/drivers/base/node.c
> > index d80c478e2a6e..ab4bae777535 100644
> > --- a/drivers/base/node.c
> > +++ b/drivers/base/node.c
> > @@ -17,6 +17,7 @@
> >  #include <linux/nodemask.h>
> >  #include <linux/cpu.h>
> >  #include <linux/device.h>
> > +#include <linux/migrate.h>
> >  #include <linux/pm_runtime.h>
> >  #include <linux/swap.h>
> >  #include <linux/slab.h>
> > @@ -560,11 +561,49 @@ static ssize_t node_read_distance(struct device *dev,
> >  }
> >  static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
> >
> > +static ssize_t node_read_demotion_path(struct device *dev,
> > +                                      struct device_attribute *attr, char *buf)
> > +{
> > +       int nid = dev->id;
> > +       int len = 0;
> > +       int i;
> > +       struct demotion_nodes *nd;
> > +
> > +       /*
> > +        * buf is currently PAGE_SIZE in length and each node needs 4 chars
> > +        * at the most (target + space or newline).
> > +        */
> > +       BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
> > +
> > +       if (!node_demotion) {
> > +               len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
> > +               goto done;
> > +       }
> > +
> > +       nd = &node_demotion[nid];
> > +
> > +       rcu_read_lock();
> > +       if (nd->nr == 0)
> > +               len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
> > +       else {
> > +               for (i = 0; i < nd->nr; i++) {
> > +                       len += sysfs_emit_at(buf, len, "%s%d",
> > +                                            i ? " " : "", nd->nodes[i]);
> > +               }
> > +       }
> > +       rcu_read_unlock();
> > +done:
> > +       len += sysfs_emit_at(buf, len, "\n");
> > +       return len;
> > +}
> > +static DEVICE_ATTR(demotion_path, 0444, node_read_demotion_path, NULL);
> > +
> >  static struct attribute *node_dev_attrs[] = {
> >         &dev_attr_meminfo.attr,
> >         &dev_attr_numastat.attr,
> >         &dev_attr_distance.attr,
> >         &dev_attr_vmstat.attr,
> > +       &dev_attr_demotion_path.attr,
> >         NULL
> >  };
> >
> > diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> > index 90e75d5a54d6..b0ac6a717e44 100644
> > --- a/include/linux/migrate.h
> > +++ b/include/linux/migrate.h
> > @@ -111,6 +111,21 @@ static inline int migrate_misplaced_page(struct page *page,
> >  }
> >  #endif /* CONFIG_NUMA_BALANCING */
> >
> > +#define DEFAULT_DEMOTION_TARGET_NODES 15
> > +
> > +#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
> > +#define DEMOTION_TARGET_NODES  (MAX_NUMNODES - 1)
> > +#else
> > +#define DEMOTION_TARGET_NODES  DEFAULT_DEMOTION_TARGET_NODES
> > +#endif
> > +
> > +struct demotion_nodes {
> > +       unsigned short nr;
> > +       short nodes[DEMOTION_TARGET_NODES];
> > +};
> > +
> > +extern struct demotion_nodes *node_demotion __read_mostly;
> > +
> >  #ifdef CONFIG_MIGRATION
> >
> >  /*
> > diff --git a/mm/migrate.c b/mm/migrate.c
> > index 6c31ee1e1c9b..e47ea25fcfe8 100644
> > --- a/mm/migrate.c
> > +++ b/mm/migrate.c
> > @@ -2172,20 +2172,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
> >   * must be held over all reads to ensure that no cycles are
> >   * observed.
> >   */
> > -#define DEFAULT_DEMOTION_TARGET_NODES 15
> > -
> > -#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
> > -#define DEMOTION_TARGET_NODES  (MAX_NUMNODES - 1)
> > -#else
> > -#define DEMOTION_TARGET_NODES  DEFAULT_DEMOTION_TARGET_NODES
> > -#endif
> > -
> > -struct demotion_nodes {
> > -       unsigned short nr;
> > -       short nodes[DEMOTION_TARGET_NODES];
> > -};
> > -
> > -static struct demotion_nodes *node_demotion __read_mostly;
> > +struct demotion_nodes *node_demotion __read_mostly;
> >
> >  /**
> >   * next_demotion_node() - Get the next node in the demotion path
> > --
> > 2.26.2
> >
> >
Davidlohr Bueso April 22, 2022, 5:50 p.m. UTC | #3
On Fri, 22 Apr 2022, Yang Shi wrote:

>On Fri, Apr 22, 2022 at 10:31 AM Yang Shi <shy828301@gmail.com> wrote:
>>
>> On Fri, Apr 15, 2022 at 10:39 PM Davidlohr Bueso <dave@stgolabs.net> wrote:
>> >
>> > Add a /sys/devices/system/node/nodeX/demotion_path file
>> > to export the possible target(s) in node_demotion[node].
>>
>> I'm not sure if you noticed that Jagdish Gediya is working on the
>> similar patch, please see
>> https://lore.kernel.org/linux-mm/20220413092206.73974-1-jvgediya@linux.ibm.com/
>
>Loop in Jagdish Gediya, Ying Huang and Wei Xu.
>

Hmm I had missed this thread, I'll go have a look.

>>
>> It would be better to combine the two to avoid duplicate effort.

Indeed - and even more reason for lsfmm discussions defining the
future ABI for tiering.

Thanks,
Davidlohr
diff mbox series

Patch

diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 3c935e1334f7..f620c6ae013c 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -192,3 +192,9 @@  Description:
 		When it completes successfully, the specified amount or more memory
 		will have been reclaimed, and -EAGAIN if less bytes are reclaimed
 		than the specified amount.
+
+What:		/sys/devices/system/node/nodeX/demotion_path
+Date:		April 2022
+Contact:	Davidlohr Bueso <dave@stgolabs.net>
+Description:
+		Shows nodes within the next tier of slower memory below this node.
diff --git a/drivers/base/node.c b/drivers/base/node.c
index d80c478e2a6e..ab4bae777535 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -17,6 +17,7 @@ 
 #include <linux/nodemask.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
+#include <linux/migrate.h>
 #include <linux/pm_runtime.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
@@ -560,11 +561,49 @@  static ssize_t node_read_distance(struct device *dev,
 }
 static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
 
+static ssize_t node_read_demotion_path(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	int nid = dev->id;
+	int len = 0;
+	int i;
+	struct demotion_nodes *nd;
+
+	/*
+	 * buf is currently PAGE_SIZE in length and each node needs 4 chars
+	 * at the most (target + space or newline).
+	 */
+	BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
+
+	if (!node_demotion) {
+		len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
+		goto done;
+	}
+
+	nd = &node_demotion[nid];
+
+	rcu_read_lock();
+	if (nd->nr == 0)
+		len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
+	else {
+		for (i = 0; i < nd->nr; i++) {
+			len += sysfs_emit_at(buf, len, "%s%d",
+					     i ? " " : "", nd->nodes[i]);
+		}
+	}
+	rcu_read_unlock();
+done:
+	len += sysfs_emit_at(buf, len, "\n");
+	return len;
+}
+static DEVICE_ATTR(demotion_path, 0444, node_read_demotion_path, NULL);
+
 static struct attribute *node_dev_attrs[] = {
 	&dev_attr_meminfo.attr,
 	&dev_attr_numastat.attr,
 	&dev_attr_distance.attr,
 	&dev_attr_vmstat.attr,
+	&dev_attr_demotion_path.attr,
 	NULL
 };
 
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 90e75d5a54d6..b0ac6a717e44 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -111,6 +111,21 @@  static inline int migrate_misplaced_page(struct page *page,
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+#define DEFAULT_DEMOTION_TARGET_NODES 15
+
+#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
+#define DEMOTION_TARGET_NODES	(MAX_NUMNODES - 1)
+#else
+#define DEMOTION_TARGET_NODES	DEFAULT_DEMOTION_TARGET_NODES
+#endif
+
+struct demotion_nodes {
+	unsigned short nr;
+	short nodes[DEMOTION_TARGET_NODES];
+};
+
+extern struct demotion_nodes *node_demotion __read_mostly;
+
 #ifdef CONFIG_MIGRATION
 
 /*
diff --git a/mm/migrate.c b/mm/migrate.c
index 6c31ee1e1c9b..e47ea25fcfe8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2172,20 +2172,7 @@  int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
  * must be held over all reads to ensure that no cycles are
  * observed.
  */
-#define DEFAULT_DEMOTION_TARGET_NODES 15
-
-#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
-#define DEMOTION_TARGET_NODES	(MAX_NUMNODES - 1)
-#else
-#define DEMOTION_TARGET_NODES	DEFAULT_DEMOTION_TARGET_NODES
-#endif
-
-struct demotion_nodes {
-	unsigned short nr;
-	short nodes[DEMOTION_TARGET_NODES];
-};
-
-static struct demotion_nodes *node_demotion __read_mostly;
+struct demotion_nodes *node_demotion __read_mostly;
 
 /**
  * next_demotion_node() - Get the next node in the demotion path