Message ID | 20220416053902.68517-6-dave@stgolabs.net (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: proactive reclaim and memory tiering topics | expand |
On Fri, Apr 15, 2022 at 10:39 PM Davidlohr Bueso <dave@stgolabs.net> wrote: > > Add a /sys/devices/system/node/nodeX/demotion_path file > to export the possible target(s) in node_demotion[node]. I'm not sure if you noticed that Jagdish Gediya is working on the similar patch, please see https://lore.kernel.org/linux-mm/20220413092206.73974-1-jvgediya@linux.ibm.com/ It would be better to combine the two to avoid duplicate effort. > > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net> > --- > Documentation/ABI/stable/sysfs-devices-node | 6 ++++ > drivers/base/node.c | 39 +++++++++++++++++++++ > include/linux/migrate.h | 15 ++++++++ > mm/migrate.c | 15 +------- > 4 files changed, 61 insertions(+), 14 deletions(-) > > diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node > index 3c935e1334f7..f620c6ae013c 100644 > --- a/Documentation/ABI/stable/sysfs-devices-node > +++ b/Documentation/ABI/stable/sysfs-devices-node > @@ -192,3 +192,9 @@ Description: > When it completes successfully, the specified amount or more memory > will have been reclaimed, and -EAGAIN if less bytes are reclaimed > than the specified amount. > + > +What: /sys/devices/system/node/nodeX/demotion_path > +Date: April 2022 > +Contact: Davidlohr Bueso <dave@stgolabs.net> > +Description: > + Shows nodes within the next tier of slower memory below this node. > diff --git a/drivers/base/node.c b/drivers/base/node.c > index d80c478e2a6e..ab4bae777535 100644 > --- a/drivers/base/node.c > +++ b/drivers/base/node.c > @@ -17,6 +17,7 @@ > #include <linux/nodemask.h> > #include <linux/cpu.h> > #include <linux/device.h> > +#include <linux/migrate.h> > #include <linux/pm_runtime.h> > #include <linux/swap.h> > #include <linux/slab.h> > @@ -560,11 +561,49 @@ static ssize_t node_read_distance(struct device *dev, > } > static DEVICE_ATTR(distance, 0444, node_read_distance, NULL); > > +static ssize_t node_read_demotion_path(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + int nid = dev->id; > + int len = 0; > + int i; > + struct demotion_nodes *nd; > + > + /* > + * buf is currently PAGE_SIZE in length and each node needs 4 chars > + * at the most (target + space or newline). > + */ > + BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE); > + > + if (!node_demotion) { > + len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE); > + goto done; > + } > + > + nd = &node_demotion[nid]; > + > + rcu_read_lock(); > + if (nd->nr == 0) > + len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE); > + else { > + for (i = 0; i < nd->nr; i++) { > + len += sysfs_emit_at(buf, len, "%s%d", > + i ? " " : "", nd->nodes[i]); > + } > + } > + rcu_read_unlock(); > +done: > + len += sysfs_emit_at(buf, len, "\n"); > + return len; > +} > +static DEVICE_ATTR(demotion_path, 0444, node_read_demotion_path, NULL); > + > static struct attribute *node_dev_attrs[] = { > &dev_attr_meminfo.attr, > &dev_attr_numastat.attr, > &dev_attr_distance.attr, > &dev_attr_vmstat.attr, > + &dev_attr_demotion_path.attr, > NULL > }; > > diff --git a/include/linux/migrate.h b/include/linux/migrate.h > index 90e75d5a54d6..b0ac6a717e44 100644 > --- a/include/linux/migrate.h > +++ b/include/linux/migrate.h > @@ -111,6 +111,21 @@ static inline int migrate_misplaced_page(struct page *page, > } > #endif /* CONFIG_NUMA_BALANCING */ > > +#define DEFAULT_DEMOTION_TARGET_NODES 15 > + > +#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES > +#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1) > +#else > +#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES > +#endif > + > +struct demotion_nodes { > + unsigned short nr; > + short nodes[DEMOTION_TARGET_NODES]; > +}; > + > +extern struct demotion_nodes *node_demotion __read_mostly; > + > #ifdef CONFIG_MIGRATION > > /* > diff --git a/mm/migrate.c b/mm/migrate.c > index 6c31ee1e1c9b..e47ea25fcfe8 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -2172,20 +2172,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, > * must be held over all reads to ensure that no cycles are > * observed. > */ > -#define DEFAULT_DEMOTION_TARGET_NODES 15 > - > -#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES > -#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1) > -#else > -#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES > -#endif > - > -struct demotion_nodes { > - unsigned short nr; > - short nodes[DEMOTION_TARGET_NODES]; > -}; > - > -static struct demotion_nodes *node_demotion __read_mostly; > +struct demotion_nodes *node_demotion __read_mostly; > > /** > * next_demotion_node() - Get the next node in the demotion path > -- > 2.26.2 > >
On Fri, Apr 22, 2022 at 10:31 AM Yang Shi <shy828301@gmail.com> wrote: > > On Fri, Apr 15, 2022 at 10:39 PM Davidlohr Bueso <dave@stgolabs.net> wrote: > > > > Add a /sys/devices/system/node/nodeX/demotion_path file > > to export the possible target(s) in node_demotion[node]. > > I'm not sure if you noticed that Jagdish Gediya is working on the > similar patch, please see > https://lore.kernel.org/linux-mm/20220413092206.73974-1-jvgediya@linux.ibm.com/ Loop in Jagdish Gediya, Ying Huang and Wei Xu. > > It would be better to combine the two to avoid duplicate effort. > > > > > Signed-off-by: Davidlohr Bueso <dave@stgolabs.net> > > --- > > Documentation/ABI/stable/sysfs-devices-node | 6 ++++ > > drivers/base/node.c | 39 +++++++++++++++++++++ > > include/linux/migrate.h | 15 ++++++++ > > mm/migrate.c | 15 +------- > > 4 files changed, 61 insertions(+), 14 deletions(-) > > > > diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node > > index 3c935e1334f7..f620c6ae013c 100644 > > --- a/Documentation/ABI/stable/sysfs-devices-node > > +++ b/Documentation/ABI/stable/sysfs-devices-node > > @@ -192,3 +192,9 @@ Description: > > When it completes successfully, the specified amount or more memory > > will have been reclaimed, and -EAGAIN if less bytes are reclaimed > > than the specified amount. > > + > > +What: /sys/devices/system/node/nodeX/demotion_path > > +Date: April 2022 > > +Contact: Davidlohr Bueso <dave@stgolabs.net> > > +Description: > > + Shows nodes within the next tier of slower memory below this node. > > diff --git a/drivers/base/node.c b/drivers/base/node.c > > index d80c478e2a6e..ab4bae777535 100644 > > --- a/drivers/base/node.c > > +++ b/drivers/base/node.c > > @@ -17,6 +17,7 @@ > > #include <linux/nodemask.h> > > #include <linux/cpu.h> > > #include <linux/device.h> > > +#include <linux/migrate.h> > > #include <linux/pm_runtime.h> > > #include <linux/swap.h> > > #include <linux/slab.h> > > @@ -560,11 +561,49 @@ static ssize_t node_read_distance(struct device *dev, > > } > > static DEVICE_ATTR(distance, 0444, node_read_distance, NULL); > > > > +static ssize_t node_read_demotion_path(struct device *dev, > > + struct device_attribute *attr, char *buf) > > +{ > > + int nid = dev->id; > > + int len = 0; > > + int i; > > + struct demotion_nodes *nd; > > + > > + /* > > + * buf is currently PAGE_SIZE in length and each node needs 4 chars > > + * at the most (target + space or newline). > > + */ > > + BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE); > > + > > + if (!node_demotion) { > > + len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE); > > + goto done; > > + } > > + > > + nd = &node_demotion[nid]; > > + > > + rcu_read_lock(); > > + if (nd->nr == 0) > > + len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE); > > + else { > > + for (i = 0; i < nd->nr; i++) { > > + len += sysfs_emit_at(buf, len, "%s%d", > > + i ? " " : "", nd->nodes[i]); > > + } > > + } > > + rcu_read_unlock(); > > +done: > > + len += sysfs_emit_at(buf, len, "\n"); > > + return len; > > +} > > +static DEVICE_ATTR(demotion_path, 0444, node_read_demotion_path, NULL); > > + > > static struct attribute *node_dev_attrs[] = { > > &dev_attr_meminfo.attr, > > &dev_attr_numastat.attr, > > &dev_attr_distance.attr, > > &dev_attr_vmstat.attr, > > + &dev_attr_demotion_path.attr, > > NULL > > }; > > > > diff --git a/include/linux/migrate.h b/include/linux/migrate.h > > index 90e75d5a54d6..b0ac6a717e44 100644 > > --- a/include/linux/migrate.h > > +++ b/include/linux/migrate.h > > @@ -111,6 +111,21 @@ static inline int migrate_misplaced_page(struct page *page, > > } > > #endif /* CONFIG_NUMA_BALANCING */ > > > > +#define DEFAULT_DEMOTION_TARGET_NODES 15 > > + > > +#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES > > +#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1) > > +#else > > +#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES > > +#endif > > + > > +struct demotion_nodes { > > + unsigned short nr; > > + short nodes[DEMOTION_TARGET_NODES]; > > +}; > > + > > +extern struct demotion_nodes *node_demotion __read_mostly; > > + > > #ifdef CONFIG_MIGRATION > > > > /* > > diff --git a/mm/migrate.c b/mm/migrate.c > > index 6c31ee1e1c9b..e47ea25fcfe8 100644 > > --- a/mm/migrate.c > > +++ b/mm/migrate.c > > @@ -2172,20 +2172,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, > > * must be held over all reads to ensure that no cycles are > > * observed. > > */ > > -#define DEFAULT_DEMOTION_TARGET_NODES 15 > > - > > -#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES > > -#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1) > > -#else > > -#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES > > -#endif > > - > > -struct demotion_nodes { > > - unsigned short nr; > > - short nodes[DEMOTION_TARGET_NODES]; > > -}; > > - > > -static struct demotion_nodes *node_demotion __read_mostly; > > +struct demotion_nodes *node_demotion __read_mostly; > > > > /** > > * next_demotion_node() - Get the next node in the demotion path > > -- > > 2.26.2 > > > >
On Fri, 22 Apr 2022, Yang Shi wrote: >On Fri, Apr 22, 2022 at 10:31 AM Yang Shi <shy828301@gmail.com> wrote: >> >> On Fri, Apr 15, 2022 at 10:39 PM Davidlohr Bueso <dave@stgolabs.net> wrote: >> > >> > Add a /sys/devices/system/node/nodeX/demotion_path file >> > to export the possible target(s) in node_demotion[node]. >> >> I'm not sure if you noticed that Jagdish Gediya is working on the >> similar patch, please see >> https://lore.kernel.org/linux-mm/20220413092206.73974-1-jvgediya@linux.ibm.com/ > >Loop in Jagdish Gediya, Ying Huang and Wei Xu. > Hmm I had missed this thread, I'll go have a look. >> >> It would be better to combine the two to avoid duplicate effort. Indeed - and even more reason for lsfmm discussions defining the future ABI for tiering. Thanks, Davidlohr
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node index 3c935e1334f7..f620c6ae013c 100644 --- a/Documentation/ABI/stable/sysfs-devices-node +++ b/Documentation/ABI/stable/sysfs-devices-node @@ -192,3 +192,9 @@ Description: When it completes successfully, the specified amount or more memory will have been reclaimed, and -EAGAIN if less bytes are reclaimed than the specified amount. + +What: /sys/devices/system/node/nodeX/demotion_path +Date: April 2022 +Contact: Davidlohr Bueso <dave@stgolabs.net> +Description: + Shows nodes within the next tier of slower memory below this node. diff --git a/drivers/base/node.c b/drivers/base/node.c index d80c478e2a6e..ab4bae777535 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -17,6 +17,7 @@ #include <linux/nodemask.h> #include <linux/cpu.h> #include <linux/device.h> +#include <linux/migrate.h> #include <linux/pm_runtime.h> #include <linux/swap.h> #include <linux/slab.h> @@ -560,11 +561,49 @@ static ssize_t node_read_distance(struct device *dev, } static DEVICE_ATTR(distance, 0444, node_read_distance, NULL); +static ssize_t node_read_demotion_path(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int nid = dev->id; + int len = 0; + int i; + struct demotion_nodes *nd; + + /* + * buf is currently PAGE_SIZE in length and each node needs 4 chars + * at the most (target + space or newline). + */ + BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE); + + if (!node_demotion) { + len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE); + goto done; + } + + nd = &node_demotion[nid]; + + rcu_read_lock(); + if (nd->nr == 0) + len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE); + else { + for (i = 0; i < nd->nr; i++) { + len += sysfs_emit_at(buf, len, "%s%d", + i ? " " : "", nd->nodes[i]); + } + } + rcu_read_unlock(); +done: + len += sysfs_emit_at(buf, len, "\n"); + return len; +} +static DEVICE_ATTR(demotion_path, 0444, node_read_demotion_path, NULL); + static struct attribute *node_dev_attrs[] = { &dev_attr_meminfo.attr, &dev_attr_numastat.attr, &dev_attr_distance.attr, &dev_attr_vmstat.attr, + &dev_attr_demotion_path.attr, NULL }; diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 90e75d5a54d6..b0ac6a717e44 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -111,6 +111,21 @@ static inline int migrate_misplaced_page(struct page *page, } #endif /* CONFIG_NUMA_BALANCING */ +#define DEFAULT_DEMOTION_TARGET_NODES 15 + +#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES +#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1) +#else +#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES +#endif + +struct demotion_nodes { + unsigned short nr; + short nodes[DEMOTION_TARGET_NODES]; +}; + +extern struct demotion_nodes *node_demotion __read_mostly; + #ifdef CONFIG_MIGRATION /* diff --git a/mm/migrate.c b/mm/migrate.c index 6c31ee1e1c9b..e47ea25fcfe8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2172,20 +2172,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, * must be held over all reads to ensure that no cycles are * observed. */ -#define DEFAULT_DEMOTION_TARGET_NODES 15 - -#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES -#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1) -#else -#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES -#endif - -struct demotion_nodes { - unsigned short nr; - short nodes[DEMOTION_TARGET_NODES]; -}; - -static struct demotion_nodes *node_demotion __read_mostly; +struct demotion_nodes *node_demotion __read_mostly; /** * next_demotion_node() - Get the next node in the demotion path
Add a /sys/devices/system/node/nodeX/demotion_path file to export the possible target(s) in node_demotion[node]. Signed-off-by: Davidlohr Bueso <dave@stgolabs.net> --- Documentation/ABI/stable/sysfs-devices-node | 6 ++++ drivers/base/node.c | 39 +++++++++++++++++++++ include/linux/migrate.h | 15 ++++++++ mm/migrate.c | 15 +------- 4 files changed, 61 insertions(+), 14 deletions(-)