new file mode 100644
@@ -0,0 +1,24 @@
+What: /sys/kernel/mm/numa/
+Date: June 2021
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Interface for NUMA
+
+What: /sys/kernel/mm/numa/demotion_enabled
+Date: June 2021
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Enable/disable demoting pages during reclaim
+
+ Page migration during reclaim is intended for systems
+ with tiered memory configurations. These systems have
+ multiple types of memory with varied performance
+ characteristics instead of plain NUMA systems where
+ the same kind of memory is found at varied distances.
+ Allowing page migration during reclaim enables these
+ systems to migrate pages from fast tiers to slow tiers
+ when the fast tier is under pressure. This migration
+ is performed before swap. It may move data to a NUMA
+ node that does not fall into the cpuset of the
+ allocating process which might be construed to violate
+ the guarantees of cpusets. This should not be enabled
+ on systems which need strict cpuset location
+ guarantees.
@@ -184,6 +184,8 @@ extern bool vma_migratable(struct vm_area_struct *vma);
extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
extern void mpol_put_task_policy(struct task_struct *);
+extern bool numa_demotion_enabled;
+
#else
struct mempolicy {};
@@ -292,5 +294,7 @@ static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
{
return NULL;
}
+
+#define numa_demotion_enabled false
#endif /* CONFIG_NUMA */
#endif
@@ -3062,3 +3062,64 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
p += scnprintf(p, buffer + maxlen - p, ":%*pbl",
nodemask_pr_args(&nodes));
}
+
+bool numa_demotion_enabled = false;
+
+#ifdef CONFIG_SYSFS
+static ssize_t numa_demotion_enabled_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%s\n",
+ numa_demotion_enabled? "true" : "false");
+}
+
+static ssize_t numa_demotion_enabled_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
+ numa_demotion_enabled = true;
+ else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
+ numa_demotion_enabled = false;
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+static struct kobj_attribute numa_demotion_enabled_attr =
+ __ATTR(demotion_enabled, 0644, numa_demotion_enabled_show,
+ numa_demotion_enabled_store);
+
+static struct attribute *numa_attrs[] = {
+ &numa_demotion_enabled_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group numa_attr_group = {
+ .attrs = numa_attrs,
+};
+
+static int __init numa_init_sysfs(void)
+{
+ int err;
+ struct kobject *numa_kobj;
+
+ numa_kobj = kobject_create_and_add("numa", mm_kobj);
+ if (!numa_kobj) {
+ pr_err("failed to create numa kobject\n");
+ return -ENOMEM;
+ }
+ err = sysfs_create_group(numa_kobj, &numa_attr_group);
+ if (err) {
+ pr_err("failed to register numa group\n");
+ goto delete_obj;
+ }
+ return 0;
+
+delete_obj:
+ kobject_put(numa_kobj);
+ return err;
+}
+subsys_initcall(numa_init_sysfs);
+#endif
@@ -521,6 +521,8 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker,
static bool can_demote(int nid, struct scan_control *sc)
{
+ if (!numa_demotion_enabled)
+ return false;
if (sc) {
if (sc->no_demotion)
return false;
@@ -531,8 +533,7 @@ static bool can_demote(int nid, struct scan_control *sc)
if (next_demotion_node(nid) == NUMA_NO_NODE)
return false;
- // FIXME: actually enable this later in the series
- return false;
+ return true;
}
static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,