new file mode 100644
@@ -0,0 +1,33 @@
+What: /sys/kernel/mm/mempolicy/
+Date: December 2023
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Interface for Mempolicy
+
+What: /sys/kernel/mm/mempolicy/cpu_nodes
+Date: December 2023
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: The numa nodes from which accesses can be generated
+
+ A cpu numa node is one which has at least 1 CPU. These nodes
+ are capable of generating accesses to memory numa nodes, and
+ will have an interleave weight table.
+
+ Example output:
+
+ ===== =================================================
+ "0,1" nodes 0 and 1 have CPUs which may generate access
+ ===== =================================================
+
+What: /sys/kernel/mm/mempolicy/possible_nodes
+Date: December 2023
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: The numa nodes which are possible to come online
+
+ A possible numa node is one which has been reserved by the
+ system at boot, but may or may not be online at runtime.
+
+ Example output:
+
+ ========= ========================================
+ "0,1,2,3" nodes 0-3 are possibly online or offline
+ ========= ========================================
new file mode 100644
@@ -0,0 +1,35 @@
+What: /sys/kernel/mm/mempolicy/weighted_interleave/
+Date: December 2023
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Configuration Interface for the Weighted Interleave policy
+
+What: /sys/kernel/mm/mempolicy/weighted_interleave/nodeN/
+Date: December 2023
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Configuration interface for accesses initiated from nodeN
+
+ The directory to configure access initiator weights for nodeN.
+
+ Possible numa nodes which have not been marked as a CPU node
+ at boot will not have a nodeN directory made for them at boot.
+ Hotplug for CPU nodes is not supported.
+
+What: /sys/kernel/mm/mempolicy/weighted_interleave/nodeN/nodeM
+ /sys/kernel/mm/mempolicy/weighted_interleave/nodeN/nodeM/weight
+Date: December 2023
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Configuration interface for target nodes accessed from nodeNN
+
+ The interleave weight for a memory node (M) from initiating
+ node (N). These weights are utilized by processes which have set
+ the mempolicy to MPOL_WEIGHTED_INTERLEAVE and have opted into
+ global weights by omitting a task-local weight array.
+
+ These weights only affect new allocations, and changes at runtime
+ will not cause migrations on already allocated pages.
+
+ If the weight of 0 is desired, the appropriate way to do this is
+ by removing the node from the weighted interleave nodemask.
+
+ Minimum weight: 1
+ Maximum weight: 255
@@ -131,6 +131,11 @@ static struct mempolicy default_policy = {
static struct mempolicy preferred_node_policy[MAX_NUMNODES];
+struct interleave_weight_table {
+ unsigned char weights[MAX_NUMNODES];
+};
+static struct interleave_weight_table *iw_table;
+
/**
* numa_nearest_node - Find nearest node by state
* @node: Node id to start the search
@@ -3067,3 +3072,224 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
p += scnprintf(p, buffer + maxlen - p, ":%*pbl",
nodemask_pr_args(&nodes));
}
+
+struct iw_node_info {
+ struct kobject kobj;
+ int src;
+ int dst;
+};
+
+static ssize_t node_weight_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct iw_node_info *node_info = container_of(kobj, struct iw_node_info,
+ kobj);
+ return sysfs_emit(buf, "%d\n",
+ iw_table[node_info->src].weights[node_info->dst]);
+}
+
+static ssize_t node_weight_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned char weight = 0;
+ struct iw_node_info *node_info = NULL;
+
+ node_info = container_of(kobj, struct iw_node_info, kobj);
+
+ if (kstrtou8(buf, 0, &weight) || !weight)
+ return -EINVAL;
+
+ iw_table[node_info->src].weights[node_info->dst] = weight;
+
+ return count;
+}
+
+static struct kobj_attribute node_weight =
+ __ATTR(weight, 0664, node_weight_show, node_weight_store);
+
+static struct attribute *dst_node_attrs[] = {
+ &node_weight.attr,
+ NULL,
+};
+
+static struct attribute_group dst_node_attr_group = {
+ .attrs = dst_node_attrs,
+};
+
+static const struct attribute_group *dst_node_attr_groups[] = {
+ &dst_node_attr_group,
+ NULL,
+};
+
+static const struct kobj_type dst_node_kobj_ktype = {
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = dst_node_attr_groups,
+};
+
+static int add_dst_node(int src, int dst, struct kobject *src_kobj)
+{
+ struct iw_node_info *node_info = NULL;
+ int ret;
+
+ node_info = kzalloc(sizeof(struct iw_node_info), GFP_KERNEL);
+ if (!node_info)
+ return -ENOMEM;
+ node_info->src = src;
+ node_info->dst = dst;
+
+ kobject_init(&node_info->kobj, &dst_node_kobj_ktype);
+ ret = kobject_add(&node_info->kobj, src_kobj, "node%d", dst);
+ if (ret) {
+ pr_err("kobject_add error [%d-node%d]: %d", src, dst, ret);
+ kobject_put(&node_info->kobj);
+ }
+ return ret;
+}
+
+static int add_src_node(int src, struct kobject *root_kobj)
+{
+ int err, dst;
+ struct kobject *src_kobj;
+ char name[24];
+
+ snprintf(name, 24, "node%d", src);
+ src_kobj = kobject_create_and_add(name, root_kobj);
+ if (!src_kobj) {
+ pr_err("failed to create source node kobject\n");
+ return -ENOMEM;
+ }
+ for_each_node_state(dst, N_POSSIBLE) {
+ err = add_dst_node(src, dst, src_kobj);
+ if (err)
+ break;
+ }
+ if (err)
+ kobject_put(src_kobj);
+ return err;
+}
+
+static int add_weighted_interleave_group(struct kobject *root_kobj)
+{
+ struct kobject *wi_kobj;
+ int nid, err;
+
+ wi_kobj = kobject_create_and_add("weighted_interleave", root_kobj);
+ if (!wi_kobj) {
+ pr_err("failed to create node kobject\n");
+ return -ENOMEM;
+ }
+
+ for_each_node_state(nid, N_CPU) {
+ err = add_src_node(nid, wi_kobj);
+ if (err) {
+ pr_err("failed to add sysfs [node%d]\n", nid);
+ break;
+ }
+ }
+ if (err)
+ kobject_put(wi_kobj);
+ return 0;
+
+}
+
+static ssize_t cpu_nodes_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ int nid, next_nid;
+ int len = 0;
+
+ for_each_node_state(nid, N_CPU) {
+ len += sysfs_emit_at(buf, len, "%d", nid);
+ next_nid = next_node(nid, node_states[N_CPU]);
+ if (next_nid < MAX_NUMNODES)
+ len += sysfs_emit_at(buf, len, ",");
+ }
+ len += sysfs_emit_at(buf, len, "\n");
+
+ return len;
+}
+
+static ssize_t possible_nodes_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ int nid, next_nid;
+ int len = 0;
+
+ for_each_node_state(nid, N_POSSIBLE) {
+ len += sysfs_emit_at(buf, len, "%d", nid);
+ next_nid = next_node(nid, node_states[N_POSSIBLE]);
+ if (next_nid < MAX_NUMNODES)
+ len += sysfs_emit_at(buf, len, ",");
+ }
+ len += sysfs_emit_at(buf, len, "\n");
+
+ return len;
+}
+
+static struct kobj_attribute cpu_nodes_attr = __ATTR_RO(cpu_nodes);
+static struct kobj_attribute possible_nodes_attr = __ATTR_RO(possible_nodes);
+
+static struct attribute *mempolicy_attrs[] = {
+ &cpu_nodes_attr.attr,
+ &possible_nodes_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group mempolicy_attr_group = {
+ .attrs = mempolicy_attrs,
+ NULL,
+};
+
+static void mempolicy_kobj_release(struct kobject *kobj)
+{
+ kfree(kobj);
+ kfree(iw_table);
+}
+
+static const struct kobj_type mempolicy_kobj_ktype = {
+ .release = mempolicy_kobj_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static int __init mempolicy_sysfs_init(void)
+{
+ int err, nid;
+ int cpunodes = 0;
+ struct kobject *root_kobj;
+
+ for_each_node_state(nid, N_CPU)
+ cpunodes += 1;
+ iw_table = kmalloc_array(cpunodes, sizeof(*iw_table), GFP_KERNEL);
+ if (!iw_table) {
+ pr_err("failed to create interleave weight table\n");
+ err = -ENOMEM;
+ goto fail_obj;
+ }
+ memset(iw_table, 1, cpunodes * sizeof(*iw_table));
+
+ root_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (!root_kobj)
+ return -ENOMEM;
+
+ kobject_init(root_kobj, &mempolicy_kobj_ktype);
+ err = kobject_add(root_kobj, mm_kobj, "mempolicy");
+ if (err) {
+ pr_err("failed to add kobject to the system\n");
+ goto fail_obj;
+ }
+
+ err = sysfs_create_group(root_kobj, &mempolicy_attr_group);
+ if (err) {
+ pr_err("failed to register mempolicy group\n");
+ goto fail_obj;
+ }
+
+ err = add_weighted_interleave_group(root_kobj);
+fail_obj:
+ if (err)
+ kobject_put(root_kobj);
+ return err;
+
+}
+late_initcall(mempolicy_sysfs_init);