@@ -19,6 +19,8 @@
*/
#define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1))
+#define MAX_TIER_INTERLEAVE_WEIGHT 100
+
struct memory_tier;
struct memory_dev_type {
/* list of memory types that are part of same tier as this type */
@@ -36,6 +38,9 @@ struct memory_dev_type *alloc_memory_type(int adistance);
void put_memory_type(struct memory_dev_type *memtype);
void init_node_memory_type(int node, struct memory_dev_type *default_type);
void clear_node_memory_type(int node, struct memory_dev_type *memtype);
+unsigned char memtier_get_node_weight(int from_node, int target_node,
+ nodemask_t *pol_nodes);
+unsigned int memtier_get_total_weight(int from_node, nodemask_t *pol_nodes);
#ifdef CONFIG_MIGRATION
int next_demotion_node(int node);
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
@@ -97,5 +102,16 @@ static inline bool node_is_toptier(int node)
{
return true;
}
+
+unsigned char memtier_get_node_weight(int from_node, int target_node,
+ nodemask_t *pol_nodes)
+{
+ return 0;
+}
+
+unsigned int memtier_get_total_weight(int from_node, nodemask_t *pol_nodes)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA */
#endif /* _LINUX_MEMORY_TIERS_H */
@@ -14,6 +14,11 @@ struct memory_tier {
struct list_head list;
/* list of all memory types part of this tier */
struct list_head memory_types;
+ /*
+ * By default all tiers will have weight as 1, which means they
+ * follow default standard allocation.
+ */
+ unsigned char interleave_weight[MAX_NUMNODES];
/*
* start value of abstract distance. memory tier maps
* an abstract distance range,
@@ -146,8 +151,72 @@ static ssize_t nodelist_show(struct device *dev,
}
static DEVICE_ATTR_RO(nodelist);
+static ssize_t interleave_weight_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int ret = 0;
+ struct memory_tier *tier = to_memory_tier(dev);
+ int node;
+ int count = 0;
+
+ down_read(&memory_tier_sem);
+ for_each_online_node(node) {
+ if (count > 0)
+ ret += sysfs_emit_at(buf, ret, ",");
+ ret += sysfs_emit_at(buf, ret, "%d:%d", node, tier->interleave_weight[node]);
+ count++;
+ }
+ up_read(&memory_tier_sem);
+ sysfs_emit_at(buf, ret++, "\n");
+
+ return ret;
+}
+
+static ssize_t interleave_weight_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ unsigned char weight;
+ int from_node;
+ char *delim;
+ int ret;
+ struct memory_tier *tier;
+
+ delim = strchr(buf, ':');
+ if (!delim)
+ return -EINVAL;
+ delim[0] = '\0';
+
+ ret = kstrtou32(buf, 10, &from_node);
+ if (ret)
+ return ret;
+
+ if (from_node >= MAX_NUMNODES || !node_online(from_node))
+ return -EINVAL;
+
+ ret = kstrtou8(delim+1, 0, &weight);
+ if (ret)
+ return ret;
+
+ if (weight > MAX_TIER_INTERLEAVE_WEIGHT)
+ return -EINVAL;
+
+ down_write(&memory_tier_sem);
+ tier = to_memory_tier(dev);
+ if (tier)
+ tier->interleave_weight[from_node] = weight;
+ else
+ ret = -ENODEV;
+ up_write(&memory_tier_sem);
+
+ return size;
+}
+static DEVICE_ATTR_RW(interleave_weight);
+
static struct attribute *memtier_dev_attrs[] = {
&dev_attr_nodelist.attr,
+ &dev_attr_interleave_weight.attr,
NULL
};
@@ -239,6 +308,72 @@ static struct memory_tier *__node_get_memory_tier(int node)
lockdep_is_held(&memory_tier_sem));
}
+unsigned char memtier_get_node_weight(int from_node, int target_node,
+ nodemask_t *pol_nodes)
+{
+ struct memory_tier *tier;
+ unsigned char tier_weight, node_weight = 1;
+ int tier_nodes;
+ nodemask_t tier_nmask, tier_and_pol;
+
+ /*
+ * If the lock is already held, revert to a low weight temporarily
+ * This should revert any interleave behavior to basic interleave
+ * this only happens if weights are being updated or during init
+ */
+ if (!down_read_trylock(&memory_tier_sem))
+ return 1;
+
+ tier = __node_get_memory_tier(target_node);
+ if (tier) {
+ tier_nmask = get_memtier_nodemask(tier);
+ nodes_and(tier_and_pol, tier_nmask, *pol_nodes);
+ tier_nodes = nodes_weight(tier_and_pol);
+ tier_weight = tier->interleave_weight[from_node];
+ node_weight = tier_weight / tier_nodes;
+ node_weight += (tier_weight % tier_nodes) ? 1 : 0;
+ }
+ up_read(&memory_tier_sem);
+ return node_weight;
+}
+
+unsigned int memtier_get_total_weight(int from_node, nodemask_t *pol_nodes)
+{
+ unsigned int weight = 0;
+ struct memory_tier *tier;
+ unsigned int min = nodes_weight(*pol_nodes);
+ int node;
+ nodemask_t tier_nmask, tier_and_pol;
+ int tier_nodes;
+ unsigned int tier_weight;
+
+ /*
+ * If the lock is already held, revert to a low weight temporarily
+ * This should revert any interleave behavior to basic interleave
+ * this only happens if weights are being updated or during init
+ */
+ if (!down_read_trylock(&memory_tier_sem))
+ return nodes_weight(*pol_nodes);
+
+ for_each_node_mask(node, *pol_nodes) {
+ tier = __node_get_memory_tier(node);
+ if (!tier) {
+ weight += 1;
+ continue;
+ }
+ tier_nmask = get_memtier_nodemask(tier);
+ nodes_and(tier_and_pol, tier_nmask, *pol_nodes);
+ tier_nodes = nodes_weight(tier_and_pol);
+ /* divide node weight by number of nodes, take ceil */
+ tier_weight = tier->interleave_weight[from_node];
+ weight += tier_weight / tier_nodes;
+ weight += (tier_weight % tier_nodes) ? 1 : 0;
+ }
+ up_read(&memory_tier_sem);
+
+ return weight >= min ? weight : min;
+}
+
#ifdef CONFIG_MIGRATION
bool node_is_toptier(int node)
{
@@ -490,8 +625,11 @@ static struct memory_tier *set_node_memory_tier(int node)
memtype = node_memory_types[node].memtype;
node_set(node, memtype->nodes);
memtier = find_create_memory_tier(memtype);
- if (!IS_ERR(memtier))
+ if (!IS_ERR(memtier)) {
rcu_assign_pointer(pgdat->memtier, memtier);
+ memset(memtier->interleave_weight, 1,
+ sizeof(memtier->interleave_weight));
+ }
return memtier;
}