@@ -558,6 +558,7 @@ struct hstate {
int next_nid_to_alloc;
int next_nid_to_free;
unsigned int order;
+ unsigned int demote_order;
unsigned long mask;
unsigned long max_huge_pages;
unsigned long nr_huge_pages;
@@ -2492,7 +2492,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
static void __init hugetlb_init_hstates(void)
{
- struct hstate *h;
+ struct hstate *h, *h2;
for_each_hstate(h) {
if (minimum_order > huge_page_order(h))
@@ -2501,6 +2501,17 @@ static void __init hugetlb_init_hstates(void)
/* oversize hugepages were init'ed in early boot */
if (!hstate_is_gigantic(h))
hugetlb_hstate_alloc_pages(h);
+
+ /*
+ * Set demote order for each hstate. Note that
+ * h->demote_order is initially 0.
+ */
+ for_each_hstate(h2) {
+ if (h2 == h)
+ continue;
+ if (h2->order < h->order && h2->order > h->demote_order)
+ h->demote_order = h2->order;
+ }
}
VM_BUG_ON(minimum_order == UINT_MAX);
}
@@ -2710,6 +2721,20 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
return 0;
}
+static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+{
+ int rc = 0;
+
+ /* If no demote order, free to buddy */
+ if (!h->demote_order)
+ return free_pool_huge_page(h, nodes_allowed, 0);
+
+ /*
+ * TODO - demote fucntionality will be added in subsequent patch
+ */
+ return rc;
+}
+
#define HSTATE_ATTR_RO(_name) \
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
@@ -2908,12 +2933,100 @@ static ssize_t surplus_hugepages_show(struct kobject *kobj,
}
HSTATE_ATTR_RO(surplus_hugepages);
+static ssize_t demote_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "0\n");
+}
+
+static ssize_t demote_store_action(struct kobject *kobj, const char *buf,
+ size_t len)
+{
+ unsigned long nr_demote;
+ unsigned long nr_available;
+ nodemask_t nodes_allowed, *n_mask;
+ struct hstate *h;
+ int err;
+ int nid;
+
+ err = kstrtoul(buf, 10, &nr_demote);
+ if (err)
+ return err;
+ h = kobj_to_hstate(kobj, &nid);
+
+ spin_lock(&hugetlb_lock);
+ if (nid != NUMA_NO_NODE) {
+ nr_available = h->free_huge_pages_node[nid];
+ init_nodemask_of_node(&nodes_allowed, nid);
+ n_mask = &nodes_allowed;
+ } else {
+ nr_available = h->free_huge_pages;
+ n_mask = &node_states[N_MEMORY];
+ }
+ nr_available -= h->resv_huge_pages;
+ if (nr_available <= 0)
+ goto out;
+ nr_demote = min(nr_available, nr_demote);
+
+ while (nr_demote) {
+ if (!demote_pool_huge_page(h, n_mask))
+ break;
+
+ cond_resched_lock(&hugetlb_lock);
+ /*
+ * We may have dropped the lock above or in the routines to
+ * demote/free a page. Recompute nr_demote as counts could
+ * have changed and we want to make sure we do not demote
+ * a reserved huge page.
+ */
+ nr_demote--;
+ if (nid != NUMA_NO_NODE)
+ nr_available = h->free_huge_pages_node[nid];
+ else
+ nr_available = h->free_huge_pages;
+ nr_available -= h->resv_huge_pages;
+ if (nr_available <= 0)
+ nr_demote = 0;
+ else
+ nr_demote = min(nr_available, nr_demote);
+ }
+
+out:
+ spin_unlock(&hugetlb_lock);
+
+ return len;
+}
+
+static ssize_t demote_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t len)
+{
+ return demote_store_action(kobj, buf, len);
+}
+HSTATE_ATTR(demote);
+
+static ssize_t demote_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct hstate *h;
+ unsigned long demote_size;
+ int nid;
+
+ h = kobj_to_hstate(kobj, &nid);
+ demote_size = h->demote_order;
+
+ return sysfs_emit(buf, "%lukB\n",
+ (unsigned long)(PAGE_SIZE << h->demote_order) / SZ_1K);
+}
+HSTATE_ATTR_RO(demote_size);
+
static struct attribute *hstate_attrs[] = {
&nr_hugepages_attr.attr,
&nr_overcommit_hugepages_attr.attr,
&free_hugepages_attr.attr,
&resv_hugepages_attr.attr,
&surplus_hugepages_attr.attr,
+ &demote_size_attr.attr,
+ &demote_attr.attr,
#ifdef CONFIG_NUMA
&nr_hugepages_mempolicy_attr.attr,
#endif
@@ -2983,6 +3096,8 @@ static struct attribute *per_node_hstate_attrs[] = {
&nr_hugepages_attr.attr,
&free_hugepages_attr.attr,
&surplus_hugepages_attr.attr,
+ &demote_size_attr.attr,
+ &demote_attr.attr,
NULL,
};
Two new sysfs files are added to demote hugtlb pages. These files are both per-hugetlb page size and per node. Files are: demote_size - The size in Kb that pages are demoted to. demote - The number of huge pages to demote. Writing a value to demote will result in an attempt to demote that number of hugetlb pages to an appropriate number of demote_size pages. This patch does not provide full demote functionality. It only provides the sysfs interfaces and uses existing code to free pages to the buddy allocator is demote_size == PAGESIZE. Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> --- include/linux/hugetlb.h | 1 + mm/hugetlb.c | 117 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 1 deletion(-)