diff mbox series

[RFC,3/4] mm: add sys fs configuration for page reporting

Message ID 20200412090919.GA19580@open-light-1.localdomain (mailing list archive)
State New, archived
Headers show
Series mm: Add PG_zero support | expand

Commit Message

Liang Li April 12, 2020, 9:09 a.m. UTC
This patch add 'delay_millisecs', 'mini_order', 'batch_size',
in '/sys/kernel/mm/page_report/'. Usage:

"delay_millisecs":
Time delay interval between page free and work start to run.

"mini_order":
Only pages with order equal or greater than mini_order will be
reported.

"batch_size"
Wake up the worker only when free pages total size are greater
than 'batch_size'.

Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Hildenbrand <david@redhat.com>  
Cc: Michal Hocko <mhocko@kernel.org> 
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: liliangleo <liliangleo@didiglobal.com>
---
 mm/page_reporting.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 mm/page_reporting.h |   4 +-
 2 files changed, 141 insertions(+), 7 deletions(-)

Comments

Alexander Duyck April 13, 2020, 3:02 p.m. UTC | #1
On 4/12/2020 2:09 AM, liliangleo wrote:
> This patch add 'delay_millisecs', 'mini_order', 'batch_size',
> in '/sys/kernel/mm/page_report/'. Usage:
> 
> "delay_millisecs":
> Time delay interval between page free and work start to run.
> 
> "mini_order":
> Only pages with order equal or greater than mini_order will be
> reported.
> 
> "batch_size"
> Wake up the worker only when free pages total size are greater
> than 'batch_size'.
> 
> Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> Cc: Mel Gorman <mgorman@techsingularity.net>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Dave Hansen <dave.hansen@intel.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: liliangleo <liliangleo@didiglobal.com>

I am not really a fan of making these configurable globally. Especially 
since the existing virtio-balloon is relying on some of this being 
configured the way it is.

It would make much more sense to push these configuration options out to 
the registration interface so that the thing that is registering for 
page reporting can configure them when it is registered.
Liang Li April 16, 2020, 1:30 a.m. UTC | #2
On Mon, Apr 13, 2020 at 11:02 PM Alexander Duyck
<alexander.h.duyck@linux.intel.com> wrote:
>
> On 4/12/2020 2:09 AM, liliangleo wrote:
> > This patch add 'delay_millisecs', 'mini_order', 'batch_size',
> > in '/sys/kernel/mm/page_report/'. Usage:
> >
> > "delay_millisecs":
> > Time delay interval between page free and work start to run.
> >
> > "mini_order":
> > Only pages with order equal or greater than mini_order will be
> > reported.
> >
> > "batch_size"
> > Wake up the worker only when free pages total size are greater
> > than 'batch_size'.
> >
> > Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > Cc: Mel Gorman <mgorman@techsingularity.net>
> > Cc: Andrea Arcangeli <aarcange@redhat.com>
> > Cc: Dan Williams <dan.j.williams@intel.com>
> > Cc: Dave Hansen <dave.hansen@intel.com>
> > Cc: David Hildenbrand <david@redhat.com>
> > Cc: Michal Hocko <mhocko@kernel.org>
> > Cc: Andrew Morton <akpm@linux-foundation.org>
> > Cc: Alex Williamson <alex.williamson@redhat.com>
> > Signed-off-by: liliangleo <liliangleo@didiglobal.com>
>
> I am not really a fan of making these configurable globally. Especially
> since the existing virtio-balloon is relying on some of this being
> configured the way it is.
>
> It would make much more sense to push these configuration options out to
> the registration interface so that the thing that is registering for
> page reporting can configure them when it is registered.
>

Agree, that's better.  Thanks!
diff mbox series

Patch

diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index dc7a22a4b752..cc6a42596560 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -7,15 +7,19 @@ 
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 #include <linux/sched.h>
+#include <linux/kobject.h>
 
 #include "page_reporting.h"
 #include "internal.h"
 
-#define PAGE_REPORTING_DELAY	(2 * HZ)
 #define MAX_SCAN_NUM 1024
 
 unsigned long page_report_batch_size  __read_mostly = 4 * 1024 * 1024UL;
 
+static unsigned long page_report_delay_millisecs __read_mostly = 2000;
+
+unsigned int page_report_mini_order __read_mostly = 8;
+
 static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
 
 enum {
@@ -48,7 +52,8 @@  __page_reporting_request(struct page_reporting_dev_info *prdev)
 	 * now we are limiting this to running no more than once every
 	 * couple of seconds.
 	 */
-	schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
+	schedule_delayed_work(&prdev->work,
+			      msecs_to_jiffies(page_report_delay_millisecs));
 }
 
 /* notify prdev of free page reporting request */
@@ -260,7 +265,7 @@  page_reporting_process_zone(struct page_reporting_dev_info *prdev,
 
 	/* Generate minimum watermark to be able to guarantee progress */
 	watermark = low_wmark_pages(zone) +
-		    (PAGE_REPORTING_CAPACITY << PAGE_REPORTING_MIN_ORDER);
+		    (PAGE_REPORTING_CAPACITY << page_report_mini_order);
 
 	/*
 	 * Cancel request if insufficient free memory or if we failed
@@ -270,7 +275,7 @@  page_reporting_process_zone(struct page_reporting_dev_info *prdev,
 		return err;
 
 	/* Process each free list starting from lowest order/mt */
-	for (order = PAGE_REPORTING_MIN_ORDER; order < MAX_ORDER; order++) {
+	for (order = page_report_mini_order; order < MAX_ORDER; order++) {
 		for (mt = 0; mt < MIGRATE_TYPES; mt++) {
 			/* We do not pull pages from the isolate free list */
 			if (is_migrate_isolate(mt))
@@ -337,7 +342,8 @@  static void page_reporting_process(struct work_struct *work)
 	 */
 	state = atomic_cmpxchg(&prdev->state, state, PAGE_REPORTING_IDLE);
 	if (state == PAGE_REPORTING_REQUESTED)
-		schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
+		schedule_delayed_work(&prdev->work,
+				msecs_to_jiffies(page_report_delay_millisecs));
 }
 
 static DEFINE_MUTEX(page_reporting_mutex);
@@ -393,3 +399,131 @@  void page_reporting_unregister(struct page_reporting_dev_info *prdev)
 	mutex_unlock(&page_reporting_mutex);
 }
 EXPORT_SYMBOL_GPL(page_reporting_unregister);
+
+static ssize_t batch_size_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", page_report_batch_size);
+}
+
+static ssize_t batch_size_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t count)
+{
+	unsigned long size;
+	int err;
+
+	err = kstrtoul(buf, 10, &size);
+	if (err || size >= UINT_MAX)
+		return -EINVAL;
+
+	page_report_batch_size = size;
+
+	return count;
+}
+
+static struct kobj_attribute batch_size_attr =
+	__ATTR(batch_size, 0644, batch_size_show, batch_size_store);
+
+static ssize_t delay_millisecs_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", page_report_delay_millisecs);
+}
+
+static ssize_t delay_millisecs_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t count)
+{
+	unsigned long msecs;
+	int err;
+
+	err = kstrtoul(buf, 10, &msecs);
+	if (err || msecs >= UINT_MAX)
+		return -EINVAL;
+
+	page_report_delay_millisecs = msecs;
+
+	return count;
+}
+
+static struct kobj_attribute wake_delay_millisecs_attr =
+	__ATTR(delay_millisecs, 0644, delay_millisecs_show,
+		delay_millisecs_store);
+
+static ssize_t mini_order_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", page_report_mini_order);
+}
+
+static ssize_t mini_order_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t count)
+{
+	unsigned int order;
+	int err;
+
+	err = kstrtouint(buf, 10, &order);
+	if (err || order >= MAX_ORDER)
+		return -EINVAL;
+
+	if (page_report_mini_order != order) {
+		mutex_lock(&page_reporting_mutex);
+		page_report_mini_order = order;
+		mutex_unlock(&page_reporting_mutex);
+	}
+
+	return count;
+}
+
+static struct kobj_attribute mini_order_attr =
+	__ATTR(mini_order, 0644, mini_order_show, mini_order_store);
+
+static struct attribute *page_report_attr[] = {
+	&mini_order_attr.attr,
+	&wake_delay_millisecs_attr.attr,
+	&batch_size_attr.attr,
+	NULL,
+};
+
+static struct attribute_group page_report_attr_group = {
+	.attrs = page_report_attr,
+};
+
+static int __init page_report_init_sysfs(struct kobject **page_report_kobj)
+{
+	int err;
+
+	*page_report_kobj = kobject_create_and_add("page_report", mm_kobj);
+	if (unlikely(!*page_report_kobj)) {
+		pr_err("page_report: failed to create page_report kobject\n");
+		return -ENOMEM;
+	}
+
+	err = sysfs_create_group(*page_report_kobj, &page_report_attr_group);
+	if (err) {
+		pr_err("page_report: failed to register page_report group\n");
+		goto delete_obj;
+	}
+
+	return 0;
+
+delete_obj:
+	kobject_put(*page_report_kobj);
+	return err;
+}
+
+static int __init page_report_init(void)
+{
+	int err;
+	struct kobject *page_report_kobj;
+
+	msecs_to_jiffies(page_report_delay_millisecs);
+	err = page_report_init_sysfs(&page_report_kobj);
+	if (err)
+		return err;
+
+	return 0;
+}
+subsys_initcall(page_report_init);
diff --git a/mm/page_reporting.h b/mm/page_reporting.h
index f18c85ecdfe0..5e52777c934d 100644
--- a/mm/page_reporting.h
+++ b/mm/page_reporting.h
@@ -10,7 +10,7 @@ 
 #include <asm/pgtable.h>
 #include <linux/scatterlist.h>
 
-#define PAGE_REPORTING_MIN_ORDER	pageblock_order
+extern unsigned int page_report_mini_order;
 
 extern unsigned long page_report_batch_size;
 
@@ -42,7 +42,7 @@  static inline void page_reporting_notify_free(unsigned int order)
 		return;
 
 	/* Determine if we have crossed reporting threshold */
-	if (order < PAGE_REPORTING_MIN_ORDER)
+	if (order < page_report_mini_order)
 		return;
 
 	batch_size += (1 << order) << PAGE_SHIFT;