@@ -118,7 +118,8 @@ compaction_proactiveness
This tunable takes a value in the range [0, 100] with a default value of
20. This tunable determines how aggressively compaction is done in the
-background. Setting it to 0 disables proactive compaction.
+background. Write of a non zero value to this tunable will immediately
+trigger the proactive compaction. Setting it to 0 disables proactive compaction.
Note that compaction has a non-trivial system-wide impact as pages
belonging to different processes are moved around, which could also lead
@@ -84,6 +84,8 @@ static inline unsigned long compact_gap(
extern unsigned int sysctl_compaction_proactiveness;
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos);
+extern int compaction_proactiveness_sysctl_handler(struct ctl_table *table,
+ int write, void *buffer, size_t *length, loff_t *ppos);
extern int sysctl_extfrag_threshold;
extern int sysctl_compact_unevictable_allowed;
@@ -846,6 +846,7 @@ typedef struct pglist_data {
enum zone_type kcompactd_highest_zoneidx;
wait_queue_head_t kcompactd_wait;
struct task_struct *kcompactd;
+ bool proactive_compact_trigger;
#endif
/*
* This is a per-node reserve of pages that are not available
@@ -2871,7 +2871,7 @@ static struct ctl_table vm_table[] = {
.data = &sysctl_compaction_proactiveness,
.maxlen = sizeof(sysctl_compaction_proactiveness),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = compaction_proactiveness_sysctl_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = &one_hundred,
},
@@ -2706,6 +2706,30 @@ static void compact_nodes(void)
*/
unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
+int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *length, loff_t *ppos)
+{
+ int rc, nid;
+
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+ if (rc)
+ return rc;
+
+ if (write && sysctl_compaction_proactiveness) {
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ if (pgdat->proactive_compact_trigger)
+ continue;
+
+ pgdat->proactive_compact_trigger = true;
+ wake_up_interruptible(&pgdat->kcompactd_wait);
+ }
+ }
+
+ return 0;
+}
+
/*
* This is the entry point for compacting all nodes via
* /proc/sys/vm/compact_memory
@@ -2750,7 +2774,8 @@ void compaction_unregister_node(struct n
static inline bool kcompactd_work_requested(pg_data_t *pgdat)
{
- return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
+ return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
+ pgdat->proactive_compact_trigger;
}
static bool kcompactd_node_suitable(pg_data_t *pgdat)
@@ -2901,9 +2926,16 @@ static int kcompactd(void *p)
while (!kthread_should_stop()) {
unsigned long pflags;
+ /*
+ * Avoid the unnecessary wakeup for proactive compaction
+ * when it is disabled.
+ */
+ if (!sysctl_compaction_proactiveness)
+ timeout = MAX_SCHEDULE_TIMEOUT;
trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
- kcompactd_work_requested(pgdat), timeout)) {
+ kcompactd_work_requested(pgdat), timeout) &&
+ !pgdat->proactive_compact_trigger) {
psi_memstall_enter(&pflags);
kcompactd_do_work(pgdat);
@@ -2938,6 +2970,8 @@ static int kcompactd(void *p)
timeout =
default_timeout << COMPACT_MAX_DEFER_SHIFT;
}
+ if (unlikely(pgdat->proactive_compact_trigger))
+ pgdat->proactive_compact_trigger = false;
}
return 0;