@@ -43,6 +43,8 @@
#include <linux/compat.h>
#include <linux/rculist.h>
#include <net/busy_poll.h>
+#include <linux/memcontrol.h>
+#include <linux/oom.h>
/*
* LOCKING:
@@ -1761,6 +1763,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
u64 slack = 0;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
+ DEFINE_WAIT_FUNC(oom_target_wait, oom_target_callback);
+
+ if (current->oom_target)
+ add_wait_queue(oom_target_get_wait(), &oom_target_wait);
if (timeout > 0) {
struct timespec64 end_time = ep_set_mstimeout(timeout);
@@ -1850,6 +1856,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
!(res = ep_send_events(ep, events, maxevents)) && !timed_out)
goto fetch_events;
+ if (current->oom_target)
+ remove_wait_queue(oom_target_get_wait(), &oom_target_wait);
+
return res;
}
@@ -350,6 +350,12 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
seq_putc(m, '\n');
}
+static inline void task_idle(struct seq_file *m, struct task_struct *p)
+{
+ seq_put_decimal_ull(m, "Idle:\t", p->oom_target);
+ seq_putc(m, '\n');
+}
+
static inline void task_context_switch_counts(struct seq_file *m,
struct task_struct *p)
{
@@ -381,6 +387,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
task_sig(m, task);
task_cap(m, task);
task_seccomp(m, task);
+ task_idle(m, task);
task_cpus_allowed(m, task);
cpuset_task_status_allowed(m, task);
task_context_switch_counts(m, task);
@@ -30,6 +30,7 @@
#include <linux/vmstat.h>
#include <linux/writeback.h>
#include <linux/page-flags.h>
+#include <linux/wait.h>
struct mem_cgroup;
struct page;
@@ -102,6 +102,10 @@ extern void oom_killer_enable(void);
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
+extern void exit_oom_target(void);
+struct wait_queue_head *oom_target_get_wait(void);
+int oom_target_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key);
+
/* sysctls */
extern int sysctl_oom_dump_tasks;
extern int sysctl_oom_kill_allocating_task;
@@ -652,6 +652,7 @@ struct task_struct {
/* disallow userland-initiated cgroup migration */
unsigned no_cgroup_migration:1;
#endif
+ unsigned oom_target:1;
unsigned long atomic_flags; /* Flags requiring atomic access. */
@@ -198,4 +198,8 @@ struct prctl_mm_map {
# define PR_CAP_AMBIENT_LOWER 3
# define PR_CAP_AMBIENT_CLEAR_ALL 4
+#define PR_SET_IDLE 48
+#define PR_GET_IDLE 49
+# define PR_IDLE_MODE_KILLME 1
+
#endif /* _LINUX_PRCTL_H */
@@ -62,6 +62,7 @@
#include <linux/random.h>
#include <linux/rcuwait.h>
#include <linux/compat.h>
+#include <linux/eventpoll.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -2386,6 +2386,15 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_FP_MODE:
error = GET_FP_MODE(me);
break;
+ case PR_SET_IDLE:
+ if (!((arg2 == 0) || (arg2 == PR_IDLE_MODE_KILLME)))
+ return -EINVAL;
+ me->oom_target = arg2;
+ error = 0;
+ break;
+ case PR_GET_IDLE:
+ error = me->oom_target;
+ break;
default:
error = -EINVAL;
break;
@@ -41,6 +41,8 @@
#include <linux/kthread.h>
#include <linux/init.h>
#include <linux/mmu_notifier.h>
+#include <linux/eventpoll.h>
+#include <linux/wait.h>
#include <asm/tlb.h>
#include "internal.h"
@@ -54,6 +56,23 @@ int sysctl_oom_dump_tasks = 1;
DEFINE_MUTEX(oom_lock);
+static DECLARE_WAIT_QUEUE_HEAD(oom_target);
+
+/* Clean up after a EPOLL_KILLME process quits.
+ * Called by kernel/exit.c.
+ */
+void exit_oom_target(void)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ remove_wait_queue(&oom_target, &wait);
+}
+
+inline struct wait_queue_head *oom_target_get_wait()
+{
+ return &oom_target;
+}
+
#ifdef CONFIG_NUMA
/**
* has_intersects_mems_allowed() - check task eligiblity for kill
@@ -994,6 +1013,18 @@ int unregister_oom_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(unregister_oom_notifier);
+int oom_target_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
+{
+ struct task_struct *ts = wait->private;
+
+ /* We use SIGKILL instead of the oom killer
+ * so as to cleanly interrupt ep_poll()
+ */
+ pr_debug("Killing pid %u from prctl(PR_SET_IDLE) death row.\n", ts->pid);
+ send_sig(SIGKILL, ts, 1);
+ return 0;
+}
+
/**
* out_of_memory - kill the "best" process when we run out of memory
* @oc: pointer to struct oom_control
@@ -1007,6 +1038,7 @@ bool out_of_memory(struct oom_control *oc)
{
unsigned long freed = 0;
enum oom_constraint constraint = CONSTRAINT_NONE;
+ wait_queue_head_t *w;
if (oom_killer_disabled)
return false;
@@ -1056,6 +1088,17 @@ bool out_of_memory(struct oom_control *oc)
return true;
}
+ /*
+ * Check death row for current memcg or global.
+ */
+ if (!is_memcg_oom(oc)) {
+ w = oom_target_get_wait();
+ if (waitqueue_active(w)) {
+ wake_up(w);
+ return true;
+ }
+ }
+
select_bad_process(oc);
/* Found nothing?!?! Either we hang forever, or we panic. */
if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {