Message ID | 20171121044947.18479-1-slandden@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Nov 20, 2017 at 8:49 PM, Shawn Landden <slandden@gmail.com> wrote: > See my systemd patch: https://github.com/shawnl/systemd/tree/prctl > > Android uses this memory model for all programs, and having it in the > kernel will enable integration with the page cache (not in this > series). > > v2 > switch to prctl, memcg support > > v3 > use <linux/wait.h> > put OOM after constraint checking > --- > fs/eventpoll.c | 27 ++++++++++++++++++++ > fs/proc/array.c | 7 ++++++ > include/linux/memcontrol.h | 3 +++ > include/linux/oom.h | 4 +++ > include/linux/sched.h | 1 + > include/uapi/linux/prctl.h | 4 +++ > kernel/cgroup/cgroup.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++ > kernel/exit.c | 1 + > kernel/sys.c | 9 +++++++ > mm/memcontrol.c | 2 ++ > mm/oom_kill.c | 47 +++++++++++++++++++++++++++++++++++ > 11 files changed, 166 insertions(+) > > diff --git a/fs/eventpoll.c b/fs/eventpoll.c > index 2fabd19cdeea..745662f9a7e1 100644 > --- a/fs/eventpoll.c > +++ b/fs/eventpoll.c > @@ -43,6 +43,8 @@ > #include <linux/compat.h> > #include <linux/rculist.h> > #include <net/busy_poll.h> > +#include <linux/memcontrol.h> > +#include <linux/oom.h> > > /* > * LOCKING: > @@ -1761,6 +1763,19 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, > u64 slack = 0; > wait_queue_entry_t wait; > ktime_t expires, *to = NULL; > + DEFINE_WAIT_FUNC(oom_target_wait, oom_target_callback); > + DEFINE_WAIT_FUNC(oom_target_wait_mcg, oom_target_callback); > + > + if (current->oom_target) { > +#ifdef CONFIG_MEMCG > + struct mem_cgroup *mcg; > + > + mcg = mem_cgroup_from_task(current); > + if (mcg) > + add_wait_queue(&mcg->oom_target, &oom_target_wait_mcg); > +#endif > + add_wait_queue(oom_target_get_wait(), &oom_target_wait); > + } > > if (timeout > 0) { > struct timespec64 end_time = ep_set_mstimeout(timeout); > @@ -1850,6 +1865,18 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, > !(res = ep_send_events(ep, events, maxevents)) && !timed_out) > goto fetch_events; > > + if (current->oom_target) { > +#ifdef CONFIG_MEMCG > + struct mem_cgroup *mcg; > + > + mcg = mem_cgroup_from_task(current); > + if (mcg) > + remove_wait_queue(&mcg->oom_target, > + &oom_target_wait_mcg); > +#endif > + remove_wait_queue(oom_target_get_wait(), &oom_target_wait); > + } > + > return res; > } > > diff --git a/fs/proc/array.c b/fs/proc/array.c > index 9390032a11e1..1954ae87cb88 100644 > --- a/fs/proc/array.c > +++ b/fs/proc/array.c > @@ -350,6 +350,12 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) > seq_putc(m, '\n'); > } > > +static inline void task_idle(struct seq_file *m, struct task_struct *p) > +{ > + seq_put_decimal_ull(m, "Idle:\t", p->oom_target); > + seq_putc(m, '\n'); > +} > + > static inline void task_context_switch_counts(struct seq_file *m, > struct task_struct *p) > { > @@ -381,6 +387,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, > task_sig(m, task); > task_cap(m, task); > task_seccomp(m, task); > + task_idle(m, task); > task_cpus_allowed(m, task); > cpuset_task_status_allowed(m, task); > task_context_switch_counts(m, task); > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 69966c461d1c..02eb92e7eff5 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -30,6 +30,7 @@ > #include <linux/vmstat.h> > #include <linux/writeback.h> > #include <linux/page-flags.h> > +#include <linux/wait.h> > > struct mem_cgroup; > struct page; > @@ -261,6 +262,8 @@ struct mem_cgroup { > struct list_head event_list; > spinlock_t event_list_lock; > > + wait_queue_head_t oom_target; > + > struct mem_cgroup_per_node *nodeinfo[0]; > /* WARNING: nodeinfo must be the last member here */ > }; > diff --git a/include/linux/oom.h b/include/linux/oom.h > index 01c91d874a57..88acea9e0a59 100644 > --- a/include/linux/oom.h > +++ b/include/linux/oom.h > @@ -102,6 +102,10 @@ extern void oom_killer_enable(void); > > extern struct task_struct *find_lock_task_mm(struct task_struct *p); > > +extern void exit_oom_target(void); > +struct wait_queue_head *oom_target_get_wait(void); > +int oom_target_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); > + > /* sysctls */ > extern int sysctl_oom_dump_tasks; > extern int sysctl_oom_kill_allocating_task; > diff --git a/include/linux/sched.h b/include/linux/sched.h > index fdf74f27acf1..51b0e5987e8c 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -652,6 +652,7 @@ struct task_struct { > /* disallow userland-initiated cgroup migration */ > unsigned no_cgroup_migration:1; > #endif > + unsigned oom_target:1; > > unsigned long atomic_flags; /* Flags requiring atomic access. */ > > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > index b640071421f7..94868317c6f2 100644 > --- a/include/uapi/linux/prctl.h > +++ b/include/uapi/linux/prctl.h > @@ -198,4 +198,8 @@ struct prctl_mm_map { > # define PR_CAP_AMBIENT_LOWER 3 > # define PR_CAP_AMBIENT_CLEAR_ALL 4 > > +#define PR_SET_IDLE 48 > +#define PR_GET_IDLE 49 > +# define PR_IDLE_MODE_KILLME 1 > + > #endif /* _LINUX_PRCTL_H */ > diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c > index 44857278eb8a..081bcd84a8d0 100644 > --- a/kernel/cgroup/cgroup.c > +++ b/kernel/cgroup/cgroup.c > @@ -55,6 +55,8 @@ > #include <linux/nsproxy.h> > #include <linux/file.h> > #include <net/sock.h> > +#include <linux/oom.h> > +#include <linux/memcontrol.h> > > #define CREATE_TRACE_POINTS > #include <trace/events/cgroup.h> > @@ -756,6 +758,9 @@ static void css_set_move_task(struct task_struct *task, > struct css_set *from_cset, struct css_set *to_cset, > bool use_mg_tasks) > { > +#ifdef CONFIG_MEMCG > + struct mem_cgroup *mcg; > +#endif > lockdep_assert_held(&css_set_lock); > > if (to_cset && !css_set_populated(to_cset)) > @@ -779,6 +784,35 @@ static void css_set_move_task(struct task_struct *task, > css_task_iter_advance(it); > > list_del_init(&task->cg_list); > +#ifdef CONFIG_MEMCG > + /* dequeue from memcg->oom_target Ahh this is all shitty here. Sorry for the noise of this shit. > + * TODO: this is O(n), add rb-tree to make it O(logn) > + */ > + mcg = mem_cgroup_from_task(task); > + if (mcg) { > + struct wait_queue_entry *wait; > + > + spin_lock(&mcg->oom_target.lock); > + if (!waitqueue_active(&mcg->oom_target)) > + goto empty_from; > + wait = list_first_entry(&mcg->oom_target.head, > + wait_queue_entry_t, entry); > + do { > + struct list_head *list; > + > + if (wait->private == task) > + __remove_wait_queue(&mcg->oom_target, > + wait); > + list = wait->entry.next; > + if (list_is_last(list, &mcg->oom_target.head)) > + break; > + wait = list_entry(list, > + struct wait_queue_entry, entry); > + } while (1); > +empty_from: > + spin_unlock(&mcg->oom_target.lock); > + } > +#endif > if (!css_set_populated(from_cset)) > css_set_update_populated(from_cset, false); > } else { > @@ -797,6 +831,33 @@ static void css_set_move_task(struct task_struct *task, > rcu_assign_pointer(task->cgroups, to_cset); > list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks : > &to_cset->tasks); > +#ifdef CONFIG_MEMCG > + /* dequeue from memcg->oom_target */ > + mcg = mem_cgroup_from_task(task); > + if (mcg) { > + struct wait_queue_entry *wait; > + > + spin_lock(&mcg->oom_target.lock); > + if (!waitqueue_active(&mcg->oom_target)) > + goto empty_to; > + wait = list_first_entry(&mcg->oom_target.head, > + wait_queue_entry_t, entry); > + do { > + struct list_head *list; > + > + if (wait->private == task) > + __add_wait_queue(&mcg->oom_target, > + wait); > + list = wait->entry.next; > + if (list_is_last(list, &mcg->oom_target.head)) > + break; > + wait = list_entry(list, > + struct wait_queue_entry, entry); > + } while (1); > +empty_to: > + spin_unlock(&mcg->oom_target.lock); > + } > +#endif > } > } > > diff --git a/kernel/exit.c b/kernel/exit.c > index f6cad39f35df..2788fbdae267 100644 > --- a/kernel/exit.c > +++ b/kernel/exit.c > @@ -62,6 +62,7 @@ > #include <linux/random.h> > #include <linux/rcuwait.h> > #include <linux/compat.h> > +#include <linux/eventpoll.h> > > #include <linux/uaccess.h> > #include <asm/unistd.h> > diff --git a/kernel/sys.c b/kernel/sys.c > index 524a4cb9bbe2..e1eb049a85e6 100644 > --- a/kernel/sys.c > +++ b/kernel/sys.c > @@ -2386,6 +2386,15 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, > case PR_GET_FP_MODE: > error = GET_FP_MODE(me); > break; > + case PR_SET_IDLE: > + if (!((arg2 == 0) || (arg2 == PR_IDLE_MODE_KILLME))) > + return -EINVAL; > + me->oom_target = arg2; > + error = 0; > + break; > + case PR_GET_IDLE: > + error = me->oom_target; > + break; > default: > error = -EINVAL; > break; > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 661f046ad318..a4e3b93aeccd 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -4300,6 +4300,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) > memory_cgrp_subsys.broken_hierarchy = true; > } > > + init_waitqueue_head(&memcg->oom_target); > + > /* The following stuff does not apply to the root */ > if (!parent) { > root_mem_cgroup = memcg; > diff --git a/mm/oom_kill.c b/mm/oom_kill.c > index dee0f75c3013..c5d8f5a716bc 100644 > --- a/mm/oom_kill.c > +++ b/mm/oom_kill.c > @@ -41,6 +41,9 @@ > #include <linux/kthread.h> > #include <linux/init.h> > #include <linux/mmu_notifier.h> > +#include <linux/eventpoll.h> > +#include <linux/wait.h> > +#include <linux/memcontrol.h> > > #include <asm/tlb.h> > #include "internal.h" > @@ -54,6 +57,23 @@ int sysctl_oom_dump_tasks = 1; > > DEFINE_MUTEX(oom_lock); > > +static DECLARE_WAIT_QUEUE_HEAD(oom_target); > + > +/* Clean up after a EPOLL_KILLME process quits. > + * Called by kernel/exit.c. > + */ > +void exit_oom_target(void) > +{ > + DECLARE_WAITQUEUE(wait, current); > + > + remove_wait_queue(&oom_target, &wait); > +} > + > +inline struct wait_queue_head *oom_target_get_wait() > +{ > + return &oom_target; > +} > + > #ifdef CONFIG_NUMA > /** > * has_intersects_mems_allowed() - check task eligiblity for kill > @@ -994,6 +1014,18 @@ int unregister_oom_notifier(struct notifier_block *nb) > } > EXPORT_SYMBOL_GPL(unregister_oom_notifier); > > +int oom_target_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) > +{ > + struct task_struct *ts = wait->private; > + > + /* We use SIGKILL instead of the oom killer > + * so as to cleanly interrupt ep_poll() > + */ > + pr_info("Killing pid %u from prctl(PR_SET_IDLE) death row.\n", ts->pid); > + send_sig(SIGKILL, ts, 1); > + return 0; > +} > + > /** > * out_of_memory - kill the "best" process when we run out of memory > * @oc: pointer to struct oom_control > @@ -1007,6 +1039,7 @@ bool out_of_memory(struct oom_control *oc) > { > unsigned long freed = 0; > enum oom_constraint constraint = CONSTRAINT_NONE; > + wait_queue_head_t *w; > > if (oom_killer_disabled) > return false; > @@ -1056,6 +1089,20 @@ bool out_of_memory(struct oom_control *oc) > return true; > } > > + /* > + * Check death row for current memcg or global. > + */ > +#ifdef CONFIG_MEMCG > + if (is_memcg_oom(oc)) > + w = &oc->memcg->oom_target; > + else > +#endif > + w = oom_target_get_wait(); > + if (waitqueue_active(w)) { > + wake_up(w); > + return true; > + } > + > select_bad_process(oc); > /* Found nothing?!?! Either we hang forever, or we panic. */ > if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) { > -- > 2.14.1 >
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 2fabd19cdeea..745662f9a7e1 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -43,6 +43,8 @@ #include <linux/compat.h> #include <linux/rculist.h> #include <net/busy_poll.h> +#include <linux/memcontrol.h> +#include <linux/oom.h> /* * LOCKING: @@ -1761,6 +1763,19 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, u64 slack = 0; wait_queue_entry_t wait; ktime_t expires, *to = NULL; + DEFINE_WAIT_FUNC(oom_target_wait, oom_target_callback); + DEFINE_WAIT_FUNC(oom_target_wait_mcg, oom_target_callback); + + if (current->oom_target) { +#ifdef CONFIG_MEMCG + struct mem_cgroup *mcg; + + mcg = mem_cgroup_from_task(current); + if (mcg) + add_wait_queue(&mcg->oom_target, &oom_target_wait_mcg); +#endif + add_wait_queue(oom_target_get_wait(), &oom_target_wait); + } if (timeout > 0) { struct timespec64 end_time = ep_set_mstimeout(timeout); @@ -1850,6 +1865,18 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto fetch_events; + if (current->oom_target) { +#ifdef CONFIG_MEMCG + struct mem_cgroup *mcg; + + mcg = mem_cgroup_from_task(current); + if (mcg) + remove_wait_queue(&mcg->oom_target, + &oom_target_wait_mcg); +#endif + remove_wait_queue(oom_target_get_wait(), &oom_target_wait); + } + return res; } diff --git a/fs/proc/array.c b/fs/proc/array.c index 9390032a11e1..1954ae87cb88 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -350,6 +350,12 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) seq_putc(m, '\n'); } +static inline void task_idle(struct seq_file *m, struct task_struct *p) +{ + seq_put_decimal_ull(m, "Idle:\t", p->oom_target); + seq_putc(m, '\n'); +} + static inline void task_context_switch_counts(struct seq_file *m, struct task_struct *p) { @@ -381,6 +387,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, task_sig(m, task); task_cap(m, task); task_seccomp(m, task); + task_idle(m, task); task_cpus_allowed(m, task); cpuset_task_status_allowed(m, task); task_context_switch_counts(m, task); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 69966c461d1c..02eb92e7eff5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -30,6 +30,7 @@ #include <linux/vmstat.h> #include <linux/writeback.h> #include <linux/page-flags.h> +#include <linux/wait.h> struct mem_cgroup; struct page; @@ -261,6 +262,8 @@ struct mem_cgroup { struct list_head event_list; spinlock_t event_list_lock; + wait_queue_head_t oom_target; + struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ }; diff --git a/include/linux/oom.h b/include/linux/oom.h index 01c91d874a57..88acea9e0a59 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -102,6 +102,10 @@ extern void oom_killer_enable(void); extern struct task_struct *find_lock_task_mm(struct task_struct *p); +extern void exit_oom_target(void); +struct wait_queue_head *oom_target_get_wait(void); +int oom_target_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key); + /* sysctls */ extern int sysctl_oom_dump_tasks; extern int sysctl_oom_kill_allocating_task; diff --git a/include/linux/sched.h b/include/linux/sched.h index fdf74f27acf1..51b0e5987e8c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -652,6 +652,7 @@ struct task_struct { /* disallow userland-initiated cgroup migration */ unsigned no_cgroup_migration:1; #endif + unsigned oom_target:1; unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index b640071421f7..94868317c6f2 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -198,4 +198,8 @@ struct prctl_mm_map { # define PR_CAP_AMBIENT_LOWER 3 # define PR_CAP_AMBIENT_CLEAR_ALL 4 +#define PR_SET_IDLE 48 +#define PR_GET_IDLE 49 +# define PR_IDLE_MODE_KILLME 1 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 44857278eb8a..081bcd84a8d0 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -55,6 +55,8 @@ #include <linux/nsproxy.h> #include <linux/file.h> #include <net/sock.h> +#include <linux/oom.h> +#include <linux/memcontrol.h> #define CREATE_TRACE_POINTS #include <trace/events/cgroup.h> @@ -756,6 +758,9 @@ static void css_set_move_task(struct task_struct *task, struct css_set *from_cset, struct css_set *to_cset, bool use_mg_tasks) { +#ifdef CONFIG_MEMCG + struct mem_cgroup *mcg; +#endif lockdep_assert_held(&css_set_lock); if (to_cset && !css_set_populated(to_cset)) @@ -779,6 +784,35 @@ static void css_set_move_task(struct task_struct *task, css_task_iter_advance(it); list_del_init(&task->cg_list); +#ifdef CONFIG_MEMCG + /* dequeue from memcg->oom_target + * TODO: this is O(n), add rb-tree to make it O(logn) + */ + mcg = mem_cgroup_from_task(task); + if (mcg) { + struct wait_queue_entry *wait; + + spin_lock(&mcg->oom_target.lock); + if (!waitqueue_active(&mcg->oom_target)) + goto empty_from; + wait = list_first_entry(&mcg->oom_target.head, + wait_queue_entry_t, entry); + do { + struct list_head *list; + + if (wait->private == task) + __remove_wait_queue(&mcg->oom_target, + wait); + list = wait->entry.next; + if (list_is_last(list, &mcg->oom_target.head)) + break; + wait = list_entry(list, + struct wait_queue_entry, entry); + } while (1); +empty_from: + spin_unlock(&mcg->oom_target.lock); + } +#endif if (!css_set_populated(from_cset)) css_set_update_populated(from_cset, false); } else { @@ -797,6 +831,33 @@ static void css_set_move_task(struct task_struct *task, rcu_assign_pointer(task->cgroups, to_cset); list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks : &to_cset->tasks); +#ifdef CONFIG_MEMCG + /* dequeue from memcg->oom_target */ + mcg = mem_cgroup_from_task(task); + if (mcg) { + struct wait_queue_entry *wait; + + spin_lock(&mcg->oom_target.lock); + if (!waitqueue_active(&mcg->oom_target)) + goto empty_to; + wait = list_first_entry(&mcg->oom_target.head, + wait_queue_entry_t, entry); + do { + struct list_head *list; + + if (wait->private == task) + __add_wait_queue(&mcg->oom_target, + wait); + list = wait->entry.next; + if (list_is_last(list, &mcg->oom_target.head)) + break; + wait = list_entry(list, + struct wait_queue_entry, entry); + } while (1); +empty_to: + spin_unlock(&mcg->oom_target.lock); + } +#endif } } diff --git a/kernel/exit.c b/kernel/exit.c index f6cad39f35df..2788fbdae267 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -62,6 +62,7 @@ #include <linux/random.h> #include <linux/rcuwait.h> #include <linux/compat.h> +#include <linux/eventpoll.h> #include <linux/uaccess.h> #include <asm/unistd.h> diff --git a/kernel/sys.c b/kernel/sys.c index 524a4cb9bbe2..e1eb049a85e6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2386,6 +2386,15 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_GET_FP_MODE: error = GET_FP_MODE(me); break; + case PR_SET_IDLE: + if (!((arg2 == 0) || (arg2 == PR_IDLE_MODE_KILLME))) + return -EINVAL; + me->oom_target = arg2; + error = 0; + break; + case PR_GET_IDLE: + error = me->oom_target; + break; default: error = -EINVAL; break; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 661f046ad318..a4e3b93aeccd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4300,6 +4300,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) memory_cgrp_subsys.broken_hierarchy = true; } + init_waitqueue_head(&memcg->oom_target); + /* The following stuff does not apply to the root */ if (!parent) { root_mem_cgroup = memcg; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index dee0f75c3013..c5d8f5a716bc 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -41,6 +41,9 @@ #include <linux/kthread.h> #include <linux/init.h> #include <linux/mmu_notifier.h> +#include <linux/eventpoll.h> +#include <linux/wait.h> +#include <linux/memcontrol.h> #include <asm/tlb.h> #include "internal.h" @@ -54,6 +57,23 @@ int sysctl_oom_dump_tasks = 1; DEFINE_MUTEX(oom_lock); +static DECLARE_WAIT_QUEUE_HEAD(oom_target); + +/* Clean up after a EPOLL_KILLME process quits. + * Called by kernel/exit.c. + */ +void exit_oom_target(void) +{ + DECLARE_WAITQUEUE(wait, current); + + remove_wait_queue(&oom_target, &wait); +} + +inline struct wait_queue_head *oom_target_get_wait() +{ + return &oom_target; +} + #ifdef CONFIG_NUMA /** * has_intersects_mems_allowed() - check task eligiblity for kill @@ -994,6 +1014,18 @@ int unregister_oom_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_oom_notifier); +int oom_target_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) +{ + struct task_struct *ts = wait->private; + + /* We use SIGKILL instead of the oom killer + * so as to cleanly interrupt ep_poll() + */ + pr_info("Killing pid %u from prctl(PR_SET_IDLE) death row.\n", ts->pid); + send_sig(SIGKILL, ts, 1); + return 0; +} + /** * out_of_memory - kill the "best" process when we run out of memory * @oc: pointer to struct oom_control @@ -1007,6 +1039,7 @@ bool out_of_memory(struct oom_control *oc) { unsigned long freed = 0; enum oom_constraint constraint = CONSTRAINT_NONE; + wait_queue_head_t *w; if (oom_killer_disabled) return false; @@ -1056,6 +1089,20 @@ bool out_of_memory(struct oom_control *oc) return true; } + /* + * Check death row for current memcg or global. + */ +#ifdef CONFIG_MEMCG + if (is_memcg_oom(oc)) + w = &oc->memcg->oom_target; + else +#endif + w = oom_target_get_wait(); + if (waitqueue_active(w)) { + wake_up(w); + return true; + } + select_bad_process(oc); /* Found nothing?!?! Either we hang forever, or we panic. */ if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {