diff mbox series

mm, oom: simplify task's refcount handling

Message ID 1563940476-6162-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp (mailing list archive)
State New, archived
Headers show
Series mm, oom: simplify task's refcount handling | expand

Commit Message

Tetsuo Handa July 24, 2019, 3:54 a.m. UTC
Currently out_of_memory() is full of get_task_struct()/put_task_struct()
calls. Since "mm, oom: avoid printk() iteration under RCU" introduced
a list for holding a snapshot of all OOM victim candidates, let's share
that list for select_bad_process() and oom_kill_process() in order to
simplify task's refcount handling.

As a result of this patch, get_task_struct()/put_task_struct() calls
in out_of_memory() are reduced to only 2 times respectively.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: David Rientjes <rientjes@google.com>
---
 include/linux/sched.h |   2 +-
 mm/oom_kill.c         | 122 ++++++++++++++++++++++++--------------------------
 2 files changed, 60 insertions(+), 64 deletions(-)

Comments

Michal Hocko July 24, 2019, 6:41 a.m. UTC | #1
On Wed 24-07-19 12:54:36, Tetsuo Handa wrote:
> Currently out_of_memory() is full of get_task_struct()/put_task_struct()
> calls. Since "mm, oom: avoid printk() iteration under RCU" introduced
> a list for holding a snapshot of all OOM victim candidates, let's share
> that list for select_bad_process() and oom_kill_process() in order to
> simplify task's refcount handling.
> 
> As a result of this patch, get_task_struct()/put_task_struct() calls
> in out_of_memory() are reduced to only 2 times respectively.

This is probably a matter of taste but the diffstat suggests to me that the
simplification is not all that great. On the other hand this makes the
oom handling even more tricky and harder for potential further
development - e.g. if we ever need to break the global lock down in the
future this would be another obstacle on the way. While potential
development might be too theoretical the benefit of the patch is not
really clear to me. The task_struct reference counting is not really
unusual operations and there is nothing really scary that we do with it
here. We already have to to extra mile wrt. task_lock so careful
reference count doesn't really jump out.

That being said, I do not think this patch gives any improvement.

> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Cc: Shakeel Butt <shakeelb@google.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Roman Gushchin <guro@fb.com>
> Cc: David Rientjes <rientjes@google.com>
> ---
>  include/linux/sched.h |   2 +-
>  mm/oom_kill.c         | 122 ++++++++++++++++++++++++--------------------------
>  2 files changed, 60 insertions(+), 64 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 48c1a4c..4062999 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1247,7 +1247,7 @@ struct task_struct {
>  #ifdef CONFIG_MMU
>  	struct task_struct		*oom_reaper_list;
>  #endif
> -	struct list_head		oom_victim_list;
> +	struct list_head		oom_candidate;
>  #ifdef CONFIG_VMAP_STACK
>  	struct vm_struct		*stack_vm_area;
>  #endif
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 110f948..311e0e9 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -63,6 +63,7 @@
>   * and mark_oom_victim
>   */
>  DEFINE_MUTEX(oom_lock);
> +static LIST_HEAD(oom_candidate_list);
>  
>  static inline bool is_memcg_oom(struct oom_control *oc)
>  {
> @@ -167,6 +168,41 @@ static bool oom_unkillable_task(struct task_struct *p)
>  	return false;
>  }
>  
> +static int add_candidate_task(struct task_struct *p, void *unused)
> +{
> +	if (!oom_unkillable_task(p)) {
> +		get_task_struct(p);
> +		list_add_tail(&p->oom_candidate, &oom_candidate_list);
> +	}
> +	return 0;
> +}
> +
> +static void link_oom_candidates(struct oom_control *oc)
> +{
> +	struct task_struct *p;
> +
> +	if (is_memcg_oom(oc))
> +		mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, NULL);
> +	else {
> +		rcu_read_lock();
> +		for_each_process(p)
> +			add_candidate_task(p, NULL);
> +		rcu_read_unlock();
> +	}
> +
> +}
> +
> +static void unlink_oom_candidates(void)
> +{
> +	struct task_struct *p;
> +	struct task_struct *t;
> +
> +	list_for_each_entry_safe(p, t, &oom_candidate_list, oom_candidate) {
> +		list_del(&p->oom_candidate);
> +		put_task_struct(p);
> +	}
> +}
> +
>  /*
>   * Print out unreclaimble slabs info when unreclaimable slabs amount is greater
>   * than all user memory (LRU pages)
> @@ -344,16 +380,11 @@ static int oom_evaluate_task(struct task_struct *task, void *arg)
>  		goto next;
>  
>  select:
> -	if (oc->chosen)
> -		put_task_struct(oc->chosen);
> -	get_task_struct(task);
>  	oc->chosen = task;
>  	oc->chosen_points = points;
>  next:
>  	return 0;
>  abort:
> -	if (oc->chosen)
> -		put_task_struct(oc->chosen);
>  	oc->chosen = (void *)-1UL;
>  	return 1;
>  }
> @@ -364,27 +395,13 @@ static int oom_evaluate_task(struct task_struct *task, void *arg)
>   */
>  static void select_bad_process(struct oom_control *oc)
>  {
> -	if (is_memcg_oom(oc))
> -		mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
> -	else {
> -		struct task_struct *p;
> -
> -		rcu_read_lock();
> -		for_each_process(p)
> -			if (oom_evaluate_task(p, oc))
> -				break;
> -		rcu_read_unlock();
> -	}
> -}
> -
> +	struct task_struct *p;
>  
> -static int add_candidate_task(struct task_struct *p, void *arg)
> -{
> -	if (!oom_unkillable_task(p)) {
> -		get_task_struct(p);
> -		list_add_tail(&p->oom_victim_list, (struct list_head *) arg);
> +	list_for_each_entry(p, &oom_candidate_list, oom_candidate) {
> +		cond_resched();
> +		if (oom_evaluate_task(p, oc))
> +			break;
>  	}
> -	return 0;
>  }
>  
>  /**
> @@ -399,21 +416,12 @@ static int add_candidate_task(struct task_struct *p, void *arg)
>   */
>  static void dump_tasks(struct oom_control *oc)
>  {
> -	static LIST_HEAD(list);
>  	struct task_struct *p;
>  	struct task_struct *t;
>  
> -	if (is_memcg_oom(oc))
> -		mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, &list);
> -	else {
> -		rcu_read_lock();
> -		for_each_process(p)
> -			add_candidate_task(p, &list);
> -		rcu_read_unlock();
> -	}
>  	pr_info("Tasks state (memory values in pages):\n");
>  	pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
> -	list_for_each_entry(p, &list, oom_victim_list) {
> +	list_for_each_entry(p, &oom_candidate_list, oom_candidate) {
>  		cond_resched();
>  		/* p may not have freeable memory in nodemask */
>  		if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
> @@ -430,10 +438,6 @@ static void dump_tasks(struct oom_control *oc)
>  			t->signal->oom_score_adj, t->comm);
>  		task_unlock(t);
>  	}
> -	list_for_each_entry_safe(p, t, &list, oom_victim_list) {
> -		list_del(&p->oom_victim_list);
> -		put_task_struct(p);
> -	}
>  }
>  
>  static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
> @@ -859,17 +863,11 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  	bool can_oom_reap = true;
>  
>  	p = find_lock_task_mm(victim);
> -	if (!p) {
> -		put_task_struct(victim);
> +	if (!p)
>  		return;
> -	} else if (victim != p) {
> -		get_task_struct(p);
> -		put_task_struct(victim);
> -		victim = p;
> -	}
>  
> -	/* Get a reference to safely compare mm after task_unlock(victim) */
> -	mm = victim->mm;
> +	/* Get a reference to safely compare mm after task_unlock(p) */
> +	mm = p->mm;
>  	mmgrab(mm);
>  
>  	/* Raise event before sending signal: task reaper must see this */
> @@ -881,16 +879,15 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  	 * in order to prevent the OOM victim from depleting the memory
>  	 * reserves from the user space under its control.
>  	 */
> -	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
> -	mark_oom_victim(victim);
> +	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
> +	mark_oom_victim(p);
>  	pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u\n",
> -		message, task_pid_nr(victim), victim->comm,
> -		K(victim->mm->total_vm),
> -		K(get_mm_counter(victim->mm, MM_ANONPAGES)),
> -		K(get_mm_counter(victim->mm, MM_FILEPAGES)),
> -		K(get_mm_counter(victim->mm, MM_SHMEMPAGES)),
> -		from_kuid(&init_user_ns, task_uid(victim)));
> -	task_unlock(victim);
> +	       message, task_pid_nr(p), p->comm, K(mm->total_vm),
> +	       K(get_mm_counter(mm, MM_ANONPAGES)),
> +	       K(get_mm_counter(mm, MM_FILEPAGES)),
> +	       K(get_mm_counter(mm, MM_SHMEMPAGES)),
> +	       from_kuid(&init_user_ns, task_uid(p)));
> +	task_unlock(p);
>  
>  	/*
>  	 * Kill all user processes sharing victim->mm in other thread groups, if
> @@ -929,7 +926,6 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  		wake_oom_reaper(victim);
>  
>  	mmdrop(mm);
> -	put_task_struct(victim);
>  }
>  #undef K
>  
> @@ -940,10 +936,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  static int oom_kill_memcg_member(struct task_struct *task, void *message)
>  {
>  	if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
> -	    !is_global_init(task)) {
> -		get_task_struct(task);
> +	    !is_global_init(task))
>  		__oom_kill_process(task, message);
> -	}
>  	return 0;
>  }
>  
> @@ -964,7 +958,6 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
>  		mark_oom_victim(victim);
>  		wake_oom_reaper(victim);
>  		task_unlock(victim);
> -		put_task_struct(victim);
>  		return;
>  	}
>  	task_unlock(victim);
> @@ -1073,6 +1066,8 @@ bool out_of_memory(struct oom_control *oc)
>  	if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
>  		return true;
>  
> +	link_oom_candidates(oc);
> +
>  	/*
>  	 * Check if there were limitations on the allocation (only relevant for
>  	 * NUMA and memcg) that may require different handling.
> @@ -1086,10 +1081,9 @@ bool out_of_memory(struct oom_control *oc)
>  	    current->mm && !oom_unkillable_task(current) &&
>  	    oom_cpuset_eligible(current, oc) &&
>  	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
> -		get_task_struct(current);
>  		oc->chosen = current;
>  		oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
> -		return true;
> +		goto done;
>  	}
>  
>  	select_bad_process(oc);
> @@ -1108,6 +1102,8 @@ bool out_of_memory(struct oom_control *oc)
>  	if (oc->chosen && oc->chosen != (void *)-1UL)
>  		oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
>  				 "Memory cgroup out of memory");
> + done:
> +	unlink_oom_candidates();
>  	return !!oc->chosen;
>  }
>  
> -- 
> 1.8.3.1
Tetsuo Handa July 24, 2019, 7:37 a.m. UTC | #2
On 2019/07/24 15:41, Michal Hocko wrote:
> On Wed 24-07-19 12:54:36, Tetsuo Handa wrote:
>> Currently out_of_memory() is full of get_task_struct()/put_task_struct()
>> calls. Since "mm, oom: avoid printk() iteration under RCU" introduced
>> a list for holding a snapshot of all OOM victim candidates, let's share
>> that list for select_bad_process() and oom_kill_process() in order to
>> simplify task's refcount handling.
>>
>> As a result of this patch, get_task_struct()/put_task_struct() calls
>> in out_of_memory() are reduced to only 2 times respectively.
> 
> This is probably a matter of taste but the diffstat suggests to me that the
> simplification is not all that great. On the other hand this makes the
> oom handling even more tricky and harder for potential further
> development - e.g. if we ever need to break the global lock down in the
> future this would be another obstacle on the way.

If we want to remove oom_lock serialization, we can implement it by doing
INIT_LIST_HEAD(&p->oom_candidate) upon creating a thread and checking
list_empty(&p->oom_candidate) under p->task_lock (or something) held
when adding to local on-stack "oom_candidate_list" list stored in "oc".

But we do not want to jumble concurrent OOM killer messages. Since it is
dump_header() which takes majority of time, synchronous printk() will be
the real obstacle on the way. I've tried removing oom_lock serialization,
and got commit cbae05d32ff68233 ("printk: Pass caller information to log_store().").
The OOM killer is calling printk() in a manner that will jumble concurrent
OOM killer messages...

>                                                   While potential
> development might be too theoretical the benefit of the patch is not
> really clear to me. The task_struct reference counting is not really
> unusual operations and there is nothing really scary that we do with it
> here. We already have to to extra mile wrt. task_lock so careful
> reference count doesn't really jump out.
> 
> That being said, I do not think this patch gives any improvement.
> 

This patch avoids RCU during select_bad_process(). This patch allows
possibility of doing reschedulable things there; e.g. directly reaping
only a portion of OOM victim's memory rather than wasting CPU resource
by spinning until MMF_OOM_SKIP is set by the OOM reaper.
Michal Hocko July 24, 2019, 8:07 a.m. UTC | #3
On Wed 24-07-19 16:37:35, Tetsuo Handa wrote:
> On 2019/07/24 15:41, Michal Hocko wrote:
[...]
> > That being said, I do not think this patch gives any improvement.
> > 
> 
> This patch avoids RCU during select_bad_process().

It just shifts where the RCU is taken. Do you have any numbers to show
that this is an improvement? Basically the only potentially expensive
thing down the oom_evaluate_task that I can see is the task_lock but I
am not aware of a single report that this would be a contributor for RCU
stalls. I can be proven wrong but 

> This patch allows
> possibility of doing reschedulable things there; e.g. directly reaping
> only a portion of OOM victim's memory rather than wasting CPU resource
> by spinning until MMF_OOM_SKIP is set by the OOM reaper.

We have been through direct oom reaping before and I haven't changed my
possition there. It is just too tricky to be worth it.
Tetsuo Handa July 26, 2019, 3:17 a.m. UTC | #4
On 2019/07/24 17:07, Michal Hocko wrote:
> On Wed 24-07-19 16:37:35, Tetsuo Handa wrote:
>> On 2019/07/24 15:41, Michal Hocko wrote:
> [...]
>>> That being said, I do not think this patch gives any improvement.
>>>
>>
>> This patch avoids RCU during select_bad_process().
> 
> It just shifts where the RCU is taken. Do you have any numbers to show
> that this is an improvement? Basically the only potentially expensive
> thing down the oom_evaluate_task that I can see is the task_lock but I
> am not aware of a single report that this would be a contributor for RCU
> stalls. I can be proven wrong but 
> 

I don't have numbers (nor intent to show numbers). What I said is "we can
do reschedulable things from select_bad_process() if future development
found that it is nice to do, for oom_evaluate_task() is called without RCU".
For now just cond_resched() would be added into select_bad_process() iteration.

>> This patch allows
>> possibility of doing reschedulable things there; e.g. directly reaping
>> only a portion of OOM victim's memory rather than wasting CPU resource
>> by spinning until MMF_OOM_SKIP is set by the OOM reaper.
> 
> We have been through direct oom reaping before and I haven't changed my
> possition there. It is just too tricky to be worth it.
> 

Not limited to direct OOM reaping. Anything that future development would
find.

Anyway, traversing only once (by this patch) allows showing consistent snapshot
of OOM victim candidates. In other words, this patch makes sure that OOM victim
candidates shown by dump_tasks() are what select_bad_process() has evaluated, for
you said that the main purpose of the listing is to double check the list to
understand the OOM victim selection. This patch removes race window of adding
or removing OOM victim candidates.
diff mbox series

Patch

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 48c1a4c..4062999 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1247,7 +1247,7 @@  struct task_struct {
 #ifdef CONFIG_MMU
 	struct task_struct		*oom_reaper_list;
 #endif
-	struct list_head		oom_victim_list;
+	struct list_head		oom_candidate;
 #ifdef CONFIG_VMAP_STACK
 	struct vm_struct		*stack_vm_area;
 #endif
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 110f948..311e0e9 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -63,6 +63,7 @@ 
  * and mark_oom_victim
  */
 DEFINE_MUTEX(oom_lock);
+static LIST_HEAD(oom_candidate_list);
 
 static inline bool is_memcg_oom(struct oom_control *oc)
 {
@@ -167,6 +168,41 @@  static bool oom_unkillable_task(struct task_struct *p)
 	return false;
 }
 
+static int add_candidate_task(struct task_struct *p, void *unused)
+{
+	if (!oom_unkillable_task(p)) {
+		get_task_struct(p);
+		list_add_tail(&p->oom_candidate, &oom_candidate_list);
+	}
+	return 0;
+}
+
+static void link_oom_candidates(struct oom_control *oc)
+{
+	struct task_struct *p;
+
+	if (is_memcg_oom(oc))
+		mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, NULL);
+	else {
+		rcu_read_lock();
+		for_each_process(p)
+			add_candidate_task(p, NULL);
+		rcu_read_unlock();
+	}
+
+}
+
+static void unlink_oom_candidates(void)
+{
+	struct task_struct *p;
+	struct task_struct *t;
+
+	list_for_each_entry_safe(p, t, &oom_candidate_list, oom_candidate) {
+		list_del(&p->oom_candidate);
+		put_task_struct(p);
+	}
+}
+
 /*
  * Print out unreclaimble slabs info when unreclaimable slabs amount is greater
  * than all user memory (LRU pages)
@@ -344,16 +380,11 @@  static int oom_evaluate_task(struct task_struct *task, void *arg)
 		goto next;
 
 select:
-	if (oc->chosen)
-		put_task_struct(oc->chosen);
-	get_task_struct(task);
 	oc->chosen = task;
 	oc->chosen_points = points;
 next:
 	return 0;
 abort:
-	if (oc->chosen)
-		put_task_struct(oc->chosen);
 	oc->chosen = (void *)-1UL;
 	return 1;
 }
@@ -364,27 +395,13 @@  static int oom_evaluate_task(struct task_struct *task, void *arg)
  */
 static void select_bad_process(struct oom_control *oc)
 {
-	if (is_memcg_oom(oc))
-		mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
-	else {
-		struct task_struct *p;
-
-		rcu_read_lock();
-		for_each_process(p)
-			if (oom_evaluate_task(p, oc))
-				break;
-		rcu_read_unlock();
-	}
-}
-
+	struct task_struct *p;
 
-static int add_candidate_task(struct task_struct *p, void *arg)
-{
-	if (!oom_unkillable_task(p)) {
-		get_task_struct(p);
-		list_add_tail(&p->oom_victim_list, (struct list_head *) arg);
+	list_for_each_entry(p, &oom_candidate_list, oom_candidate) {
+		cond_resched();
+		if (oom_evaluate_task(p, oc))
+			break;
 	}
-	return 0;
 }
 
 /**
@@ -399,21 +416,12 @@  static int add_candidate_task(struct task_struct *p, void *arg)
  */
 static void dump_tasks(struct oom_control *oc)
 {
-	static LIST_HEAD(list);
 	struct task_struct *p;
 	struct task_struct *t;
 
-	if (is_memcg_oom(oc))
-		mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, &list);
-	else {
-		rcu_read_lock();
-		for_each_process(p)
-			add_candidate_task(p, &list);
-		rcu_read_unlock();
-	}
 	pr_info("Tasks state (memory values in pages):\n");
 	pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
-	list_for_each_entry(p, &list, oom_victim_list) {
+	list_for_each_entry(p, &oom_candidate_list, oom_candidate) {
 		cond_resched();
 		/* p may not have freeable memory in nodemask */
 		if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
@@ -430,10 +438,6 @@  static void dump_tasks(struct oom_control *oc)
 			t->signal->oom_score_adj, t->comm);
 		task_unlock(t);
 	}
-	list_for_each_entry_safe(p, t, &list, oom_victim_list) {
-		list_del(&p->oom_victim_list);
-		put_task_struct(p);
-	}
 }
 
 static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
@@ -859,17 +863,11 @@  static void __oom_kill_process(struct task_struct *victim, const char *message)
 	bool can_oom_reap = true;
 
 	p = find_lock_task_mm(victim);
-	if (!p) {
-		put_task_struct(victim);
+	if (!p)
 		return;
-	} else if (victim != p) {
-		get_task_struct(p);
-		put_task_struct(victim);
-		victim = p;
-	}
 
-	/* Get a reference to safely compare mm after task_unlock(victim) */
-	mm = victim->mm;
+	/* Get a reference to safely compare mm after task_unlock(p) */
+	mm = p->mm;
 	mmgrab(mm);
 
 	/* Raise event before sending signal: task reaper must see this */
@@ -881,16 +879,15 @@  static void __oom_kill_process(struct task_struct *victim, const char *message)
 	 * in order to prevent the OOM victim from depleting the memory
 	 * reserves from the user space under its control.
 	 */
-	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
-	mark_oom_victim(victim);
+	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
+	mark_oom_victim(p);
 	pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u\n",
-		message, task_pid_nr(victim), victim->comm,
-		K(victim->mm->total_vm),
-		K(get_mm_counter(victim->mm, MM_ANONPAGES)),
-		K(get_mm_counter(victim->mm, MM_FILEPAGES)),
-		K(get_mm_counter(victim->mm, MM_SHMEMPAGES)),
-		from_kuid(&init_user_ns, task_uid(victim)));
-	task_unlock(victim);
+	       message, task_pid_nr(p), p->comm, K(mm->total_vm),
+	       K(get_mm_counter(mm, MM_ANONPAGES)),
+	       K(get_mm_counter(mm, MM_FILEPAGES)),
+	       K(get_mm_counter(mm, MM_SHMEMPAGES)),
+	       from_kuid(&init_user_ns, task_uid(p)));
+	task_unlock(p);
 
 	/*
 	 * Kill all user processes sharing victim->mm in other thread groups, if
@@ -929,7 +926,6 @@  static void __oom_kill_process(struct task_struct *victim, const char *message)
 		wake_oom_reaper(victim);
 
 	mmdrop(mm);
-	put_task_struct(victim);
 }
 #undef K
 
@@ -940,10 +936,8 @@  static void __oom_kill_process(struct task_struct *victim, const char *message)
 static int oom_kill_memcg_member(struct task_struct *task, void *message)
 {
 	if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
-	    !is_global_init(task)) {
-		get_task_struct(task);
+	    !is_global_init(task))
 		__oom_kill_process(task, message);
-	}
 	return 0;
 }
 
@@ -964,7 +958,6 @@  static void oom_kill_process(struct oom_control *oc, const char *message)
 		mark_oom_victim(victim);
 		wake_oom_reaper(victim);
 		task_unlock(victim);
-		put_task_struct(victim);
 		return;
 	}
 	task_unlock(victim);
@@ -1073,6 +1066,8 @@  bool out_of_memory(struct oom_control *oc)
 	if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
 		return true;
 
+	link_oom_candidates(oc);
+
 	/*
 	 * Check if there were limitations on the allocation (only relevant for
 	 * NUMA and memcg) that may require different handling.
@@ -1086,10 +1081,9 @@  bool out_of_memory(struct oom_control *oc)
 	    current->mm && !oom_unkillable_task(current) &&
 	    oom_cpuset_eligible(current, oc) &&
 	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
-		get_task_struct(current);
 		oc->chosen = current;
 		oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
-		return true;
+		goto done;
 	}
 
 	select_bad_process(oc);
@@ -1108,6 +1102,8 @@  bool out_of_memory(struct oom_control *oc)
 	if (oc->chosen && oc->chosen != (void *)-1UL)
 		oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
 				 "Memory cgroup out of memory");
+ done:
+	unlink_oom_candidates();
 	return !!oc->chosen;
 }