diff mbox

[v2] mm: fix oom_kill event handling

Message ID 20180508124637.29984-1-guro@fb.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roman Gushchin May 8, 2018, 12:46 p.m. UTC
Commit e27be240df53 ("mm: memcg: make sure memory.events is
uptodate when waking pollers") converted most of memcg event
counters to per-memcg atomics, which made them less confusing
for a user. The "oom_kill" counter remained untouched, so now
it behaves differently than other counters (including "oom").
This adds nothing but confusion.

Let's fix this by adding the MEMCG_OOM_KILL event, and follow
the MEMCG_OOM approach. This also removes a hack from
count_memcg_event_mm(), introduced earlier specially for the
OOM_KILL counter.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: linux-kernel@vger.kernel.org
Cc: cgroups@vger.kernel.org
Cc: linux-mm@kvack.org
---
 include/linux/memcontrol.h | 26 ++++++++++++++++++++++----
 mm/memcontrol.c            |  6 ++++--
 mm/oom_kill.c              |  2 +-
 3 files changed, 27 insertions(+), 7 deletions(-)

Comments

Konstantin Khlebnikov May 8, 2018, 1:26 p.m. UTC | #1
On 08.05.2018 15:46, Roman Gushchin wrote:
> Commit e27be240df53 ("mm: memcg: make sure memory.events is
> uptodate when waking pollers") converted most of memcg event
> counters to per-memcg atomics, which made them less confusing
> for a user. The "oom_kill" counter remained untouched, so now
> it behaves differently than other counters (including "oom").
> This adds nothing but confusion.
> 
> Let's fix this by adding the MEMCG_OOM_KILL event, and follow
> the MEMCG_OOM approach. This also removes a hack from
> count_memcg_event_mm(), introduced earlier specially for the
> OOM_KILL counter.
> 
> Signed-off-by: Roman Gushchin <guro@fb.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> Cc: linux-kernel@vger.kernel.org
> Cc: cgroups@vger.kernel.org
> Cc: linux-mm@kvack.org

Acked-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

> ---
>   include/linux/memcontrol.h | 26 ++++++++++++++++++++++----
>   mm/memcontrol.c            |  6 ++++--
>   mm/oom_kill.c              |  2 +-
>   3 files changed, 27 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 6cbea2f25a87..794475db7368 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -54,6 +54,7 @@ enum memcg_memory_event {
>   	MEMCG_HIGH,
>   	MEMCG_MAX,
>   	MEMCG_OOM,
> +	MEMCG_OOM_KILL,
>   	MEMCG_SWAP_MAX,
>   	MEMCG_SWAP_FAIL,
>   	MEMCG_NR_MEMORY_EVENTS,
> @@ -721,11 +722,8 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
>   
>   	rcu_read_lock();
>   	memcg = rcu_dereference(mm->memcg);
> -	if (likely(memcg)) {
> +	if (likely(memcg))
>   		count_memcg_events(memcg, idx, 1);
> -		if (idx == OOM_KILL)
> -			cgroup_file_notify(&memcg->events_file);
> -	}
>   	rcu_read_unlock();
>   }
>   
> @@ -736,6 +734,21 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
>   	cgroup_file_notify(&memcg->events_file);
>   }
>   
> +static inline void memcg_memory_event_mm(struct mm_struct *mm,
> +					 enum memcg_memory_event event)
> +{
> +	struct mem_cgroup *memcg;
> +
> +	if (mem_cgroup_disabled())
> +		return;
> +
> +	rcu_read_lock();
> +	memcg = rcu_dereference(mm->memcg);
> +	if (likely(memcg))
> +		memcg_memory_event(memcg, event);
> +	rcu_read_unlock();
> +}
> +
>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>   void mem_cgroup_split_huge_fixup(struct page *head);
>   #endif
> @@ -757,6 +770,11 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
>   {
>   }
>   
> +static inline void memcg_memory_event_mm(struct mm_struct *mm,
> +					 enum memcg_memory_event event)
> +{
> +}
> +
>   static inline bool mem_cgroup_low(struct mem_cgroup *root,
>   				  struct mem_cgroup *memcg)
>   {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 10973671e562..38717630305d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3772,7 +3772,8 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
>   
>   	seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
>   	seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
> -	seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
> +	seq_printf(sf, "oom_kill %lu\n",
> +		   atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
>   	return 0;
>   }
>   
> @@ -5529,7 +5530,8 @@ static int memory_events_show(struct seq_file *m, void *v)
>   		   atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
>   	seq_printf(m, "oom %lu\n",
>   		   atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
> -	seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
> +	seq_printf(m, "oom_kill %lu\n",
> +		   atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
>   
>   	return 0;
>   }
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 8f7d8dd99e5d..6b74142a1259 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -868,7 +868,7 @@ static void __oom_kill_process(struct task_struct *victim)
>   
>   	/* Raise event before sending signal: task reaper must see this */
>   	count_vm_event(OOM_KILL);
> -	count_memcg_event_mm(mm, OOM_KILL);
> +	memcg_memory_event_mm(mm, MEMCG_OOM_KILL);
>   
>   	/*
>   	 * We should send SIGKILL before granting access to memory reserves
>
Johannes Weiner May 8, 2018, 5:16 p.m. UTC | #2
On Tue, May 08, 2018 at 01:46:37PM +0100, Roman Gushchin wrote:
> Commit e27be240df53 ("mm: memcg: make sure memory.events is
> uptodate when waking pollers") converted most of memcg event
> counters to per-memcg atomics, which made them less confusing
> for a user. The "oom_kill" counter remained untouched, so now
> it behaves differently than other counters (including "oom").
> This adds nothing but confusion.
> 
> Let's fix this by adding the MEMCG_OOM_KILL event, and follow
> the MEMCG_OOM approach. This also removes a hack from
> count_memcg_event_mm(), introduced earlier specially for the
> OOM_KILL counter.
> 
> Signed-off-by: Roman Gushchin <guro@fb.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> Cc: linux-kernel@vger.kernel.org
> Cc: cgroups@vger.kernel.org
> Cc: linux-mm@kvack.org

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Michal Hocko May 10, 2018, 11:41 a.m. UTC | #3
On Tue 08-05-18 13:46:37, Roman Gushchin wrote:
> Commit e27be240df53 ("mm: memcg: make sure memory.events is
> uptodate when waking pollers") converted most of memcg event
> counters to per-memcg atomics, which made them less confusing
> for a user. The "oom_kill" counter remained untouched, so now
> it behaves differently than other counters (including "oom").
> This adds nothing but confusion.
> 
> Let's fix this by adding the MEMCG_OOM_KILL event, and follow
> the MEMCG_OOM approach. This also removes a hack from
> count_memcg_event_mm(), introduced earlier specially for the
> OOM_KILL counter.

I agree that the current OOM_KILL is confusing. But do we really need
another memcg_memory_event_mm helper used for only one counter rather
than reuse memcg_memory_event. __oom_kill_process doesn't have the memcg
but nothing should really prevent us from adding the context
(oom_control) there, no?

[...]
Roman Gushchin May 10, 2018, 12:12 p.m. UTC | #4
On Thu, May 10, 2018 at 01:41:47PM +0200, Michal Hocko wrote:
> On Tue 08-05-18 13:46:37, Roman Gushchin wrote:
> > Commit e27be240df53 ("mm: memcg: make sure memory.events is
> > uptodate when waking pollers") converted most of memcg event
> > counters to per-memcg atomics, which made them less confusing
> > for a user. The "oom_kill" counter remained untouched, so now
> > it behaves differently than other counters (including "oom").
> > This adds nothing but confusion.
> > 
> > Let's fix this by adding the MEMCG_OOM_KILL event, and follow
> > the MEMCG_OOM approach. This also removes a hack from
> > count_memcg_event_mm(), introduced earlier specially for the
> > OOM_KILL counter.
> 
> I agree that the current OOM_KILL is confusing. But do we really need
> another memcg_memory_event_mm helper used for only one counter rather
> than reuse memcg_memory_event. __oom_kill_process doesn't have the memcg
> but nothing should really prevent us from adding the context
> (oom_control) there, no?

Not sure, that I follow. oom_control has memcg pointer,
but it's a pointer to a cgroup, where OOM happened.
In particular, it's NULL for a system-wide OOM.

And we do send the OOM_KILL event to the cgroup,
which actually contains the process.
Michal Hocko May 10, 2018, 1:07 p.m. UTC | #5
On Thu 10-05-18 13:12:56, Roman Gushchin wrote:
> On Thu, May 10, 2018 at 01:41:47PM +0200, Michal Hocko wrote:
> > On Tue 08-05-18 13:46:37, Roman Gushchin wrote:
> > > Commit e27be240df53 ("mm: memcg: make sure memory.events is
> > > uptodate when waking pollers") converted most of memcg event
> > > counters to per-memcg atomics, which made them less confusing
> > > for a user. The "oom_kill" counter remained untouched, so now
> > > it behaves differently than other counters (including "oom").
> > > This adds nothing but confusion.
> > > 
> > > Let's fix this by adding the MEMCG_OOM_KILL event, and follow
> > > the MEMCG_OOM approach. This also removes a hack from
> > > count_memcg_event_mm(), introduced earlier specially for the
> > > OOM_KILL counter.
> > 
> > I agree that the current OOM_KILL is confusing. But do we really need
> > another memcg_memory_event_mm helper used for only one counter rather
> > than reuse memcg_memory_event. __oom_kill_process doesn't have the memcg
> > but nothing should really prevent us from adding the context
> > (oom_control) there, no?
> 
> Not sure, that I follow. oom_control has memcg pointer,
> but it's a pointer to a cgroup, where OOM happened.
> In particular, it's NULL for a system-wide OOM.
> 
> And we do send the OOM_KILL event to the cgroup,
> which actually contains the process.

You are right! For some reason I thought we do count events on the
hierarchy which is under OOM. I was wrong.

Acked-by: Michal Hocko <mhocko@suse.com>
diff mbox

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6cbea2f25a87..794475db7368 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,6 +54,7 @@  enum memcg_memory_event {
 	MEMCG_HIGH,
 	MEMCG_MAX,
 	MEMCG_OOM,
+	MEMCG_OOM_KILL,
 	MEMCG_SWAP_MAX,
 	MEMCG_SWAP_FAIL,
 	MEMCG_NR_MEMORY_EVENTS,
@@ -721,11 +722,8 @@  static inline void count_memcg_event_mm(struct mm_struct *mm,
 
 	rcu_read_lock();
 	memcg = rcu_dereference(mm->memcg);
-	if (likely(memcg)) {
+	if (likely(memcg))
 		count_memcg_events(memcg, idx, 1);
-		if (idx == OOM_KILL)
-			cgroup_file_notify(&memcg->events_file);
-	}
 	rcu_read_unlock();
 }
 
@@ -736,6 +734,21 @@  static inline void memcg_memory_event(struct mem_cgroup *memcg,
 	cgroup_file_notify(&memcg->events_file);
 }
 
+static inline void memcg_memory_event_mm(struct mm_struct *mm,
+					 enum memcg_memory_event event)
+{
+	struct mem_cgroup *memcg;
+
+	if (mem_cgroup_disabled())
+		return;
+
+	rcu_read_lock();
+	memcg = rcu_dereference(mm->memcg);
+	if (likely(memcg))
+		memcg_memory_event(memcg, event);
+	rcu_read_unlock();
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void mem_cgroup_split_huge_fixup(struct page *head);
 #endif
@@ -757,6 +770,11 @@  static inline void memcg_memory_event(struct mem_cgroup *memcg,
 {
 }
 
+static inline void memcg_memory_event_mm(struct mm_struct *mm,
+					 enum memcg_memory_event event)
+{
+}
+
 static inline bool mem_cgroup_low(struct mem_cgroup *root,
 				  struct mem_cgroup *memcg)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 10973671e562..38717630305d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3772,7 +3772,8 @@  static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
 
 	seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
 	seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
-	seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
+	seq_printf(sf, "oom_kill %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
 	return 0;
 }
 
@@ -5529,7 +5530,8 @@  static int memory_events_show(struct seq_file *m, void *v)
 		   atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
 	seq_printf(m, "oom %lu\n",
 		   atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
-	seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
+	seq_printf(m, "oom_kill %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
 
 	return 0;
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8f7d8dd99e5d..6b74142a1259 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -868,7 +868,7 @@  static void __oom_kill_process(struct task_struct *victim)
 
 	/* Raise event before sending signal: task reaper must see this */
 	count_vm_event(OOM_KILL);
-	count_memcg_event_mm(mm, OOM_KILL);
+	memcg_memory_event_mm(mm, MEMCG_OOM_KILL);
 
 	/*
 	 * We should send SIGKILL before granting access to memory reserves