diff mbox series

[v3] mm: memcontrol: fix potential oom_lock recursion deadlock

Message ID 86afb39f-8c65-bec2-6cfc-c5e3cd600c0b@I-love.SAKURA.ne.jp (mailing list archive)
State New
Headers show
Series [v3] mm: memcontrol: fix potential oom_lock recursion deadlock | expand

Commit Message

Tetsuo Handa July 22, 2022, 10:45 a.m. UTC
syzbot is reporting GFP_KERNEL allocation with oom_lock held when reporting
memcg OOM [1]. Such allocation request might deadlock the system, for
__alloc_pages_may_oom() cannot invoke global OOM killer due to oom_lock
being already held by the caller.

Fix this problem by removing the allocation from memory_stat_format()
completely, and pass static buffer when calling from memcg OOM path.

Link: https://syzkaller.appspot.com/bug?extid=2d2aeadc6ce1e1f11d45 [1]
Reported-by: syzbot <syzbot+2d2aeadc6ce1e1f11d45@syzkaller.appspotmail.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Fixes: c8713d0b23123759 ("mm: memcontrol: dump memory.stat during cgroup OOM")
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Michal Hocko <mhocko@suse.com>
---
Changes in v3:
  Update patch description.

Changes in v2:
  Use static buffer for OOM reporting, suggested by Michal Hocko <mhocko@suse.com>.

 mm/memcontrol.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

Comments

Michal Hocko July 22, 2022, 11:04 a.m. UTC | #1
On Fri 22-07-22 19:45:39, Tetsuo Handa wrote:
> syzbot is reporting GFP_KERNEL allocation with oom_lock held when reporting
> memcg OOM [1]. Such allocation request might deadlock the system, for
> __alloc_pages_may_oom() cannot invoke global OOM killer due to oom_lock
> being already held by the caller.

OK, I have misunderstood your previous wording and now I have realized
that there are 2 issues here. One of them is a (less likely) dead lock on
the oom_lock not making a fwd progress (that would require global OOM
racing with memcg OOM)  and the other is the GFP_NOFS->GFP_KERNEL dependency
which can deadlock even without global the above race.
Correct?

Sorry I could have realized that sooner.

> Fix this problem by removing the allocation from memory_stat_format()
> completely, and pass static buffer when calling from memcg OOM path.
> 
> Link: https://syzkaller.appspot.com/bug?extid=2d2aeadc6ce1e1f11d45 [1]
> Reported-by: syzbot <syzbot+2d2aeadc6ce1e1f11d45@syzkaller.appspotmail.com>
> Suggested-by: Michal Hocko <mhocko@suse.com>
> Fixes: c8713d0b23123759 ("mm: memcontrol: dump memory.stat during cgroup OOM")
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Acked-by: Michal Hocko <mhocko@suse.com>
> ---
> Changes in v3:
>   Update patch description.
> 
> Changes in v2:
>   Use static buffer for OOM reporting, suggested by Michal Hocko <mhocko@suse.com>.
> 
>  mm/memcontrol.c | 22 +++++++++-------------
>  1 file changed, 9 insertions(+), 13 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 618c366a2f07..8092be2fbb7c 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1460,14 +1460,12 @@ static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
>  	return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
>  }
>  
> -static char *memory_stat_format(struct mem_cgroup *memcg)
> +static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize)
>  {
>  	struct seq_buf s;
>  	int i;
>  
> -	seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
> -	if (!s.buffer)
> -		return NULL;
> +	seq_buf_init(&s, buf, bufsize);
>  
>  	/*
>  	 * Provide statistics on the state of the memory subsystem as
> @@ -1533,8 +1531,6 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
>  
>  	/* The above should easily fit into one page */
>  	WARN_ON_ONCE(seq_buf_has_overflowed(&s));
> -
> -	return s.buffer;
>  }
>  
>  #define K(x) ((x) << (PAGE_SHIFT-10))
> @@ -1570,7 +1566,10 @@ void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *
>   */
>  void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
>  {
> -	char *buf;
> +	/* Use static buffer, for the caller is holding oom_lock. */
> +	static char buf[PAGE_SIZE];
> +
> +	lockdep_assert_held(&oom_lock);
>  
>  	pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n",
>  		K((u64)page_counter_read(&memcg->memory)),
> @@ -1591,11 +1590,8 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
>  	pr_info("Memory cgroup stats for ");
>  	pr_cont_cgroup_path(memcg->css.cgroup);
>  	pr_cont(":");
> -	buf = memory_stat_format(memcg);
> -	if (!buf)
> -		return;
> +	memory_stat_format(memcg, buf, sizeof(buf));
>  	pr_info("%s", buf);
> -	kfree(buf);
>  }
>  
>  /*
> @@ -6335,11 +6331,11 @@ static int memory_events_local_show(struct seq_file *m, void *v)
>  static int memory_stat_show(struct seq_file *m, void *v)
>  {
>  	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
> -	char *buf;
> +	char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
>  
> -	buf = memory_stat_format(memcg);
>  	if (!buf)
>  		return -ENOMEM;
> +	memory_stat_format(memcg, buf, PAGE_SIZE);
>  	seq_puts(m, buf);
>  	kfree(buf);
>  	return 0;
> -- 
> 2.18.4
Tetsuo Handa July 22, 2022, 11:12 a.m. UTC | #2
On 2022/07/22 20:04, Michal Hocko wrote:
> On Fri 22-07-22 19:45:39, Tetsuo Handa wrote:
>> syzbot is reporting GFP_KERNEL allocation with oom_lock held when reporting
>> memcg OOM [1]. Such allocation request might deadlock the system, for
>> __alloc_pages_may_oom() cannot invoke global OOM killer due to oom_lock
>> being already held by the caller.
> 
> OK, I have misunderstood your previous wording and now I have realized
> that there are 2 issues here. One of them is a (less likely) dead lock on
> the oom_lock not making a fwd progress (that would require global OOM
> racing with memcg OOM)  and the other is the GFP_NOFS->GFP_KERNEL dependency
> which can deadlock even without global the above race.
> Correct?

Correct.

> 
> Sorry I could have realized that sooner.
> 

No problem. Thanks for suggestion.
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 618c366a2f07..8092be2fbb7c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1460,14 +1460,12 @@  static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
 	return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
 }
 
-static char *memory_stat_format(struct mem_cgroup *memcg)
+static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize)
 {
 	struct seq_buf s;
 	int i;
 
-	seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
-	if (!s.buffer)
-		return NULL;
+	seq_buf_init(&s, buf, bufsize);
 
 	/*
 	 * Provide statistics on the state of the memory subsystem as
@@ -1533,8 +1531,6 @@  static char *memory_stat_format(struct mem_cgroup *memcg)
 
 	/* The above should easily fit into one page */
 	WARN_ON_ONCE(seq_buf_has_overflowed(&s));
-
-	return s.buffer;
 }
 
 #define K(x) ((x) << (PAGE_SHIFT-10))
@@ -1570,7 +1566,10 @@  void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *
  */
 void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
 {
-	char *buf;
+	/* Use static buffer, for the caller is holding oom_lock. */
+	static char buf[PAGE_SIZE];
+
+	lockdep_assert_held(&oom_lock);
 
 	pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n",
 		K((u64)page_counter_read(&memcg->memory)),
@@ -1591,11 +1590,8 @@  void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
 	pr_info("Memory cgroup stats for ");
 	pr_cont_cgroup_path(memcg->css.cgroup);
 	pr_cont(":");
-	buf = memory_stat_format(memcg);
-	if (!buf)
-		return;
+	memory_stat_format(memcg, buf, sizeof(buf));
 	pr_info("%s", buf);
-	kfree(buf);
 }
 
 /*
@@ -6335,11 +6331,11 @@  static int memory_events_local_show(struct seq_file *m, void *v)
 static int memory_stat_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
-	char *buf;
+	char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
 
-	buf = memory_stat_format(memcg);
 	if (!buf)
 		return -ENOMEM;
+	memory_stat_format(memcg, buf, PAGE_SIZE);
 	seq_puts(m, buf);
 	kfree(buf);
 	return 0;