diff mbox

INFO: task hung in __sb_start_write

Message ID f91e1c82-9693-cca3-4ab7-ecd9d9881fb4@i-love.sakura.ne.jp (mailing list archive)
State New, archived
Headers show

Commit Message

Tetsuo Handa June 19, 2018, 11:10 a.m. UTC
On 2018/06/16 4:40, Tetsuo Handa wrote:
> Hmm, there might be other locations calling percpu_rwsem_release() ?

There are other locations calling percpu_rwsem_release(), but quite few.

include/linux/fs.h:1494:#define __sb_writers_release(sb, lev)   \
include/linux/fs.h-1495-        percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)

fs/btrfs/transaction.c:1821:            __sb_writers_release(fs_info->sb, SB_FREEZE_FS);
fs/aio.c:1566:                  __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
fs/xfs/xfs_aops.c:211:  __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);



I'd like to check what atomic_long_read(&sem->rw_sem.count) says
when hung task is reported.

Dmitry, if you succeed to reproduce khungtaskd messages, can you try this patch?

---
 include/linux/sched.h         |  1 +
 kernel/hung_task.c            | 25 +++++++++++++++++++++++++
 kernel/locking/percpu-rwsem.c |  2 ++
 3 files changed, 28 insertions(+)

Comments

Dmitry Vyukov June 19, 2018, 11:47 a.m. UTC | #1
On Tue, Jun 19, 2018 at 1:10 PM, Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp> wrote:
> On 2018/06/16 4:40, Tetsuo Handa wrote:
>> Hmm, there might be other locations calling percpu_rwsem_release() ?
>
> There are other locations calling percpu_rwsem_release(), but quite few.
>
> include/linux/fs.h:1494:#define __sb_writers_release(sb, lev)   \
> include/linux/fs.h-1495-        percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
>
> fs/btrfs/transaction.c:1821:            __sb_writers_release(fs_info->sb, SB_FREEZE_FS);
> fs/aio.c:1566:                  __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
> fs/xfs/xfs_aops.c:211:  __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
>
>
>
> I'd like to check what atomic_long_read(&sem->rw_sem.count) says
> when hung task is reported.
>
> Dmitry, if you succeed to reproduce khungtaskd messages, can you try this patch?
>
> ---
>  include/linux/sched.h         |  1 +
>  kernel/hung_task.c            | 25 +++++++++++++++++++++++++
>  kernel/locking/percpu-rwsem.c |  2 ++
>  3 files changed, 28 insertions(+)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 87bf02d..af95251 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1179,6 +1179,7 @@ struct task_struct {
>         /* Used by LSM modules for access restriction: */
>         void                            *security;
>  #endif
> +       struct percpu_rw_semaphore      *pcpu_rwsem_read_waiting;
>
>         /*
>          * New fields for task_struct should be added above here, so that
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 32b4794..409cc82 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -18,6 +18,7 @@
>  #include <linux/utsname.h>
>  #include <linux/sched/signal.h>
>  #include <linux/sched/debug.h>
> +#include <linux/percpu-rwsem.h>
>
>  #include <trace/events/sched.h>
>
> @@ -77,6 +78,29 @@ static int __init hung_task_panic_setup(char *str)
>         .notifier_call = hung_task_panic,
>  };
>
> +static void dump_percpu_rwsem_state(struct percpu_rw_semaphore *sem)
> +{
> +       unsigned int sum = 0;
> +       int cpu;
> +
> +       if (!sem)
> +               return;
> +       pr_info("percpu_rw_semaphore(%p)\n", sem);
> +       pr_info("  ->rw_sem.count=0x%lx\n",
> +               atomic_long_read(&sem->rw_sem.count));
> +       pr_info("  ->rss.gp_state=%d\n", sem->rss.gp_state);
> +       pr_info("  ->rss.gp_count=%d\n", sem->rss.gp_count);
> +       pr_info("  ->rss.cb_state=%d\n", sem->rss.cb_state);
> +       pr_info("  ->rss.gp_type=%d\n", sem->rss.gp_type);
> +       pr_info("  ->readers_block=%d\n", sem->readers_block);
> +       for_each_possible_cpu(cpu)
> +               sum += per_cpu(*sem->read_count, cpu);
> +       pr_info("  ->read_count=%d\n", sum);
> +       pr_info("  ->list_empty(rw_sem.wait_list)=%d\n",
> +              list_empty(&sem->rw_sem.wait_list));
> +       pr_info("  ->writer.task=%p\n", sem->writer.task);
> +}
> +
>  static void check_hung_task(struct task_struct *t, unsigned long timeout)
>  {
>         unsigned long switch_count = t->nvcsw + t->nivcsw;
> @@ -122,6 +146,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
>                 pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
>                         " disables this message.\n");
>                 sched_show_task(t);
> +               dump_percpu_rwsem_state(t->pcpu_rwsem_read_waiting);
>                 hung_task_show_lock = true;
>         }
>
> diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
> index 883cf1b..b3654ab 100644
> --- a/kernel/locking/percpu-rwsem.c
> +++ b/kernel/locking/percpu-rwsem.c
> @@ -82,7 +82,9 @@ int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
>         /*
>          * Avoid lockdep for the down/up_read() we already have them.
>          */
> +       current->pcpu_rwsem_read_waiting = sem;
>         __down_read(&sem->rw_sem);
> +       current->pcpu_rwsem_read_waiting = NULL;
>         this_cpu_inc(*sem->read_count);
>         __up_read(&sem->rw_sem);
>
> --
> 1.8.3.1



I wasn't able to reproduce this locally yet.

> If no, we would want a git tree for testing under syzbot.

Thinking of this, we could setup a sandbox instance that won't report
anything over email at all, but crashes will be available on the web.
We could point this instance to a custom git tree, where additional
patches can be applied as necessary.  The main question is: who and
how will manage this tree? The tree needs to be rebased periodically,
patches applied, old patches taken out.
Tetsuo Handa June 19, 2018, 1 p.m. UTC | #2
On 2018/06/19 20:47, Dmitry Vyukov wrote:
>> If no, we would want a git tree for testing under syzbot.
> 
> Thinking of this, we could setup a sandbox instance that won't report
> anything over email at all, but crashes will be available on the web.
> We could point this instance to a custom git tree, where additional
> patches can be applied as necessary.  The main question is: who and
> how will manage this tree? The tree needs to be rebased periodically,
> patches applied, old patches taken out.
> 

The simplest approach (if Linus can tolerate) would be to merge any
debug printk() patches to linux.git with single kernel config option
(e.g. CONFIG_DEBUG_AID_FOR_SYZBOT=y) for turning them on/off.
Assuming that we will eventually be able to move to saving and analyzing
vmcore files, some bit of debug printk() code would be tolerable for now?

Other possible approach (if Andrew can tolerate) would be to use
linux-next.git and carry debug printk() patches via mmotm tree.
diff mbox

Patch

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 87bf02d..af95251 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1179,6 +1179,7 @@  struct task_struct {
 	/* Used by LSM modules for access restriction: */
 	void				*security;
 #endif
+	struct percpu_rw_semaphore	*pcpu_rwsem_read_waiting;
 
 	/*
 	 * New fields for task_struct should be added above here, so that
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 32b4794..409cc82 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -18,6 +18,7 @@ 
 #include <linux/utsname.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/percpu-rwsem.h>
 
 #include <trace/events/sched.h>
 
@@ -77,6 +78,29 @@  static int __init hung_task_panic_setup(char *str)
 	.notifier_call = hung_task_panic,
 };
 
+static void dump_percpu_rwsem_state(struct percpu_rw_semaphore *sem)
+{
+	unsigned int sum = 0;
+	int cpu;
+
+	if (!sem)
+		return;
+	pr_info("percpu_rw_semaphore(%p)\n", sem);
+	pr_info("  ->rw_sem.count=0x%lx\n",
+		atomic_long_read(&sem->rw_sem.count));
+	pr_info("  ->rss.gp_state=%d\n", sem->rss.gp_state);
+	pr_info("  ->rss.gp_count=%d\n", sem->rss.gp_count);
+	pr_info("  ->rss.cb_state=%d\n", sem->rss.cb_state);
+	pr_info("  ->rss.gp_type=%d\n", sem->rss.gp_type);
+	pr_info("  ->readers_block=%d\n", sem->readers_block);
+	for_each_possible_cpu(cpu)
+		sum += per_cpu(*sem->read_count, cpu);
+	pr_info("  ->read_count=%d\n", sum);
+	pr_info("  ->list_empty(rw_sem.wait_list)=%d\n",
+	       list_empty(&sem->rw_sem.wait_list));
+	pr_info("  ->writer.task=%p\n", sem->writer.task);
+}
+
 static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
 	unsigned long switch_count = t->nvcsw + t->nivcsw;
@@ -122,6 +146,7 @@  static void check_hung_task(struct task_struct *t, unsigned long timeout)
 		pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
 			" disables this message.\n");
 		sched_show_task(t);
+		dump_percpu_rwsem_state(t->pcpu_rwsem_read_waiting);
 		hung_task_show_lock = true;
 	}
 
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 883cf1b..b3654ab 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -82,7 +82,9 @@  int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
 	/*
 	 * Avoid lockdep for the down/up_read() we already have them.
 	 */
+	current->pcpu_rwsem_read_waiting = sem;
 	__down_read(&sem->rw_sem);
+	current->pcpu_rwsem_read_waiting = NULL;
 	this_cpu_inc(*sem->read_count);
 	__up_read(&sem->rw_sem);