diff mbox series

[RFC] kernel/hung_task.c: disable on suspend

Message ID 20180912161119.2692-1-vkuznets@redhat.com (mailing list archive)
State Superseded, archived
Headers show
Series [RFC] kernel/hung_task.c: disable on suspend | expand

Commit Message

Vitaly Kuznetsov Sept. 12, 2018, 4:11 p.m. UTC
It is possible to observe hung_task complaints when system goes to
suspend-to-idle state:

 PM: Syncing filesystems ... done.
 Freezing user space processes ... (elapsed 0.001 seconds) done.
 OOM killer disabled.
 Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done.
 sd 0:0:0:0: [sda] Synchronizing SCSI cache
 INFO: task bash:1569 blocked for more than 120 seconds.
       Not tainted 4.19.0-rc3_+ #687
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 bash            D    0  1569    604 0x00000000
 Call Trace:
  ? __schedule+0x1fe/0x7e0
  schedule+0x28/0x80
  suspend_devices_and_enter+0x4ac/0x750
  pm_suspend+0x2c0/0x310

Register a PM notifier to disable the detector on suspend and re-enable
back on wakeup.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
RFC: It really makes me wonder why nobody reported this before, makes
 me think I'm missing something.
---
 kernel/hung_task.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

Comments

Rafael J. Wysocki Sept. 13, 2018, 7:06 a.m. UTC | #1
On Wed, Sep 12, 2018 at 6:11 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
> It is possible to observe hung_task complaints when system goes to
> suspend-to-idle state:
>
>  PM: Syncing filesystems ... done.
>  Freezing user space processes ... (elapsed 0.001 seconds) done.
>  OOM killer disabled.
>  Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done.
>  sd 0:0:0:0: [sda] Synchronizing SCSI cache
>  INFO: task bash:1569 blocked for more than 120 seconds.
>        Not tainted 4.19.0-rc3_+ #687
>  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>  bash            D    0  1569    604 0x00000000
>  Call Trace:
>   ? __schedule+0x1fe/0x7e0
>   schedule+0x28/0x80
>   suspend_devices_and_enter+0x4ac/0x750
>   pm_suspend+0x2c0/0x310

This actually is a good catch, but the problem is related to what
happens to the monotonic clock during suspend to idle.

The clock issue needs to be addressed anyway IMO and then this problem
will go away automatically.

> Register a PM notifier to disable the detector on suspend and re-enable
> back on wakeup.
>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
> RFC: It really makes me wonder why nobody reported this before, makes
>  me think I'm missing something.
> ---
>  kernel/hung_task.c | 26 +++++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index b9132d1269ef..d75f288c016f 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -15,6 +15,7 @@
>  #include <linux/lockdep.h>
>  #include <linux/export.h>
>  #include <linux/sysctl.h>
> +#include <linux/suspend.h>
>  #include <linux/utsname.h>
>  #include <linux/sched/signal.h>
>  #include <linux/sched/debug.h>
> @@ -242,6 +243,24 @@ void reset_hung_task_detector(void)
>  }
>  EXPORT_SYMBOL_GPL(reset_hung_task_detector);
>
> +static bool hung_detector_suspended;
> +
> +static int hungtask_pm_notify(struct notifier_block *self,
> +                             unsigned long action, void *hcpu)
> +{
> +       switch (action) {
> +       case PM_SUSPEND_PREPARE:

You'd want PM_HIBERNATION_PREPARE here too I think.

> +               hung_detector_suspended = true;
> +               break;
> +       case PM_POST_SUSPEND:

And PM_POST_HIBERNATION here for consistency.

> +               hung_detector_suspended = false;
> +               break;
> +       default:
> +               break;
> +       }
> +       return NOTIFY_OK;
> +}
> +
>  /*
>   * kthread which checks for tasks stuck in D state
>   */
> @@ -261,7 +280,8 @@ static int watchdog(void *dummy)
>                 interval = min_t(unsigned long, interval, timeout);
>                 t = hung_timeout_jiffies(hung_last_checked, interval);
>                 if (t <= 0) {
> -                       if (!atomic_xchg(&reset_hung_task, 0))
> +                       if (!atomic_xchg(&reset_hung_task, 0) &&
> +                           !hung_detector_suspended)
>                                 check_hung_uninterruptible_tasks(timeout);
>                         hung_last_checked = jiffies;
>                         continue;
> @@ -275,6 +295,10 @@ static int watchdog(void *dummy)
>  static int __init hung_task_init(void)
>  {
>         atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
> +
> +       /* Disable hung task detector on suspend */
> +       pm_notifier(hungtask_pm_notify, 0);
> +
>         watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
>
>         return 0;
> --
> 2.14.4
>
Vitaly Kuznetsov Sept. 13, 2018, 8:47 a.m. UTC | #2
"Rafael J. Wysocki" <rafael@kernel.org> writes:

> On Wed, Sep 12, 2018 at 6:11 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>>
>> It is possible to observe hung_task complaints when system goes to
>> suspend-to-idle state:
>>
>>  PM: Syncing filesystems ... done.
>>  Freezing user space processes ... (elapsed 0.001 seconds) done.
>>  OOM killer disabled.
>>  Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done.
>>  sd 0:0:0:0: [sda] Synchronizing SCSI cache
>>  INFO: task bash:1569 blocked for more than 120 seconds.
>>        Not tainted 4.19.0-rc3_+ #687
>>  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>>  bash            D    0  1569    604 0x00000000
>>  Call Trace:
>>   ? __schedule+0x1fe/0x7e0
>>   schedule+0x28/0x80
>>   suspend_devices_and_enter+0x4ac/0x750
>>   pm_suspend+0x2c0/0x310
>
> This actually is a good catch, but the problem is related to what
> happens to the monotonic clock during suspend to idle.
>
> The clock issue needs to be addressed anyway IMO and then this problem
> will go away automatically.

Do I understand it correctly that the suggestion is to fully suspend
monothonic clock in s2idle (and don't advance it after resume)?

>
>> Register a PM notifier to disable the detector on suspend and re-enable
>> back on wakeup.
>>
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>> ---
>> RFC: It really makes me wonder why nobody reported this before, makes
>>  me think I'm missing something.
>> ---
>>  kernel/hung_task.c | 26 +++++++++++++++++++++++++-
>>  1 file changed, 25 insertions(+), 1 deletion(-)
>>
>> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
>> index b9132d1269ef..d75f288c016f 100644
>> --- a/kernel/hung_task.c
>> +++ b/kernel/hung_task.c
>> @@ -15,6 +15,7 @@
>>  #include <linux/lockdep.h>
>>  #include <linux/export.h>
>>  #include <linux/sysctl.h>
>> +#include <linux/suspend.h>
>>  #include <linux/utsname.h>
>>  #include <linux/sched/signal.h>
>>  #include <linux/sched/debug.h>
>> @@ -242,6 +243,24 @@ void reset_hung_task_detector(void)
>>  }
>>  EXPORT_SYMBOL_GPL(reset_hung_task_detector);
>>
>> +static bool hung_detector_suspended;
>> +
>> +static int hungtask_pm_notify(struct notifier_block *self,
>> +                             unsigned long action, void *hcpu)
>> +{
>> +       switch (action) {
>> +       case PM_SUSPEND_PREPARE:
>
> You'd want PM_HIBERNATION_PREPARE here too I think.
>
>> +               hung_detector_suspended = true;
>> +               break;
>> +       case PM_POST_SUSPEND:
>
> And PM_POST_HIBERNATION here for consistency.
>

Sure, will do in v1.

>> +               hung_detector_suspended = false;
>> +               break;
>> +       default:
>> +               break;
>> +       }
>> +       return NOTIFY_OK;
>> +}
>> +
>>  /*
>>   * kthread which checks for tasks stuck in D state
>>   */
>> @@ -261,7 +280,8 @@ static int watchdog(void *dummy)
>>                 interval = min_t(unsigned long, interval, timeout);
>>                 t = hung_timeout_jiffies(hung_last_checked, interval);
>>                 if (t <= 0) {
>> -                       if (!atomic_xchg(&reset_hung_task, 0))
>> +                       if (!atomic_xchg(&reset_hung_task, 0) &&
>> +                           !hung_detector_suspended)
>>                                 check_hung_uninterruptible_tasks(timeout);
>>                         hung_last_checked = jiffies;
>>                         continue;
>> @@ -275,6 +295,10 @@ static int watchdog(void *dummy)
>>  static int __init hung_task_init(void)
>>  {
>>         atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
>> +
>> +       /* Disable hung task detector on suspend */
>> +       pm_notifier(hungtask_pm_notify, 0);
>> +
>>         watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
>>
>>         return 0;
>> --
>> 2.14.4
>>
Rafael J. Wysocki Sept. 13, 2018, 9:04 a.m. UTC | #3
On Thu, Sep 13, 2018 at 10:47 AM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
> "Rafael J. Wysocki" <rafael@kernel.org> writes:
>
> > On Wed, Sep 12, 2018 at 6:11 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> >>
> >> It is possible to observe hung_task complaints when system goes to
> >> suspend-to-idle state:
> >>
> >>  PM: Syncing filesystems ... done.
> >>  Freezing user space processes ... (elapsed 0.001 seconds) done.
> >>  OOM killer disabled.
> >>  Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done.
> >>  sd 0:0:0:0: [sda] Synchronizing SCSI cache
> >>  INFO: task bash:1569 blocked for more than 120 seconds.
> >>        Not tainted 4.19.0-rc3_+ #687
> >>  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> >>  bash            D    0  1569    604 0x00000000
> >>  Call Trace:
> >>   ? __schedule+0x1fe/0x7e0
> >>   schedule+0x28/0x80
> >>   suspend_devices_and_enter+0x4ac/0x750
> >>   pm_suspend+0x2c0/0x310
> >
> > This actually is a good catch, but the problem is related to what
> > happens to the monotonic clock during suspend to idle.
> >
> > The clock issue needs to be addressed anyway IMO and then this problem
> > will go away automatically.
>
> Do I understand it correctly that the suggestion is to fully suspend
> monothonic clock in s2idle (and don't advance it after resume)?

Something like that.

We already do that to some extent, but kind of with the assumption
that it will not grow too much in s2idle_loop(), but it may actually
grow too much in there, so we need to compensate for that to make the
s2idle behavior reflect the suspend-to-RAM one.

I have a patch for that, will post it shortly.

Thanks,
Rafael
Oleg Nesterov Sept. 13, 2018, 3:23 p.m. UTC | #4
On 09/13, Rafael J. Wysocki wrote:
>
> On Wed, Sep 12, 2018 at 6:11 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> >
> > It is possible to observe hung_task complaints when system goes to
> > suspend-to-idle state:
> >
> >  PM: Syncing filesystems ... done.
> >  Freezing user space processes ... (elapsed 0.001 seconds) done.
> >  OOM killer disabled.
> >  Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done.
> >  sd 0:0:0:0: [sda] Synchronizing SCSI cache
> >  INFO: task bash:1569 blocked for more than 120 seconds.
> >        Not tainted 4.19.0-rc3_+ #687
> >  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> >  bash            D    0  1569    604 0x00000000
> >  Call Trace:
> >   ? __schedule+0x1fe/0x7e0
> >   schedule+0x28/0x80
> >   suspend_devices_and_enter+0x4ac/0x750
> >   pm_suspend+0x2c0/0x310
>
> This actually is a good catch, but the problem is related to what
> happens to the monotonic clock during suspend to idle.
>
> The clock issue needs to be addressed anyway IMO and then this problem
> will go away automatically.

I don't understand your discussion with Vitaly, but shouldn't we make
khungtaskd thread freezable anyway?

Oleg.

--- x/kernel/hung_task.c
+++ x/kernel/hung_task.c
@@ -185,7 +185,7 @@ static void check_hung_uninterruptible_t
 	hung_task_show_lock = false;
 	rcu_read_lock();
 	for_each_process_thread(g, t) {
-		if (!max_count--)
+		if (!max_count-- || freezing(current))
 			goto unlock;
 		if (!--batch_count) {
 			batch_count = HUNG_TASK_BATCHING;
@@ -249,6 +249,7 @@ static int watchdog(void *dummy)
 {
 	unsigned long hung_last_checked = jiffies;
 
+	set_freezable();
 	set_user_nice(current, 0);
 
 	for ( ; ; ) {
@@ -266,7 +267,7 @@ static int watchdog(void *dummy)
 			hung_last_checked = jiffies;
 			continue;
 		}
-		schedule_timeout_interruptible(t);
+		freezable_schedule_timeout_interruptible(t);
 	}
 
 	return 0;
Rafael J. Wysocki Sept. 13, 2018, 3:27 p.m. UTC | #5
On Thu, Sep 13, 2018 at 5:23 PM Oleg Nesterov <oleg@redhat.com> wrote:
>
> On 09/13, Rafael J. Wysocki wrote:
> >
> > On Wed, Sep 12, 2018 at 6:11 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> > >
> > > It is possible to observe hung_task complaints when system goes to
> > > suspend-to-idle state:
> > >
> > >  PM: Syncing filesystems ... done.
> > >  Freezing user space processes ... (elapsed 0.001 seconds) done.
> > >  OOM killer disabled.
> > >  Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done.
> > >  sd 0:0:0:0: [sda] Synchronizing SCSI cache
> > >  INFO: task bash:1569 blocked for more than 120 seconds.
> > >        Not tainted 4.19.0-rc3_+ #687
> > >  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> > >  bash            D    0  1569    604 0x00000000
> > >  Call Trace:
> > >   ? __schedule+0x1fe/0x7e0
> > >   schedule+0x28/0x80
> > >   suspend_devices_and_enter+0x4ac/0x750
> > >   pm_suspend+0x2c0/0x310
> >
> > This actually is a good catch, but the problem is related to what
> > happens to the monotonic clock during suspend to idle.
> >
> > The clock issue needs to be addressed anyway IMO and then this problem
> > will go away automatically.
>
> I don't understand your discussion with Vitaly, but shouldn't we make
> khungtaskd thread freezable anyway?

Well, if the Vitaly's patch is applied, that shouldn't be necessary
and I wouldn't like to add more freezable kernel threads if that's
avoidable TBH.

Cheers,
Rafael
diff mbox series

Patch

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index b9132d1269ef..d75f288c016f 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -15,6 +15,7 @@ 
 #include <linux/lockdep.h>
 #include <linux/export.h>
 #include <linux/sysctl.h>
+#include <linux/suspend.h>
 #include <linux/utsname.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
@@ -242,6 +243,24 @@  void reset_hung_task_detector(void)
 }
 EXPORT_SYMBOL_GPL(reset_hung_task_detector);
 
+static bool hung_detector_suspended;
+
+static int hungtask_pm_notify(struct notifier_block *self,
+			      unsigned long action, void *hcpu)
+{
+	switch (action) {
+	case PM_SUSPEND_PREPARE:
+		hung_detector_suspended = true;
+		break;
+	case PM_POST_SUSPEND:
+		hung_detector_suspended = false;
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
 /*
  * kthread which checks for tasks stuck in D state
  */
@@ -261,7 +280,8 @@  static int watchdog(void *dummy)
 		interval = min_t(unsigned long, interval, timeout);
 		t = hung_timeout_jiffies(hung_last_checked, interval);
 		if (t <= 0) {
-			if (!atomic_xchg(&reset_hung_task, 0))
+			if (!atomic_xchg(&reset_hung_task, 0) &&
+			    !hung_detector_suspended)
 				check_hung_uninterruptible_tasks(timeout);
 			hung_last_checked = jiffies;
 			continue;
@@ -275,6 +295,10 @@  static int watchdog(void *dummy)
 static int __init hung_task_init(void)
 {
 	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+
+	/* Disable hung task detector on suspend */
+	pm_notifier(hungtask_pm_notify, 0);
+
 	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
 
 	return 0;