diff mbox series

PM / core: Allow configuring the DPM watchdog to warn earlier than panic

Message ID 20241218095613.1.I4554f931b8da97948f308ecc651b124338ee9603@changeid (mailing list archive)
State New
Headers show
Series PM / core: Allow configuring the DPM watchdog to warn earlier than panic | expand

Commit Message

Douglas Anderson Dec. 18, 2024, 5:56 p.m. UTC
Allow configuring the DPM watchdog to warn about slow suspend/resume
functions without causing a system panic(). This allows you to set the
DPM_WATCHDOG_WARNING_TIMEOUT to something like 5 or 10 seconds to get
warnings about slow suspend/resume functions that eventually succeed.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

 drivers/base/power/main.c | 22 ++++++++++++++++++----
 kernel/power/Kconfig      |  8 +++++++-
 2 files changed, 25 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 4a67e83300e1..239bcf93f821 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -496,6 +496,7 @@  struct dpm_watchdog {
 	struct device		*dev;
 	struct task_struct	*tsk;
 	struct timer_list	timer;
+	bool			fatal;
 };
 
 #define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
@@ -512,11 +513,23 @@  struct dpm_watchdog {
 static void dpm_watchdog_handler(struct timer_list *t)
 {
 	struct dpm_watchdog *wd = from_timer(wd, t, timer);
+	struct timer_list *timer = &wd->timer;
+	unsigned int time_left;
+
+	if (wd->fatal) {
+		dev_emerg(wd->dev, "**** DPM device timeout ****\n");
+		show_stack(wd->tsk, NULL, KERN_EMERG);
+		panic("%s %s: unrecoverable failure\n",
+			dev_driver_string(wd->dev), dev_name(wd->dev));
+	}
 
-	dev_emerg(wd->dev, "**** DPM device timeout ****\n");
+	time_left = CONFIG_DPM_WATCHDOG_TIMEOUT - CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
+	dev_emerg(wd->dev, "**** DPM device timeout after %u seconds; %u seconds until panic ****\n",
+		  CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT, time_left);
 	show_stack(wd->tsk, NULL, KERN_EMERG);
-	panic("%s %s: unrecoverable failure\n",
-		dev_driver_string(wd->dev), dev_name(wd->dev));
+
+	wd->fatal = true;
+	mod_timer(timer, jiffies + HZ * time_left);
 }
 
 /**
@@ -530,10 +543,11 @@  static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)
 
 	wd->dev = dev;
 	wd->tsk = current;
+	wd->fatal = CONFIG_DPM_WATCHDOG_TIMEOUT == CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
 
 	timer_setup_on_stack(timer, dpm_watchdog_handler, 0);
 	/* use same timeout value for both suspend and resume */
-	timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
+	timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
 	add_timer(timer);
 }
 
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index afce8130d8b9..3387b94e76c1 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -257,11 +257,17 @@  config DPM_WATCHDOG
 	  boot session.
 
 config DPM_WATCHDOG_TIMEOUT
-	int "Watchdog timeout in seconds"
+	int "Watchdog timeout to panic in seconds"
 	range 1 120
 	default 120
 	depends on DPM_WATCHDOG
 
+config DPM_WATCHDOG_WARNING_TIMEOUT
+	int "Watchdog timeout to warn in seconds"
+	range 1 DPM_WATCHDOG_TIMEOUT
+	default DPM_WATCHDOG_TIMEOUT
+	depends on DPM_WATCHDOG
+
 config PM_TRACE
 	bool
 	help