diff mbox series

[v3] Extend watchdog timeout during kernel panic.

Message ID 20210128011425.8933-1-jp.ertola@hpe.com (mailing list archive)
State Changes Requested
Headers show
Series [v3] Extend watchdog timeout during kernel panic. | expand

Commit Message

JP Ertola Jan. 28, 2021, 1:14 a.m. UTC
If the watchdog timeout is set such that the crash kernel does not
have time to collect a coredump and the crash kernel is not equipped to
ping the watchdog timer itself, then a kernel coredump will not be collected
before the watchdog fires. This change registers a panic notifier and
callback so the watchdog timeout can be extended if a kernel panic occurs.
This timeout extension would give the crash kernel enough time to collect
a coredump before the CPU resets. The watchdog timeout is extended if and only
if a crash kernel image is loaded in memory and the watchdog is active at the
time of the panic.

Drivers have the option of implementing their own platform-dependent (PD)
callback at the same time they register their watchdog device (wdd) with the
Linux kernel. By registering their own PD callback, the watchdog device
can extend the watchdog timeout and perform other tasks in a panic context.

This may be desireable when uncommon hardware platforms are used. For
example, a power management subsystem controlled by an FPGA attached to
the CPU.

A Kconfig option has been added to configure the timeout duration at
compile-time. Default is zero seconds.

Signed-off-by: JP Ertola <jp.ertola@hpe.com>
---
 drivers/watchdog/Kconfig        | 13 +++++
 drivers/watchdog/watchdog_dev.c | 84 ++++++++++++++++++++++++++++++++-
 include/linux/watchdog.h        |  3 ++
 3 files changed, 99 insertions(+), 1 deletion(-)

Comments

Guenter Roeck Jan. 28, 2021, 1:54 a.m. UTC | #1
On 1/27/21 5:14 PM, JP Ertola wrote:
> If the watchdog timeout is set such that the crash kernel does not
> have time to collect a coredump and the crash kernel is not equipped to
> ping the watchdog timer itself, then a kernel coredump will not be collected
> before the watchdog fires. This change registers a panic notifier and
> callback so the watchdog timeout can be extended if a kernel panic occurs.
> This timeout extension would give the crash kernel enough time to collect
> a coredump before the CPU resets. The watchdog timeout is extended if and only
> if a crash kernel image is loaded in memory and the watchdog is active at the
> time of the panic.
> 
> Drivers have the option of implementing their own platform-dependent (PD)
> callback at the same time they register their watchdog device (wdd) with the
> Linux kernel. By registering their own PD callback, the watchdog device
> can extend the watchdog timeout and perform other tasks in a panic context.
> 
> This may be desireable when uncommon hardware platforms are used. For
> example, a power management subsystem controlled by an FPGA attached to
> the CPU.

This is only acceptable if there is an actual user for this callback.
I won't accept adding such a callback without a user. If you have one
in mind, please submit the patch adding it as part of a series.
That will let us evaluate if the callback is really needed and
what it is used for. Otherwise, please drop the callback.

> 
> A Kconfig option has been added to configure the timeout duration at
> compile-time. Default is zero seconds.
> 
> Signed-off-by: JP Ertola <jp.ertola@hpe.com>
> ---

Change log goes here, and I am not going back to v2 and v1 to try to figure
out what changed.

Guenter

>  drivers/watchdog/Kconfig        | 13 +++++
>  drivers/watchdog/watchdog_dev.c | 84 ++++++++++++++++++++++++++++++++-
>  include/linux/watchdog.h        |  3 ++
>  3 files changed, 99 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> index fd7968635e6d..f1055985e100 100644
> --- a/drivers/watchdog/Kconfig
> +++ b/drivers/watchdog/Kconfig
> @@ -141,6 +141,19 @@ comment "Watchdog Device Drivers"
>  
>  # Architecture Independent
>  
> +config DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT
> +	int "Default timeout for watchdog timer before crash kernel starts (seconds)"
> +	default 0
> +	help
> +	  This option allows an extended timeout to be used for the watchdog when
> +	  the kernel panics and a crash kernel is about to start. This is helpful
> +	  when the existing WDT timeout value is less than the time required for
> +	  crash kernel to run and the crash kernel is unable to handle the
> +	  the watchdog itself. The timeout extension happens last in chain of
> +	  kernel panic handler callbacks just in case another panic handler
> +	  hangs unexpectedly. When this value is set to 0, the watchdog timeout
> +	  will not be changed.
> +
>  config SOFT_WATCHDOG
>  	tristate "Software watchdog"
>  	select WATCHDOG_CORE
> diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
> index 2946f3a63110..e2d056c70ca7 100644
> --- a/drivers/watchdog/watchdog_dev.c
> +++ b/drivers/watchdog/watchdog_dev.c
> @@ -34,6 +34,7 @@
>  #include <linux/init.h>		/* For __init/__exit/... */
>  #include <linux/hrtimer.h>	/* For hrtimers */
>  #include <linux/kernel.h>	/* For printk/panic/... */
> +#include <linux/kexec.h>	/* For checking if crash kernel is loaded */
>  #include <linux/kthread.h>	/* For kthread_work */
>  #include <linux/miscdevice.h>	/* For handling misc devices */
>  #include <linux/module.h>	/* For module stuff/... */
> @@ -82,6 +83,8 @@ static bool handle_boot_enabled =
>  
>  static unsigned open_timeout = CONFIG_WATCHDOG_OPEN_TIMEOUT;
>  
> +static unsigned int wdt_panic_timeout = CONFIG_DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT;
> +
>  static bool watchdog_past_open_deadline(struct watchdog_core_data *data)
>  {
>  	return ktime_after(ktime_get(), data->open_deadline);
> @@ -658,6 +661,61 @@ static int watchdog_ioctl_op(struct watchdog_device *wdd, unsigned int cmd,
>   *	off the watchdog (if 'nowayout' is not set).
>   */
>  
> +static int watchdog_panic_notifier(struct notifier_block *nb,
> +	unsigned long code, void *data)
> +{
> +	struct watchdog_device *wdd;
> +	int ret = 0;
> +	unsigned int time_out = 0; // seconds
> +
> +	if (wdt_panic_timeout == 0)
> +		return NOTIFY_DONE;
> +
> +	if (watchdog_timeout_invalid(wdt_panic_timeout)) {
> +		time_out = min(wdt_panic_timeout, wdd->max_timeout);
> +		pr_err("watchdog%d: timeout extension value "
> +			" invalid. Falling back to %d seconds\n", time_out);
> +	} else {
> +		time_out = wdt_panic_timeout;
> +	}
> +
> +	wdd = container_of(nb, struct watchdog_device, panic_nb);
> +
> +	if (kexec_crash_image && watchdog_active(wdd)) {
> +		if (wdd->ops->panic_cb) {
> +			ret = wdd->ops->panic_cb(wdd, time_out);
> +		} else {
> +			int ping_ret;
> +
> +			pr_info("watchdog%d: Extending watchdog timeout to "
> +				"%d seconds\n", wdd->id, time_out);
> +
> +			ret = watchdog_set_timeout(wdd, time_out);
> +
> +			/* Many watchdog implementations will reset the timer
> +			 * when the timeout is changed, but ping again to be
> +			 * safe.
> +			 */
> +			if (wdd->ops->ping) {
> +				ping_ret = wdd->ops->ping(wdd);
> +				if (ping_ret) {
> +					pr_warn("watchdog%d: Unable to ping "
> +						"watchdog before starting "
> +						"crash kernel (%d)\n",
> +						wdd->id, ping_ret);
> +				}
> +			}
> +		}
> +		if (ret) {
> +			pr_err("watchdog%d: Unable to extend watchdog timeout "
> +				"before starting crash kernel (%d)",
> +				wdd->id, ret);
> +		}
> +	}
> +
> +	return notifier_from_errno(ret);
> +}
> +
>  static ssize_t watchdog_write(struct file *file, const char __user *data,
>  						size_t len, loff_t *ppos)
>  {
> @@ -1118,8 +1176,27 @@ int watchdog_dev_register(struct watchdog_device *wdd)
>  		return ret;
>  
>  	ret = watchdog_register_pretimeout(wdd);
> -	if (ret)
> +	if (ret) {
>  		watchdog_cdev_unregister(wdd);
> +		return ret;
> +	}
> +
> +	/*
> +	 * Setting panic_nb priority to minimum ensures the watchdog device
> +	 * panic callback runs last in the chain of kernel panic callbacks.
> +	 * This way, the watchdog device will fire in the event another
> +	 * panic callback hangs.
> +	 */
> +	if (wdd->ops->panic_cb) {
> +		wdd->panic_nb.priority = INT_MIN;
> +		wdd->panic_nb.notifier_call = watchdog_panic_notifier;
> +
> +		ret = atomic_notifier_chain_register(&panic_notifier_list,
> +						&wdd->panic_nb);
> +		if (ret)
> +			pr_err("watchdog%d: Cannot register panic notifier (%d)\n",
> +				wdd->id, ret);
> +	}
>  
>  	return ret;
>  }
> @@ -1228,3 +1305,8 @@ module_param(open_timeout, uint, 0644);
>  MODULE_PARM_DESC(open_timeout,
>  	"Maximum time (in seconds, 0 means infinity) for userspace to take over a running watchdog (default="
>  	__MODULE_STRING(CONFIG_WATCHDOG_OPEN_TIMEOUT) ")");
> +
> +module_param(wdt_panic_timeout, uint, 0444);
> +MODULE_PARM_DESC(wdt_panic_timeout,
> +	"Watchdog timeout extension duration upon kernel panic. (default="
> +	__MODULE_STRING(CONFIG_DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT) " seconds)");
> diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
> index 9b19e6bb68b5..f79f41cca156 100644
> --- a/include/linux/watchdog.h
> +++ b/include/linux/watchdog.h
> @@ -34,6 +34,7 @@ struct watchdog_governor;
>   * @get_timeleft:The routine that gets the time left before a reset (in seconds).
>   * @restart:	The routine for restarting the machine.
>   * @ioctl:	The routines that handles extra ioctl calls.
> + * @panic_cb:	The routine that extends the watchdog timeout before the crash kernel starts.
>   *
>   * The watchdog_ops structure contains a list of low-level operations
>   * that control a watchdog device. It also contains the module that owns
> @@ -53,6 +54,7 @@ struct watchdog_ops {
>  	unsigned int (*get_timeleft)(struct watchdog_device *);
>  	int (*restart)(struct watchdog_device *, unsigned long, void *);
>  	long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
> +	int (*panic_cb)(struct watchdog_device *, unsigned int);
>  };
>  
>  /** struct watchdog_device - The structure that defines a watchdog device
> @@ -107,6 +109,7 @@ struct watchdog_device {
>  	unsigned int max_hw_heartbeat_ms;
>  	struct notifier_block reboot_nb;
>  	struct notifier_block restart_nb;
> +	struct notifier_block panic_nb;
>  	void *driver_data;
>  	struct watchdog_core_data *wd_data;
>  	unsigned long status;
>
diff mbox series

Patch

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index fd7968635e6d..f1055985e100 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -141,6 +141,19 @@  comment "Watchdog Device Drivers"
 
 # Architecture Independent
 
+config DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT
+	int "Default timeout for watchdog timer before crash kernel starts (seconds)"
+	default 0
+	help
+	  This option allows an extended timeout to be used for the watchdog when
+	  the kernel panics and a crash kernel is about to start. This is helpful
+	  when the existing WDT timeout value is less than the time required for
+	  crash kernel to run and the crash kernel is unable to handle the
+	  the watchdog itself. The timeout extension happens last in chain of
+	  kernel panic handler callbacks just in case another panic handler
+	  hangs unexpectedly. When this value is set to 0, the watchdog timeout
+	  will not be changed.
+
 config SOFT_WATCHDOG
 	tristate "Software watchdog"
 	select WATCHDOG_CORE
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 2946f3a63110..e2d056c70ca7 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -34,6 +34,7 @@ 
 #include <linux/init.h>		/* For __init/__exit/... */
 #include <linux/hrtimer.h>	/* For hrtimers */
 #include <linux/kernel.h>	/* For printk/panic/... */
+#include <linux/kexec.h>	/* For checking if crash kernel is loaded */
 #include <linux/kthread.h>	/* For kthread_work */
 #include <linux/miscdevice.h>	/* For handling misc devices */
 #include <linux/module.h>	/* For module stuff/... */
@@ -82,6 +83,8 @@  static bool handle_boot_enabled =
 
 static unsigned open_timeout = CONFIG_WATCHDOG_OPEN_TIMEOUT;
 
+static unsigned int wdt_panic_timeout = CONFIG_DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT;
+
 static bool watchdog_past_open_deadline(struct watchdog_core_data *data)
 {
 	return ktime_after(ktime_get(), data->open_deadline);
@@ -658,6 +661,61 @@  static int watchdog_ioctl_op(struct watchdog_device *wdd, unsigned int cmd,
  *	off the watchdog (if 'nowayout' is not set).
  */
 
+static int watchdog_panic_notifier(struct notifier_block *nb,
+	unsigned long code, void *data)
+{
+	struct watchdog_device *wdd;
+	int ret = 0;
+	unsigned int time_out = 0; // seconds
+
+	if (wdt_panic_timeout == 0)
+		return NOTIFY_DONE;
+
+	if (watchdog_timeout_invalid(wdt_panic_timeout)) {
+		time_out = min(wdt_panic_timeout, wdd->max_timeout);
+		pr_err("watchdog%d: timeout extension value "
+			" invalid. Falling back to %d seconds\n", time_out);
+	} else {
+		time_out = wdt_panic_timeout;
+	}
+
+	wdd = container_of(nb, struct watchdog_device, panic_nb);
+
+	if (kexec_crash_image && watchdog_active(wdd)) {
+		if (wdd->ops->panic_cb) {
+			ret = wdd->ops->panic_cb(wdd, time_out);
+		} else {
+			int ping_ret;
+
+			pr_info("watchdog%d: Extending watchdog timeout to "
+				"%d seconds\n", wdd->id, time_out);
+
+			ret = watchdog_set_timeout(wdd, time_out);
+
+			/* Many watchdog implementations will reset the timer
+			 * when the timeout is changed, but ping again to be
+			 * safe.
+			 */
+			if (wdd->ops->ping) {
+				ping_ret = wdd->ops->ping(wdd);
+				if (ping_ret) {
+					pr_warn("watchdog%d: Unable to ping "
+						"watchdog before starting "
+						"crash kernel (%d)\n",
+						wdd->id, ping_ret);
+				}
+			}
+		}
+		if (ret) {
+			pr_err("watchdog%d: Unable to extend watchdog timeout "
+				"before starting crash kernel (%d)",
+				wdd->id, ret);
+		}
+	}
+
+	return notifier_from_errno(ret);
+}
+
 static ssize_t watchdog_write(struct file *file, const char __user *data,
 						size_t len, loff_t *ppos)
 {
@@ -1118,8 +1176,27 @@  int watchdog_dev_register(struct watchdog_device *wdd)
 		return ret;
 
 	ret = watchdog_register_pretimeout(wdd);
-	if (ret)
+	if (ret) {
 		watchdog_cdev_unregister(wdd);
+		return ret;
+	}
+
+	/*
+	 * Setting panic_nb priority to minimum ensures the watchdog device
+	 * panic callback runs last in the chain of kernel panic callbacks.
+	 * This way, the watchdog device will fire in the event another
+	 * panic callback hangs.
+	 */
+	if (wdd->ops->panic_cb) {
+		wdd->panic_nb.priority = INT_MIN;
+		wdd->panic_nb.notifier_call = watchdog_panic_notifier;
+
+		ret = atomic_notifier_chain_register(&panic_notifier_list,
+						&wdd->panic_nb);
+		if (ret)
+			pr_err("watchdog%d: Cannot register panic notifier (%d)\n",
+				wdd->id, ret);
+	}
 
 	return ret;
 }
@@ -1228,3 +1305,8 @@  module_param(open_timeout, uint, 0644);
 MODULE_PARM_DESC(open_timeout,
 	"Maximum time (in seconds, 0 means infinity) for userspace to take over a running watchdog (default="
 	__MODULE_STRING(CONFIG_WATCHDOG_OPEN_TIMEOUT) ")");
+
+module_param(wdt_panic_timeout, uint, 0444);
+MODULE_PARM_DESC(wdt_panic_timeout,
+	"Watchdog timeout extension duration upon kernel panic. (default="
+	__MODULE_STRING(CONFIG_DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT) " seconds)");
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 9b19e6bb68b5..f79f41cca156 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -34,6 +34,7 @@  struct watchdog_governor;
  * @get_timeleft:The routine that gets the time left before a reset (in seconds).
  * @restart:	The routine for restarting the machine.
  * @ioctl:	The routines that handles extra ioctl calls.
+ * @panic_cb:	The routine that extends the watchdog timeout before the crash kernel starts.
  *
  * The watchdog_ops structure contains a list of low-level operations
  * that control a watchdog device. It also contains the module that owns
@@ -53,6 +54,7 @@  struct watchdog_ops {
 	unsigned int (*get_timeleft)(struct watchdog_device *);
 	int (*restart)(struct watchdog_device *, unsigned long, void *);
 	long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
+	int (*panic_cb)(struct watchdog_device *, unsigned int);
 };
 
 /** struct watchdog_device - The structure that defines a watchdog device
@@ -107,6 +109,7 @@  struct watchdog_device {
 	unsigned int max_hw_heartbeat_ms;
 	struct notifier_block reboot_nb;
 	struct notifier_block restart_nb;
+	struct notifier_block panic_nb;
 	void *driver_data;
 	struct watchdog_core_data *wd_data;
 	unsigned long status;