diff mbox series

[v3] softdog: Add options 'soft_reboot_cmd' and 'soft_active_on_boot'

Message ID 20200701110339.2833449-1-woodylin@google.com (mailing list archive)
State Changes Requested
Headers show
Series [v3] softdog: Add options 'soft_reboot_cmd' and 'soft_active_on_boot' | expand

Commit Message

Woody Lin July 1, 2020, 11:03 a.m. UTC
Add module parameters 'soft_reboot_cmd' and 'soft_active_on_boot' for
customizing softdog configuration; config reboot command by assigning
soft_reboot_cmd, and set soft_active_on_boot to start up softdog
timer at module initialization stage.

Signed-off-by: Woody Lin <woodylin@google.com>
---
 drivers/watchdog/softdog.c | 56 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

Comments

Guenter Roeck July 1, 2020, 2:10 p.m. UTC | #1
On 7/1/20 4:03 AM, Woody Lin wrote:
> Add module parameters 'soft_reboot_cmd' and 'soft_active_on_boot' for
> customizing softdog configuration; config reboot command by assigning
> soft_reboot_cmd, and set soft_active_on_boot to start up softdog
> timer at module initialization stage.
> 
> Signed-off-by: Woody Lin <woodylin@google.com>

Sigh. Now I'll have to look up old versions and compare to figure out
changes from v2. Why do people always believe that kernel maintainers
have endless amounts of time available ?

[ Sorry, yes, I know, we are supposed to remain friendly all the time.
  It is just _so_ frustrating to have to deal with this all the time. ]

Guenter

> ---
>  drivers/watchdog/softdog.c | 56 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 56 insertions(+)
> 
> diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
> index 3e4885c1545e..8c8d214b6aa7 100644
> --- a/drivers/watchdog/softdog.c
> +++ b/drivers/watchdog/softdog.c
> @@ -20,11 +20,13 @@
>  #include <linux/hrtimer.h>
>  #include <linux/init.h>
>  #include <linux/kernel.h>
> +#include <linux/kthread.h>
>  #include <linux/module.h>
>  #include <linux/moduleparam.h>
>  #include <linux/reboot.h>
>  #include <linux/types.h>
>  #include <linux/watchdog.h>
> +#include <linux/workqueue.h>
>  
>  #define TIMER_MARGIN	60		/* Default is 60 seconds */
>  static unsigned int soft_margin = TIMER_MARGIN;	/* in seconds */
> @@ -49,11 +51,33 @@ module_param(soft_panic, int, 0);
>  MODULE_PARM_DESC(soft_panic,
>  	"Softdog action, set to 1 to panic, 0 to reboot (default=0)");
>  
> +static char *soft_reboot_cmd;
> +module_param(soft_reboot_cmd, charp, 0000);
> +MODULE_PARM_DESC(soft_reboot_cmd,
> +	"Set reboot command. Emergency reboot takes place if unset");
> +
> +static bool soft_active_on_boot;
> +module_param(soft_active_on_boot, bool, 0000);
> +MODULE_PARM_DESC(soft_active_on_boot,
> +	"Set to true to active Softdog on boot (default=false)");
> +
>  static struct hrtimer softdog_ticktock;
>  static struct hrtimer softdog_preticktock;
>  
> +static int reboot_kthread_fn(void *data)
> +{
> +	kernel_restart(soft_reboot_cmd);
> +	return -EPERM; /* Should not reach here */
> +}
> +
> +static void reboot_work_fn(struct work_struct *unused)
> +{
> +	kthread_run(reboot_kthread_fn, NULL, "softdog_reboot");
> +}
> +
>  static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
>  {
> +	static bool soft_reboot_fired;
>  	module_put(THIS_MODULE);
>  	if (soft_noboot) {
>  		pr_crit("Triggered - Reboot ignored\n");
> @@ -62,6 +86,33 @@ static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
>  		panic("Software Watchdog Timer expired");
>  	} else {
>  		pr_crit("Initiating system reboot\n");
> +		if (!soft_reboot_fired && soft_reboot_cmd != NULL) {
> +			static DECLARE_WORK(reboot_work, reboot_work_fn);
> +			/*
> +			 * The 'kernel_restart' is a 'might-sleep' operation.
> +			 * Also, executing it in system-wide workqueues blocks
> +			 * any driver from using the same workqueue in its
> +			 * shutdown callback function. Thus, we should execute
> +			 * the 'kernel_restart' in a standalone kernel thread.
> +			 * But since starting a kernel thread is also a
> +			 * 'might-sleep' operation, so the 'reboot_work' is
> +			 * required as a launcher of the kernel thread.
> +			 *
> +			 * After request the reboot, restart the timer to
> +			 * schedule an 'emergency_restart' reboot after
> +			 * 'TIMER_MARGIN' seconds. It's because if the softdog
> +			 * hangs, it might be because of scheduling issues. And
> +			 * if that is the case, both 'schedule_work' and
> +			 * 'kernel_restart' may possibly be malfunctional at the
> +			 * same time.
> +			 */
> +			soft_reboot_fired = true;
> +			schedule_work(&reboot_work);
> +			hrtimer_add_expires_ns(timer,
> +					(u64)TIMER_MARGIN * NSEC_PER_SEC);
> +
> +			return HRTIMER_RESTART;
> +		}
>  		emergency_restart();
>  		pr_crit("Reboot didn't ?????\n");
>  	}
> @@ -145,12 +196,17 @@ static int __init softdog_init(void)
>  		softdog_preticktock.function = softdog_pretimeout;
>  	}
>  
> +	if (soft_active_on_boot)
> +		softdog_ping(&softdog_dev);
> +
>  	ret = watchdog_register_device(&softdog_dev);
>  	if (ret)
>  		return ret;
>  
>  	pr_info("initialized. soft_noboot=%d soft_margin=%d sec soft_panic=%d (nowayout=%d)\n",
>  		soft_noboot, softdog_dev.timeout, soft_panic, nowayout);
> +	pr_info("             soft_reboot_cmd=%s soft_active_on_boot=%d\n",
> +		soft_reboot_cmd, soft_active_on_boot);
>  
>  	return 0;
>  }
>
Woody Lin July 1, 2020, 2:32 p.m. UTC | #2
Attach diff from v2 to v3 here for you to refer (Or any advice about
what next time I can do when uploading the v#N patch for reviewers to
figure out the diff between patch sets more easily? I'd love to follow
if it improves the review process)

Woody

---- 8< ----

diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
index d267af37e066..8c8d214b6aa7 100644
--- a/drivers/watchdog/softdog.c
+++ b/drivers/watchdog/softdog.c
@@ -51,10 +51,10 @@ module_param(soft_panic, int, 0);
 MODULE_PARM_DESC(soft_panic,
  "Softdog action, set to 1 to panic, 0 to reboot (default=0)");

-static char *soft_reboot_target;
-module_param(soft_reboot_target, charp, 0000);
-MODULE_PARM_DESC(soft_reboot_target,
- "Softdog action, set reboot target (default=emergency)");
+static char *soft_reboot_cmd;
+module_param(soft_reboot_cmd, charp, 0000);
+MODULE_PARM_DESC(soft_reboot_cmd,
+ "Set reboot command. Emergency reboot takes place if unset");

 static bool soft_active_on_boot;
 module_param(soft_active_on_boot, bool, 0000);
@@ -66,7 +66,7 @@ static struct hrtimer softdog_preticktock;

 static int reboot_kthread_fn(void *data)
 {
- kernel_restart(soft_reboot_target);
+ kernel_restart(soft_reboot_cmd);
  return -EPERM; /* Should not reach here */
 }

@@ -86,9 +86,26 @@ static enum hrtimer_restart softdog_fire(struct
hrtimer *timer)
  panic("Software Watchdog Timer expired");
  } else {
  pr_crit("Initiating system reboot\n");
- if (!soft_reboot_fired && soft_reboot_target != NULL) {
+ if (!soft_reboot_fired && soft_reboot_cmd != NULL) {
  static DECLARE_WORK(reboot_work, reboot_work_fn);
-
+ /*
+ * The 'kernel_restart' is a 'might-sleep' operation.
+ * Also, executing it in system-wide workqueues blocks
+ * any driver from using the same workqueue in its
+ * shutdown callback function. Thus, we should execute
+ * the 'kernel_restart' in a standalone kernel thread.
+ * But since starting a kernel thread is also a
+ * 'might-sleep' operation, so the 'reboot_work' is
+ * required as a launcher of the kernel thread.
+ *
+ * After request the reboot, restart the timer to
+ * schedule an 'emergency_restart' reboot after
+ * 'TIMER_MARGIN' seconds. It's because if the softdog
+ * hangs, it might be because of scheduling issues. And
+ * if that is the case, both 'schedule_work' and
+ * 'kernel_restart' may possibly be malfunctional at the
+ * same time.
+ */
  soft_reboot_fired = true;
  schedule_work(&reboot_work);
  hrtimer_add_expires_ns(timer,
@@ -179,10 +196,8 @@ static int __init softdog_init(void)
  softdog_preticktock.function = softdog_pretimeout;
  }

- if (soft_active_on_boot) {
- set_bit(WDOG_HW_RUNNING, &softdog_dev.status);
- set_bit(WDOG_ACTIVE, &softdog_dev.status);
- }
+ if (soft_active_on_boot)
+ softdog_ping(&softdog_dev);

  ret = watchdog_register_device(&softdog_dev);
  if (ret)
@@ -190,8 +205,8 @@ static int __init softdog_init(void)

  pr_info("initialized. soft_noboot=%d soft_margin=%d sec
soft_panic=%d (nowayout=%d)\n",
  soft_noboot, softdog_dev.timeout, soft_panic, nowayout);
- pr_info("             soft_reboot_target=%s soft_active_on_boot=%d\n",
- soft_reboot_target, soft_active_on_boot);
+ pr_info("             soft_reboot_cmd=%s soft_active_on_boot=%d\n",
+ soft_reboot_cmd, soft_active_on_boot);

  return 0;
 }
---- >8 ----

On Wed, Jul 1, 2020 at 10:10 PM Guenter Roeck <linux@roeck-us.net> wrote:
>
> On 7/1/20 4:03 AM, Woody Lin wrote:
> > Add module parameters 'soft_reboot_cmd' and 'soft_active_on_boot' for
> > customizing softdog configuration; config reboot command by assigning
> > soft_reboot_cmd, and set soft_active_on_boot to start up softdog
> > timer at module initialization stage.
> >
> > Signed-off-by: Woody Lin <woodylin@google.com>
>
> Sigh. Now I'll have to look up old versions and compare to figure out
> changes from v2. Why do people always believe that kernel maintainers
> have endless amounts of time available ?
>
> [ Sorry, yes, I know, we are supposed to remain friendly all the time.
>   It is just _so_ frustrating to have to deal with this all the time. ]
>
> Guenter
>
> > ---
> >  drivers/watchdog/softdog.c | 56 ++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 56 insertions(+)
> >
> > diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
> > index 3e4885c1545e..8c8d214b6aa7 100644
> > --- a/drivers/watchdog/softdog.c
> > +++ b/drivers/watchdog/softdog.c
> > @@ -20,11 +20,13 @@
> >  #include <linux/hrtimer.h>
> >  #include <linux/init.h>
> >  #include <linux/kernel.h>
> > +#include <linux/kthread.h>
> >  #include <linux/module.h>
> >  #include <linux/moduleparam.h>
> >  #include <linux/reboot.h>
> >  #include <linux/types.h>
> >  #include <linux/watchdog.h>
> > +#include <linux/workqueue.h>
> >
> >  #define TIMER_MARGIN 60              /* Default is 60 seconds */
> >  static unsigned int soft_margin = TIMER_MARGIN;      /* in seconds */
> > @@ -49,11 +51,33 @@ module_param(soft_panic, int, 0);
> >  MODULE_PARM_DESC(soft_panic,
> >       "Softdog action, set to 1 to panic, 0 to reboot (default=0)");
> >
> > +static char *soft_reboot_cmd;
> > +module_param(soft_reboot_cmd, charp, 0000);
> > +MODULE_PARM_DESC(soft_reboot_cmd,
> > +     "Set reboot command. Emergency reboot takes place if unset");
> > +
> > +static bool soft_active_on_boot;
> > +module_param(soft_active_on_boot, bool, 0000);
> > +MODULE_PARM_DESC(soft_active_on_boot,
> > +     "Set to true to active Softdog on boot (default=false)");
> > +
> >  static struct hrtimer softdog_ticktock;
> >  static struct hrtimer softdog_preticktock;
> >
> > +static int reboot_kthread_fn(void *data)
> > +{
> > +     kernel_restart(soft_reboot_cmd);
> > +     return -EPERM; /* Should not reach here */
> > +}
> > +
> > +static void reboot_work_fn(struct work_struct *unused)
> > +{
> > +     kthread_run(reboot_kthread_fn, NULL, "softdog_reboot");
> > +}
> > +
> >  static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
> >  {
> > +     static bool soft_reboot_fired;
> >       module_put(THIS_MODULE);
> >       if (soft_noboot) {
> >               pr_crit("Triggered - Reboot ignored\n");
> > @@ -62,6 +86,33 @@ static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
> >               panic("Software Watchdog Timer expired");
> >       } else {
> >               pr_crit("Initiating system reboot\n");
> > +             if (!soft_reboot_fired && soft_reboot_cmd != NULL) {
> > +                     static DECLARE_WORK(reboot_work, reboot_work_fn);
> > +                     /*
> > +                      * The 'kernel_restart' is a 'might-sleep' operation.
> > +                      * Also, executing it in system-wide workqueues blocks
> > +                      * any driver from using the same workqueue in its
> > +                      * shutdown callback function. Thus, we should execute
> > +                      * the 'kernel_restart' in a standalone kernel thread.
> > +                      * But since starting a kernel thread is also a
> > +                      * 'might-sleep' operation, so the 'reboot_work' is
> > +                      * required as a launcher of the kernel thread.
> > +                      *
> > +                      * After request the reboot, restart the timer to
> > +                      * schedule an 'emergency_restart' reboot after
> > +                      * 'TIMER_MARGIN' seconds. It's because if the softdog
> > +                      * hangs, it might be because of scheduling issues. And
> > +                      * if that is the case, both 'schedule_work' and
> > +                      * 'kernel_restart' may possibly be malfunctional at the
> > +                      * same time.
> > +                      */
> > +                     soft_reboot_fired = true;
> > +                     schedule_work(&reboot_work);
> > +                     hrtimer_add_expires_ns(timer,
> > +                                     (u64)TIMER_MARGIN * NSEC_PER_SEC);
> > +
> > +                     return HRTIMER_RESTART;
> > +             }
> >               emergency_restart();
> >               pr_crit("Reboot didn't ?????\n");
> >       }
> > @@ -145,12 +196,17 @@ static int __init softdog_init(void)
> >               softdog_preticktock.function = softdog_pretimeout;
> >       }
> >
> > +     if (soft_active_on_boot)
> > +             softdog_ping(&softdog_dev);
> > +
> >       ret = watchdog_register_device(&softdog_dev);
> >       if (ret)
> >               return ret;
> >
> >       pr_info("initialized. soft_noboot=%d soft_margin=%d sec soft_panic=%d (nowayout=%d)\n",
> >               soft_noboot, softdog_dev.timeout, soft_panic, nowayout);
> > +     pr_info("             soft_reboot_cmd=%s soft_active_on_boot=%d\n",
> > +             soft_reboot_cmd, soft_active_on_boot);
> >
> >       return 0;
> >  }
> >
>
Guenter Roeck July 2, 2020, 3:07 p.m. UTC | #3
On Wed, Jul 01, 2020 at 10:32:16PM +0800, Woody Lin wrote:
> Attach diff from v2 to v3 here for you to refer (Or any advice about
> what next time I can do when uploading the v#N patch for reviewers to
> figure out the diff between patch sets more easily? I'd love to follow
> if it improves the review process)
> 

I would have expected something like

---
v3: Renamed soft_reboot_target to soft_reboot_cmd
    Explain reason for introducing a worker
    <other changes made in v3>

v2: <changes made in v2>

Guenter

> Woody
> 
> ---- 8< ----
> 
> diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
> index d267af37e066..8c8d214b6aa7 100644
> --- a/drivers/watchdog/softdog.c
> +++ b/drivers/watchdog/softdog.c
> @@ -51,10 +51,10 @@ module_param(soft_panic, int, 0);
>  MODULE_PARM_DESC(soft_panic,
>   "Softdog action, set to 1 to panic, 0 to reboot (default=0)");
> 
> -static char *soft_reboot_target;
> -module_param(soft_reboot_target, charp, 0000);
> -MODULE_PARM_DESC(soft_reboot_target,
> - "Softdog action, set reboot target (default=emergency)");
> +static char *soft_reboot_cmd;
> +module_param(soft_reboot_cmd, charp, 0000);
> +MODULE_PARM_DESC(soft_reboot_cmd,
> + "Set reboot command. Emergency reboot takes place if unset");
> 
>  static bool soft_active_on_boot;
>  module_param(soft_active_on_boot, bool, 0000);
> @@ -66,7 +66,7 @@ static struct hrtimer softdog_preticktock;
> 
>  static int reboot_kthread_fn(void *data)
>  {
> - kernel_restart(soft_reboot_target);
> + kernel_restart(soft_reboot_cmd);
>   return -EPERM; /* Should not reach here */
>  }
> 
> @@ -86,9 +86,26 @@ static enum hrtimer_restart softdog_fire(struct
> hrtimer *timer)
>   panic("Software Watchdog Timer expired");
>   } else {
>   pr_crit("Initiating system reboot\n");
> - if (!soft_reboot_fired && soft_reboot_target != NULL) {
> + if (!soft_reboot_fired && soft_reboot_cmd != NULL) {
>   static DECLARE_WORK(reboot_work, reboot_work_fn);
> -
> + /*
> + * The 'kernel_restart' is a 'might-sleep' operation.
> + * Also, executing it in system-wide workqueues blocks
> + * any driver from using the same workqueue in its
> + * shutdown callback function. Thus, we should execute
> + * the 'kernel_restart' in a standalone kernel thread.
> + * But since starting a kernel thread is also a
> + * 'might-sleep' operation, so the 'reboot_work' is
> + * required as a launcher of the kernel thread.
> + *
> + * After request the reboot, restart the timer to
> + * schedule an 'emergency_restart' reboot after
> + * 'TIMER_MARGIN' seconds. It's because if the softdog
> + * hangs, it might be because of scheduling issues. And
> + * if that is the case, both 'schedule_work' and
> + * 'kernel_restart' may possibly be malfunctional at the
> + * same time.
> + */
>   soft_reboot_fired = true;
>   schedule_work(&reboot_work);
>   hrtimer_add_expires_ns(timer,
> @@ -179,10 +196,8 @@ static int __init softdog_init(void)
>   softdog_preticktock.function = softdog_pretimeout;
>   }
> 
> - if (soft_active_on_boot) {
> - set_bit(WDOG_HW_RUNNING, &softdog_dev.status);
> - set_bit(WDOG_ACTIVE, &softdog_dev.status);
> - }
> + if (soft_active_on_boot)
> + softdog_ping(&softdog_dev);
> 
>   ret = watchdog_register_device(&softdog_dev);
>   if (ret)
> @@ -190,8 +205,8 @@ static int __init softdog_init(void)
> 
>   pr_info("initialized. soft_noboot=%d soft_margin=%d sec
> soft_panic=%d (nowayout=%d)\n",
>   soft_noboot, softdog_dev.timeout, soft_panic, nowayout);
> - pr_info("             soft_reboot_target=%s soft_active_on_boot=%d\n",
> - soft_reboot_target, soft_active_on_boot);
> + pr_info("             soft_reboot_cmd=%s soft_active_on_boot=%d\n",
> + soft_reboot_cmd, soft_active_on_boot);
> 
>   return 0;
>  }
> ---- >8 ----
> 
> On Wed, Jul 1, 2020 at 10:10 PM Guenter Roeck <linux@roeck-us.net> wrote:
> >
> > On 7/1/20 4:03 AM, Woody Lin wrote:
> > > Add module parameters 'soft_reboot_cmd' and 'soft_active_on_boot' for
> > > customizing softdog configuration; config reboot command by assigning
> > > soft_reboot_cmd, and set soft_active_on_boot to start up softdog
> > > timer at module initialization stage.
> > >
> > > Signed-off-by: Woody Lin <woodylin@google.com>
> >
> > Sigh. Now I'll have to look up old versions and compare to figure out
> > changes from v2. Why do people always believe that kernel maintainers
> > have endless amounts of time available ?
> >
> > [ Sorry, yes, I know, we are supposed to remain friendly all the time.
> >   It is just _so_ frustrating to have to deal with this all the time. ]
> >
> > Guenter
> >
> > > ---
> > >  drivers/watchdog/softdog.c | 56 ++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 56 insertions(+)
> > >
> > > diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
> > > index 3e4885c1545e..8c8d214b6aa7 100644
> > > --- a/drivers/watchdog/softdog.c
> > > +++ b/drivers/watchdog/softdog.c
> > > @@ -20,11 +20,13 @@
> > >  #include <linux/hrtimer.h>
> > >  #include <linux/init.h>
> > >  #include <linux/kernel.h>
> > > +#include <linux/kthread.h>
> > >  #include <linux/module.h>
> > >  #include <linux/moduleparam.h>
> > >  #include <linux/reboot.h>
> > >  #include <linux/types.h>
> > >  #include <linux/watchdog.h>
> > > +#include <linux/workqueue.h>
> > >
> > >  #define TIMER_MARGIN 60              /* Default is 60 seconds */
> > >  static unsigned int soft_margin = TIMER_MARGIN;      /* in seconds */
> > > @@ -49,11 +51,33 @@ module_param(soft_panic, int, 0);
> > >  MODULE_PARM_DESC(soft_panic,
> > >       "Softdog action, set to 1 to panic, 0 to reboot (default=0)");
> > >
> > > +static char *soft_reboot_cmd;
> > > +module_param(soft_reboot_cmd, charp, 0000);
> > > +MODULE_PARM_DESC(soft_reboot_cmd,
> > > +     "Set reboot command. Emergency reboot takes place if unset");
> > > +
> > > +static bool soft_active_on_boot;
> > > +module_param(soft_active_on_boot, bool, 0000);
> > > +MODULE_PARM_DESC(soft_active_on_boot,
> > > +     "Set to true to active Softdog on boot (default=false)");
> > > +
> > >  static struct hrtimer softdog_ticktock;
> > >  static struct hrtimer softdog_preticktock;
> > >
> > > +static int reboot_kthread_fn(void *data)
> > > +{
> > > +     kernel_restart(soft_reboot_cmd);
> > > +     return -EPERM; /* Should not reach here */
> > > +}
> > > +
> > > +static void reboot_work_fn(struct work_struct *unused)
> > > +{
> > > +     kthread_run(reboot_kthread_fn, NULL, "softdog_reboot");
> > > +}
> > > +
> > >  static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
> > >  {
> > > +     static bool soft_reboot_fired;
> > >       module_put(THIS_MODULE);
> > >       if (soft_noboot) {
> > >               pr_crit("Triggered - Reboot ignored\n");
> > > @@ -62,6 +86,33 @@ static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
> > >               panic("Software Watchdog Timer expired");
> > >       } else {
> > >               pr_crit("Initiating system reboot\n");
> > > +             if (!soft_reboot_fired && soft_reboot_cmd != NULL) {
> > > +                     static DECLARE_WORK(reboot_work, reboot_work_fn);
> > > +                     /*
> > > +                      * The 'kernel_restart' is a 'might-sleep' operation.
> > > +                      * Also, executing it in system-wide workqueues blocks
> > > +                      * any driver from using the same workqueue in its
> > > +                      * shutdown callback function. Thus, we should execute
> > > +                      * the 'kernel_restart' in a standalone kernel thread.
> > > +                      * But since starting a kernel thread is also a
> > > +                      * 'might-sleep' operation, so the 'reboot_work' is
> > > +                      * required as a launcher of the kernel thread.
> > > +                      *
> > > +                      * After request the reboot, restart the timer to
> > > +                      * schedule an 'emergency_restart' reboot after
> > > +                      * 'TIMER_MARGIN' seconds. It's because if the softdog
> > > +                      * hangs, it might be because of scheduling issues. And
> > > +                      * if that is the case, both 'schedule_work' and
> > > +                      * 'kernel_restart' may possibly be malfunctional at the
> > > +                      * same time.
> > > +                      */
> > > +                     soft_reboot_fired = true;
> > > +                     schedule_work(&reboot_work);
> > > +                     hrtimer_add_expires_ns(timer,
> > > +                                     (u64)TIMER_MARGIN * NSEC_PER_SEC);
> > > +
> > > +                     return HRTIMER_RESTART;
> > > +             }
> > >               emergency_restart();
> > >               pr_crit("Reboot didn't ?????\n");
> > >       }
> > > @@ -145,12 +196,17 @@ static int __init softdog_init(void)
> > >               softdog_preticktock.function = softdog_pretimeout;
> > >       }
> > >
> > > +     if (soft_active_on_boot)
> > > +             softdog_ping(&softdog_dev);
> > > +
> > >       ret = watchdog_register_device(&softdog_dev);
> > >       if (ret)
> > >               return ret;
> > >
> > >       pr_info("initialized. soft_noboot=%d soft_margin=%d sec soft_panic=%d (nowayout=%d)\n",
> > >               soft_noboot, softdog_dev.timeout, soft_panic, nowayout);
> > > +     pr_info("             soft_reboot_cmd=%s soft_active_on_boot=%d\n",
> > > +             soft_reboot_cmd, soft_active_on_boot);
> > >
> > >       return 0;
> > >  }
> > >
> >
Guenter Roeck July 7, 2020, 4:03 a.m. UTC | #4
On Wed, Jul 01, 2020 at 07:03:40PM +0800, Woody Lin wrote:
> Add module parameters 'soft_reboot_cmd' and 'soft_active_on_boot' for
> customizing softdog configuration; config reboot command by assigning
> soft_reboot_cmd, and set soft_active_on_boot to start up softdog
> timer at module initialization stage.
> 
> Signed-off-by: Woody Lin <woodylin@google.com>
> ---
>  drivers/watchdog/softdog.c | 56 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 56 insertions(+)
> 
> diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
> index 3e4885c1545e..8c8d214b6aa7 100644
> --- a/drivers/watchdog/softdog.c
> +++ b/drivers/watchdog/softdog.c
> @@ -20,11 +20,13 @@
>  #include <linux/hrtimer.h>
>  #include <linux/init.h>
>  #include <linux/kernel.h>
> +#include <linux/kthread.h>
>  #include <linux/module.h>
>  #include <linux/moduleparam.h>
>  #include <linux/reboot.h>
>  #include <linux/types.h>
>  #include <linux/watchdog.h>
> +#include <linux/workqueue.h>
>  
>  #define TIMER_MARGIN	60		/* Default is 60 seconds */
>  static unsigned int soft_margin = TIMER_MARGIN;	/* in seconds */
> @@ -49,11 +51,33 @@ module_param(soft_panic, int, 0);
>  MODULE_PARM_DESC(soft_panic,
>  	"Softdog action, set to 1 to panic, 0 to reboot (default=0)");
>  
> +static char *soft_reboot_cmd;
> +module_param(soft_reboot_cmd, charp, 0000);
> +MODULE_PARM_DESC(soft_reboot_cmd,
> +	"Set reboot command. Emergency reboot takes place if unset");
> +
> +static bool soft_active_on_boot;
> +module_param(soft_active_on_boot, bool, 0000);
> +MODULE_PARM_DESC(soft_active_on_boot,
> +	"Set to true to active Softdog on boot (default=false)");
> +
>  static struct hrtimer softdog_ticktock;
>  static struct hrtimer softdog_preticktock;
>  
> +static int reboot_kthread_fn(void *data)
> +{
> +	kernel_restart(soft_reboot_cmd);
> +	return -EPERM; /* Should not reach here */
> +}
> +
> +static void reboot_work_fn(struct work_struct *unused)
> +{
> +	kthread_run(reboot_kthread_fn, NULL, "softdog_reboot");
> +}
> +
>  static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
>  {
> +	static bool soft_reboot_fired;

Per coding style there should be an empty line here.

>  	module_put(THIS_MODULE);
>  	if (soft_noboot) {
>  		pr_crit("Triggered - Reboot ignored\n");
> @@ -62,6 +86,33 @@ static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
>  		panic("Software Watchdog Timer expired");
>  	} else {
>  		pr_crit("Initiating system reboot\n");
> +		if (!soft_reboot_fired && soft_reboot_cmd != NULL) {
> +			static DECLARE_WORK(reboot_work, reboot_work_fn);
> +			/*
> +			 * The 'kernel_restart' is a 'might-sleep' operation.
> +			 * Also, executing it in system-wide workqueues blocks
> +			 * any driver from using the same workqueue in its
> +			 * shutdown callback function. Thus, we should execute
> +			 * the 'kernel_restart' in a standalone kernel thread.
> +			 * But since starting a kernel thread is also a
> +			 * 'might-sleep' operation, so the 'reboot_work' is
> +			 * required as a launcher of the kernel thread.
> +			 *
> +			 * After request the reboot, restart the timer to
> +			 * schedule an 'emergency_restart' reboot after
> +			 * 'TIMER_MARGIN' seconds. It's because if the softdog
> +			 * hangs, it might be because of scheduling issues. And
> +			 * if that is the case, both 'schedule_work' and
> +			 * 'kernel_restart' may possibly be malfunctional at the
> +			 * same time.
> +			 */
> +			soft_reboot_fired = true;
> +			schedule_work(&reboot_work);
> +			hrtimer_add_expires_ns(timer,
> +					(u64)TIMER_MARGIN * NSEC_PER_SEC);
> +
> +			return HRTIMER_RESTART;
> +		}
>  		emergency_restart();
>  		pr_crit("Reboot didn't ?????\n");
>  	}
> @@ -145,12 +196,17 @@ static int __init softdog_init(void)
>  		softdog_preticktock.function = softdog_pretimeout;
>  	}
>  
> +	if (soft_active_on_boot)
> +		softdog_ping(&softdog_dev);
> +
>  	ret = watchdog_register_device(&softdog_dev);
>  	if (ret)
>  		return ret;
>  
>  	pr_info("initialized. soft_noboot=%d soft_margin=%d sec soft_panic=%d (nowayout=%d)\n",
>  		soft_noboot, softdog_dev.timeout, soft_panic, nowayout);
> +	pr_info("             soft_reboot_cmd=%s soft_active_on_boot=%d\n",
> +		soft_reboot_cmd, soft_active_on_boot);

soft_reboot_cmd can be NULL, which makes the output a bit awkward.

>  
>  	return 0;
>  }
Woody Lin July 7, 2020, 8:46 a.m. UTC | #5
On Tue, Jul 7, 2020 at 12:04 PM Guenter Roeck <linux@roeck-us.net> wrote:
>
> On Wed, Jul 01, 2020 at 07:03:40PM +0800, Woody Lin wrote:
> > Add module parameters 'soft_reboot_cmd' and 'soft_active_on_boot' for
> > customizing softdog configuration; config reboot command by assigning
> > soft_reboot_cmd, and set soft_active_on_boot to start up softdog
> > timer at module initialization stage.
> >
> > Signed-off-by: Woody Lin <woodylin@google.com>
> > ---
> >  drivers/watchdog/softdog.c | 56 ++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 56 insertions(+)
> >
> > diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
> > index 3e4885c1545e..8c8d214b6aa7 100644
> > --- a/drivers/watchdog/softdog.c
> > +++ b/drivers/watchdog/softdog.c
> > @@ -20,11 +20,13 @@
> >  #include <linux/hrtimer.h>
> >  #include <linux/init.h>
> >  #include <linux/kernel.h>
> > +#include <linux/kthread.h>
> >  #include <linux/module.h>
> >  #include <linux/moduleparam.h>
> >  #include <linux/reboot.h>
> >  #include <linux/types.h>
> >  #include <linux/watchdog.h>
> > +#include <linux/workqueue.h>
> >
> >  #define TIMER_MARGIN 60              /* Default is 60 seconds */
> >  static unsigned int soft_margin = TIMER_MARGIN;      /* in seconds */
> > @@ -49,11 +51,33 @@ module_param(soft_panic, int, 0);
> >  MODULE_PARM_DESC(soft_panic,
> >       "Softdog action, set to 1 to panic, 0 to reboot (default=0)");
> >
> > +static char *soft_reboot_cmd;
> > +module_param(soft_reboot_cmd, charp, 0000);
> > +MODULE_PARM_DESC(soft_reboot_cmd,
> > +     "Set reboot command. Emergency reboot takes place if unset");
> > +
> > +static bool soft_active_on_boot;
> > +module_param(soft_active_on_boot, bool, 0000);
> > +MODULE_PARM_DESC(soft_active_on_boot,
> > +     "Set to true to active Softdog on boot (default=false)");
> > +
> >  static struct hrtimer softdog_ticktock;
> >  static struct hrtimer softdog_preticktock;
> >
> > +static int reboot_kthread_fn(void *data)
> > +{
> > +     kernel_restart(soft_reboot_cmd);
> > +     return -EPERM; /* Should not reach here */
> > +}
> > +
> > +static void reboot_work_fn(struct work_struct *unused)
> > +{
> > +     kthread_run(reboot_kthread_fn, NULL, "softdog_reboot");
> > +}
> > +
> >  static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
> >  {
> > +     static bool soft_reboot_fired;
>
> Per coding style there should be an empty line here.

Ack.

>
> >       module_put(THIS_MODULE);
> >       if (soft_noboot) {
> >               pr_crit("Triggered - Reboot ignored\n");
> > @@ -62,6 +86,33 @@ static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
> >               panic("Software Watchdog Timer expired");
> >       } else {
> >               pr_crit("Initiating system reboot\n");
> > +             if (!soft_reboot_fired && soft_reboot_cmd != NULL) {
> > +                     static DECLARE_WORK(reboot_work, reboot_work_fn);
> > +                     /*
> > +                      * The 'kernel_restart' is a 'might-sleep' operation.
> > +                      * Also, executing it in system-wide workqueues blocks
> > +                      * any driver from using the same workqueue in its
> > +                      * shutdown callback function. Thus, we should execute
> > +                      * the 'kernel_restart' in a standalone kernel thread.
> > +                      * But since starting a kernel thread is also a
> > +                      * 'might-sleep' operation, so the 'reboot_work' is
> > +                      * required as a launcher of the kernel thread.
> > +                      *
> > +                      * After request the reboot, restart the timer to
> > +                      * schedule an 'emergency_restart' reboot after
> > +                      * 'TIMER_MARGIN' seconds. It's because if the softdog
> > +                      * hangs, it might be because of scheduling issues. And
> > +                      * if that is the case, both 'schedule_work' and
> > +                      * 'kernel_restart' may possibly be malfunctional at the
> > +                      * same time.
> > +                      */
> > +                     soft_reboot_fired = true;
> > +                     schedule_work(&reboot_work);
> > +                     hrtimer_add_expires_ns(timer,
> > +                                     (u64)TIMER_MARGIN * NSEC_PER_SEC);
> > +
> > +                     return HRTIMER_RESTART;
> > +             }
> >               emergency_restart();
> >               pr_crit("Reboot didn't ?????\n");
> >       }
> > @@ -145,12 +196,17 @@ static int __init softdog_init(void)
> >               softdog_preticktock.function = softdog_pretimeout;
> >       }
> >
> > +     if (soft_active_on_boot)
> > +             softdog_ping(&softdog_dev);
> > +
> >       ret = watchdog_register_device(&softdog_dev);
> >       if (ret)
> >               return ret;
> >
> >       pr_info("initialized. soft_noboot=%d soft_margin=%d sec soft_panic=%d (nowayout=%d)\n",
> >               soft_noboot, softdog_dev.timeout, soft_panic, nowayout);
> > +     pr_info("             soft_reboot_cmd=%s soft_active_on_boot=%d\n",
> > +             soft_reboot_cmd, soft_active_on_boot);
>
> soft_reboot_cmd can be NULL, which makes the output a bit awkward.

Then how about change it to something like this:
"soft_reboot_cmd=%s", soft_reboot_cmd ?: "<null> (emergency reboot)"
Then we will see "soft_reboot_cmd=<null> (emergency reboot)" when it's NULL.

>
> >
> >       return 0;
> >  }


Woody
Guenter Roeck July 7, 2020, 11:47 a.m. UTC | #6
On 7/7/20 1:46 AM, Woody Lin wrote:
> On Tue, Jul 7, 2020 at 12:04 PM Guenter Roeck <linux@roeck-us.net> wrote:
>>
>> On Wed, Jul 01, 2020 at 07:03:40PM +0800, Woody Lin wrote:
>>> Add module parameters 'soft_reboot_cmd' and 'soft_active_on_boot' for
>>> customizing softdog configuration; config reboot command by assigning
>>> soft_reboot_cmd, and set soft_active_on_boot to start up softdog
>>> timer at module initialization stage.
>>>
>>> Signed-off-by: Woody Lin <woodylin@google.com>
>>> ---
>>>  drivers/watchdog/softdog.c | 56 ++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 56 insertions(+)
>>>
>>> diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
>>> index 3e4885c1545e..8c8d214b6aa7 100644
>>> --- a/drivers/watchdog/softdog.c
>>> +++ b/drivers/watchdog/softdog.c
>>> @@ -20,11 +20,13 @@
>>>  #include <linux/hrtimer.h>
>>>  #include <linux/init.h>
>>>  #include <linux/kernel.h>
>>> +#include <linux/kthread.h>
>>>  #include <linux/module.h>
>>>  #include <linux/moduleparam.h>
>>>  #include <linux/reboot.h>
>>>  #include <linux/types.h>
>>>  #include <linux/watchdog.h>
>>> +#include <linux/workqueue.h>
>>>
>>>  #define TIMER_MARGIN 60              /* Default is 60 seconds */
>>>  static unsigned int soft_margin = TIMER_MARGIN;      /* in seconds */
>>> @@ -49,11 +51,33 @@ module_param(soft_panic, int, 0);
>>>  MODULE_PARM_DESC(soft_panic,
>>>       "Softdog action, set to 1 to panic, 0 to reboot (default=0)");
>>>
>>> +static char *soft_reboot_cmd;
>>> +module_param(soft_reboot_cmd, charp, 0000);
>>> +MODULE_PARM_DESC(soft_reboot_cmd,
>>> +     "Set reboot command. Emergency reboot takes place if unset");
>>> +
>>> +static bool soft_active_on_boot;
>>> +module_param(soft_active_on_boot, bool, 0000);
>>> +MODULE_PARM_DESC(soft_active_on_boot,
>>> +     "Set to true to active Softdog on boot (default=false)");
>>> +
>>>  static struct hrtimer softdog_ticktock;
>>>  static struct hrtimer softdog_preticktock;
>>>
>>> +static int reboot_kthread_fn(void *data)
>>> +{
>>> +     kernel_restart(soft_reboot_cmd);
>>> +     return -EPERM; /* Should not reach here */
>>> +}
>>> +
>>> +static void reboot_work_fn(struct work_struct *unused)
>>> +{
>>> +     kthread_run(reboot_kthread_fn, NULL, "softdog_reboot");
>>> +}
>>> +
>>>  static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
>>>  {
>>> +     static bool soft_reboot_fired;
>>
>> Per coding style there should be an empty line here.
> 
> Ack.
> 
>>
>>>       module_put(THIS_MODULE);
>>>       if (soft_noboot) {
>>>               pr_crit("Triggered - Reboot ignored\n");
>>> @@ -62,6 +86,33 @@ static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
>>>               panic("Software Watchdog Timer expired");
>>>       } else {
>>>               pr_crit("Initiating system reboot\n");
>>> +             if (!soft_reboot_fired && soft_reboot_cmd != NULL) {
>>> +                     static DECLARE_WORK(reboot_work, reboot_work_fn);
>>> +                     /*
>>> +                      * The 'kernel_restart' is a 'might-sleep' operation.
>>> +                      * Also, executing it in system-wide workqueues blocks
>>> +                      * any driver from using the same workqueue in its
>>> +                      * shutdown callback function. Thus, we should execute
>>> +                      * the 'kernel_restart' in a standalone kernel thread.
>>> +                      * But since starting a kernel thread is also a
>>> +                      * 'might-sleep' operation, so the 'reboot_work' is
>>> +                      * required as a launcher of the kernel thread.
>>> +                      *
>>> +                      * After request the reboot, restart the timer to
>>> +                      * schedule an 'emergency_restart' reboot after
>>> +                      * 'TIMER_MARGIN' seconds. It's because if the softdog
>>> +                      * hangs, it might be because of scheduling issues. And
>>> +                      * if that is the case, both 'schedule_work' and
>>> +                      * 'kernel_restart' may possibly be malfunctional at the
>>> +                      * same time.
>>> +                      */
>>> +                     soft_reboot_fired = true;
>>> +                     schedule_work(&reboot_work);
>>> +                     hrtimer_add_expires_ns(timer,
>>> +                                     (u64)TIMER_MARGIN * NSEC_PER_SEC);
>>> +
>>> +                     return HRTIMER_RESTART;
>>> +             }
>>>               emergency_restart();
>>>               pr_crit("Reboot didn't ?????\n");
>>>       }
>>> @@ -145,12 +196,17 @@ static int __init softdog_init(void)
>>>               softdog_preticktock.function = softdog_pretimeout;
>>>       }
>>>
>>> +     if (soft_active_on_boot)
>>> +             softdog_ping(&softdog_dev);
>>> +
>>>       ret = watchdog_register_device(&softdog_dev);
>>>       if (ret)
>>>               return ret;
>>>
>>>       pr_info("initialized. soft_noboot=%d soft_margin=%d sec soft_panic=%d (nowayout=%d)\n",
>>>               soft_noboot, softdog_dev.timeout, soft_panic, nowayout);
>>> +     pr_info("             soft_reboot_cmd=%s soft_active_on_boot=%d\n",
>>> +             soft_reboot_cmd, soft_active_on_boot);
>>
>> soft_reboot_cmd can be NULL, which makes the output a bit awkward.
> 
> Then how about change it to something like this:
> "soft_reboot_cmd=%s", soft_reboot_cmd ?: "<null> (emergency reboot)"
> Then we will see "soft_reboot_cmd=<null> (emergency reboot)" when it's NULL.

I'd rather see something like "<not set>". "<null>" looks like an error.
Also, it isn't correct to assume emergency reboot; that is only correct
if neither soft_noboot nor soft_panic is set.

Thanks,
Guenter
Woody Lin July 8, 2020, 7:17 a.m. UTC | #7
On Tue, Jul 7, 2020 at 7:47 PM Guenter Roeck <linux@roeck-us.net> wrote:
> I'd rather see something like "<not set>". "<null>" looks like an error.
> Also, it isn't correct to assume emergency reboot; that is only correct
> if neither soft_noboot nor soft_panic is set.

"<not set>" sounds good to me, and thanks for correcting my usage
description on "(emergency reboot)".
I'm uploading the next version.

Woody
diff mbox series

Patch

diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
index 3e4885c1545e..8c8d214b6aa7 100644
--- a/drivers/watchdog/softdog.c
+++ b/drivers/watchdog/softdog.c
@@ -20,11 +20,13 @@ 
 #include <linux/hrtimer.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/reboot.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
+#include <linux/workqueue.h>
 
 #define TIMER_MARGIN	60		/* Default is 60 seconds */
 static unsigned int soft_margin = TIMER_MARGIN;	/* in seconds */
@@ -49,11 +51,33 @@  module_param(soft_panic, int, 0);
 MODULE_PARM_DESC(soft_panic,
 	"Softdog action, set to 1 to panic, 0 to reboot (default=0)");
 
+static char *soft_reboot_cmd;
+module_param(soft_reboot_cmd, charp, 0000);
+MODULE_PARM_DESC(soft_reboot_cmd,
+	"Set reboot command. Emergency reboot takes place if unset");
+
+static bool soft_active_on_boot;
+module_param(soft_active_on_boot, bool, 0000);
+MODULE_PARM_DESC(soft_active_on_boot,
+	"Set to true to active Softdog on boot (default=false)");
+
 static struct hrtimer softdog_ticktock;
 static struct hrtimer softdog_preticktock;
 
+static int reboot_kthread_fn(void *data)
+{
+	kernel_restart(soft_reboot_cmd);
+	return -EPERM; /* Should not reach here */
+}
+
+static void reboot_work_fn(struct work_struct *unused)
+{
+	kthread_run(reboot_kthread_fn, NULL, "softdog_reboot");
+}
+
 static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
 {
+	static bool soft_reboot_fired;
 	module_put(THIS_MODULE);
 	if (soft_noboot) {
 		pr_crit("Triggered - Reboot ignored\n");
@@ -62,6 +86,33 @@  static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
 		panic("Software Watchdog Timer expired");
 	} else {
 		pr_crit("Initiating system reboot\n");
+		if (!soft_reboot_fired && soft_reboot_cmd != NULL) {
+			static DECLARE_WORK(reboot_work, reboot_work_fn);
+			/*
+			 * The 'kernel_restart' is a 'might-sleep' operation.
+			 * Also, executing it in system-wide workqueues blocks
+			 * any driver from using the same workqueue in its
+			 * shutdown callback function. Thus, we should execute
+			 * the 'kernel_restart' in a standalone kernel thread.
+			 * But since starting a kernel thread is also a
+			 * 'might-sleep' operation, so the 'reboot_work' is
+			 * required as a launcher of the kernel thread.
+			 *
+			 * After request the reboot, restart the timer to
+			 * schedule an 'emergency_restart' reboot after
+			 * 'TIMER_MARGIN' seconds. It's because if the softdog
+			 * hangs, it might be because of scheduling issues. And
+			 * if that is the case, both 'schedule_work' and
+			 * 'kernel_restart' may possibly be malfunctional at the
+			 * same time.
+			 */
+			soft_reboot_fired = true;
+			schedule_work(&reboot_work);
+			hrtimer_add_expires_ns(timer,
+					(u64)TIMER_MARGIN * NSEC_PER_SEC);
+
+			return HRTIMER_RESTART;
+		}
 		emergency_restart();
 		pr_crit("Reboot didn't ?????\n");
 	}
@@ -145,12 +196,17 @@  static int __init softdog_init(void)
 		softdog_preticktock.function = softdog_pretimeout;
 	}
 
+	if (soft_active_on_boot)
+		softdog_ping(&softdog_dev);
+
 	ret = watchdog_register_device(&softdog_dev);
 	if (ret)
 		return ret;
 
 	pr_info("initialized. soft_noboot=%d soft_margin=%d sec soft_panic=%d (nowayout=%d)\n",
 		soft_noboot, softdog_dev.timeout, soft_panic, nowayout);
+	pr_info("             soft_reboot_cmd=%s soft_active_on_boot=%d\n",
+		soft_reboot_cmd, soft_active_on_boot);
 
 	return 0;
 }