Message ID | 20190819203711.32599-3-minyard@acm.org (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | [01/12] watchdog: NULL the default governor if it is unregistered | expand |
On Mon, Aug 19, 2019 at 03:37:01PM -0500, minyard@acm.org wrote: > From: Corey Minyard <cminyard@mvista.com> > > This is for the read data pretimeout governor. > I am missing an explanation how this is useful and necessary, and what problem it solves that can not be solved differently. For my part I don't immediately see the benefits. Guenter > Signed-off-by: Corey Minyard <cminyard@mvista.com> > --- > drivers/watchdog/watchdog_core.c | 3 + > drivers/watchdog/watchdog_dev.c | 113 +++++++++++++++++++++++++++++++ > include/linux/watchdog.h | 5 ++ > 3 files changed, 121 insertions(+) > > diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c > index 21e8085b848b..80149ac229fc 100644 > --- a/drivers/watchdog/watchdog_core.c > +++ b/drivers/watchdog/watchdog_core.c > @@ -216,6 +216,9 @@ static int __watchdog_register_device(struct watchdog_device *wdd) > return id; > wdd->id = id; > > + spin_lock_init(&wdd->readlock); > + init_waitqueue_head(&wdd->read_q); > + > ret = watchdog_dev_register(wdd); > if (ret) { > ida_simple_remove(&watchdog_ida, id); > diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c > index dbd2ad4c9294..8e8304607a8c 100644 > --- a/drivers/watchdog/watchdog_dev.c > +++ b/drivers/watchdog/watchdog_dev.c > @@ -44,6 +44,8 @@ > #include <linux/types.h> /* For standard types (like size_t) */ > #include <linux/watchdog.h> /* For watchdog specific items */ > #include <linux/uaccess.h> /* For copy_to_user/put_user/... */ > +#include <linux/poll.h> /* For poll_table/... */ > +#include <linux/sched/signal.h> /* For signal_pending */ > > #include <uapi/linux/sched/types.h> /* For struct sched_param */ > > @@ -929,12 +931,120 @@ static int watchdog_release(struct inode *inode, struct file *file) > return 0; > } > > +static ssize_t watchdog_read(struct file *file, > + char __user *buf, > + size_t count, > + loff_t *ppos) > +{ > + struct watchdog_core_data *wd_data = file->private_data; > + struct watchdog_device *wdd; > + int err = 0; > + wait_queue_entry_t wait; > + char dummy = 1; > + > + if (count <= 0) > + return 0; > + > + mutex_lock(&wd_data->lock); > + > + wdd = wd_data->wdd; > + if (!wdd) > + goto done; > + > + /* > + * Reading returns if the pretimeout has gone off, and it only does > + * it once per pretimeout. > + */ > + spin_lock_irq(&wdd->readlock); > + while (!wdd->data_to_read) { > + if (file->f_flags & O_NONBLOCK) { > + err = -EAGAIN; > + goto out; > + } > + > + init_waitqueue_entry(&wait, current); > + add_wait_queue(&wdd->read_q, &wait); > + set_current_state(TASK_INTERRUPTIBLE); > + spin_unlock_irq(&wdd->readlock); > + schedule(); > + spin_lock_irq(&wdd->readlock); > + remove_wait_queue(&wdd->read_q, &wait); > + > + if (signal_pending(current)) { > + err = -ERESTARTSYS; > + goto out; > + } > + } > + dummy = wdd->data_to_read; > + wdd->data_to_read = 0; > + > + out: > + spin_unlock_irq(&wdd->readlock); > + > + if (err == 0) { > + if (copy_to_user(buf, &dummy, 1)) > + err = -EFAULT; > + else > + err = 1; > + } > + > + done: > + mutex_unlock(&wd_data->lock); > + > + return err; > +} > + > +static __poll_t watchdog_poll(struct file *file, poll_table *wait) > +{ > + struct watchdog_core_data *wd_data = file->private_data; > + struct watchdog_device *wdd; > + __poll_t mask = 0; > + > + mutex_lock(&wd_data->lock); > + > + wdd = wd_data->wdd; > + if (!wdd) > + goto done; > + > + poll_wait(file, &wdd->read_q, wait); > + > + spin_lock_irq(&wdd->readlock); > + if (wdd->data_to_read) > + mask |= (EPOLLIN | EPOLLRDNORM); > + spin_unlock_irq(&wdd->readlock); > + > +done: > + mutex_unlock(&wd_data->lock); > + return mask; > +} > + > +static int watchdog_fasync(int fd, struct file *file, int on) > +{ > + struct watchdog_core_data *wd_data = file->private_data; > + struct watchdog_device *wdd; > + int err = -ENODEV; > + > + mutex_lock(&wd_data->lock); > + > + wdd = wd_data->wdd; > + if (!wdd) > + goto done; > + > + err = fasync_helper(fd, file, on, &wdd->fasync_q); > +done: > + mutex_unlock(&wd_data->lock); > + return err; > +} > + > static const struct file_operations watchdog_fops = { > .owner = THIS_MODULE, > .write = watchdog_write, > .unlocked_ioctl = watchdog_ioctl, > .open = watchdog_open, > .release = watchdog_release, > + .read = watchdog_read, > + .poll = watchdog_poll, > + .fasync = watchdog_fasync, > }; > > static struct miscdevice watchdog_miscdev = { > @@ -970,6 +1080,9 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno) > if (IS_ERR_OR_NULL(watchdog_kworker)) > return -ENODEV; > > + spin_lock_init(&wdd->readlock); > + init_waitqueue_head(&wdd->read_q); > + > kthread_init_work(&wd_data->work, watchdog_ping_work); > hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); > wd_data->timer.function = watchdog_timer_expired; > diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h > index 417d9f37077a..e34501a822f0 100644 > --- a/include/linux/watchdog.h > +++ b/include/linux/watchdog.h > @@ -117,6 +117,11 @@ struct watchdog_device { > #define WDOG_HW_RUNNING 3 /* True if HW watchdog running */ > #define WDOG_STOP_ON_UNREGISTER 4 /* Should be stopped on unregister */ > struct list_head deferred; > + > + spinlock_t readlock; > + bool data_to_read; > + struct wait_queue_head read_q; > + struct fasync_struct *fasync_q; > }; > > #define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT) > -- > 2.17.1 >
On Mon, Aug 19, 2019 at 03:37:01PM -0500, minyard@acm.org wrote: > From: Corey Minyard <cminyard@mvista.com> > > This is for the read data pretimeout governor. > > Signed-off-by: Corey Minyard <cminyard@mvista.com> On further thought, I think it would be a bad idea to add this functionality: It changes the userspace ABI for accessing the watchdog device. Today, when a watchdog device is opened, it does not provide read data, it does not hang, and returns immediately. A "cat" from it is an easy and quick means to test if a watchdog works. Guenter > --- > drivers/watchdog/watchdog_core.c | 3 + > drivers/watchdog/watchdog_dev.c | 113 +++++++++++++++++++++++++++++++ > include/linux/watchdog.h | 5 ++ > 3 files changed, 121 insertions(+) > > diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c > index 21e8085b848b..80149ac229fc 100644 > --- a/drivers/watchdog/watchdog_core.c > +++ b/drivers/watchdog/watchdog_core.c > @@ -216,6 +216,9 @@ static int __watchdog_register_device(struct watchdog_device *wdd) > return id; > wdd->id = id; > > + spin_lock_init(&wdd->readlock); > + init_waitqueue_head(&wdd->read_q); > + > ret = watchdog_dev_register(wdd); > if (ret) { > ida_simple_remove(&watchdog_ida, id); > diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c > index dbd2ad4c9294..8e8304607a8c 100644 > --- a/drivers/watchdog/watchdog_dev.c > +++ b/drivers/watchdog/watchdog_dev.c > @@ -44,6 +44,8 @@ > #include <linux/types.h> /* For standard types (like size_t) */ > #include <linux/watchdog.h> /* For watchdog specific items */ > #include <linux/uaccess.h> /* For copy_to_user/put_user/... */ > +#include <linux/poll.h> /* For poll_table/... */ > +#include <linux/sched/signal.h> /* For signal_pending */ > > #include <uapi/linux/sched/types.h> /* For struct sched_param */ > > @@ -929,12 +931,120 @@ static int watchdog_release(struct inode *inode, struct file *file) > return 0; > } > > +static ssize_t watchdog_read(struct file *file, > + char __user *buf, > + size_t count, > + loff_t *ppos) > +{ > + struct watchdog_core_data *wd_data = file->private_data; > + struct watchdog_device *wdd; > + int err = 0; > + wait_queue_entry_t wait; > + char dummy = 1; > + > + if (count <= 0) > + return 0; > + > + mutex_lock(&wd_data->lock); > + > + wdd = wd_data->wdd; > + if (!wdd) > + goto done; > + > + /* > + * Reading returns if the pretimeout has gone off, and it only does > + * it once per pretimeout. > + */ > + spin_lock_irq(&wdd->readlock); > + while (!wdd->data_to_read) { > + if (file->f_flags & O_NONBLOCK) { > + err = -EAGAIN; > + goto out; > + } > + > + init_waitqueue_entry(&wait, current); > + add_wait_queue(&wdd->read_q, &wait); > + set_current_state(TASK_INTERRUPTIBLE); > + spin_unlock_irq(&wdd->readlock); > + schedule(); > + spin_lock_irq(&wdd->readlock); > + remove_wait_queue(&wdd->read_q, &wait); > + > + if (signal_pending(current)) { > + err = -ERESTARTSYS; > + goto out; > + } > + } > + dummy = wdd->data_to_read; > + wdd->data_to_read = 0; > + > + out: > + spin_unlock_irq(&wdd->readlock); > + > + if (err == 0) { > + if (copy_to_user(buf, &dummy, 1)) > + err = -EFAULT; > + else > + err = 1; > + } > + > + done: > + mutex_unlock(&wd_data->lock); > + > + return err; > +} > + > +static __poll_t watchdog_poll(struct file *file, poll_table *wait) > +{ > + struct watchdog_core_data *wd_data = file->private_data; > + struct watchdog_device *wdd; > + __poll_t mask = 0; > + > + mutex_lock(&wd_data->lock); > + > + wdd = wd_data->wdd; > + if (!wdd) > + goto done; > + > + poll_wait(file, &wdd->read_q, wait); > + > + spin_lock_irq(&wdd->readlock); > + if (wdd->data_to_read) > + mask |= (EPOLLIN | EPOLLRDNORM); > + spin_unlock_irq(&wdd->readlock); > + > +done: > + mutex_unlock(&wd_data->lock); > + return mask; > +} > + > +static int watchdog_fasync(int fd, struct file *file, int on) > +{ > + struct watchdog_core_data *wd_data = file->private_data; > + struct watchdog_device *wdd; > + int err = -ENODEV; > + > + mutex_lock(&wd_data->lock); > + > + wdd = wd_data->wdd; > + if (!wdd) > + goto done; > + > + err = fasync_helper(fd, file, on, &wdd->fasync_q); > +done: > + mutex_unlock(&wd_data->lock); > + return err; > +} > + > static const struct file_operations watchdog_fops = { > .owner = THIS_MODULE, > .write = watchdog_write, > .unlocked_ioctl = watchdog_ioctl, > .open = watchdog_open, > .release = watchdog_release, > + .read = watchdog_read, > + .poll = watchdog_poll, > + .fasync = watchdog_fasync, > }; > > static struct miscdevice watchdog_miscdev = { > @@ -970,6 +1080,9 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno) > if (IS_ERR_OR_NULL(watchdog_kworker)) > return -ENODEV; > > + spin_lock_init(&wdd->readlock); > + init_waitqueue_head(&wdd->read_q); > + > kthread_init_work(&wd_data->work, watchdog_ping_work); > hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); > wd_data->timer.function = watchdog_timer_expired; > diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h > index 417d9f37077a..e34501a822f0 100644 > --- a/include/linux/watchdog.h > +++ b/include/linux/watchdog.h > @@ -117,6 +117,11 @@ struct watchdog_device { > #define WDOG_HW_RUNNING 3 /* True if HW watchdog running */ > #define WDOG_STOP_ON_UNREGISTER 4 /* Should be stopped on unregister */ > struct list_head deferred; > + > + spinlock_t readlock; > + bool data_to_read; > + struct wait_queue_head read_q; > + struct fasync_struct *fasync_q; > }; > > #define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT) > -- > 2.17.1 >
On Mon, Aug 19, 2019 at 03:43:45PM -0700, Guenter Roeck wrote: > On Mon, Aug 19, 2019 at 03:37:01PM -0500, minyard@acm.org wrote: > > From: Corey Minyard <cminyard@mvista.com> > > > > This is for the read data pretimeout governor. > > > > Signed-off-by: Corey Minyard <cminyard@mvista.com> > > On further thought, I think it would be a bad idea to add this > functionality: It changes the userspace ABI for accessing the watchdog > device. Today, when a watchdog device is opened, it does not provide > read data, it does not hang, and returns immediately. A "cat" from it > is an easy and quick means to test if a watchdog works. Umm, why would a "cat" from a watchdog tell you if a watchdog works? If you want to know if the device exists, I would think that "if [ -c /dev/watchdog ]..." would be a lot more clear and wouldn't print a useless error and wouldn't start the watchdog. That said, this is useful if a userland program wants to know if a pretimeout occurred. I'm not sure if anyone is using this with the IPMI watchdog, but to preserve the IPMI interface function, this will be needed. It might be better to set it to immediately return an error if the pretimeout governer is not one that reads data. I didn't think of that before and it will be better to do that. -corey > > Guenter > > > --- > > drivers/watchdog/watchdog_core.c | 3 + > > drivers/watchdog/watchdog_dev.c | 113 +++++++++++++++++++++++++++++++ > > include/linux/watchdog.h | 5 ++ > > 3 files changed, 121 insertions(+) > > > > diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c > > index 21e8085b848b..80149ac229fc 100644 > > --- a/drivers/watchdog/watchdog_core.c > > +++ b/drivers/watchdog/watchdog_core.c > > @@ -216,6 +216,9 @@ static int __watchdog_register_device(struct watchdog_device *wdd) > > return id; > > wdd->id = id; > > > > + spin_lock_init(&wdd->readlock); > > + init_waitqueue_head(&wdd->read_q); > > + > > ret = watchdog_dev_register(wdd); > > if (ret) { > > ida_simple_remove(&watchdog_ida, id); > > diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c > > index dbd2ad4c9294..8e8304607a8c 100644 > > --- a/drivers/watchdog/watchdog_dev.c > > +++ b/drivers/watchdog/watchdog_dev.c > > @@ -44,6 +44,8 @@ > > #include <linux/types.h> /* For standard types (like size_t) */ > > #include <linux/watchdog.h> /* For watchdog specific items */ > > #include <linux/uaccess.h> /* For copy_to_user/put_user/... */ > > +#include <linux/poll.h> /* For poll_table/... */ > > +#include <linux/sched/signal.h> /* For signal_pending */ > > > > #include <uapi/linux/sched/types.h> /* For struct sched_param */ > > > > @@ -929,12 +931,120 @@ static int watchdog_release(struct inode *inode, struct file *file) > > return 0; > > } > > > > +static ssize_t watchdog_read(struct file *file, > > + char __user *buf, > > + size_t count, > > + loff_t *ppos) > > +{ > > + struct watchdog_core_data *wd_data = file->private_data; > > + struct watchdog_device *wdd; > > + int err = 0; > > + wait_queue_entry_t wait; > > + char dummy = 1; > > + > > + if (count <= 0) > > + return 0; > > + > > + mutex_lock(&wd_data->lock); > > + > > + wdd = wd_data->wdd; > > + if (!wdd) > > + goto done; > > + > > + /* > > + * Reading returns if the pretimeout has gone off, and it only does > > + * it once per pretimeout. > > + */ > > + spin_lock_irq(&wdd->readlock); > > + while (!wdd->data_to_read) { > > + if (file->f_flags & O_NONBLOCK) { > > + err = -EAGAIN; > > + goto out; > > + } > > + > > + init_waitqueue_entry(&wait, current); > > + add_wait_queue(&wdd->read_q, &wait); > > + set_current_state(TASK_INTERRUPTIBLE); > > + spin_unlock_irq(&wdd->readlock); > > + schedule(); > > + spin_lock_irq(&wdd->readlock); > > + remove_wait_queue(&wdd->read_q, &wait); > > + > > + if (signal_pending(current)) { > > + err = -ERESTARTSYS; > > + goto out; > > + } > > + } > > + dummy = wdd->data_to_read; > > + wdd->data_to_read = 0; > > + > > + out: > > + spin_unlock_irq(&wdd->readlock); > > + > > + if (err == 0) { > > + if (copy_to_user(buf, &dummy, 1)) > > + err = -EFAULT; > > + else > > + err = 1; > > + } > > + > > + done: > > + mutex_unlock(&wd_data->lock); > > + > > + return err; > > +} > > + > > +static __poll_t watchdog_poll(struct file *file, poll_table *wait) > > +{ > > + struct watchdog_core_data *wd_data = file->private_data; > > + struct watchdog_device *wdd; > > + __poll_t mask = 0; > > + > > + mutex_lock(&wd_data->lock); > > + > > + wdd = wd_data->wdd; > > + if (!wdd) > > + goto done; > > + > > + poll_wait(file, &wdd->read_q, wait); > > + > > + spin_lock_irq(&wdd->readlock); > > + if (wdd->data_to_read) > > + mask |= (EPOLLIN | EPOLLRDNORM); > > + spin_unlock_irq(&wdd->readlock); > > + > > +done: > > + mutex_unlock(&wd_data->lock); > > + return mask; > > +} > > + > > +static int watchdog_fasync(int fd, struct file *file, int on) > > +{ > > + struct watchdog_core_data *wd_data = file->private_data; > > + struct watchdog_device *wdd; > > + int err = -ENODEV; > > + > > + mutex_lock(&wd_data->lock); > > + > > + wdd = wd_data->wdd; > > + if (!wdd) > > + goto done; > > + > > + err = fasync_helper(fd, file, on, &wdd->fasync_q); > > +done: > > + mutex_unlock(&wd_data->lock); > > + return err; > > +} > > + > > static const struct file_operations watchdog_fops = { > > .owner = THIS_MODULE, > > .write = watchdog_write, > > .unlocked_ioctl = watchdog_ioctl, > > .open = watchdog_open, > > .release = watchdog_release, > > + .read = watchdog_read, > > + .poll = watchdog_poll, > > + .fasync = watchdog_fasync, > > }; > > > > static struct miscdevice watchdog_miscdev = { > > @@ -970,6 +1080,9 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno) > > if (IS_ERR_OR_NULL(watchdog_kworker)) > > return -ENODEV; > > > > + spin_lock_init(&wdd->readlock); > > + init_waitqueue_head(&wdd->read_q); > > + > > kthread_init_work(&wd_data->work, watchdog_ping_work); > > hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); > > wd_data->timer.function = watchdog_timer_expired; > > diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h > > index 417d9f37077a..e34501a822f0 100644 > > --- a/include/linux/watchdog.h > > +++ b/include/linux/watchdog.h > > @@ -117,6 +117,11 @@ struct watchdog_device { > > #define WDOG_HW_RUNNING 3 /* True if HW watchdog running */ > > #define WDOG_STOP_ON_UNREGISTER 4 /* Should be stopped on unregister */ > > struct list_head deferred; > > + > > + spinlock_t readlock; > > + bool data_to_read; > > + struct wait_queue_head read_q; > > + struct fasync_struct *fasync_q; > > }; > > > > #define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT) > > -- > > 2.17.1 > >
On Mon, Aug 19, 2019 at 07:23:09PM -0500, Corey Minyard wrote: > On Mon, Aug 19, 2019 at 03:43:45PM -0700, Guenter Roeck wrote: > > On Mon, Aug 19, 2019 at 03:37:01PM -0500, minyard@acm.org wrote: > > > From: Corey Minyard <cminyard@mvista.com> > > > > > > This is for the read data pretimeout governor. > > > > > > Signed-off-by: Corey Minyard <cminyard@mvista.com> > > > > On further thought, I think it would be a bad idea to add this > > functionality: It changes the userspace ABI for accessing the watchdog > > device. Today, when a watchdog device is opened, it does not provide > > read data, it does not hang, and returns immediately. A "cat" from it > > is an easy and quick means to test if a watchdog works. > > Umm, why would a "cat" from a watchdog tell you if a watchdog works? cat /dev/watchdog starts the watchdog running. Then one can do useful things like monitor /sys/class/watchdog/watchdogN and see time ticking down, etc.., echo V > /dev/watchdog stops the watchdog assuming driver supports WDIOF_MAGICCLOSE. So I can test without having to reboot. One can't test magic close with the proposed change as /dev/watchdog is exclusive open.
On Mon, Aug 19, 2019 at 07:09:46PM -0600, Jerry Hoemann wrote: > On Mon, Aug 19, 2019 at 07:23:09PM -0500, Corey Minyard wrote: > > On Mon, Aug 19, 2019 at 03:43:45PM -0700, Guenter Roeck wrote: > > > On Mon, Aug 19, 2019 at 03:37:01PM -0500, minyard@acm.org wrote: > > > > From: Corey Minyard <cminyard@mvista.com> > > > > > > > > This is for the read data pretimeout governor. > > > > > > > > Signed-off-by: Corey Minyard <cminyard@mvista.com> > > > > > > On further thought, I think it would be a bad idea to add this > > > functionality: It changes the userspace ABI for accessing the watchdog > > > device. Today, when a watchdog device is opened, it does not provide > > > read data, it does not hang, and returns immediately. A "cat" from it > > > is an easy and quick means to test if a watchdog works. > > > > Umm, why would a "cat" from a watchdog tell you if a watchdog works? > > cat /dev/watchdog starts the watchdog running. > > Then one can do useful things like monitor /sys/class/watchdog/watchdogN and see > time ticking down, etc.., > > echo V > /dev/watchdog stops the watchdog assuming driver supports WDIOF_MAGICCLOSE. > > So I can test without having to reboot. > > One can't test magic close with the proposed change as /dev/watchdog > is exclusive open. Sure you can: # echo "" >/dev/watchdog0 [ 92.390649] watchdog: watchdog0: watchdog did not stop! # sleep 2 # cat /sys/class/watchdog/watchdog0/timeleft 8 # echo "V" >/dev/watchdog0 Works just fine. But I can make it so that reading returns an error unless the governor is the read one. The question is if this is required to transfer the IPMI watchdog over to the standard interface. It currently has this function, do we do an API change to move it over? -corey > > > > > -- > > ----------------------------------------------------------------------------- > Jerry Hoemann Software Engineer Hewlett Packard Enterprise > -----------------------------------------------------------------------------
On 8/20/19 5:12 AM, Corey Minyard wrote: > On Mon, Aug 19, 2019 at 07:09:46PM -0600, Jerry Hoemann wrote: >> On Mon, Aug 19, 2019 at 07:23:09PM -0500, Corey Minyard wrote: >>> On Mon, Aug 19, 2019 at 03:43:45PM -0700, Guenter Roeck wrote: >>>> On Mon, Aug 19, 2019 at 03:37:01PM -0500, minyard@acm.org wrote: >>>>> From: Corey Minyard <cminyard@mvista.com> >>>>> >>>>> This is for the read data pretimeout governor. >>>>> >>>>> Signed-off-by: Corey Minyard <cminyard@mvista.com> >>>> >>>> On further thought, I think it would be a bad idea to add this >>>> functionality: It changes the userspace ABI for accessing the watchdog >>>> device. Today, when a watchdog device is opened, it does not provide >>>> read data, it does not hang, and returns immediately. A "cat" from it >>>> is an easy and quick means to test if a watchdog works. >>> >>> Umm, why would a "cat" from a watchdog tell you if a watchdog works? >> >> cat /dev/watchdog starts the watchdog running. >> >> Then one can do useful things like monitor /sys/class/watchdog/watchdogN and see >> time ticking down, etc.., >> >> echo V > /dev/watchdog stops the watchdog assuming driver supports WDIOF_MAGICCLOSE. >> >> So I can test without having to reboot. >> >> One can't test magic close with the proposed change as /dev/watchdog >> is exclusive open. > > Sure you can: > > # echo "" >/dev/watchdog0 > [ 92.390649] watchdog: watchdog0: watchdog did not stop! > # sleep 2 > # cat /sys/class/watchdog/watchdog0/timeleft > 8 > # echo "V" >/dev/watchdog0 > > Works just fine. But I can make it so that reading returns an error > unless the governor is the read one. > > The question is if this is required to transfer the IPMI watchdog > over to the standard interface. It currently has this function, > do we do an API change to move it over? > Having to change the standard watchdog API to accommodate a non-standard driver is most definitely not the right approach. If it was, anyone could use it to force standard API/ABI changes. Just implement driver X outside its subsystem and then claim you need to change the subsystem to accommodate it. On a side note, a standard watchdog driver can implement its own ioctl functions. Guenter > -corey > >> >> >> >> >> -- >> >> ----------------------------------------------------------------------------- >> Jerry Hoemann Software Engineer Hewlett Packard Enterprise >> ----------------------------------------------------------------------------- >
On Tue, Aug 20, 2019 at 06:53:40AM -0700, Guenter Roeck wrote: > On 8/20/19 5:12 AM, Corey Minyard wrote: > > On Mon, Aug 19, 2019 at 07:09:46PM -0600, Jerry Hoemann wrote: > > > On Mon, Aug 19, 2019 at 07:23:09PM -0500, Corey Minyard wrote: > > > > On Mon, Aug 19, 2019 at 03:43:45PM -0700, Guenter Roeck wrote: > > > > > On Mon, Aug 19, 2019 at 03:37:01PM -0500, minyard@acm.org wrote: > > > > > > From: Corey Minyard <cminyard@mvista.com> > > > > > > > > > > > > This is for the read data pretimeout governor. > > > > > > > > > > > > Signed-off-by: Corey Minyard <cminyard@mvista.com> > > > > > > > > > > On further thought, I think it would be a bad idea to add this > > > > > functionality: It changes the userspace ABI for accessing the watchdog > > > > > device. Today, when a watchdog device is opened, it does not provide > > > > > read data, it does not hang, and returns immediately. A "cat" from it > > > > > is an easy and quick means to test if a watchdog works. > > > > > > > > Umm, why would a "cat" from a watchdog tell you if a watchdog works? > > > > > > cat /dev/watchdog starts the watchdog running. > > > > > > Then one can do useful things like monitor /sys/class/watchdog/watchdogN and see > > > time ticking down, etc.., > > > > > > echo V > /dev/watchdog stops the watchdog assuming driver supports WDIOF_MAGICCLOSE. > > > > > > So I can test without having to reboot. > > > > > > One can't test magic close with the proposed change as /dev/watchdog > > > is exclusive open. > > > > Sure you can: > > > > # echo "" >/dev/watchdog0 > > [ 92.390649] watchdog: watchdog0: watchdog did not stop! > > # sleep 2 > > # cat /sys/class/watchdog/watchdog0/timeleft > > 8 > > # echo "V" >/dev/watchdog0 > > > > Works just fine. But I can make it so that reading returns an error > > unless the governor is the read one. > > > > The question is if this is required to transfer the IPMI watchdog > > over to the standard interface. It currently has this function, > > do we do an API change to move it over? > > > Having to change the standard watchdog API to accommodate a non-standard driver > is most definitely not the right approach. If it was, anyone could use it to > force standard API/ABI changes. Just implement driver X outside its subsystem > and then claim you need to change the subsystem to accommodate it. I'm not advocating anything of the sort. I think it can be done in a way that keeps the API the same unless you enable a new pretimeout governor. I would not suggest that the API be changed, and I should have handled that in the original design. > > On a side note, a standard watchdog driver can implement its own ioctl functions. I am aware of that, but you can't provide read data on a file descriptor through that interface. The actions and preactions could be done that way, but that seemed a more general function that could benefit other drivers. The function to provide read data might be useful, I don't know, but it could be used with any driver that did a normal interrupt pretimeout. I can't remember why it was originally done. I vaguely remember someone asking for it, but that was 17 years ago. It could just be left out and added back if someone complains. That's not very friendly since it's an API change, but then we would know if anyone was using it. -corey > > Guenter > > > -corey > > > > > > > > > > > > > > > > > -- > > > > > > ----------------------------------------------------------------------------- > > > Jerry Hoemann Software Engineer Hewlett Packard Enterprise > > > ----------------------------------------------------------------------------- > > >
On 8/20/19 8:58 AM, Corey Minyard wrote: > On Tue, Aug 20, 2019 at 06:53:40AM -0700, Guenter Roeck wrote: >> On 8/20/19 5:12 AM, Corey Minyard wrote: >>> On Mon, Aug 19, 2019 at 07:09:46PM -0600, Jerry Hoemann wrote: >>>> On Mon, Aug 19, 2019 at 07:23:09PM -0500, Corey Minyard wrote: >>>>> On Mon, Aug 19, 2019 at 03:43:45PM -0700, Guenter Roeck wrote: >>>>>> On Mon, Aug 19, 2019 at 03:37:01PM -0500, minyard@acm.org wrote: >>>>>>> From: Corey Minyard <cminyard@mvista.com> >>>>>>> >>>>>>> This is for the read data pretimeout governor. >>>>>>> >>>>>>> Signed-off-by: Corey Minyard <cminyard@mvista.com> >>>>>> >>>>>> On further thought, I think it would be a bad idea to add this >>>>>> functionality: It changes the userspace ABI for accessing the watchdog >>>>>> device. Today, when a watchdog device is opened, it does not provide >>>>>> read data, it does not hang, and returns immediately. A "cat" from it >>>>>> is an easy and quick means to test if a watchdog works. >>>>> >>>>> Umm, why would a "cat" from a watchdog tell you if a watchdog works? >>>> >>>> cat /dev/watchdog starts the watchdog running. >>>> >>>> Then one can do useful things like monitor /sys/class/watchdog/watchdogN and see >>>> time ticking down, etc.., >>>> >>>> echo V > /dev/watchdog stops the watchdog assuming driver supports WDIOF_MAGICCLOSE. >>>> >>>> So I can test without having to reboot. >>>> >>>> One can't test magic close with the proposed change as /dev/watchdog >>>> is exclusive open. >>> >>> Sure you can: >>> >>> # echo "" >/dev/watchdog0 >>> [ 92.390649] watchdog: watchdog0: watchdog did not stop! >>> # sleep 2 >>> # cat /sys/class/watchdog/watchdog0/timeleft >>> 8 >>> # echo "V" >/dev/watchdog0 >>> >>> Works just fine. But I can make it so that reading returns an error >>> unless the governor is the read one. >>> >>> The question is if this is required to transfer the IPMI watchdog >>> over to the standard interface. It currently has this function, >>> do we do an API change to move it over? >>> >> Having to change the standard watchdog API to accommodate a non-standard driver >> is most definitely not the right approach. If it was, anyone could use it to >> force standard API/ABI changes. Just implement driver X outside its subsystem >> and then claim you need to change the subsystem to accommodate it. > > I'm not advocating anything of the sort. I think it can be done in > a way that keeps the API the same unless you enable a new pretimeout > governor. I would not suggest that the API be changed, and I should > have handled that in the original design. > >> >> On a side note, a standard watchdog driver can implement its own ioctl functions. > > I am aware of that, but you can't provide read data on a file descriptor > through that interface. The actions and preactions could be done that > way, but that seemed a more general function that could benefit other > drivers. > That comment was more directed towards the ioctls you are adding to the watchdog core, which I think would require more discussion. > The function to provide read data might be useful, I don't know, but > it could be used with any driver that did a normal interrupt pretimeout. > I can't remember why it was originally done. I vaguely remember someone > asking for it, but that was 17 years ago. > I find it odd. Only one driver can have the watchdog device open, and that open file should be used to ping the watchdog. It can't do that while waiting for a pretimeout. This almost sounds like the user wrote an application which waited for the pretimeout to happen before pinging the watchdog. Talking about a standardized ABI to inform userspace if a pretimeout occurred... if there is a use case for this, I'd rather trigger a udev event on the "pretimeout" sysfs attribute. That would make much more sense to me than sending a random data byte to the "read" function. The WDIOC_GETSTATUS ioctl could then report that a pretimeout occurred. Or, depending on the use case, just report the fact that a pretimeout occurred with WDIOC_GETSTATUS. That would be really simple to add, and I would support it. > It could just be left out and added back if someone complains. That's > not very friendly since it's an API change, but then we would know if > anyone was using it. It is still better than an API change for standard watchdog drivers, and a somewhat awkward interface to inform userspace about pretimeout events. Thanks, Guenter > > -corey > >> >> Guenter >> >>> -corey >>> >>>> >>>> >>>> >>>> >>>> -- >>>> >>>> ----------------------------------------------------------------------------- >>>> Jerry Hoemann Software Engineer Hewlett Packard Enterprise >>>> ----------------------------------------------------------------------------- >>> >> >
On Tue, Aug 20, 2019 at 10:14:50AM -0700, Guenter Roeck wrote: > On 8/20/19 8:58 AM, Corey Minyard wrote: > > On Tue, Aug 20, 2019 at 06:53:40AM -0700, Guenter Roeck wrote: > > > On 8/20/19 5:12 AM, Corey Minyard wrote: > > > > On Mon, Aug 19, 2019 at 07:09:46PM -0600, Jerry Hoemann wrote: > > > > > On Mon, Aug 19, 2019 at 07:23:09PM -0500, Corey Minyard wrote: > > > > > > On Mon, Aug 19, 2019 at 03:43:45PM -0700, Guenter Roeck wrote: > > > > > > > On Mon, Aug 19, 2019 at 03:37:01PM -0500, minyard@acm.org wrote: > > > > > > > > From: Corey Minyard <cminyard@mvista.com> > > > > > > > > > > > > > > > > This is for the read data pretimeout governor. > > > > > > > > > > > > > > > > Signed-off-by: Corey Minyard <cminyard@mvista.com> > > > > > > > > > > > > > > On further thought, I think it would be a bad idea to add this > > > > > > > functionality: It changes the userspace ABI for accessing the watchdog > > > > > > > device. Today, when a watchdog device is opened, it does not provide > > > > > > > read data, it does not hang, and returns immediately. A "cat" from it > > > > > > > is an easy and quick means to test if a watchdog works. > > > > > > > > > > > > Umm, why would a "cat" from a watchdog tell you if a watchdog works? > > > > > > > > > > cat /dev/watchdog starts the watchdog running. > > > > > > > > > > Then one can do useful things like monitor /sys/class/watchdog/watchdogN and see > > > > > time ticking down, etc.., > > > > > > > > > > echo V > /dev/watchdog stops the watchdog assuming driver supports WDIOF_MAGICCLOSE. > > > > > > > > > > So I can test without having to reboot. > > > > > > > > > > One can't test magic close with the proposed change as /dev/watchdog > > > > > is exclusive open. > > > > > > > > Sure you can: > > > > > > > > # echo "" >/dev/watchdog0 > > > > [ 92.390649] watchdog: watchdog0: watchdog did not stop! > > > > # sleep 2 > > > > # cat /sys/class/watchdog/watchdog0/timeleft > > > > 8 > > > > # echo "V" >/dev/watchdog0 > > > > > > > > Works just fine. But I can make it so that reading returns an error > > > > unless the governor is the read one. > > > > > > > > The question is if this is required to transfer the IPMI watchdog > > > > over to the standard interface. It currently has this function, > > > > do we do an API change to move it over? > > > > > > > Having to change the standard watchdog API to accommodate a non-standard driver > > > is most definitely not the right approach. If it was, anyone could use it to > > > force standard API/ABI changes. Just implement driver X outside its subsystem > > > and then claim you need to change the subsystem to accommodate it. > > > > I'm not advocating anything of the sort. I think it can be done in > > a way that keeps the API the same unless you enable a new pretimeout > > governor. I would not suggest that the API be changed, and I should > > have handled that in the original design. > > > > > > > > On a side note, a standard watchdog driver can implement its own ioctl functions. > > > > I am aware of that, but you can't provide read data on a file descriptor > > through that interface. The actions and preactions could be done that > > way, but that seemed a more general function that could benefit other > > drivers. > > > That comment was more directed towards the ioctls you are adding to the > watchdog core, which I think would require more discussion. Ok, that's kind of a separate discussion. We should probably have it on that email. > > > The function to provide read data might be useful, I don't know, but > > it could be used with any driver that did a normal interrupt pretimeout. > > I can't remember why it was originally done. I vaguely remember someone > > asking for it, but that was 17 years ago. > > > > I find it odd. Only one driver can have the watchdog device open, > and that open file should be used to ping the watchdog. It can't do that > while waiting for a pretimeout. This almost sounds like the user wrote > an application which waited for the pretimeout to happen before pinging > the watchdog. No, poll() is also implemented, so you can wait for poll input and also wait for a timeout. And fasync is also implemented. And you can have multiple threads in the same program, one for pinging and one for reading. Single open does not mean single thread through the driver. > > Talking about a standardized ABI to inform userspace if a pretimeout > occurred... if there is a use case for this, I'd rather trigger a udev > event on the "pretimeout" sysfs attribute. That would make much more > sense to me than sending a random data byte to the "read" function. > The WDIOC_GETSTATUS ioctl could then report that a pretimeout occurred. > Or, depending on the use case, just report the fact that a pretimeout > occurred with WDIOC_GETSTATUS. That would be really simple to add, > and I would support it. I'd agree on the udev event, but none of that existed when this was originally written. I'd prefer to not implement interfaces that require periodic polling. To me the only sensible thing to do on a pretimeout is to panic. But people come up with all kinds of wild things, and I almost certainly had some sort of external impetus to add this. > > > It could just be left out and added back if someone complains. That's > > not very friendly since it's an API change, but then we would know if > > anyone was using it. > > It is still better than an API change for standard watchdog drivers, > and a somewhat awkward interface to inform userspace about pretimeout > events. Like I said before, I think we can avoid an API change. It's not an API change if you add a new function that has to be enabled separately that then makes it work differently. Othersize you could never add any new function. And one can argue whether it's an API change, really, as API changes only affect non-erroneous uses of an API. Whether reading from a write-only device is erroneous is questionable in my mind ;-). But I'm fine with leaving it out, then we can discuss how to add something back in if someone complains. That's probably the best way forward. I separated it out so those changes can be dropped without affecting anything else. And if someone complains, we would get a real user who would know why they are using it. Thanks, -corey > > Thanks, > Guenter > > > > > -corey > > > > > > > > Guenter > > > > > > > -corey > > > > > > > > > > > > > > > > > > > > > > > > > > > > > -- > > > > > > > > > > ----------------------------------------------------------------------------- > > > > > Jerry Hoemann Software Engineer Hewlett Packard Enterprise > > > > > ----------------------------------------------------------------------------- > > > > > > > > > >
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 21e8085b848b..80149ac229fc 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -216,6 +216,9 @@ static int __watchdog_register_device(struct watchdog_device *wdd) return id; wdd->id = id; + spin_lock_init(&wdd->readlock); + init_waitqueue_head(&wdd->read_q); + ret = watchdog_dev_register(wdd); if (ret) { ida_simple_remove(&watchdog_ida, id); diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index dbd2ad4c9294..8e8304607a8c 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -44,6 +44,8 @@ #include <linux/types.h> /* For standard types (like size_t) */ #include <linux/watchdog.h> /* For watchdog specific items */ #include <linux/uaccess.h> /* For copy_to_user/put_user/... */ +#include <linux/poll.h> /* For poll_table/... */ +#include <linux/sched/signal.h> /* For signal_pending */ #include <uapi/linux/sched/types.h> /* For struct sched_param */ @@ -929,12 +931,120 @@ static int watchdog_release(struct inode *inode, struct file *file) return 0; } +static ssize_t watchdog_read(struct file *file, + char __user *buf, + size_t count, + loff_t *ppos) +{ + struct watchdog_core_data *wd_data = file->private_data; + struct watchdog_device *wdd; + int err = 0; + wait_queue_entry_t wait; + char dummy = 1; + + if (count <= 0) + return 0; + + mutex_lock(&wd_data->lock); + + wdd = wd_data->wdd; + if (!wdd) + goto done; + + /* + * Reading returns if the pretimeout has gone off, and it only does + * it once per pretimeout. + */ + spin_lock_irq(&wdd->readlock); + while (!wdd->data_to_read) { + if (file->f_flags & O_NONBLOCK) { + err = -EAGAIN; + goto out; + } + + init_waitqueue_entry(&wait, current); + add_wait_queue(&wdd->read_q, &wait); + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irq(&wdd->readlock); + schedule(); + spin_lock_irq(&wdd->readlock); + remove_wait_queue(&wdd->read_q, &wait); + + if (signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } + } + dummy = wdd->data_to_read; + wdd->data_to_read = 0; + + out: + spin_unlock_irq(&wdd->readlock); + + if (err == 0) { + if (copy_to_user(buf, &dummy, 1)) + err = -EFAULT; + else + err = 1; + } + + done: + mutex_unlock(&wd_data->lock); + + return err; +} + +static __poll_t watchdog_poll(struct file *file, poll_table *wait) +{ + struct watchdog_core_data *wd_data = file->private_data; + struct watchdog_device *wdd; + __poll_t mask = 0; + + mutex_lock(&wd_data->lock); + + wdd = wd_data->wdd; + if (!wdd) + goto done; + + poll_wait(file, &wdd->read_q, wait); + + spin_lock_irq(&wdd->readlock); + if (wdd->data_to_read) + mask |= (EPOLLIN | EPOLLRDNORM); + spin_unlock_irq(&wdd->readlock); + +done: + mutex_unlock(&wd_data->lock); + return mask; +} + +static int watchdog_fasync(int fd, struct file *file, int on) +{ + struct watchdog_core_data *wd_data = file->private_data; + struct watchdog_device *wdd; + int err = -ENODEV; + + mutex_lock(&wd_data->lock); + + wdd = wd_data->wdd; + if (!wdd) + goto done; + + err = fasync_helper(fd, file, on, &wdd->fasync_q); +done: + mutex_unlock(&wd_data->lock); + return err; +} + static const struct file_operations watchdog_fops = { .owner = THIS_MODULE, .write = watchdog_write, .unlocked_ioctl = watchdog_ioctl, .open = watchdog_open, .release = watchdog_release, + .read = watchdog_read, + .poll = watchdog_poll, + .fasync = watchdog_fasync, }; static struct miscdevice watchdog_miscdev = { @@ -970,6 +1080,9 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno) if (IS_ERR_OR_NULL(watchdog_kworker)) return -ENODEV; + spin_lock_init(&wdd->readlock); + init_waitqueue_head(&wdd->read_q); + kthread_init_work(&wd_data->work, watchdog_ping_work); hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); wd_data->timer.function = watchdog_timer_expired; diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 417d9f37077a..e34501a822f0 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -117,6 +117,11 @@ struct watchdog_device { #define WDOG_HW_RUNNING 3 /* True if HW watchdog running */ #define WDOG_STOP_ON_UNREGISTER 4 /* Should be stopped on unregister */ struct list_head deferred; + + spinlock_t readlock; + bool data_to_read; + struct wait_queue_head read_q; + struct fasync_struct *fasync_q; }; #define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT)