diff mbox series

drivers: watchdog: Add ChromeOS EC watchdog

Message ID 20240117102450.4080839-1-lma@chromium.org (mailing list archive)
State New
Headers show
Series drivers: watchdog: Add ChromeOS EC watchdog | expand

Commit Message

Łukasz Majczak Jan. 17, 2024, 10:24 a.m. UTC
This adds EC-based watchdog support for ChromeOS
based devices equipped with embedded controller (EC).

Signed-off-by: Lukasz Majczak <lma@chromium.org>
---
 MAINTAINERS                                   |   6 +
 drivers/mfd/cros_ec_dev.c                     |   9 +
 drivers/watchdog/Kconfig                      |  15 +
 drivers/watchdog/Makefile                     |   3 +
 drivers/watchdog/cros_ec_wdt.c                | 303 ++++++++++++++++++
 .../linux/platform_data/cros_ec_commands.h    |  78 ++---
 6 files changed, 370 insertions(+), 44 deletions(-)
 create mode 100644 drivers/watchdog/cros_ec_wdt.c

Comments

Krzysztof Kozlowski Jan. 17, 2024, 3:27 p.m. UTC | #1
On 17/01/2024 11:24, Lukasz Majczak wrote:
> This adds EC-based watchdog support for ChromeOS
> based devices equipped with embedded controller (EC).
> 
> Signed-off-by: Lukasz Majczak <lma@chromium.org>
> ---
>  MAINTAINERS                                   |   6 +
>  drivers/mfd/cros_ec_dev.c                     |   9 +
>  drivers/watchdog/Kconfig                      |  15 +
>  drivers/watchdog/Makefile                     |   3 +
>  drivers/watchdog/cros_ec_wdt.c                | 303 ++++++++++++++++++
>  .../linux/platform_data/cros_ec_commands.h    |  78 ++---
>  6 files changed, 370 insertions(+), 44 deletions(-)
>  create mode 100644 drivers/watchdog/cros_ec_wdt.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 391bbb855cbe..55cd626a525f 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -4952,6 +4952,12 @@ R:	Sami Kyöstilä <skyostil@chromium.org>
>  S:	Maintained
>  F:	drivers/platform/chrome/cros_hps_i2c.c
>  
> +CHROMEOS EC WATCHDOG
> +M:	Lukasz Majczak <lma@chromium.org>
> +L:	chrome-platform@lists.linux.dev
> +S:	Maintained
> +F:	drivers/watchdog/cros_ec_wdt.c
> +
>  CHRONTEL CH7322 CEC DRIVER
>  M:	Joe Tessler <jrt@google.com>
>  L:	linux-media@vger.kernel.org
> diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
> index 79d393b602bf..60fe831cf30a 100644
> --- a/drivers/mfd/cros_ec_dev.c
> +++ b/drivers/mfd/cros_ec_dev.c
> @@ -91,6 +91,10 @@ static const struct mfd_cell cros_usbpd_notify_cells[] = {
>  	{ .name = "cros-usbpd-notify", },
>  };
>  
> +static const struct mfd_cell cros_ec_wdt_cells[] = {
> +	{ .name = "cros-ec-wdt-drv", }
> +};
> +
>  static const struct cros_feature_to_cells cros_subdevices[] = {
>  	{
>  		.id		= EC_FEATURE_CEC,
> @@ -107,6 +111,11 @@ static const struct cros_feature_to_cells cros_subdevices[] = {
>  		.mfd_cells	= cros_usbpd_charger_cells,
>  		.num_cells	= ARRAY_SIZE(cros_usbpd_charger_cells),
>  	},
> +	{
> +		.id		= EC_FEATURE_HANG_DETECT,
> +		.mfd_cells	= cros_ec_wdt_cells,
> +		.num_cells	= ARRAY_SIZE(cros_ec_wdt_cells),
> +	},
>  };
>  
>  static const struct mfd_cell cros_ec_platform_cells[] = {
> diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> index 7d22051b15a2..1da4be661be8 100644
> --- a/drivers/watchdog/Kconfig
> +++ b/drivers/watchdog/Kconfig
> @@ -2251,4 +2251,19 @@ config KEEMBAY_WATCHDOG
>  	  To compile this driver as a module, choose M here: the
>  	  module will be called keembay_wdt.
>  
> +#
> +# ChromeOS EC-based Watchdog
> +#

Drop comment, useless, copies what's below.

> +
> +config CROS_EC_WATCHDOG
> +	tristate "ChromeOS EC-based watchdog driver"
> +	select WATCHDOG_CORE
> +	depends on CROS_EC
> +	help
> +	  This is the watchdog driver for ChromeOS devices equipped with EC.
> +	  The EC watchdog - when enabled - expects to be kicked within a given
> +	  time (default set to 30 seconds) otherwise it will simple reboot
> +	  the AP. Priori to that it can also send an event (configurable timeout)
> +	  about upcoming reboot.

Instead you could say what will be the name of the module.

> +
>  endif # WATCHDOG
> diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> index 7cbc34514ec1..8295c209ddb0 100644
> --- a/drivers/watchdog/Makefile
> +++ b/drivers/watchdog/Makefile
> @@ -234,3 +234,6 @@ obj-$(CONFIG_MENZ069_WATCHDOG) += menz69_wdt.o
>  obj-$(CONFIG_RAVE_SP_WATCHDOG) += rave-sp-wdt.o
>  obj-$(CONFIG_STPMIC1_WATCHDOG) += stpmic1_wdt.o
>  obj-$(CONFIG_SL28CPLD_WATCHDOG) += sl28cpld_wdt.o
> +
> +# Cros EC watchdog

Drop comment.

Also, are you sure you placed it in appropriate place, not just at the
end of both files?

> +obj-$(CONFIG_CROS_EC_WATCHDOG) += cros_ec_wdt.o
> diff --git a/drivers/watchdog/cros_ec_wdt.c b/drivers/watchdog/cros_ec_wdt.c
> new file mode 100644
> index 000000000000..b461c0613269
> --- /dev/null
> +++ b/drivers/watchdog/cros_ec_wdt.c
> @@ -0,0 +1,303 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/slab.h>
> +#include <linux/err.h>
> +#include <linux/of_device.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/platform_data/cros_ec_commands.h>
> +#include <linux/platform_data/cros_ec_proto.h>
> +#include <linux/platform_device.h>
> +#include <linux/watchdog.h>
> +#include <linux/uaccess.h>
> +
> +#define CROS_EC_WATCHDOG_DEFAULT_TIME 30 /* seconds */
> +
> +#define DEV_NAME "cros-ec-wdt-dev"

Drop unused defines.

> +#define DRV_NAME "cros-ec-wdt-drv"
> +
> +static int cros_ec_wdt_ping(struct watchdog_device *wdd);
> +static int cros_ec_wdt_start(struct watchdog_device *wdd);
> +static int cros_ec_wdt_stop(struct watchdog_device *wdd);
> +static int cros_ec_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t);
> +

...

> +
> +static int cros_ec_wdt_probe(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	/* We need to get a reference to cros_ec_devices
> +	 * (provides communication layer) which is a parent of
> +	 * the cros-ec-dev (our parent)
> +	 */
> +	struct cros_ec_device *cros_ec = dev_get_drvdata(dev->parent->parent);
> +	int ret = 0;
> +	uint32_t bootstatus;
> +
> +	if (!cros_ec) {
> +		ret = -ENODEV;
> +		dev_err_probe(dev, ret, "There is no coresponding EC device!");

Syntax is return dev_err_probe

> +		return ret;
> +	}
> +
> +	cros_ec_wdd.parent = &pdev->dev;
> +	wd_data.cros_ec = cros_ec;
> +	wd_data.wdd = &cros_ec_wdd;
> +	wd_data.start_on_resume = false;
> +	wd_data.keepalive_on = false;
> +	wd_data.wdd->timeout = CROS_EC_WATCHDOG_DEFAULT_TIME;
> +
> +	watchdog_stop_on_reboot(&cros_ec_wdd);
> +	watchdog_stop_on_unregister(&cros_ec_wdd);
> +	watchdog_set_drvdata(&cros_ec_wdd, &wd_data);
> +	platform_set_drvdata(pdev, &wd_data);
> +
> +	/* Get current AP boot status */
> +	ret = cros_ec_wdt_send_hang_detect(&wd_data, EC_HANG_DETECT_CMD_GET_STATUS, 0,
> +					   &bootstatus);
> +	if (ret < 0) {
> +		dev_err_probe(dev, ret, "Couldn't get AP boot status from EC");

Syntax is return dev_err_probe

> +		return ret;
> +	}
> +
> +	/*
> +	 * If bootstatus is not EC_HANG_DETECT_AP_BOOT_NORMAL
> +	 * it mens EC has rebooted the AP due to watchdog timeout.
> +	 * Lets translate it to watchdog core status code.
> +	 */
> +	if (bootstatus != EC_HANG_DETECT_AP_BOOT_NORMAL)
> +		wd_data.wdd->bootstatus = WDIOF_CARDRESET;
> +
> +	ret = watchdog_register_device(&cros_ec_wdd);
> +	if (ret < 0)
> +		dev_err_probe(dev, ret, "Couldn't get AP boot status from EC");

Syntax is return dev_err_probe

> +
> +	return ret;

return 0

> +}
> +
> +static int cros_ec_wdt_remove(struct platform_device *pdev)
> +{
> +	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> +
> +	watchdog_unregister_device(wd_data->wdd);
> +
> +	return 0;
> +}
> +
> +static void cros_ec_wdt_shutdown(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> +	int ret;
> +
> +	/*
> +	 * Clean only bootstatus flag.
> +	 * EC wdt is are already stopped by watchdog framework.
> +	 */
> +	ret = cros_ec_wdt_send_hang_detect(wd_data,
> +					   EC_HANG_DETECT_CMD_CLEAR_STATUS, 0, NULL);
> +	if (ret < 0)
> +		dev_err(dev, "%s failed (%d)", __func__, ret);
> +
> +	watchdog_unregister_device(wd_data->wdd);
> +}
> +
> +static int __maybe_unused cros_ec_wdt_suspend(struct platform_device *pdev, pm_message_t state)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> +	int ret;
> +
> +	if (watchdog_active(wd_data->wdd)) {
> +		ret = cros_ec_wdt_send_hang_detect(wd_data,
> +						   EC_HANG_DETECT_CMD_CANCEL, 0, NULL);
> +		if (ret < 0)
> +			dev_err(dev, "%s failed (%d)", __func__, ret);
> +		wd_data->start_on_resume = true;
> +	}
> +
> +	return 0;
> +}
> +
> +static int __maybe_unused cros_ec_wdt_resume(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> +	int ret;
> +
> +	/* start_on_resume is only set if watchdog was active
> +	 * when device was going to sleep
> +	 */
> +	if (wd_data->start_on_resume) {
> +		/* On resume we just need to setup a EC watchdog the same way as

Use comment style from Linux coding style.

> +		 * in cros_ec_wdt_start(). When userspace resumes from suspend
> +		 * the watchdog app should just start petting the watchdog again.
> +		 */
> +		ret = cros_ec_wdt_start(wd_data->wdd);
> +		if (ret < 0)
> +			dev_err(dev, "%s failed (%d)", __func__, ret);

That's not really helpful. This applies everywhere. You have all over
the place debugging prints with __func__. This is not useful pattern.
Print something useful, not function name.

> +
> +		wd_data->start_on_resume = false;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct platform_driver cros_ec_wdt_driver = {
> +	.probe		= cros_ec_wdt_probe,
> +	.remove		= cros_ec_wdt_remove,
> +	.shutdown	= cros_ec_wdt_shutdown,
> +	.suspend	= pm_ptr(cros_ec_wdt_suspend),
> +	.resume		= pm_ptr(cros_ec_wdt_resume),
> +	.driver		= {
> +		.name	= DRV_NAME,
> +	},
> +};
> +
> +module_platform_driver(cros_ec_wdt_driver);
> +
> +MODULE_ALIAS("platform:" DRV_NAME);

You should not need MODULE_ALIAS() in normal cases. If you need it,
usually it means your device ID table is wrong (e.g. misses either
entries or MODULE_DEVICE_TABLE()). MODULE_ALIAS() is not a substitute
for incomplete ID table.


> +MODULE_DESCRIPTION("Cros EC Watchdog Device Driver");
> +MODULE_LICENSE("GPL");
> diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
> index 7dae17b62a4d..35a7a2d32819 100644
> --- a/include/linux/platform_data/cros_ec_commands.h
> +++ b/include/linux/platform_data/cros_ec_commands.h
> @@ -3961,60 +3961,50 @@ struct ec_response_i2c_passthru {
>  } __ec_align1;
>  
>  /*****************************************************************************/
> -/* Power button hang detect */
> -
> +/* AP hang detect */

I don't understand how this change is related to the new driver. This
entire hunk is confusing.


Best regards,
Krzysztof
Krzysztof Kozlowski Jan. 17, 2024, 3:29 p.m. UTC | #2
On 17/01/2024 11:24, Lukasz Majczak wrote:

Few more nits:

> This adds EC-based watchdog support for ChromeOS

Please do not use "This commit/patch/change", but imperative mood. See
longer explanation here:
https://elixir.bootlin.com/linux/v5.17.1/source/Documentation/process/submitting-patches.rst#L95

> based devices equipped with embedded controller (EC).
> 

Please use subject prefixes matching the subsystem. You can get them for
example with `git log --oneline -- DIRECTORY_OR_FILE` on the directory
your patch is touching.

And not a nit - important:

Please use scripts/get_maintainers.pl to get a list of necessary people
and lists to CC (and consider --no-git-fallback argument). It might
happen, that command when run on an older kernel, gives you outdated
entries. Therefore please be sure you base your patches on recent Linux
kernel.

Tools are doing it automatically, so either you work on some ancient
tree (don't), on fork of kernel (don't) or ignore some maintainers
(really don't).

Best regards,
Krzysztof
Guenter Roeck Jan. 17, 2024, 4:23 p.m. UTC | #3
On 1/17/24 02:24, Lukasz Majczak wrote:
> This adds EC-based watchdog support for ChromeOS
> based devices equipped with embedded controller (EC).
> 
> Signed-off-by: Lukasz Majczak <lma@chromium.org>

checkpatch --strict says:

total: 0 errors, 0 warnings, 14 checks, 455 lines checked

Please fix.

> ---
>   MAINTAINERS                                   |   6 +
>   drivers/mfd/cros_ec_dev.c                     |   9 +
>   drivers/watchdog/Kconfig                      |  15 +
>   drivers/watchdog/Makefile                     |   3 +
>   drivers/watchdog/cros_ec_wdt.c                | 303 ++++++++++++++++++
>   .../linux/platform_data/cros_ec_commands.h    |  78 ++---
>   6 files changed, 370 insertions(+), 44 deletions(-)
>   create mode 100644 drivers/watchdog/cros_ec_wdt.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 391bbb855cbe..55cd626a525f 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -4952,6 +4952,12 @@ R:	Sami Kyöstilä <skyostil@chromium.org>
>   S:	Maintained
>   F:	drivers/platform/chrome/cros_hps_i2c.c
>   
> +CHROMEOS EC WATCHDOG
> +M:	Lukasz Majczak <lma@chromium.org>
> +L:	chrome-platform@lists.linux.dev
> +S:	Maintained
> +F:	drivers/watchdog/cros_ec_wdt.c
> +
>   CHRONTEL CH7322 CEC DRIVER
>   M:	Joe Tessler <jrt@google.com>
>   L:	linux-media@vger.kernel.org
> diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
> index 79d393b602bf..60fe831cf30a 100644
> --- a/drivers/mfd/cros_ec_dev.c
> +++ b/drivers/mfd/cros_ec_dev.c
> @@ -91,6 +91,10 @@ static const struct mfd_cell cros_usbpd_notify_cells[] = {
>   	{ .name = "cros-usbpd-notify", },
>   };
>   
> +static const struct mfd_cell cros_ec_wdt_cells[] = {
> +	{ .name = "cros-ec-wdt-drv", }
> +};
> +
>   static const struct cros_feature_to_cells cros_subdevices[] = {
>   	{
>   		.id		= EC_FEATURE_CEC,
> @@ -107,6 +111,11 @@ static const struct cros_feature_to_cells cros_subdevices[] = {
>   		.mfd_cells	= cros_usbpd_charger_cells,
>   		.num_cells	= ARRAY_SIZE(cros_usbpd_charger_cells),
>   	},
> +	{
> +		.id		= EC_FEATURE_HANG_DETECT,
> +		.mfd_cells	= cros_ec_wdt_cells,
> +		.num_cells	= ARRAY_SIZE(cros_ec_wdt_cells),
> +	},
>   };
>   
>   static const struct mfd_cell cros_ec_platform_cells[] = {

The above should be a separate patch.

> diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> index 7d22051b15a2..1da4be661be8 100644
> --- a/drivers/watchdog/Kconfig
> +++ b/drivers/watchdog/Kconfig
> @@ -2251,4 +2251,19 @@ config KEEMBAY_WATCHDOG
>   	  To compile this driver as a module, choose M here: the
>   	  module will be called keembay_wdt.
>   
> +#
> +# ChromeOS EC-based Watchdog
> +#

Pointless comment.

> +
> +config CROS_EC_WATCHDOG
> +	tristate "ChromeOS EC-based watchdog driver"
> +	select WATCHDOG_CORE
> +	depends on CROS_EC
> +	help
> +	  This is the watchdog driver for ChromeOS devices equipped with EC.
> +	  The EC watchdog - when enabled - expects to be kicked within a given
> +	  time (default set to 30 seconds) otherwise it will simple reboot
> +	  the AP. Priori to that it can also send an event (configurable timeout)
> +	  about upcoming reboot.
> +

In other words, it is a watchdog driver. I don't think it adds value
to describe how a watchdog works here, nor to describe what other (unused)
functionality the hardware may have.

Also, please try to insert in alphabetic order.

>   endif # WATCHDOG
> diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> index 7cbc34514ec1..8295c209ddb0 100644
> --- a/drivers/watchdog/Makefile
> +++ b/drivers/watchdog/Makefile
> @@ -234,3 +234,6 @@ obj-$(CONFIG_MENZ069_WATCHDOG) += menz69_wdt.o
>   obj-$(CONFIG_RAVE_SP_WATCHDOG) += rave-sp-wdt.o
>   obj-$(CONFIG_STPMIC1_WATCHDOG) += stpmic1_wdt.o
>   obj-$(CONFIG_SL28CPLD_WATCHDOG) += sl28cpld_wdt.o
> +
> +# Cros EC watchdog

We don't add individual comments for each watchdog, and we should not start
doing so. Also, please try to insert in alphabetic order.

> +obj-$(CONFIG_CROS_EC_WATCHDOG) += cros_ec_wdt.o
> diff --git a/drivers/watchdog/cros_ec_wdt.c b/drivers/watchdog/cros_ec_wdt.c
> new file mode 100644
> index 000000000000..b461c0613269
> --- /dev/null
> +++ b/drivers/watchdog/cros_ec_wdt.c
> @@ -0,0 +1,303 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/slab.h>
> +#include <linux/err.h>
> +#include <linux/of_device.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/platform_data/cros_ec_commands.h>
> +#include <linux/platform_data/cros_ec_proto.h>
> +#include <linux/platform_device.h>
> +#include <linux/watchdog.h>
> +#include <linux/uaccess.h>

Alphabetic include file order, please. Also, I don't immediately see where
uaccess.h or of_device.h are used. Please drop unused includes.

> +
> +#define CROS_EC_WATCHDOG_DEFAULT_TIME 30 /* seconds */
> +
> +#define DEV_NAME "cros-ec-wdt-dev"
> +#define DRV_NAME "cros-ec-wdt-drv"

One of those is unused.

> +
> +static int cros_ec_wdt_ping(struct watchdog_device *wdd);
> +static int cros_ec_wdt_start(struct watchdog_device *wdd);
> +static int cros_ec_wdt_stop(struct watchdog_device *wdd);
> +static int cros_ec_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t);
> +

Please rearrange to avoid forward declarations.

> +struct cros_ec_wdt_data {
> +	bool start_on_resume;
> +	bool keepalive_on;
> +	struct cros_ec_device *cros_ec;
> +	struct watchdog_device *wdd;
> +};
> +static struct cros_ec_wdt_data wd_data;

Please allocate and avoid static variables.

> +
> +static const struct watchdog_info cros_ec_wdt_ident = {
> +	.options          = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
> +	.firmware_version = 0,
> +	.identity         = DRV_NAME,
> +};
> +
> +static const struct watchdog_ops cros_ec_wdt_ops = {
> +	.owner		 = THIS_MODULE,
> +	.ping		 = cros_ec_wdt_ping,
> +	.start		 = cros_ec_wdt_start,
> +	.stop		 = cros_ec_wdt_stop,
> +	.set_timeout = cros_ec_wdt_set_timeout,
> +};
> +
> +static struct watchdog_device cros_ec_wdd = {
> +	.info = &cros_ec_wdt_ident,
> +	.ops = &cros_ec_wdt_ops,
> +	.timeout = CROS_EC_WATCHDOG_DEFAULT_TIME,
> +	.bootstatus = EC_HANG_DETECT_AP_BOOT_NORMAL

Consider allocating and embedding in struct cros_ec_wdt_data.
.timeout and .bootstatus are overwritten anyway, and it does not make sense
to initialize those elements here.

> +};
> +
> +static int cros_ec_wdt_send_hang_detect(struct cros_ec_wdt_data *wd_data,

Seems to me that cros_ec_wdt_send_cmd or similar would be a better function name,
but your call.

> +					uint16_t command,
> +					uint16_t reboot_timeout_sec,
> +					uint32_t *status)
> +{
> +	int ret;
> +
> +	struct {
> +		struct cros_ec_command msg;
> +		union {
> +			struct ec_params_hang_detect req;
> +			struct ec_response_hang_detect resp;
> +		};
> +	} __packed buf = {
> +		.msg = {
> +			.version = 0,
> +			.command = EC_CMD_HANG_DETECT,
> +			.insize  = (command == EC_HANG_DETECT_CMD_GET_STATUS) ?
> +				   sizeof(struct ec_response_hang_detect) :
> +				   0,
> +			.outsize = sizeof(struct ec_params_hang_detect),
> +		},
> +		.req = {
> +			.command = command,
> +			.reboot_timeout_sec = reboot_timeout_sec,
> +		}
> +	};
> +
> +	ret = cros_ec_cmd_xfer_status(wd_data->cros_ec, &buf.msg);
> +	if (ret < 0) {
> +		dev_warn(wd_data->wdd->parent,

Given that wdd is needed anyway,

> +				 "cros_ec_cmd_xfer_status failed(%d) command (%04x) reboot_timeout_sec(%ds)",
> +				 ret, command, reboot_timeout_sec);

That message, if ever seen, does not make much sense if the command was
one that does not set the timeout. On top of that, this code
dumps a warning, and the calling code dumps an error message. Is the EC
really that fragile that it is necessary to dump all those error messages ?

If not, please consider replacing most if not all of those log messages
with dev_dbg().

> +		return ret;
> +	}
> +
> +	if (status && (command == EC_HANG_DETECT_CMD_GET_STATUS)) {

Unnecessary (). Besides, it doesn't make much sense. Why provide
a status pointer but only use it if the command is a specific one ?

> +		*status = buf.resp.status;
> +		dev_info(wd_data->wdd->parent, "EC Watchdog boot status (%d)",
> +				 buf.resp.status);

That message does not belong here.

> +	}
> +
> +	return 0;
> +}
> +
> +static int cros_ec_wdt_ping(struct watchdog_device *wdd)
> +{
> +	struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
> +	int ret;
> +
> +	ret = cros_ec_wdt_send_hang_detect(wd_data, EC_HANG_DETECT_CMD_RELOAD,
> +					   0, NULL);
> +	if (ret < 0)
> +		dev_err(wdd->parent, "%s failed (%d)", __func__, ret);

return here and drop the else.

Nore that this driver is extremely noisy. If the EC for some reason doesn't
respond as expected the error messages will drown all other kernel messages.
I really do not see the point of this.

> +	else
> +		wd_data->keepalive_on = true;

This is odd and needs a detailed explanation. More on that see below.

> +
> +	return ret;
> +}
> +
> +static int cros_ec_wdt_start(struct watchdog_device *wdd)
> +{
> +	struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
> +	int ret = 0;
> +
> +	/* Prepare watchdog on EC side */
> +	ret = cros_ec_wdt_send_hang_detect(wd_data,
> +					EC_HANG_DETECT_CMD_SET_TIMEOUT,
> +					wdd->timeout,
> +					NULL);
> +	if (ret < 0)
> +		dev_err(wdd->parent, "%s failed (%d)", __func__, ret);
> +
> +	return ret;
> +}
> +
> +static int cros_ec_wdt_stop(struct watchdog_device *wdd)
> +{
> +	struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
> +	int ret = 0;
> +
> +	if (wd_data->keepalive_on) {
> +		wd_data->keepalive_on = false;
> +		ret = cros_ec_wdt_send_hang_detect(wd_data, EC_HANG_DETECT_CMD_CANCEL,
> +						0, NULL);

This needs explanation. Why is it only needed here after the first ping is sent,
but on suspend it is unconditional ? In other words, why does the watchdog
behave differently after the first ping ? Why does the watchdog not have to be
stopped if it never received a ping ? Logically that seems odd, if not plain
wrong. The watchdog should behave exactly the same after it was started, no matter
if it ever received a ping or not.

> +		if (ret < 0)
> +			dev_err(wdd->parent, "%s failed (%d)", __func__, ret);
> +	}
> +
> +	return ret;
> +}
> +
> +static int cros_ec_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
> +{
> +	struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
> +	int ret;
> +
> +	if (t < EC_HANG_DETECT_MIN_TIMEOUT) {
> +		dev_err(wdd->parent,
> +				"%s failed, requested timeout is lower than min(%d < %d)",
> +				__func__, t, EC_HANG_DETECT_MIN_TIMEOUT);
> +		return -EINVAL;

This should be handled in the watchdog core. If it isn't the driver is misconfigured.

> +	}
> +
> +	ret = cros_ec_wdt_send_hang_detect(wd_data,
> +					   EC_HANG_DETECT_CMD_SET_TIMEOUT,
> +					   t, NULL);
> +	if (ret < 0)
> +		dev_err(wdd->parent, "%s failed (%d)", __func__, ret);

return here.

> +	else
> +		wdd->timeout = t;
> +
> +	return ret;
> +}
> +
> +static int cros_ec_wdt_probe(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	/* We need to get a reference to cros_ec_devices
> +	 * (provides communication layer) which is a parent of
> +	 * the cros-ec-dev (our parent)
> +	 */
> +	struct cros_ec_device *cros_ec = dev_get_drvdata(dev->parent->parent);


Other cros drivers get struct cros_ec_dev from dev->parent and then use
ec->ec_dev. Please explain why you don't use the same pattern; dereferencing
parent->parent and assuming to know its driver data seems to be (much) more
risky than the existing pattern.

Also, the comment seems obvious. None of the other cros_ec drivers needed it,
and I don't see why it would add value here.

> +	int ret = 0;
> +	uint32_t bootstatus;
> +
> +	if (!cros_ec) {
> +		ret = -ENODEV;
> +		dev_err_probe(dev, ret, "There is no coresponding EC device!");
> +		return ret;

return dev_err_probe(). Same elsewhere.

> +	}
> +
> +	cros_ec_wdd.parent = &pdev->dev;
> +	wd_data.cros_ec = cros_ec;
> +	wd_data.wdd = &cros_ec_wdd;
> +	wd_data.start_on_resume = false;
> +	wd_data.keepalive_on = false;
> +	wd_data.wdd->timeout = CROS_EC_WATCHDOG_DEFAULT_TIME;

Ah, I guess the reason for the error handling when setting the timeout is
the lack of setting boundaries here. That is wrong. min_timeout
and max_timeout need to be set.

> +
> +	watchdog_stop_on_reboot(&cros_ec_wdd);
> +	watchdog_stop_on_unregister(&cros_ec_wdd);
> +	watchdog_set_drvdata(&cros_ec_wdd, &wd_data);
> +	platform_set_drvdata(pdev, &wd_data);
> +
> +	/* Get current AP boot status */
> +	ret = cros_ec_wdt_send_hang_detect(&wd_data, EC_HANG_DETECT_CMD_GET_STATUS, 0,
> +					   &bootstatus);
> +	if (ret < 0) {
> +		dev_err_probe(dev, ret, "Couldn't get AP boot status from EC");
> +		return ret;
> +	}
> +
> +	/*
> +	 * If bootstatus is not EC_HANG_DETECT_AP_BOOT_NORMAL
> +	 * it mens EC has rebooted the AP due to watchdog timeout.

means

> +	 * Lets translate it to watchdog core status code.
> +	 */
> +	if (bootstatus != EC_HANG_DETECT_AP_BOOT_NORMAL)
> +		wd_data.wdd->bootstatus = WDIOF_CARDRESET;
> +
> +	ret = watchdog_register_device(&cros_ec_wdd);
> +	if (ret < 0)
> +		dev_err_probe(dev, ret, "Couldn't get AP boot status from EC");

Wrong error message.

> +
> +	return ret;
> +}
> +
> +static int cros_ec_wdt_remove(struct platform_device *pdev)
> +{
> +	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> +
> +	watchdog_unregister_device(wd_data->wdd);

Why not use devm_watchdog_register_device() ? Please explain.

> +
> +	return 0;
> +}
> +
> +static void cros_ec_wdt_shutdown(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> +	int ret;
> +
> +	/*
> +	 * Clean only bootstatus flag.
> +	 * EC wdt is are already stopped by watchdog framework.
> +	 */

Why is this necessary if the watchdog is stopped ?

> +	ret = cros_ec_wdt_send_hang_detect(wd_data,
> +					   EC_HANG_DETECT_CMD_CLEAR_STATUS, 0, NULL);
> +	if (ret < 0)
> +		dev_err(dev, "%s failed (%d)", __func__, ret);
> +
> +	watchdog_unregister_device(wd_data->wdd);

This doesn't make sense to me. Explain why it is necessary.

> +}
> +
> +static int __maybe_unused cros_ec_wdt_suspend(struct platform_device *pdev, pm_message_t state)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> +	int ret;
> +
> +	if (watchdog_active(wd_data->wdd)) {
> +		ret = cros_ec_wdt_send_hang_detect(wd_data,
> +						   EC_HANG_DETECT_CMD_CANCEL, 0, NULL);
> +		if (ret < 0)
> +			dev_err(dev, "%s failed (%d)", __func__, ret);
> +		wd_data->start_on_resume = true;

Needs explanation. See below.

> +	}
> +
> +	return 0;
> +}
> +
> +static int __maybe_unused cros_ec_wdt_resume(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> +	int ret;
> +
> +	/* start_on_resume is only set if watchdog was active
> +	 * when device was going to sleep
> +	 */

Please use standard multi-line comments. This is not the networking subsystem.
Even more important, use _consistent_ multi-line comments.

> +	if (wd_data->start_on_resume) {

Please explain why watchdog_active() does not work here.

> +		/* On resume we just need to setup a EC watchdog the same way as
> +		 * in cros_ec_wdt_start(). When userspace resumes from suspend
> +		 * the watchdog app should just start petting the watchdog again.
> +		 */

Isn't that obvious ?

> +		ret = cros_ec_wdt_start(wd_data->wdd);
> +		if (ret < 0)
> +			dev_err(dev, "%s failed (%d)", __func__, ret);
> +
> +		wd_data->start_on_resume = false;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct platform_driver cros_ec_wdt_driver = {
> +	.probe		= cros_ec_wdt_probe,
> +	.remove		= cros_ec_wdt_remove,
> +	.shutdown	= cros_ec_wdt_shutdown,
> +	.suspend	= pm_ptr(cros_ec_wdt_suspend),
> +	.resume		= pm_ptr(cros_ec_wdt_resume),
> +	.driver		= {
> +		.name	= DRV_NAME,
> +	},
> +};
> +
> +module_platform_driver(cros_ec_wdt_driver);
> +
> +MODULE_ALIAS("platform:" DRV_NAME);
> +MODULE_DESCRIPTION("Cros EC Watchdog Device Driver");
> +MODULE_LICENSE("GPL");
> diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
> index 7dae17b62a4d..35a7a2d32819 100644
> --- a/include/linux/platform_data/cros_ec_commands.h
> +++ b/include/linux/platform_data/cros_ec_commands.h
> @@ -3961,60 +3961,50 @@ struct ec_response_i2c_passthru {
>   } __ec_align1;
>   
>   /*****************************************************************************/
> -/* Power button hang detect */
> -
> +/* AP hang detect */
>   #define EC_CMD_HANG_DETECT 0x009F
>   
> -/* Reasons to start hang detection timer */
> -/* Power button pressed */
> -#define EC_HANG_START_ON_POWER_PRESS  BIT(0)
> -
> -/* Lid closed */
> -#define EC_HANG_START_ON_LID_CLOSE    BIT(1)
> -
> - /* Lid opened */
> -#define EC_HANG_START_ON_LID_OPEN     BIT(2)
> -
> -/* Start of AP S3->S0 transition (booting or resuming from suspend) */
> -#define EC_HANG_START_ON_RESUME       BIT(3)
> -
> -/* Reasons to cancel hang detection */
> +#define EC_HANG_DETECT_MIN_TIMEOUT 5
>   
> -/* Power button released */
> -#define EC_HANG_STOP_ON_POWER_RELEASE BIT(8)
> +/* EC hang detect commands */
> +enum ec_hang_detect_cmds {
> +	/* Reload AP hang detect timer. */
> +	EC_HANG_DETECT_CMD_RELOAD = 0x0,
>   
> -/* Any host command from AP received */
> -#define EC_HANG_STOP_ON_HOST_COMMAND  BIT(9)
> +	/* Stop AP hang detect timer. */
> +	EC_HANG_DETECT_CMD_CANCEL = 0x1,
>   

This is making unrelated changes. Please make those in a separate patch.
It might be best to update this file in a separate patch to start with.

> -/* Stop on end of AP S0->S3 transition (suspending or shutting down) */
> -#define EC_HANG_STOP_ON_SUSPEND       BIT(10)
> +	/* Configure watchdog with given reboot timeout and
> +	 * cancel currently running AP hand detect timer.
> +	 */
> +	EC_HANG_DETECT_CMD_SET_TIMEOUT = 0x2,
>   
> -/*
> - * If this flag is set, all the other fields are ignored, and the hang detect
> - * timer is started.  This provides the AP a way to start the hang timer
> - * without reconfiguring any of the other hang detect settings.  Note that
> - * you must previously have configured the timeouts.
> - */
> -#define EC_HANG_START_NOW             BIT(30)
> +	/* Get last hang status - whether the AP boot was clear or not */
> +	EC_HANG_DETECT_CMD_GET_STATUS = 0x3,
>   
> -/*
> - * If this flag is set, all the other fields are ignored (including
> - * EC_HANG_START_NOW).  This provides the AP a way to stop the hang timer
> - * without reconfiguring any of the other hang detect settings.
> - */
> -#define EC_HANG_STOP_NOW              BIT(31)
> +	/* Clear last hang status. Called when AP is rebooting/shutting down
> +	 * gracefully.
> +	 */
> +	EC_HANG_DETECT_CMD_CLEAR_STATUS = 0x4
> +};
>   
>   struct ec_params_hang_detect {
> -	/* Flags; see EC_HANG_* */
> -	uint32_t flags;
> -
> -	/* Timeout in msec before generating host event, if enabled */
> -	uint16_t host_event_timeout_msec;
> -
> -	/* Timeout in msec before generating warm reboot, if enabled */
> -	uint16_t warm_reboot_timeout_msec;
> -} __ec_align4;
> +	uint16_t command; /* enum ec_hang_detect_cmds */
> +	/* Timeout in seconds before generating reboot */
> +	uint16_t reboot_timeout_sec;
> +} __ec_align2;
>   
> +/* Status codes that describe whether AP has boot normally or the hang has been
> + * detected and EC has reset AP
> + */
> +enum ec_hang_detect_status {
> +	EC_HANG_DETECT_AP_BOOT_NORMAL = 0x0,
> +	EC_HANG_DETECT_AP_BOOT_EC_WDT = 0x1,
> +	EC_HANG_DETECT_AP_BOOT_COUNT,
> +};
> +struct ec_response_hang_detect {
> +	uint8_t status; /* enum ec_hang_detect_status */
> +} __ec_align1;
>   /*****************************************************************************/
>   /* Commands for battery charging */
>
Łukasz Majczak Jan. 18, 2024, 5:28 p.m. UTC | #4
Hi Krzysztof,

Please find the answers below.

On Wed, Jan 17, 2024 at 4:27 PM Krzysztof Kozlowski <krzk@kernel.org> wrote:
>
> On 17/01/2024 11:24, Lukasz Majczak wrote:
> > This adds EC-based watchdog support for ChromeOS
> > based devices equipped with embedded controller (EC).
> >
> > Signed-off-by: Lukasz Majczak <lma@chromium.org>
> > ---
> >  MAINTAINERS                                   |   6 +
> >  drivers/mfd/cros_ec_dev.c                     |   9 +
> >  drivers/watchdog/Kconfig                      |  15 +
> >  drivers/watchdog/Makefile                     |   3 +
> >  drivers/watchdog/cros_ec_wdt.c                | 303 ++++++++++++++++++
> >  .../linux/platform_data/cros_ec_commands.h    |  78 ++---
> >  6 files changed, 370 insertions(+), 44 deletions(-)
> >  create mode 100644 drivers/watchdog/cros_ec_wdt.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 391bbb855cbe..55cd626a525f 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -4952,6 +4952,12 @@ R:     Sami Kyöstilä <skyostil@chromium.org>
> >  S:   Maintained
> >  F:   drivers/platform/chrome/cros_hps_i2c.c
> >
> > +CHROMEOS EC WATCHDOG
> > +M:   Lukasz Majczak <lma@chromium.org>
> > +L:   chrome-platform@lists.linux.dev
> > +S:   Maintained
> > +F:   drivers/watchdog/cros_ec_wdt.c
> > +
> >  CHRONTEL CH7322 CEC DRIVER
> >  M:   Joe Tessler <jrt@google.com>
> >  L:   linux-media@vger.kernel.org
> > diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
> > index 79d393b602bf..60fe831cf30a 100644
> > --- a/drivers/mfd/cros_ec_dev.c
> > +++ b/drivers/mfd/cros_ec_dev.c
> > @@ -91,6 +91,10 @@ static const struct mfd_cell cros_usbpd_notify_cells[] = {
> >       { .name = "cros-usbpd-notify", },
> >  };
> >
> > +static const struct mfd_cell cros_ec_wdt_cells[] = {
> > +     { .name = "cros-ec-wdt-drv", }
> > +};
> > +
> >  static const struct cros_feature_to_cells cros_subdevices[] = {
> >       {
> >               .id             = EC_FEATURE_CEC,
> > @@ -107,6 +111,11 @@ static const struct cros_feature_to_cells cros_subdevices[] = {
> >               .mfd_cells      = cros_usbpd_charger_cells,
> >               .num_cells      = ARRAY_SIZE(cros_usbpd_charger_cells),
> >       },
> > +     {
> > +             .id             = EC_FEATURE_HANG_DETECT,
> > +             .mfd_cells      = cros_ec_wdt_cells,
> > +             .num_cells      = ARRAY_SIZE(cros_ec_wdt_cells),
> > +     },
> >  };
> >
> >  static const struct mfd_cell cros_ec_platform_cells[] = {
> > diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> > index 7d22051b15a2..1da4be661be8 100644
> > --- a/drivers/watchdog/Kconfig
> > +++ b/drivers/watchdog/Kconfig
> > @@ -2251,4 +2251,19 @@ config KEEMBAY_WATCHDOG
> >         To compile this driver as a module, choose M here: the
> >         module will be called keembay_wdt.
> >
> > +#
> > +# ChromeOS EC-based Watchdog
> > +#
>
> Drop comment, useless, copies what's below.

ACK

>
> > +
> > +config CROS_EC_WATCHDOG
> > +     tristate "ChromeOS EC-based watchdog driver"
> > +     select WATCHDOG_CORE
> > +     depends on CROS_EC
> > +     help
> > +       This is the watchdog driver for ChromeOS devices equipped with EC.
> > +       The EC watchdog - when enabled - expects to be kicked within a given
> > +       time (default set to 30 seconds) otherwise it will simple reboot
> > +       the AP. Priori to that it can also send an event (configurable timeout)
> > +       about upcoming reboot.
>
> Instead you could say what will be the name of the module.
>

ACK

> > +
> >  endif # WATCHDOG
> > diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> > index 7cbc34514ec1..8295c209ddb0 100644
> > --- a/drivers/watchdog/Makefile
> > +++ b/drivers/watchdog/Makefile
> > @@ -234,3 +234,6 @@ obj-$(CONFIG_MENZ069_WATCHDOG) += menz69_wdt.o
> >  obj-$(CONFIG_RAVE_SP_WATCHDOG) += rave-sp-wdt.o
> >  obj-$(CONFIG_STPMIC1_WATCHDOG) += stpmic1_wdt.o
> >  obj-$(CONFIG_SL28CPLD_WATCHDOG) += sl28cpld_wdt.o
> > +
> > +# Cros EC watchdog
>
> Drop comment.
>
> Also, are you sure you placed it in appropriate place, not just at the
> end of both files?
>

ACK, I will find a better place for above (both in Makefile and
Kconfig) and will try to keep alphabetic order.

> > +obj-$(CONFIG_CROS_EC_WATCHDOG) += cros_ec_wdt.o
> > diff --git a/drivers/watchdog/cros_ec_wdt.c b/drivers/watchdog/cros_ec_wdt.c
> > new file mode 100644
> > index 000000000000..b461c0613269
> > --- /dev/null
> > +++ b/drivers/watchdog/cros_ec_wdt.c
> > @@ -0,0 +1,303 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +
> > +#include <linux/slab.h>
> > +#include <linux/err.h>
> > +#include <linux/of_device.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/platform_data/cros_ec_commands.h>
> > +#include <linux/platform_data/cros_ec_proto.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/watchdog.h>
> > +#include <linux/uaccess.h>
> > +
> > +#define CROS_EC_WATCHDOG_DEFAULT_TIME 30 /* seconds */
> > +
> > +#define DEV_NAME "cros-ec-wdt-dev"
>
> Drop unused defines.

ACK

>
> > +#define DRV_NAME "cros-ec-wdt-drv"
> > +
> > +static int cros_ec_wdt_ping(struct watchdog_device *wdd);
> > +static int cros_ec_wdt_start(struct watchdog_device *wdd);
> > +static int cros_ec_wdt_stop(struct watchdog_device *wdd);
> > +static int cros_ec_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t);
> > +
>
> ...
>
> > +
> > +static int cros_ec_wdt_probe(struct platform_device *pdev)
> > +{
> > +     struct device *dev = &pdev->dev;
> > +     /* We need to get a reference to cros_ec_devices
> > +      * (provides communication layer) which is a parent of
> > +      * the cros-ec-dev (our parent)
> > +      */
> > +     struct cros_ec_device *cros_ec = dev_get_drvdata(dev->parent->parent);
> > +     int ret = 0;
> > +     uint32_t bootstatus;
> > +
> > +     if (!cros_ec) {
> > +             ret = -ENODEV;
> > +             dev_err_probe(dev, ret, "There is no coresponding EC device!");
>
> Syntax is return dev_err_probe
>

ACK

> > +             return ret;
> > +     }
> > +
> > +     cros_ec_wdd.parent = &pdev->dev;
> > +     wd_data.cros_ec = cros_ec;
> > +     wd_data.wdd = &cros_ec_wdd;
> > +     wd_data.start_on_resume = false;
> > +     wd_data.keepalive_on = false;
> > +     wd_data.wdd->timeout = CROS_EC_WATCHDOG_DEFAULT_TIME;
> > +
> > +     watchdog_stop_on_reboot(&cros_ec_wdd);
> > +     watchdog_stop_on_unregister(&cros_ec_wdd);
> > +     watchdog_set_drvdata(&cros_ec_wdd, &wd_data);
> > +     platform_set_drvdata(pdev, &wd_data);
> > +
> > +     /* Get current AP boot status */
> > +     ret = cros_ec_wdt_send_hang_detect(&wd_data, EC_HANG_DETECT_CMD_GET_STATUS, 0,
> > +                                        &bootstatus);
> > +     if (ret < 0) {
> > +             dev_err_probe(dev, ret, "Couldn't get AP boot status from EC");
>
> Syntax is return dev_err_probe

ACK

>
> > +             return ret;
> > +     }
> > +
> > +     /*
> > +      * If bootstatus is not EC_HANG_DETECT_AP_BOOT_NORMAL
> > +      * it mens EC has rebooted the AP due to watchdog timeout.
> > +      * Lets translate it to watchdog core status code.
> > +      */
> > +     if (bootstatus != EC_HANG_DETECT_AP_BOOT_NORMAL)
> > +             wd_data.wdd->bootstatus = WDIOF_CARDRESET;
> > +
> > +     ret = watchdog_register_device(&cros_ec_wdd);
> > +     if (ret < 0)
> > +             dev_err_probe(dev, ret, "Couldn't get AP boot status from EC");
>
> Syntax is return dev_err_probe
>

ACK

> > +
> > +     return ret;
>
> return 0
>

ACK

> > +}
> > +
> > +static int cros_ec_wdt_remove(struct platform_device *pdev)
> > +{
> > +     struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> > +
> > +     watchdog_unregister_device(wd_data->wdd);
> > +
> > +     return 0;
> > +}
> > +
> > +static void cros_ec_wdt_shutdown(struct platform_device *pdev)
> > +{
> > +     struct device *dev = &pdev->dev;
> > +     struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> > +     int ret;
> > +
> > +     /*
> > +      * Clean only bootstatus flag.
> > +      * EC wdt is are already stopped by watchdog framework.
> > +      */
> > +     ret = cros_ec_wdt_send_hang_detect(wd_data,
> > +                                        EC_HANG_DETECT_CMD_CLEAR_STATUS, 0, NULL);
> > +     if (ret < 0)
> > +             dev_err(dev, "%s failed (%d)", __func__, ret);
> > +
> > +     watchdog_unregister_device(wd_data->wdd);
> > +}
> > +
> > +static int __maybe_unused cros_ec_wdt_suspend(struct platform_device *pdev, pm_message_t state)
> > +{
> > +     struct device *dev = &pdev->dev;
> > +     struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> > +     int ret;
> > +
> > +     if (watchdog_active(wd_data->wdd)) {
> > +             ret = cros_ec_wdt_send_hang_detect(wd_data,
> > +                                                EC_HANG_DETECT_CMD_CANCEL, 0, NULL);
> > +             if (ret < 0)
> > +                     dev_err(dev, "%s failed (%d)", __func__, ret);
> > +             wd_data->start_on_resume = true;
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static int __maybe_unused cros_ec_wdt_resume(struct platform_device *pdev)
> > +{
> > +     struct device *dev = &pdev->dev;
> > +     struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> > +     int ret;
> > +
> > +     /* start_on_resume is only set if watchdog was active
> > +      * when device was going to sleep
> > +      */
> > +     if (wd_data->start_on_resume) {
> > +             /* On resume we just need to setup a EC watchdog the same way as
>
> Use comment style from Linux coding style.
>

ACK

> > +              * in cros_ec_wdt_start(). When userspace resumes from suspend
> > +              * the watchdog app should just start petting the watchdog again.
> > +              */
> > +             ret = cros_ec_wdt_start(wd_data->wdd);
> > +             if (ret < 0)
> > +                     dev_err(dev, "%s failed (%d)", __func__, ret);
>
> That's not really helpful. This applies everywhere. You have all over
> the place debugging prints with __func__. This is not useful pattern.
> Print something useful, not function name.
>

ACK, I will revisit the logs messages (both the meanings and log levels)

> > +
> > +             wd_data->start_on_resume = false;
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static struct platform_driver cros_ec_wdt_driver = {
> > +     .probe          = cros_ec_wdt_probe,
> > +     .remove         = cros_ec_wdt_remove,
> > +     .shutdown       = cros_ec_wdt_shutdown,
> > +     .suspend        = pm_ptr(cros_ec_wdt_suspend),
> > +     .resume         = pm_ptr(cros_ec_wdt_resume),
> > +     .driver         = {
> > +             .name   = DRV_NAME,
> > +     },
> > +};
> > +
> > +module_platform_driver(cros_ec_wdt_driver);
> > +
> > +MODULE_ALIAS("platform:" DRV_NAME);
>
> You should not need MODULE_ALIAS() in normal cases. If you need it,
> usually it means your device ID table is wrong (e.g. misses either
> entries or MODULE_DEVICE_TABLE()). MODULE_ALIAS() is not a substitute
> for incomplete ID table.
>
>

ACK, I will use MODULE_DEVICE_TABLE().

> > +MODULE_DESCRIPTION("Cros EC Watchdog Device Driver");
> > +MODULE_LICENSE("GPL");
> > diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
> > index 7dae17b62a4d..35a7a2d32819 100644
> > --- a/include/linux/platform_data/cros_ec_commands.h
> > +++ b/include/linux/platform_data/cros_ec_commands.h
> > @@ -3961,60 +3961,50 @@ struct ec_response_i2c_passthru {
> >  } __ec_align1;
> >
> >  /*****************************************************************************/
> > -/* Power button hang detect */
> > -
> > +/* AP hang detect */
>
> I don't understand how this change is related to the new driver. This
> entire hunk is confusing.
>

Yes, that updates the API and definition for embedded controller (EC).
It should go in the separate patch.

>
> Best regards,
> Krzysztof
>

Best regards,
Lukasz
Łukasz Majczak Jan. 18, 2024, 6:08 p.m. UTC | #5
Hi Guenter

Please see my comments below

On Wed, Jan 17, 2024 at 5:23 PM Guenter Roeck <linux@roeck-us.net> wrote:
>
> On 1/17/24 02:24, Lukasz Majczak wrote:
> > This adds EC-based watchdog support for ChromeOS
> > based devices equipped with embedded controller (EC).
> >
> > Signed-off-by: Lukasz Majczak <lma@chromium.org>
>
> checkpatch --strict says:
>
> total: 0 errors, 0 warnings, 14 checks, 455 lines checked
>
> Please fix.

ACK, Although I would keep uint16_t/uint32_t types in
cros_ec_commands.h as those are shared with EC firmware.

> > ---
> >   MAINTAINERS                                   |   6 +
> >   drivers/mfd/cros_ec_dev.c                     |   9 +
> >   drivers/watchdog/Kconfig                      |  15 +
> >   drivers/watchdog/Makefile                     |   3 +
> >   drivers/watchdog/cros_ec_wdt.c                | 303 ++++++++++++++++++
> >   .../linux/platform_data/cros_ec_commands.h    |  78 ++---
> >   6 files changed, 370 insertions(+), 44 deletions(-)
> >   create mode 100644 drivers/watchdog/cros_ec_wdt.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 391bbb855cbe..55cd626a525f 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -4952,6 +4952,12 @@ R:     Sami Kyöstilä <skyostil@chromium.org>
> >   S:  Maintained
> >   F:  drivers/platform/chrome/cros_hps_i2c.c
> >
> > +CHROMEOS EC WATCHDOG
> > +M:   Lukasz Majczak <lma@chromium.org>
> > +L:   chrome-platform@lists.linux.dev
> > +S:   Maintained
> > +F:   drivers/watchdog/cros_ec_wdt.c
> > +
> >   CHRONTEL CH7322 CEC DRIVER
> >   M:  Joe Tessler <jrt@google.com>
> >   L:  linux-media@vger.kernel.org
> > diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
> > index 79d393b602bf..60fe831cf30a 100644
> > --- a/drivers/mfd/cros_ec_dev.c
> > +++ b/drivers/mfd/cros_ec_dev.c
> > @@ -91,6 +91,10 @@ static const struct mfd_cell cros_usbpd_notify_cells[] = {
> >       { .name = "cros-usbpd-notify", },
> >   };
> >
> > +static const struct mfd_cell cros_ec_wdt_cells[] = {
> > +     { .name = "cros-ec-wdt-drv", }
> > +};
> > +
> >   static const struct cros_feature_to_cells cros_subdevices[] = {
> >       {
> >               .id             = EC_FEATURE_CEC,
> > @@ -107,6 +111,11 @@ static const struct cros_feature_to_cells cros_subdevices[] = {
> >               .mfd_cells      = cros_usbpd_charger_cells,
> >               .num_cells      = ARRAY_SIZE(cros_usbpd_charger_cells),
> >       },
> > +     {
> > +             .id             = EC_FEATURE_HANG_DETECT,
> > +             .mfd_cells      = cros_ec_wdt_cells,
> > +             .num_cells      = ARRAY_SIZE(cros_ec_wdt_cells),
> > +     },
> >   };
> >
> >   static const struct mfd_cell cros_ec_platform_cells[] = {
>
> The above should be a separate patch.
>
ACK, I will create a separate one for this.

> > diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> > index 7d22051b15a2..1da4be661be8 100644
> > --- a/drivers/watchdog/Kconfig
> > +++ b/drivers/watchdog/Kconfig
> > @@ -2251,4 +2251,19 @@ config KEEMBAY_WATCHDOG
> >         To compile this driver as a module, choose M here: the
> >         module will be called keembay_wdt.
> >
> > +#
> > +# ChromeOS EC-based Watchdog
> > +#
>
> Pointless comment.
>
ACK
> > +
> > +config CROS_EC_WATCHDOG
> > +     tristate "ChromeOS EC-based watchdog driver"
> > +     select WATCHDOG_CORE
> > +     depends on CROS_EC
> > +     help
> > +       This is the watchdog driver for ChromeOS devices equipped with EC.
> > +       The EC watchdog - when enabled - expects to be kicked within a given
> > +       time (default set to 30 seconds) otherwise it will simple reboot
> > +       the AP. Priori to that it can also send an event (configurable timeout)
> > +       about upcoming reboot.
> > +
>
> In other words, it is a watchdog driver. I don't think it adds value
> to describe how a watchdog works here, nor to describe what other (unused)
> functionality the hardware may have.
>
> Also, please try to insert in alphabetic order.
ACK
>
> >   endif # WATCHDOG
> > diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> > index 7cbc34514ec1..8295c209ddb0 100644
> > --- a/drivers/watchdog/Makefile
> > +++ b/drivers/watchdog/Makefile
> > @@ -234,3 +234,6 @@ obj-$(CONFIG_MENZ069_WATCHDOG) += menz69_wdt.o
> >   obj-$(CONFIG_RAVE_SP_WATCHDOG) += rave-sp-wdt.o
> >   obj-$(CONFIG_STPMIC1_WATCHDOG) += stpmic1_wdt.o
> >   obj-$(CONFIG_SL28CPLD_WATCHDOG) += sl28cpld_wdt.o
> > +
> > +# Cros EC watchdog
>
> We don't add individual comments for each watchdog, and we should not start
> doing so. Also, please try to insert in alphabetic order.
>
ACK
> > +obj-$(CONFIG_CROS_EC_WATCHDOG) += cros_ec_wdt.o
> > diff --git a/drivers/watchdog/cros_ec_wdt.c b/drivers/watchdog/cros_ec_wdt.c
> > new file mode 100644
> > index 000000000000..b461c0613269
> > --- /dev/null
> > +++ b/drivers/watchdog/cros_ec_wdt.c
> > @@ -0,0 +1,303 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +
> > +#include <linux/slab.h>
> > +#include <linux/err.h>
> > +#include <linux/of_device.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/platform_data/cros_ec_commands.h>
> > +#include <linux/platform_data/cros_ec_proto.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/watchdog.h>
> > +#include <linux/uaccess.h>
>
> Alphabetic include file order, please. Also, I don't immediately see where
> uaccess.h or of_device.h are used. Please drop unused includes.
>
ACK, in fact uaccess.h or of_device.h are unnecessary
> > +
> > +#define CROS_EC_WATCHDOG_DEFAULT_TIME 30 /* seconds */
> > +
> > +#define DEV_NAME "cros-ec-wdt-dev"
> > +#define DRV_NAME "cros-ec-wdt-drv"
>
> One of those is unused.
>
ACK
> > +
> > +static int cros_ec_wdt_ping(struct watchdog_device *wdd);
> > +static int cros_ec_wdt_start(struct watchdog_device *wdd);
> > +static int cros_ec_wdt_stop(struct watchdog_device *wdd);
> > +static int cros_ec_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t);
> > +
>
> Please rearrange to avoid forward declarations.
>
ACK
> > +struct cros_ec_wdt_data {
> > +     bool start_on_resume;
> > +     bool keepalive_on;
> > +     struct cros_ec_device *cros_ec;
> > +     struct watchdog_device *wdd;
> > +};
> > +static struct cros_ec_wdt_data wd_data;
>
> Please allocate and avoid static variables.
>
ACK
> > +
> > +static const struct watchdog_info cros_ec_wdt_ident = {
> > +     .options          = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
> > +     .firmware_version = 0,
> > +     .identity         = DRV_NAME,
> > +};
> > +
> > +static const struct watchdog_ops cros_ec_wdt_ops = {
> > +     .owner           = THIS_MODULE,
> > +     .ping            = cros_ec_wdt_ping,
> > +     .start           = cros_ec_wdt_start,
> > +     .stop            = cros_ec_wdt_stop,
> > +     .set_timeout = cros_ec_wdt_set_timeout,
> > +};
> > +
> > +static struct watchdog_device cros_ec_wdd = {
> > +     .info = &cros_ec_wdt_ident,
> > +     .ops = &cros_ec_wdt_ops,
> > +     .timeout = CROS_EC_WATCHDOG_DEFAULT_TIME,
> > +     .bootstatus = EC_HANG_DETECT_AP_BOOT_NORMAL
>
> Consider allocating and embedding in struct cros_ec_wdt_data.
> .timeout and .bootstatus are overwritten anyway, and it does not make sense
> to initialize those elements here.
>
ACK, Agree, in fact cros_ec_wdt_data might be not necessary at all and
I might be able to drop it.
> > +};
> > +
> > +static int cros_ec_wdt_send_hang_detect(struct cros_ec_wdt_data *wd_data,
>
> Seems to me that cros_ec_wdt_send_cmd or similar would be a better function name,
> but your call.
ACK, cros_ec_wdt_send_cmd looks better - thank you,
>
> > +                                     uint16_t command,
> > +                                     uint16_t reboot_timeout_sec,
> > +                                     uint32_t *status)
> > +{
> > +     int ret;
> > +
> > +     struct {
> > +             struct cros_ec_command msg;
> > +             union {
> > +                     struct ec_params_hang_detect req;
> > +                     struct ec_response_hang_detect resp;
> > +             };
> > +     } __packed buf = {
> > +             .msg = {
> > +                     .version = 0,
> > +                     .command = EC_CMD_HANG_DETECT,
> > +                     .insize  = (command == EC_HANG_DETECT_CMD_GET_STATUS) ?
> > +                                sizeof(struct ec_response_hang_detect) :
> > +                                0,
> > +                     .outsize = sizeof(struct ec_params_hang_detect),
> > +             },
> > +             .req = {
> > +                     .command = command,
> > +                     .reboot_timeout_sec = reboot_timeout_sec,
> > +             }
> > +     };
> > +
> > +     ret = cros_ec_cmd_xfer_status(wd_data->cros_ec, &buf.msg);
> > +     if (ret < 0) {
> > +             dev_warn(wd_data->wdd->parent,
>
> Given that wdd is needed anyway,
>
> > +                              "cros_ec_cmd_xfer_status failed(%d) command (%04x) reboot_timeout_sec(%ds)",
> > +                              ret, command, reboot_timeout_sec);
>
> That message, if ever seen, does not make much sense if the command was
> one that does not set the timeout. On top of that, this code
> dumps a warning, and the calling code dumps an error message. Is the EC
> really that fragile that it is necessary to dump all those error messages ?
>
> If not, please consider replacing most if not all of those log messages
> with dev_dbg().
ACK, I will revisit log messages (and levels)
>
> > +             return ret;
> > +     }
> > +
> > +     if (status && (command == EC_HANG_DETECT_CMD_GET_STATUS)) {
>
> Unnecessary (). Besides, it doesn't make much sense. Why provide
> a status pointer but only use it if the command is a specific one ?
>
ACK, I will rework the whole function to be more generic.
> > +             *status = buf.resp.status;
> > +             dev_info(wd_data->wdd->parent, "EC Watchdog boot status (%d)",
> > +                              buf.resp.status);
>
> That message does not belong here.
>
ACK
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static int cros_ec_wdt_ping(struct watchdog_device *wdd)
> > +{
> > +     struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
> > +     int ret;
> > +
> > +     ret = cros_ec_wdt_send_hang_detect(wd_data, EC_HANG_DETECT_CMD_RELOAD,
> > +                                        0, NULL);
> > +     if (ret < 0)
> > +             dev_err(wdd->parent, "%s failed (%d)", __func__, ret);
>
> return here and drop the else.
>
ACK
> Nore that this driver is extremely noisy. If the EC for some reason doesn't
> respond as expected the error messages will drown all other kernel messages.
> I really do not see the point of this.
>
ACK, I will try to clean it up.
> > +     else
> > +             wd_data->keepalive_on = true;
>
> This is odd and needs a detailed explanation. More on that see below.
>
I have revisited the code again and I think I can drop struc
cros_ec_wdt_data and rely purely on structures provided by framework.
> > +
> > +     return ret;
> > +}
> > +
> > +static int cros_ec_wdt_start(struct watchdog_device *wdd)
> > +{
> > +     struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
> > +     int ret = 0;
> > +
> > +     /* Prepare watchdog on EC side */
> > +     ret = cros_ec_wdt_send_hang_detect(wd_data,
> > +                                     EC_HANG_DETECT_CMD_SET_TIMEOUT,
> > +                                     wdd->timeout,
> > +                                     NULL);
> > +     if (ret < 0)
> > +             dev_err(wdd->parent, "%s failed (%d)", __func__, ret);
> > +
> > +     return ret;
> > +}
> > +
> > +static int cros_ec_wdt_stop(struct watchdog_device *wdd)
> > +{
> > +     struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
> > +     int ret = 0;
> > +
> > +     if (wd_data->keepalive_on) {
> > +             wd_data->keepalive_on = false;
> > +             ret = cros_ec_wdt_send_hang_detect(wd_data, EC_HANG_DETECT_CMD_CANCEL,
> > +                                             0, NULL);
>
> This needs explanation. Why is it only needed here after the first ping is sent,
> but on suspend it is unconditional ? In other words, why does the watchdog
> behave differently after the first ping ? Why does the watchdog not have to be
> stopped if it never received a ping ? Logically that seems odd, if not plain
> wrong. The watchdog should behave exactly the same after it was started, no matter
> if it ever received a ping or not.
>
ACK, Same as with "keepalive_on" I will drop the struct
cros_ec_wdt_data. Agree with the confusion here, sorry for that, I
will simplify it.
> > +             if (ret < 0)
> > +                     dev_err(wdd->parent, "%s failed (%d)", __func__, ret);
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> > +static int cros_ec_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
> > +{
> > +     struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
> > +     int ret;
> > +
> > +     if (t < EC_HANG_DETECT_MIN_TIMEOUT) {
> > +             dev_err(wdd->parent,
> > +                             "%s failed, requested timeout is lower than min(%d < %d)",
> > +                             __func__, t, EC_HANG_DETECT_MIN_TIMEOUT);
> > +             return -EINVAL;
>
> This should be handled in the watchdog core. If it isn't the driver is misconfigured.
>
ACK, yes, both min and max values should be set.
> > +     }
> > +
> > +     ret = cros_ec_wdt_send_hang_detect(wd_data,
> > +                                        EC_HANG_DETECT_CMD_SET_TIMEOUT,
> > +                                        t, NULL);
> > +     if (ret < 0)
> > +             dev_err(wdd->parent, "%s failed (%d)", __func__, ret);
>
> return here.
>
ACK
> > +     else
> > +             wdd->timeout = t;
> > +
> > +     return ret;
> > +}
> > +
> > +static int cros_ec_wdt_probe(struct platform_device *pdev)
> > +{
> > +     struct device *dev = &pdev->dev;
> > +     /* We need to get a reference to cros_ec_devices
> > +      * (provides communication layer) which is a parent of
> > +      * the cros-ec-dev (our parent)
> > +      */
> > +     struct cros_ec_device *cros_ec = dev_get_drvdata(dev->parent->parent);
>
>
> Other cros drivers get struct cros_ec_dev from dev->parent and then use
> ec->ec_dev. Please explain why you don't use the same pattern; dereferencing
> parent->parent and assuming to know its driver data seems to be (much) more
> risky than the existing pattern.
>
ACK, I will look at other cros_ec driver as an example.
> Also, the comment seems obvious. None of the other cros_ec drivers needed it,
> and I don't see why it would add value here.
>
ACK
> > +     int ret = 0;
> > +     uint32_t bootstatus;
> > +
> > +     if (!cros_ec) {
> > +             ret = -ENODEV;
> > +             dev_err_probe(dev, ret, "There is no coresponding EC device!");
> > +             return ret;
>
> return dev_err_probe(). Same elsewhere.
>
ACK
> > +     }
> > +
> > +     cros_ec_wdd.parent = &pdev->dev;
> > +     wd_data.cros_ec = cros_ec;
> > +     wd_data.wdd = &cros_ec_wdd;
> > +     wd_data.start_on_resume = false;
> > +     wd_data.keepalive_on = false;
> > +     wd_data.wdd->timeout = CROS_EC_WATCHDOG_DEFAULT_TIME;
>
> Ah, I guess the reason for the error handling when setting the timeout is
> the lack of setting boundaries here. That is wrong. min_timeout
> and max_timeout need to be set.
>
ACK
> > +
> > +     watchdog_stop_on_reboot(&cros_ec_wdd);
> > +     watchdog_stop_on_unregister(&cros_ec_wdd);
> > +     watchdog_set_drvdata(&cros_ec_wdd, &wd_data);
> > +     platform_set_drvdata(pdev, &wd_data);
> > +
> > +     /* Get current AP boot status */
> > +     ret = cros_ec_wdt_send_hang_detect(&wd_data, EC_HANG_DETECT_CMD_GET_STATUS, 0,
> > +                                        &bootstatus);
> > +     if (ret < 0) {
> > +             dev_err_probe(dev, ret, "Couldn't get AP boot status from EC");
> > +             return ret;
> > +     }
> > +
> > +     /*
> > +      * If bootstatus is not EC_HANG_DETECT_AP_BOOT_NORMAL
> > +      * it mens EC has rebooted the AP due to watchdog timeout.
>
> means
>
ACK
> > +      * Lets translate it to watchdog core status code.
> > +      */
> > +     if (bootstatus != EC_HANG_DETECT_AP_BOOT_NORMAL)
> > +             wd_data.wdd->bootstatus = WDIOF_CARDRESET;
> > +
> > +     ret = watchdog_register_device(&cros_ec_wdd);
> > +     if (ret < 0)
> > +             dev_err_probe(dev, ret, "Couldn't get AP boot status from EC");
>
> Wrong error message.
>
ACK
> > +
> > +     return ret;
> > +}
> > +
> > +static int cros_ec_wdt_remove(struct platform_device *pdev)
> > +{
> > +     struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> > +
> > +     watchdog_unregister_device(wd_data->wdd);
>
> Why not use devm_watchdog_register_device() ? Please explain.
ACK, good point, thank you.
> > +
> > +     return 0;
> > +}
> > +
> > +static void cros_ec_wdt_shutdown(struct platform_device *pdev)
> > +{
> > +     struct device *dev = &pdev->dev;
> > +     struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> > +     int ret;
> > +
> > +     /*
> > +      * Clean only bootstatus flag.
> > +      * EC wdt is are already stopped by watchdog framework.
> > +      */
>
> Why is this necessary if the watchdog is stopped ?
>
ACK, The idea was to clear the bootstatus (on EC side) when AP was
shutting down gracefully, to address the scenario:
1. EC kicks AP and sets bootstatus for doing so
2. AP boots and in sometime gracefully shutdown/reboot (EC still keeps
bootstatus that it's kicked AP)
3. AP boots again and read the bootstatus from EC and thinks it was kicked by EC
...but yeah much easier is to read-and-clear the boot status during
probe and keep it in the "bootstatus" field of thestruct
watchdog_device (where it was meant to be ) that will simplify the
code.
> > +     ret = cros_ec_wdt_send_hang_detect(wd_data,
> > +                                        EC_HANG_DETECT_CMD_CLEAR_STATUS, 0, NULL);
> > +     if (ret < 0)
> > +             dev_err(dev, "%s failed (%d)", __func__, ret);
> > +
> > +     watchdog_unregister_device(wd_data->wdd);
>
> This doesn't make sense to me. Explain why it is necessary.
>
ACK,  this will be removed.
> > +}
> > +
> > +static int __maybe_unused cros_ec_wdt_suspend(struct platform_device *pdev, pm_message_t state)
> > +{
> > +     struct device *dev = &pdev->dev;
> > +     struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> > +     int ret;
> > +
> > +     if (watchdog_active(wd_data->wdd)) {
> > +             ret = cros_ec_wdt_send_hang_detect(wd_data,
> > +                                                EC_HANG_DETECT_CMD_CANCEL, 0, NULL);
> > +             if (ret < 0)
> > +                     dev_err(dev, "%s failed (%d)", __func__, ret);
> > +             wd_data->start_on_resume = true;
>
> Needs explanation. See below.
>
ACK, same as with "keepalive_on" - will drop the struct
cros_ec_wdt_data and rely on watchdog_active().
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static int __maybe_unused cros_ec_wdt_resume(struct platform_device *pdev)
> > +{
> > +     struct device *dev = &pdev->dev;
> > +     struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
> > +     int ret;
> > +
> > +     /* start_on_resume is only set if watchdog was active
> > +      * when device was going to sleep
> > +      */
>
> Please use standard multi-line comments. This is not the networking subsystem.
> Even more important, use _consistent_ multi-line comments.
>
ACK
> > +     if (wd_data->start_on_resume) {
>
> Please explain why watchdog_active() does not work here.
>
> > +             /* On resume we just need to setup a EC watchdog the same way as
> > +              * in cros_ec_wdt_start(). When userspace resumes from suspend
> > +              * the watchdog app should just start petting the watchdog again.
> > +              */
>
> Isn't that obvious ?
ACK, Right
>
> > +             ret = cros_ec_wdt_start(wd_data->wdd);
> > +             if (ret < 0)
> > +                     dev_err(dev, "%s failed (%d)", __func__, ret);
> > +
> > +             wd_data->start_on_resume = false;
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static struct platform_driver cros_ec_wdt_driver = {
> > +     .probe          = cros_ec_wdt_probe,
> > +     .remove         = cros_ec_wdt_remove,
> > +     .shutdown       = cros_ec_wdt_shutdown,
> > +     .suspend        = pm_ptr(cros_ec_wdt_suspend),
> > +     .resume         = pm_ptr(cros_ec_wdt_resume),
> > +     .driver         = {
> > +             .name   = DRV_NAME,
> > +     },
> > +};
> > +
> > +module_platform_driver(cros_ec_wdt_driver);
> > +
> > +MODULE_ALIAS("platform:" DRV_NAME);
> > +MODULE_DESCRIPTION("Cros EC Watchdog Device Driver");
> > +MODULE_LICENSE("GPL");
> > diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
> > index 7dae17b62a4d..35a7a2d32819 100644
> > --- a/include/linux/platform_data/cros_ec_commands.h
> > +++ b/include/linux/platform_data/cros_ec_commands.h
> > @@ -3961,60 +3961,50 @@ struct ec_response_i2c_passthru {
> >   } __ec_align1;
> >
> >   /*****************************************************************************/
> > -/* Power button hang detect */
> > -
> > +/* AP hang detect */
> >   #define EC_CMD_HANG_DETECT 0x009F
> >
> > -/* Reasons to start hang detection timer */
> > -/* Power button pressed */
> > -#define EC_HANG_START_ON_POWER_PRESS  BIT(0)
> > -
> > -/* Lid closed */
> > -#define EC_HANG_START_ON_LID_CLOSE    BIT(1)
> > -
> > - /* Lid opened */
> > -#define EC_HANG_START_ON_LID_OPEN     BIT(2)
> > -
> > -/* Start of AP S3->S0 transition (booting or resuming from suspend) */
> > -#define EC_HANG_START_ON_RESUME       BIT(3)
> > -
> > -/* Reasons to cancel hang detection */
> > +#define EC_HANG_DETECT_MIN_TIMEOUT 5
> >
> > -/* Power button released */
> > -#define EC_HANG_STOP_ON_POWER_RELEASE BIT(8)
> > +/* EC hang detect commands */
> > +enum ec_hang_detect_cmds {
> > +     /* Reload AP hang detect timer. */
> > +     EC_HANG_DETECT_CMD_RELOAD = 0x0,
> >
> > -/* Any host command from AP received */
> > -#define EC_HANG_STOP_ON_HOST_COMMAND  BIT(9)
> > +     /* Stop AP hang detect timer. */
> > +     EC_HANG_DETECT_CMD_CANCEL = 0x1,
> >
>
> This is making unrelated changes. Please make those in a separate patch.
> It might be best to update this file in a separate patch to start with.
>
ACK, I will create a separate one for this.
> > -/* Stop on end of AP S0->S3 transition (suspending or shutting down) */
> > -#define EC_HANG_STOP_ON_SUSPEND       BIT(10)
> > +     /* Configure watchdog with given reboot timeout and
> > +      * cancel currently running AP hand detect timer.
> > +      */
> > +     EC_HANG_DETECT_CMD_SET_TIMEOUT = 0x2,
> >
> > -/*
> > - * If this flag is set, all the other fields are ignored, and the hang detect
> > - * timer is started.  This provides the AP a way to start the hang timer
> > - * without reconfiguring any of the other hang detect settings.  Note that
> > - * you must previously have configured the timeouts.
> > - */
> > -#define EC_HANG_START_NOW             BIT(30)
> > +     /* Get last hang status - whether the AP boot was clear or not */
> > +     EC_HANG_DETECT_CMD_GET_STATUS = 0x3,
> >
> > -/*
> > - * If this flag is set, all the other fields are ignored (including
> > - * EC_HANG_START_NOW).  This provides the AP a way to stop the hang timer
> > - * without reconfiguring any of the other hang detect settings.
> > - */
> > -#define EC_HANG_STOP_NOW              BIT(31)
> > +     /* Clear last hang status. Called when AP is rebooting/shutting down
> > +      * gracefully.
> > +      */
> > +     EC_HANG_DETECT_CMD_CLEAR_STATUS = 0x4
> > +};
> >
> >   struct ec_params_hang_detect {
> > -     /* Flags; see EC_HANG_* */
> > -     uint32_t flags;
> > -
> > -     /* Timeout in msec before generating host event, if enabled */
> > -     uint16_t host_event_timeout_msec;
> > -
> > -     /* Timeout in msec before generating warm reboot, if enabled */
> > -     uint16_t warm_reboot_timeout_msec;
> > -} __ec_align4;
> > +     uint16_t command; /* enum ec_hang_detect_cmds */
> > +     /* Timeout in seconds before generating reboot */
> > +     uint16_t reboot_timeout_sec;
> > +} __ec_align2;
> >
> > +/* Status codes that describe whether AP has boot normally or the hang has been
> > + * detected and EC has reset AP
> > + */
> > +enum ec_hang_detect_status {
> > +     EC_HANG_DETECT_AP_BOOT_NORMAL = 0x0,
> > +     EC_HANG_DETECT_AP_BOOT_EC_WDT = 0x1,
> > +     EC_HANG_DETECT_AP_BOOT_COUNT,
> > +};
> > +struct ec_response_hang_detect {
> > +     uint8_t status; /* enum ec_hang_detect_status */
> > +} __ec_align1;
> >   /*****************************************************************************/
> >   /* Commands for battery charging */
> >
>

Thank you Guenter for all the comments, I will try to address them
best I can sith V2 and thank you for your time reading all of this and
answering.

Best regards,
Lukasz
Guenter Roeck Jan. 18, 2024, 6:17 p.m. UTC | #6
On 1/18/24 10:08, Łukasz Majczak wrote:
> Hi Guenter
> 
> Please see my comments below
> 
> On Wed, Jan 17, 2024 at 5:23 PM Guenter Roeck <linux@roeck-us.net> wrote:
>>
>> On 1/17/24 02:24, Lukasz Majczak wrote:
>>> This adds EC-based watchdog support for ChromeOS
>>> based devices equipped with embedded controller (EC).
>>>
>>> Signed-off-by: Lukasz Majczak <lma@chromium.org>
>>
>> checkpatch --strict says:
>>
>> total: 0 errors, 0 warnings, 14 checks, 455 lines checked
>>
>> Please fix.
> 
> ACK, Although I would keep uint16_t/uint32_t types in
> cros_ec_commands.h as those are shared with EC firmware.
> 

Makes sense. Thanks!

Guenter
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 391bbb855cbe..55cd626a525f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4952,6 +4952,12 @@  R:	Sami Kyöstilä <skyostil@chromium.org>
 S:	Maintained
 F:	drivers/platform/chrome/cros_hps_i2c.c
 
+CHROMEOS EC WATCHDOG
+M:	Lukasz Majczak <lma@chromium.org>
+L:	chrome-platform@lists.linux.dev
+S:	Maintained
+F:	drivers/watchdog/cros_ec_wdt.c
+
 CHRONTEL CH7322 CEC DRIVER
 M:	Joe Tessler <jrt@google.com>
 L:	linux-media@vger.kernel.org
diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 79d393b602bf..60fe831cf30a 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -91,6 +91,10 @@  static const struct mfd_cell cros_usbpd_notify_cells[] = {
 	{ .name = "cros-usbpd-notify", },
 };
 
+static const struct mfd_cell cros_ec_wdt_cells[] = {
+	{ .name = "cros-ec-wdt-drv", }
+};
+
 static const struct cros_feature_to_cells cros_subdevices[] = {
 	{
 		.id		= EC_FEATURE_CEC,
@@ -107,6 +111,11 @@  static const struct cros_feature_to_cells cros_subdevices[] = {
 		.mfd_cells	= cros_usbpd_charger_cells,
 		.num_cells	= ARRAY_SIZE(cros_usbpd_charger_cells),
 	},
+	{
+		.id		= EC_FEATURE_HANG_DETECT,
+		.mfd_cells	= cros_ec_wdt_cells,
+		.num_cells	= ARRAY_SIZE(cros_ec_wdt_cells),
+	},
 };
 
 static const struct mfd_cell cros_ec_platform_cells[] = {
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 7d22051b15a2..1da4be661be8 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -2251,4 +2251,19 @@  config KEEMBAY_WATCHDOG
 	  To compile this driver as a module, choose M here: the
 	  module will be called keembay_wdt.
 
+#
+# ChromeOS EC-based Watchdog
+#
+
+config CROS_EC_WATCHDOG
+	tristate "ChromeOS EC-based watchdog driver"
+	select WATCHDOG_CORE
+	depends on CROS_EC
+	help
+	  This is the watchdog driver for ChromeOS devices equipped with EC.
+	  The EC watchdog - when enabled - expects to be kicked within a given
+	  time (default set to 30 seconds) otherwise it will simple reboot
+	  the AP. Priori to that it can also send an event (configurable timeout)
+	  about upcoming reboot.
+
 endif # WATCHDOG
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 7cbc34514ec1..8295c209ddb0 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -234,3 +234,6 @@  obj-$(CONFIG_MENZ069_WATCHDOG) += menz69_wdt.o
 obj-$(CONFIG_RAVE_SP_WATCHDOG) += rave-sp-wdt.o
 obj-$(CONFIG_STPMIC1_WATCHDOG) += stpmic1_wdt.o
 obj-$(CONFIG_SL28CPLD_WATCHDOG) += sl28cpld_wdt.o
+
+# Cros EC watchdog
+obj-$(CONFIG_CROS_EC_WATCHDOG) += cros_ec_wdt.o
diff --git a/drivers/watchdog/cros_ec_wdt.c b/drivers/watchdog/cros_ec_wdt.c
new file mode 100644
index 000000000000..b461c0613269
--- /dev/null
+++ b/drivers/watchdog/cros_ec_wdt.c
@@ -0,0 +1,303 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/of_device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/uaccess.h>
+
+#define CROS_EC_WATCHDOG_DEFAULT_TIME 30 /* seconds */
+
+#define DEV_NAME "cros-ec-wdt-dev"
+#define DRV_NAME "cros-ec-wdt-drv"
+
+static int cros_ec_wdt_ping(struct watchdog_device *wdd);
+static int cros_ec_wdt_start(struct watchdog_device *wdd);
+static int cros_ec_wdt_stop(struct watchdog_device *wdd);
+static int cros_ec_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t);
+
+struct cros_ec_wdt_data {
+	bool start_on_resume;
+	bool keepalive_on;
+	struct cros_ec_device *cros_ec;
+	struct watchdog_device *wdd;
+};
+static struct cros_ec_wdt_data wd_data;
+
+static const struct watchdog_info cros_ec_wdt_ident = {
+	.options          = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
+	.firmware_version = 0,
+	.identity         = DRV_NAME,
+};
+
+static const struct watchdog_ops cros_ec_wdt_ops = {
+	.owner		 = THIS_MODULE,
+	.ping		 = cros_ec_wdt_ping,
+	.start		 = cros_ec_wdt_start,
+	.stop		 = cros_ec_wdt_stop,
+	.set_timeout = cros_ec_wdt_set_timeout,
+};
+
+static struct watchdog_device cros_ec_wdd = {
+	.info = &cros_ec_wdt_ident,
+	.ops = &cros_ec_wdt_ops,
+	.timeout = CROS_EC_WATCHDOG_DEFAULT_TIME,
+	.bootstatus = EC_HANG_DETECT_AP_BOOT_NORMAL
+};
+
+static int cros_ec_wdt_send_hang_detect(struct cros_ec_wdt_data *wd_data,
+					uint16_t command,
+					uint16_t reboot_timeout_sec,
+					uint32_t *status)
+{
+	int ret;
+
+	struct {
+		struct cros_ec_command msg;
+		union {
+			struct ec_params_hang_detect req;
+			struct ec_response_hang_detect resp;
+		};
+	} __packed buf = {
+		.msg = {
+			.version = 0,
+			.command = EC_CMD_HANG_DETECT,
+			.insize  = (command == EC_HANG_DETECT_CMD_GET_STATUS) ?
+				   sizeof(struct ec_response_hang_detect) :
+				   0,
+			.outsize = sizeof(struct ec_params_hang_detect),
+		},
+		.req = {
+			.command = command,
+			.reboot_timeout_sec = reboot_timeout_sec,
+		}
+	};
+
+	ret = cros_ec_cmd_xfer_status(wd_data->cros_ec, &buf.msg);
+	if (ret < 0) {
+		dev_warn(wd_data->wdd->parent,
+				 "cros_ec_cmd_xfer_status failed(%d) command (%04x) reboot_timeout_sec(%ds)",
+				 ret, command, reboot_timeout_sec);
+		return ret;
+	}
+
+	if (status && (command == EC_HANG_DETECT_CMD_GET_STATUS)) {
+		*status = buf.resp.status;
+		dev_info(wd_data->wdd->parent, "EC Watchdog boot status (%d)",
+				 buf.resp.status);
+	}
+
+	return 0;
+}
+
+static int cros_ec_wdt_ping(struct watchdog_device *wdd)
+{
+	struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
+	int ret;
+
+	ret = cros_ec_wdt_send_hang_detect(wd_data, EC_HANG_DETECT_CMD_RELOAD,
+					   0, NULL);
+	if (ret < 0)
+		dev_err(wdd->parent, "%s failed (%d)", __func__, ret);
+	else
+		wd_data->keepalive_on = true;
+
+	return ret;
+}
+
+static int cros_ec_wdt_start(struct watchdog_device *wdd)
+{
+	struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
+	int ret = 0;
+
+	/* Prepare watchdog on EC side */
+	ret = cros_ec_wdt_send_hang_detect(wd_data,
+					EC_HANG_DETECT_CMD_SET_TIMEOUT,
+					wdd->timeout,
+					NULL);
+	if (ret < 0)
+		dev_err(wdd->parent, "%s failed (%d)", __func__, ret);
+
+	return ret;
+}
+
+static int cros_ec_wdt_stop(struct watchdog_device *wdd)
+{
+	struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
+	int ret = 0;
+
+	if (wd_data->keepalive_on) {
+		wd_data->keepalive_on = false;
+		ret = cros_ec_wdt_send_hang_detect(wd_data, EC_HANG_DETECT_CMD_CANCEL,
+						0, NULL);
+		if (ret < 0)
+			dev_err(wdd->parent, "%s failed (%d)", __func__, ret);
+	}
+
+	return ret;
+}
+
+static int cros_ec_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
+{
+	struct cros_ec_wdt_data *wd_data = watchdog_get_drvdata(wdd);
+	int ret;
+
+	if (t < EC_HANG_DETECT_MIN_TIMEOUT) {
+		dev_err(wdd->parent,
+				"%s failed, requested timeout is lower than min(%d < %d)",
+				__func__, t, EC_HANG_DETECT_MIN_TIMEOUT);
+		return -EINVAL;
+	}
+
+	ret = cros_ec_wdt_send_hang_detect(wd_data,
+					   EC_HANG_DETECT_CMD_SET_TIMEOUT,
+					   t, NULL);
+	if (ret < 0)
+		dev_err(wdd->parent, "%s failed (%d)", __func__, ret);
+	else
+		wdd->timeout = t;
+
+	return ret;
+}
+
+static int cros_ec_wdt_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	/* We need to get a reference to cros_ec_devices
+	 * (provides communication layer) which is a parent of
+	 * the cros-ec-dev (our parent)
+	 */
+	struct cros_ec_device *cros_ec = dev_get_drvdata(dev->parent->parent);
+	int ret = 0;
+	uint32_t bootstatus;
+
+	if (!cros_ec) {
+		ret = -ENODEV;
+		dev_err_probe(dev, ret, "There is no coresponding EC device!");
+		return ret;
+	}
+
+	cros_ec_wdd.parent = &pdev->dev;
+	wd_data.cros_ec = cros_ec;
+	wd_data.wdd = &cros_ec_wdd;
+	wd_data.start_on_resume = false;
+	wd_data.keepalive_on = false;
+	wd_data.wdd->timeout = CROS_EC_WATCHDOG_DEFAULT_TIME;
+
+	watchdog_stop_on_reboot(&cros_ec_wdd);
+	watchdog_stop_on_unregister(&cros_ec_wdd);
+	watchdog_set_drvdata(&cros_ec_wdd, &wd_data);
+	platform_set_drvdata(pdev, &wd_data);
+
+	/* Get current AP boot status */
+	ret = cros_ec_wdt_send_hang_detect(&wd_data, EC_HANG_DETECT_CMD_GET_STATUS, 0,
+					   &bootstatus);
+	if (ret < 0) {
+		dev_err_probe(dev, ret, "Couldn't get AP boot status from EC");
+		return ret;
+	}
+
+	/*
+	 * If bootstatus is not EC_HANG_DETECT_AP_BOOT_NORMAL
+	 * it mens EC has rebooted the AP due to watchdog timeout.
+	 * Lets translate it to watchdog core status code.
+	 */
+	if (bootstatus != EC_HANG_DETECT_AP_BOOT_NORMAL)
+		wd_data.wdd->bootstatus = WDIOF_CARDRESET;
+
+	ret = watchdog_register_device(&cros_ec_wdd);
+	if (ret < 0)
+		dev_err_probe(dev, ret, "Couldn't get AP boot status from EC");
+
+	return ret;
+}
+
+static int cros_ec_wdt_remove(struct platform_device *pdev)
+{
+	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
+
+	watchdog_unregister_device(wd_data->wdd);
+
+	return 0;
+}
+
+static void cros_ec_wdt_shutdown(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
+	int ret;
+
+	/*
+	 * Clean only bootstatus flag.
+	 * EC wdt is are already stopped by watchdog framework.
+	 */
+	ret = cros_ec_wdt_send_hang_detect(wd_data,
+					   EC_HANG_DETECT_CMD_CLEAR_STATUS, 0, NULL);
+	if (ret < 0)
+		dev_err(dev, "%s failed (%d)", __func__, ret);
+
+	watchdog_unregister_device(wd_data->wdd);
+}
+
+static int __maybe_unused cros_ec_wdt_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct device *dev = &pdev->dev;
+	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
+	int ret;
+
+	if (watchdog_active(wd_data->wdd)) {
+		ret = cros_ec_wdt_send_hang_detect(wd_data,
+						   EC_HANG_DETECT_CMD_CANCEL, 0, NULL);
+		if (ret < 0)
+			dev_err(dev, "%s failed (%d)", __func__, ret);
+		wd_data->start_on_resume = true;
+	}
+
+	return 0;
+}
+
+static int __maybe_unused cros_ec_wdt_resume(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct cros_ec_wdt_data *wd_data = platform_get_drvdata(pdev);
+	int ret;
+
+	/* start_on_resume is only set if watchdog was active
+	 * when device was going to sleep
+	 */
+	if (wd_data->start_on_resume) {
+		/* On resume we just need to setup a EC watchdog the same way as
+		 * in cros_ec_wdt_start(). When userspace resumes from suspend
+		 * the watchdog app should just start petting the watchdog again.
+		 */
+		ret = cros_ec_wdt_start(wd_data->wdd);
+		if (ret < 0)
+			dev_err(dev, "%s failed (%d)", __func__, ret);
+
+		wd_data->start_on_resume = false;
+	}
+
+	return 0;
+}
+
+static struct platform_driver cros_ec_wdt_driver = {
+	.probe		= cros_ec_wdt_probe,
+	.remove		= cros_ec_wdt_remove,
+	.shutdown	= cros_ec_wdt_shutdown,
+	.suspend	= pm_ptr(cros_ec_wdt_suspend),
+	.resume		= pm_ptr(cros_ec_wdt_resume),
+	.driver		= {
+		.name	= DRV_NAME,
+	},
+};
+
+module_platform_driver(cros_ec_wdt_driver);
+
+MODULE_ALIAS("platform:" DRV_NAME);
+MODULE_DESCRIPTION("Cros EC Watchdog Device Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index 7dae17b62a4d..35a7a2d32819 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -3961,60 +3961,50 @@  struct ec_response_i2c_passthru {
 } __ec_align1;
 
 /*****************************************************************************/
-/* Power button hang detect */
-
+/* AP hang detect */
 #define EC_CMD_HANG_DETECT 0x009F
 
-/* Reasons to start hang detection timer */
-/* Power button pressed */
-#define EC_HANG_START_ON_POWER_PRESS  BIT(0)
-
-/* Lid closed */
-#define EC_HANG_START_ON_LID_CLOSE    BIT(1)
-
- /* Lid opened */
-#define EC_HANG_START_ON_LID_OPEN     BIT(2)
-
-/* Start of AP S3->S0 transition (booting or resuming from suspend) */
-#define EC_HANG_START_ON_RESUME       BIT(3)
-
-/* Reasons to cancel hang detection */
+#define EC_HANG_DETECT_MIN_TIMEOUT 5
 
-/* Power button released */
-#define EC_HANG_STOP_ON_POWER_RELEASE BIT(8)
+/* EC hang detect commands */
+enum ec_hang_detect_cmds {
+	/* Reload AP hang detect timer. */
+	EC_HANG_DETECT_CMD_RELOAD = 0x0,
 
-/* Any host command from AP received */
-#define EC_HANG_STOP_ON_HOST_COMMAND  BIT(9)
+	/* Stop AP hang detect timer. */
+	EC_HANG_DETECT_CMD_CANCEL = 0x1,
 
-/* Stop on end of AP S0->S3 transition (suspending or shutting down) */
-#define EC_HANG_STOP_ON_SUSPEND       BIT(10)
+	/* Configure watchdog with given reboot timeout and
+	 * cancel currently running AP hand detect timer.
+	 */
+	EC_HANG_DETECT_CMD_SET_TIMEOUT = 0x2,
 
-/*
- * If this flag is set, all the other fields are ignored, and the hang detect
- * timer is started.  This provides the AP a way to start the hang timer
- * without reconfiguring any of the other hang detect settings.  Note that
- * you must previously have configured the timeouts.
- */
-#define EC_HANG_START_NOW             BIT(30)
+	/* Get last hang status - whether the AP boot was clear or not */
+	EC_HANG_DETECT_CMD_GET_STATUS = 0x3,
 
-/*
- * If this flag is set, all the other fields are ignored (including
- * EC_HANG_START_NOW).  This provides the AP a way to stop the hang timer
- * without reconfiguring any of the other hang detect settings.
- */
-#define EC_HANG_STOP_NOW              BIT(31)
+	/* Clear last hang status. Called when AP is rebooting/shutting down
+	 * gracefully.
+	 */
+	EC_HANG_DETECT_CMD_CLEAR_STATUS = 0x4
+};
 
 struct ec_params_hang_detect {
-	/* Flags; see EC_HANG_* */
-	uint32_t flags;
-
-	/* Timeout in msec before generating host event, if enabled */
-	uint16_t host_event_timeout_msec;
-
-	/* Timeout in msec before generating warm reboot, if enabled */
-	uint16_t warm_reboot_timeout_msec;
-} __ec_align4;
+	uint16_t command; /* enum ec_hang_detect_cmds */
+	/* Timeout in seconds before generating reboot */
+	uint16_t reboot_timeout_sec;
+} __ec_align2;
 
+/* Status codes that describe whether AP has boot normally or the hang has been
+ * detected and EC has reset AP
+ */
+enum ec_hang_detect_status {
+	EC_HANG_DETECT_AP_BOOT_NORMAL = 0x0,
+	EC_HANG_DETECT_AP_BOOT_EC_WDT = 0x1,
+	EC_HANG_DETECT_AP_BOOT_COUNT,
+};
+struct ec_response_hang_detect {
+	uint8_t status; /* enum ec_hang_detect_status */
+} __ec_align1;
 /*****************************************************************************/
 /* Commands for battery charging */