diff mbox series

[v2,1/2] hwmon: add ChromeOS EC driver

Message ID 20240507-cros_ec-hwmon-v2-1-1222c5fca0f7@weissschuh.net (mailing list archive)
State Superseded
Headers show
Series ChromeOS Embedded controller hwmon driver | expand

Commit Message

Thomas Weißschuh May 7, 2024, 4:29 p.m. UTC
The ChromeOS Embedded Controller exposes fan speed and temperature
readings.
Expose this data through the hwmon subsystem.

The driver is designed to be probed via the cros_ec mfd device.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
---
 Documentation/hwmon/cros_ec_hwmon.rst |  26 ++++
 Documentation/hwmon/index.rst         |   1 +
 MAINTAINERS                           |   8 +
 drivers/hwmon/Kconfig                 |  11 ++
 drivers/hwmon/Makefile                |   1 +
 drivers/hwmon/cros_ec_hwmon.c         | 269 ++++++++++++++++++++++++++++++++++
 6 files changed, 316 insertions(+)

Comments

Guenter Roeck May 7, 2024, 8:55 p.m. UTC | #1
On Tue, May 7, 2024 at 10:29 AM Thomas Weißschuh <linux@weissschuh.net> wrote:
>
> The ChromeOS Embedded Controller exposes fan speed and temperature
> readings.
> Expose this data through the hwmon subsystem.
>
> The driver is designed to be probed via the cros_ec mfd device.
>
> Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
> ---
>  Documentation/hwmon/cros_ec_hwmon.rst |  26 ++++
>  Documentation/hwmon/index.rst         |   1 +
>  MAINTAINERS                           |   8 +
>  drivers/hwmon/Kconfig                 |  11 ++
>  drivers/hwmon/Makefile                |   1 +
>  drivers/hwmon/cros_ec_hwmon.c         | 269 ++++++++++++++++++++++++++++++++++
>  6 files changed, 316 insertions(+)
>
> diff --git a/Documentation/hwmon/cros_ec_hwmon.rst b/Documentation/hwmon/cros_ec_hwmon.rst
> new file mode 100644
> index 000000000000..aeb88c79d11b
> --- /dev/null
> +++ b/Documentation/hwmon/cros_ec_hwmon.rst
> @@ -0,0 +1,26 @@
> +.. SPDX-License-Identifier: GPL-2.0-or-later
> +
> +Kernel driver cros_ec_hwmon
> +===========================
> +
> +Supported chips:
> +
> +  * ChromeOS embedded controllers connected via LPC
> +
> +    Prefix: 'cros_ec'
> +
> +    Addresses scanned: -
> +
> +Author:
> +
> +  - Thomas Weißschuh <linux@weissschuh.net>
> +
> +Description
> +-----------
> +
> +This driver implements support for hardware monitoring commands exposed by the
> +ChromeOS embedded controller used in Chromebooks and other devices.
> +
> +The channel labels exposed via hwmon are retrieved from the EC itself.
> +
> +Fan and temperature readings are supported.
> diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
> index 1ca7a4fe1f8f..355a83e66928 100644
> --- a/Documentation/hwmon/index.rst
> +++ b/Documentation/hwmon/index.rst
> @@ -57,6 +57,7 @@ Hardware Monitoring Kernel Drivers
>     coretemp
>     corsair-cpro
>     corsair-psu
> +   cros_ec_hwmon
>     da9052
>     da9055
>     dell-smm-hwmon
> diff --git a/MAINTAINERS b/MAINTAINERS
> index c23fda1aa1f0..aa5689169eca 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -4988,6 +4988,14 @@ S:       Maintained
>  F:     Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
>  F:     sound/soc/codecs/cros_ec_codec.*
>
> +CHROMEOS EC HARDWARE MONITORING
> +M:     Thomas Weißschuh <thomas@weissschuh.net>
> +L:     chrome-platform@lists.linux.dev
> +L:     linux-hwmon@vger.kernel.org
> +S:     Maintained
> +F:     Documentation/hwmon/chros_ec_hwmon.rst
> +F:     drivers/hwmon/cros_ec_hwmon.c
> +
>  CHROMEOS EC SUBDRIVERS
>  M:     Benson Leung <bleung@chromium.org>
>  R:     Guenter Roeck <groeck@chromium.org>
> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> index 83945397b6eb..c1284d42697f 100644
> --- a/drivers/hwmon/Kconfig
> +++ b/drivers/hwmon/Kconfig
> @@ -506,6 +506,17 @@ config SENSORS_CORSAIR_PSU
>           This driver can also be built as a module. If so, the module
>           will be called corsair-psu.
>
> +config SENSORS_CROS_EC
> +       tristate "ChromeOS Embedded Controller sensors"
> +       depends on MFD_CROS_EC_DEV
> +       default MFD_CROS_EC_DEV
> +       help
> +         If you say yes here you get support for ChromeOS Embedded Controller
> +         sensors.
> +
> +         This driver can also be built as a module. If so, the module
> +         will be called cros_ec_hwmon.
> +
>  config SENSORS_DRIVETEMP
>         tristate "Hard disk drives with temperature sensors"
>         depends on SCSI && ATA
> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> index 5c31808f6378..8519a6b36c00 100644
> --- a/drivers/hwmon/Makefile
> +++ b/drivers/hwmon/Makefile
> @@ -64,6 +64,7 @@ obj-$(CONFIG_SENSORS_CHIPCAP2) += chipcap2.o
>  obj-$(CONFIG_SENSORS_CORETEMP) += coretemp.o
>  obj-$(CONFIG_SENSORS_CORSAIR_CPRO) += corsair-cpro.o
>  obj-$(CONFIG_SENSORS_CORSAIR_PSU) += corsair-psu.o
> +obj-$(CONFIG_SENSORS_CROS_EC)  += cros_ec_hwmon.o
>  obj-$(CONFIG_SENSORS_DA9052_ADC)+= da9052-hwmon.o
>  obj-$(CONFIG_SENSORS_DA9055)+= da9055-hwmon.o
>  obj-$(CONFIG_SENSORS_DELL_SMM) += dell-smm-hwmon.o
> diff --git a/drivers/hwmon/cros_ec_hwmon.c b/drivers/hwmon/cros_ec_hwmon.c
> new file mode 100644
> index 000000000000..d59d39df2ac4
> --- /dev/null
> +++ b/drivers/hwmon/cros_ec_hwmon.c
> @@ -0,0 +1,269 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + *  ChromesOS EC driver for hwmon
> + *
> + *  Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
> + */
> +
> +#include <linux/device.h>
> +#include <linux/hwmon.h>
> +#include <linux/kernel.h>
> +#include <linux/mod_devicetable.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/platform_data/cros_ec_commands.h>
> +#include <linux/platform_data/cros_ec_proto.h>
> +#include <linux/units.h>
> +
> +#define DRV_NAME       "cros-ec-hwmon"
> +
> +struct cros_ec_hwmon_priv {
> +       struct cros_ec_device *cros_ec;
> +       u8 thermal_version;
> +       const char *temp_sensor_names[EC_TEMP_SENSOR_ENTRIES + EC_TEMP_SENSOR_B_ENTRIES];
> +};
> +
> +static int cros_ec_hwmon_read_fan_speed(struct cros_ec_device *cros_ec, u8 index, u16 *speed)
> +{
> +       u16 data;
> +       int ret;
> +
> +       ret = cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_FAN + index * 2, 2, &data);
> +       if (ret < 0)
> +               return ret;
> +
> +       data = le16_to_cpu(data);
> +
> +       if (data == EC_FAN_SPEED_NOT_PRESENT)
> +               return -ENODEV;
> +
I don't have time for a full review right now, but this looks wrong:
If a fan is not present, its sysfs attribute should not be instantiated.
Returning -ENODEV here is most definitely wrong.

> +       *speed = data;
> +       return 0;
> +}
> +
> +static int cros_ec_hwmon_read_temp(struct cros_ec_device *cros_ec, u8 thermal_version,
> +                                  u8 index, u8 *data)
> +{
> +       unsigned int offset;
> +       int ret;
> +
> +       if (index < EC_TEMP_SENSOR_ENTRIES)
> +               offset = EC_MEMMAP_TEMP_SENSOR + index;
> +       else
> +               offset = EC_MEMMAP_TEMP_SENSOR_B + index - EC_TEMP_SENSOR_ENTRIES;
> +
> +       ret = cros_ec->cmd_readmem(cros_ec, offset, 1, data);
> +       if (ret < 0)
> +               return ret;
> +
> +       if (*data == EC_TEMP_SENSOR_NOT_PRESENT ||
> +           *data == EC_TEMP_SENSOR_ERROR ||
> +           *data == EC_TEMP_SENSOR_NOT_POWERED ||
> +           *data == EC_TEMP_SENSOR_NOT_CALIBRATED)
> +               return -ENODEV;
> +
Same as above.

Guenter

> +       return 0;
> +}
> +
> +static int cros_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> +                             u32 attr, int channel, long *val)
> +{
> +       struct cros_ec_hwmon_priv *priv = dev_get_drvdata(dev);
> +       int ret = -ENODATA;
> +       u16 speed;
> +       u8 temp;
> +
> +       if (type == hwmon_fan) {
> +               ret = cros_ec_hwmon_read_fan_speed(priv->cros_ec, channel, &speed);
> +               if (ret == 0)
> +                       *val = speed;
> +       } else if (type == hwmon_temp) {
> +               ret = cros_ec_hwmon_read_temp(priv->cros_ec, priv->thermal_version, channel, &temp);
> +               if (ret == 0)
> +                       *val = kelvin_to_millicelsius((((long)temp) + EC_TEMP_SENSOR_OFFSET));
> +       }
> +
> +       return ret;
> +}
> +
> +static int cros_ec_hwmon_get_temp_sensor_info(struct cros_ec_device *cros_ec, u8 id,
> +                                             struct ec_response_temp_sensor_get_info *resp)
> +{
> +       int ret;
> +       struct {
> +               struct cros_ec_command msg;
> +               union {
> +                       struct ec_params_temp_sensor_get_info req;
> +                       struct ec_response_temp_sensor_get_info resp;
> +               } __packed data;
> +       } __packed buf = {
> +               .msg = {
> +                       .version = 0,
> +                       .command = EC_CMD_TEMP_SENSOR_GET_INFO,
> +                       .insize  = sizeof(buf.data.resp),
> +                       .outsize = sizeof(buf.data.req),
> +               },
> +               .data.req.id = id,
> +       };
> +
> +       ret = cros_ec_cmd_xfer_status(cros_ec, &buf.msg);
> +       if (ret < 0)
> +               return ret;
> +
> +       *resp = buf.data.resp;
> +       return 0;
> +}
> +
> +static int cros_ec_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
> +                                    u32 attr, int channel, const char **str)
> +{
> +       struct cros_ec_hwmon_priv *priv = dev_get_drvdata(dev);
> +
> +       if (type == hwmon_temp && attr == hwmon_temp_label) {
> +               *str = priv->temp_sensor_names[channel];
> +               return 0;
> +       }
> +
> +       return -ENODATA;
> +}
> +
> +static umode_t cros_ec_hwmon_is_visible(const void *data, enum hwmon_sensor_types type,
> +                                       u32 attr, int channel)
> +{
> +       const struct cros_ec_hwmon_priv *priv = data;
> +       u16 speed;
> +
> +       if (type == hwmon_fan) {
> +               if (cros_ec_hwmon_read_fan_speed(priv->cros_ec, channel, &speed) == 0)
> +                       return 0444;
> +       } else if (type == hwmon_temp) {
> +               if (priv->temp_sensor_names[channel])
> +                       return 0444;
> +       }
> +
> +       return 0;
> +}
> +
> +static const struct hwmon_channel_info * const cros_ec_hwmon_info[] = {
> +       HWMON_CHANNEL_INFO(fan,
> +                          HWMON_F_INPUT,
> +                          HWMON_F_INPUT,
> +                          HWMON_F_INPUT,
> +                          HWMON_F_INPUT),
> +       HWMON_CHANNEL_INFO(temp,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL,
> +                          HWMON_T_INPUT | HWMON_T_LABEL),
> +       NULL
> +};
> +
> +static const struct hwmon_ops cros_ec_hwmon_ops = {
> +       .read = cros_ec_hwmon_read,
> +       .read_string = cros_ec_hwmon_read_string,
> +       .is_visible = cros_ec_hwmon_is_visible,
> +};
> +
> +static const struct hwmon_chip_info cros_ec_hwmon_chip_info = {
> +       .ops = &cros_ec_hwmon_ops,
> +       .info = cros_ec_hwmon_info,
> +};
> +
> +static void cros_ec_hwmon_probe_temp_sensors(struct device *dev, struct cros_ec_hwmon_priv *priv)
> +{
> +       struct ec_response_temp_sensor_get_info info;
> +       size_t candidates, i;
> +       int ret;
> +       u8 temp;
> +
> +       if (priv->thermal_version < 2)
> +               candidates = EC_TEMP_SENSOR_ENTRIES;
> +       else
> +               candidates = ARRAY_SIZE(priv->temp_sensor_names);
> +
> +       for (i = 0; i < candidates; i++) {
> +               if (cros_ec_hwmon_read_temp(priv->cros_ec, priv->thermal_version, i, &temp) != 0)
> +                       continue;
> +
> +               ret = cros_ec_hwmon_get_temp_sensor_info(priv->cros_ec, i, &info);
> +               if (ret < 0)
> +                       continue;
> +
> +               priv->temp_sensor_names[i] = devm_kasprintf(dev, GFP_KERNEL, "%*s",
> +                                                           (int)sizeof(info.sensor_name),
> +                                                           info.sensor_name);
> +       }
> +}
> +
> +static int cros_ec_hwmon_probe(struct platform_device *pdev)
> +{
> +       struct device *dev = &pdev->dev;
> +       struct cros_ec_dev *ec_dev = dev_get_drvdata(dev->parent);
> +       struct cros_ec_device *cros_ec = ec_dev->ec_dev;
> +       struct cros_ec_hwmon_priv *priv;
> +       struct device *hwmon_dev;
> +       int ret;
> +
> +       /* Not every platform supports direct reads */
> +       if (!cros_ec->cmd_readmem)
> +               return -ENODEV;
> +
> +       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
> +       if (!priv)
> +               return -ENOMEM;
> +
> +       priv->cros_ec = cros_ec;
> +
> +       ret = priv->cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_THERMAL_VERSION,
> +                                        1, &priv->thermal_version);
> +       if (ret < 0)
> +               return ret;
> +
> +       /* Covers both fan and temp sensors */
> +       if (!priv->thermal_version)
> +               return -ENODEV;
> +
> +       cros_ec_hwmon_probe_temp_sensors(dev, priv);
> +
> +       hwmon_dev = devm_hwmon_device_register_with_info(dev, "cros_ec", priv,
> +                                                        &cros_ec_hwmon_chip_info, NULL);
> +
> +       return PTR_ERR_OR_ZERO(hwmon_dev);
> +}
> +
> +static const struct platform_device_id cros_ec_hwmon_id[] = {
> +       { DRV_NAME, 0 },
> +       { }
> +};
> +
> +static struct platform_driver cros_ec_hwmon_driver = {
> +       .driver.name    = DRV_NAME,
> +       .probe          = cros_ec_hwmon_probe,
> +       .id_table       = cros_ec_hwmon_id,
> +};
> +module_platform_driver(cros_ec_hwmon_driver);
> +
> +MODULE_DEVICE_TABLE(platform, cros_ec_hwmon_id);
> +MODULE_DESCRIPTION("ChromeOS EC Hardware Monitoring Driver");
> +MODULE_AUTHOR("Thomas Weißschuh <linux@weissschuh.net");
> +MODULE_LICENSE("GPL");
>
> --
> 2.45.0
>
Thomas Weißschuh May 7, 2024, 9:59 p.m. UTC | #2
Hi Guenter,

thanks for your quick responses!

On 2024-05-07 14:55:44+0000, Guenter Roeck wrote:
> On Tue, May 7, 2024 at 10:29 AM Thomas Weißschuh <linux@weissschuh.net> wrote:
> >
> > The ChromeOS Embedded Controller exposes fan speed and temperature
> > readings.
> > Expose this data through the hwmon subsystem.
> >
> > The driver is designed to be probed via the cros_ec mfd device.
> >
> > Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
> > ---
> >  Documentation/hwmon/cros_ec_hwmon.rst |  26 ++++
> >  Documentation/hwmon/index.rst         |   1 +
> >  MAINTAINERS                           |   8 +
> >  drivers/hwmon/Kconfig                 |  11 ++
> >  drivers/hwmon/Makefile                |   1 +
> >  drivers/hwmon/cros_ec_hwmon.c         | 269 ++++++++++++++++++++++++++++++++++
> >  6 files changed, 316 insertions(+)
> >

[..]

> > diff --git a/drivers/hwmon/cros_ec_hwmon.c b/drivers/hwmon/cros_ec_hwmon.c
> > new file mode 100644
> > index 000000000000..d59d39df2ac4
> > --- /dev/null
> > +++ b/drivers/hwmon/cros_ec_hwmon.c
> > @@ -0,0 +1,269 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + *  ChromesOS EC driver for hwmon
> > + *
> > + *  Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
> > + */
> > +
> > +#include <linux/device.h>
> > +#include <linux/hwmon.h>
> > +#include <linux/kernel.h>
> > +#include <linux/mod_devicetable.h>
> > +#include <linux/module.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/platform_data/cros_ec_commands.h>
> > +#include <linux/platform_data/cros_ec_proto.h>
> > +#include <linux/units.h>
> > +
> > +#define DRV_NAME       "cros-ec-hwmon"
> > +
> > +struct cros_ec_hwmon_priv {
> > +       struct cros_ec_device *cros_ec;
> > +       u8 thermal_version;
> > +       const char *temp_sensor_names[EC_TEMP_SENSOR_ENTRIES + EC_TEMP_SENSOR_B_ENTRIES];
> > +};
> > +
> > +static int cros_ec_hwmon_read_fan_speed(struct cros_ec_device *cros_ec, u8 index, u16 *speed)
> > +{
> > +       u16 data;
> > +       int ret;
> > +
> > +       ret = cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_FAN + index * 2, 2, &data);
> > +       if (ret < 0)
> > +               return ret;
> > +
> > +       data = le16_to_cpu(data);
> > +
> > +       if (data == EC_FAN_SPEED_NOT_PRESENT)
> > +               return -ENODEV;
> > +
> I don't have time for a full review right now, but this looks wrong:
> If a fan is not present, its sysfs attribute should not be instantiated.
> Returning -ENODEV here is most definitely wrong.

This is also called from cros_ec_hwmon_is_visible(),

I think the special value should be handled in the read function
anyways, although it should never happen after probing.

Otherwise the magic, nonsensical value will be shown to the user as
sensor reading.

The sysfs hiding works.
Out of 4 fans and 24 temp sensors known by the driver,
on my device only 1 fan and 4 temp sensors exist and are probed.

If you prefer another error code please let me know.

Please let me know if I got something wrong.

> > +       *speed = data;
> > +       return 0;
> > +}
> > +
> > +static int cros_ec_hwmon_read_temp(struct cros_ec_device *cros_ec, u8 thermal_version,
> > +                                  u8 index, u8 *data)
> > +{
> > +       unsigned int offset;
> > +       int ret;
> > +
> > +       if (index < EC_TEMP_SENSOR_ENTRIES)
> > +               offset = EC_MEMMAP_TEMP_SENSOR + index;
> > +       else
> > +               offset = EC_MEMMAP_TEMP_SENSOR_B + index - EC_TEMP_SENSOR_ENTRIES;
> > +
> > +       ret = cros_ec->cmd_readmem(cros_ec, offset, 1, data);
> > +       if (ret < 0)
> > +               return ret;
> > +
> > +       if (*data == EC_TEMP_SENSOR_NOT_PRESENT ||
> > +           *data == EC_TEMP_SENSOR_ERROR ||
> > +           *data == EC_TEMP_SENSOR_NOT_POWERED ||
> > +           *data == EC_TEMP_SENSOR_NOT_CALIBRATED)
> > +               return -ENODEV;
> > +
> Same as above.

cros_ec_hwmon_probe()
  cros_ec_hwmon_probe_temp_sensors()
    cros_ec_hwmon_read_temp()

if _read_temp() fails here, the sensor name remains NULL and
is_visible() will hide that sensor.
      
As for the general reasoning, see above.

[..]


Thanks,
Thomas
Stephen Horvath May 24, 2024, 11:13 p.m. UTC | #3
Hi Thomas,

I was the one to implement fan monitoring/control into Dustin's driver, 
and just had a quick comment for your driver:

On 8/5/24 02:29, Thomas Weißschuh wrote:
> The ChromeOS Embedded Controller exposes fan speed and temperature
> readings.
> Expose this data through the hwmon subsystem.
> 
> The driver is designed to be probed via the cros_ec mfd device.
> 
> Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
> ---
>   Documentation/hwmon/cros_ec_hwmon.rst |  26 ++++
>   Documentation/hwmon/index.rst         |   1 +
>   MAINTAINERS                           |   8 +
>   drivers/hwmon/Kconfig                 |  11 ++
>   drivers/hwmon/Makefile                |   1 +
>   drivers/hwmon/cros_ec_hwmon.c         | 269 ++++++++++++++++++++++++++++++++++
>   6 files changed, 316 insertions(+)
> 
> diff --git a/Documentation/hwmon/cros_ec_hwmon.rst b/Documentation/hwmon/cros_ec_hwmon.rst
> new file mode 100644
> index 000000000000..aeb88c79d11b
> --- /dev/null
> +++ b/Documentation/hwmon/cros_ec_hwmon.rst
> @@ -0,0 +1,26 @@
> +.. SPDX-License-Identifier: GPL-2.0-or-later
> +
> +Kernel driver cros_ec_hwmon
> +===========================
> +
> +Supported chips:
> +
> +  * ChromeOS embedded controllers connected via LPC
> +
> +    Prefix: 'cros_ec'
> +
> +    Addresses scanned: -
> +
> +Author:
> +
> +  - Thomas Weißschuh <linux@weissschuh.net>
> +
> +Description
> +-----------
> +
> +This driver implements support for hardware monitoring commands exposed by the
> +ChromeOS embedded controller used in Chromebooks and other devices.
> +
> +The channel labels exposed via hwmon are retrieved from the EC itself.
> +
> +Fan and temperature readings are supported.
> diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
> index 1ca7a4fe1f8f..355a83e66928 100644
> --- a/Documentation/hwmon/index.rst
> +++ b/Documentation/hwmon/index.rst
> @@ -57,6 +57,7 @@ Hardware Monitoring Kernel Drivers
>      coretemp
>      corsair-cpro
>      corsair-psu
> +   cros_ec_hwmon
>      da9052
>      da9055
>      dell-smm-hwmon
> diff --git a/MAINTAINERS b/MAINTAINERS
> index c23fda1aa1f0..aa5689169eca 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -4988,6 +4988,14 @@ S:	Maintained
>   F:	Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
>   F:	sound/soc/codecs/cros_ec_codec.*
>   
> +CHROMEOS EC HARDWARE MONITORING
> +M:	Thomas Weißschuh <thomas@weissschuh.net>
> +L:	chrome-platform@lists.linux.dev
> +L:	linux-hwmon@vger.kernel.org
> +S:	Maintained
> +F:	Documentation/hwmon/chros_ec_hwmon.rst
> +F:	drivers/hwmon/cros_ec_hwmon.c
> +
>   CHROMEOS EC SUBDRIVERS
>   M:	Benson Leung <bleung@chromium.org>
>   R:	Guenter Roeck <groeck@chromium.org>
> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> index 83945397b6eb..c1284d42697f 100644
> --- a/drivers/hwmon/Kconfig
> +++ b/drivers/hwmon/Kconfig
> @@ -506,6 +506,17 @@ config SENSORS_CORSAIR_PSU
>   	  This driver can also be built as a module. If so, the module
>   	  will be called corsair-psu.
>   
> +config SENSORS_CROS_EC
> +	tristate "ChromeOS Embedded Controller sensors"
> +	depends on MFD_CROS_EC_DEV
> +	default MFD_CROS_EC_DEV
> +	help
> +	  If you say yes here you get support for ChromeOS Embedded Controller
> +	  sensors.
> +
> +	  This driver can also be built as a module. If so, the module
> +	  will be called cros_ec_hwmon.
> +
>   config SENSORS_DRIVETEMP
>   	tristate "Hard disk drives with temperature sensors"
>   	depends on SCSI && ATA
> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> index 5c31808f6378..8519a6b36c00 100644
> --- a/drivers/hwmon/Makefile
> +++ b/drivers/hwmon/Makefile
> @@ -64,6 +64,7 @@ obj-$(CONFIG_SENSORS_CHIPCAP2) += chipcap2.o
>   obj-$(CONFIG_SENSORS_CORETEMP)	+= coretemp.o
>   obj-$(CONFIG_SENSORS_CORSAIR_CPRO) += corsair-cpro.o
>   obj-$(CONFIG_SENSORS_CORSAIR_PSU) += corsair-psu.o
> +obj-$(CONFIG_SENSORS_CROS_EC)	+= cros_ec_hwmon.o
>   obj-$(CONFIG_SENSORS_DA9052_ADC)+= da9052-hwmon.o
>   obj-$(CONFIG_SENSORS_DA9055)+= da9055-hwmon.o
>   obj-$(CONFIG_SENSORS_DELL_SMM)	+= dell-smm-hwmon.o
> diff --git a/drivers/hwmon/cros_ec_hwmon.c b/drivers/hwmon/cros_ec_hwmon.c
> new file mode 100644
> index 000000000000..d59d39df2ac4
> --- /dev/null
> +++ b/drivers/hwmon/cros_ec_hwmon.c
> @@ -0,0 +1,269 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + *  ChromesOS EC driver for hwmon
> + *
> + *  Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
> + */
> +
> +#include <linux/device.h>
> +#include <linux/hwmon.h>
> +#include <linux/kernel.h>
> +#include <linux/mod_devicetable.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/platform_data/cros_ec_commands.h>
> +#include <linux/platform_data/cros_ec_proto.h>
> +#include <linux/units.h>
> +
> +#define DRV_NAME	"cros-ec-hwmon"
> +
> +struct cros_ec_hwmon_priv {
> +	struct cros_ec_device *cros_ec;
> +	u8 thermal_version;
> +	const char *temp_sensor_names[EC_TEMP_SENSOR_ENTRIES + EC_TEMP_SENSOR_B_ENTRIES];
> +};
> +
> +static int cros_ec_hwmon_read_fan_speed(struct cros_ec_device *cros_ec, u8 index, u16 *speed)
> +{
> +	u16 data;
> +	int ret;
> +
> +	ret = cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_FAN + index * 2, 2, &data);
> +	if (ret < 0)
> +		return ret;
> +
> +	data = le16_to_cpu(data);
> +
> +	if (data == EC_FAN_SPEED_NOT_PRESENT)
> +		return -ENODEV;
> +

Don't forget it can also return `EC_FAN_SPEED_STALLED`.
Like Guenter, I also don't like returning `-ENODEV`, but I don't have a 
problem with checking for `EC_FAN_SPEED_NOT_PRESENT` in case it was 
removed since init or something.
My approach was to return the speed as `0`, since the fan probably isn't 
spinning, but set HWMON_F_FAULT for `EC_FAN_SPEED_NOT_PRESENT` and 
HWMON_F_ALARM for `EC_FAN_SPEED_STALLED`.
No idea if this is correct though.

> +	*speed = data;
> +	return 0;
> +}
> +
> +static int cros_ec_hwmon_read_temp(struct cros_ec_device *cros_ec, u8 thermal_version,
> +				   u8 index, u8 *data)
> +{
> +	unsigned int offset;
> +	int ret;
> +
> +	if (index < EC_TEMP_SENSOR_ENTRIES)
> +		offset = EC_MEMMAP_TEMP_SENSOR + index;
> +	else
> +		offset = EC_MEMMAP_TEMP_SENSOR_B + index - EC_TEMP_SENSOR_ENTRIES;
> +
> +	ret = cros_ec->cmd_readmem(cros_ec, offset, 1, data);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (*data == EC_TEMP_SENSOR_NOT_PRESENT ||
> +	    *data == EC_TEMP_SENSOR_ERROR ||
> +	    *data == EC_TEMP_SENSOR_NOT_POWERED ||
> +	    *data == EC_TEMP_SENSOR_NOT_CALIBRATED)
> +		return -ENODEV;
> +
> +	return 0;
> +}
> +
> +static int cros_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> +			      u32 attr, int channel, long *val)
> +{
> +	struct cros_ec_hwmon_priv *priv = dev_get_drvdata(dev);
> +	int ret = -ENODATA;
> +	u16 speed;
> +	u8 temp;
> +
> +	if (type == hwmon_fan) {
> +		ret = cros_ec_hwmon_read_fan_speed(priv->cros_ec, channel, &speed);
> +		if (ret == 0)
> +			*val = speed;
> +	} else if (type == hwmon_temp) {
> +		ret = cros_ec_hwmon_read_temp(priv->cros_ec, priv->thermal_version, channel, &temp);
> +		if (ret == 0)
> +			*val = kelvin_to_millicelsius((((long)temp) + EC_TEMP_SENSOR_OFFSET));
> +	}
> +
> +	return ret;
> +}
> +
> +static int cros_ec_hwmon_get_temp_sensor_info(struct cros_ec_device *cros_ec, u8 id,
> +					      struct ec_response_temp_sensor_get_info *resp)
> +{
> +	int ret;
> +	struct {
> +		struct cros_ec_command msg;
> +		union {
> +			struct ec_params_temp_sensor_get_info req;
> +			struct ec_response_temp_sensor_get_info resp;
> +		} __packed data;
> +	} __packed buf = {
> +		.msg = {
> +			.version = 0,
> +			.command = EC_CMD_TEMP_SENSOR_GET_INFO,
> +			.insize  = sizeof(buf.data.resp),
> +			.outsize = sizeof(buf.data.req),
> +		},
> +		.data.req.id = id,
> +	};
> +
> +	ret = cros_ec_cmd_xfer_status(cros_ec, &buf.msg);
> +	if (ret < 0)
> +		return ret;
> +
> +	*resp = buf.data.resp;
> +	return 0;
> +}
> +
> +static int cros_ec_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
> +				     u32 attr, int channel, const char **str)
> +{
> +	struct cros_ec_hwmon_priv *priv = dev_get_drvdata(dev);
> +
> +	if (type == hwmon_temp && attr == hwmon_temp_label) {
> +		*str = priv->temp_sensor_names[channel];
> +		return 0;
> +	}
> +
> +	return -ENODATA;
> +}
> +
> +static umode_t cros_ec_hwmon_is_visible(const void *data, enum hwmon_sensor_types type,
> +					u32 attr, int channel)
> +{
> +	const struct cros_ec_hwmon_priv *priv = data;
> +	u16 speed;
> +
> +	if (type == hwmon_fan) {
> +		if (cros_ec_hwmon_read_fan_speed(priv->cros_ec, channel, &speed) == 0)
> +			return 0444;
> +	} else if (type == hwmon_temp) {
> +		if (priv->temp_sensor_names[channel])
> +			return 0444;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct hwmon_channel_info * const cros_ec_hwmon_info[] = {
> +	HWMON_CHANNEL_INFO(fan,
> +			   HWMON_F_INPUT,
> +			   HWMON_F_INPUT,
> +			   HWMON_F_INPUT,
> +			   HWMON_F_INPUT),
> +	HWMON_CHANNEL_INFO(temp,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL),
> +	NULL
> +};
> +
> +static const struct hwmon_ops cros_ec_hwmon_ops = {
> +	.read = cros_ec_hwmon_read,
> +	.read_string = cros_ec_hwmon_read_string,
> +	.is_visible = cros_ec_hwmon_is_visible,
> +};
> +
> +static const struct hwmon_chip_info cros_ec_hwmon_chip_info = {
> +	.ops = &cros_ec_hwmon_ops,
> +	.info = cros_ec_hwmon_info,
> +};
> +
> +static void cros_ec_hwmon_probe_temp_sensors(struct device *dev, struct cros_ec_hwmon_priv *priv)
> +{
> +	struct ec_response_temp_sensor_get_info info;
> +	size_t candidates, i;
> +	int ret;
> +	u8 temp;
> +
> +	if (priv->thermal_version < 2)
> +		candidates = EC_TEMP_SENSOR_ENTRIES;
> +	else
> +		candidates = ARRAY_SIZE(priv->temp_sensor_names);
> +
> +	for (i = 0; i < candidates; i++) {
> +		if (cros_ec_hwmon_read_temp(priv->cros_ec, priv->thermal_version, i, &temp) != 0)
> +			continue;
> +
> +		ret = cros_ec_hwmon_get_temp_sensor_info(priv->cros_ec, i, &info);
> +		if (ret < 0)
> +			continue;
> +
> +		priv->temp_sensor_names[i] = devm_kasprintf(dev, GFP_KERNEL, "%*s",
> +							    (int)sizeof(info.sensor_name),
> +							    info.sensor_name);
> +	}
> +}
> +
> +static int cros_ec_hwmon_probe(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct cros_ec_dev *ec_dev = dev_get_drvdata(dev->parent);
> +	struct cros_ec_device *cros_ec = ec_dev->ec_dev;
> +	struct cros_ec_hwmon_priv *priv;
> +	struct device *hwmon_dev;
> +	int ret;
> +
> +	/* Not every platform supports direct reads */
> +	if (!cros_ec->cmd_readmem)
> +		return -ENODEV;
> +
> +	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		return -ENOMEM;
> +
> +	priv->cros_ec = cros_ec;
> +
> +	ret = priv->cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_THERMAL_VERSION,
> +					 1, &priv->thermal_version);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* Covers both fan and temp sensors */
> +	if (!priv->thermal_version)
> +		return -ENODEV;
> +
> +	cros_ec_hwmon_probe_temp_sensors(dev, priv);
> +
> +	hwmon_dev = devm_hwmon_device_register_with_info(dev, "cros_ec", priv,
> +							 &cros_ec_hwmon_chip_info, NULL);
> +
> +	return PTR_ERR_OR_ZERO(hwmon_dev);
> +}
> +
> +static const struct platform_device_id cros_ec_hwmon_id[] = {
> +	{ DRV_NAME, 0 },
> +	{ }
> +};
> +
> +static struct platform_driver cros_ec_hwmon_driver = {
> +	.driver.name	= DRV_NAME,
> +	.probe		= cros_ec_hwmon_probe,
> +	.id_table	= cros_ec_hwmon_id,
> +};
> +module_platform_driver(cros_ec_hwmon_driver);
> +
> +MODULE_DEVICE_TABLE(platform, cros_ec_hwmon_id);
> +MODULE_DESCRIPTION("ChromeOS EC Hardware Monitoring Driver");
> +MODULE_AUTHOR("Thomas Weißschuh <linux@weissschuh.net");
> +MODULE_LICENSE("GPL");
> 

But feel free to ignore me if I'm completly wrong about this, since I 
really don't have much experience with kernel dev.

Thanks,
Steve
Thomas Weißschuh May 27, 2024, 7:24 p.m. UTC | #4
Hi Stephen,

On 2024-05-25 09:13:09+0000, Stephen Horvath wrote:
> I was the one to implement fan monitoring/control into Dustin's driver, and
> just had a quick comment for your driver:
> 
> On 8/5/24 02:29, Thomas Weißschuh wrote:
> > The ChromeOS Embedded Controller exposes fan speed and temperature
> > readings.
> > Expose this data through the hwmon subsystem.
> > 
> > The driver is designed to be probed via the cros_ec mfd device.
> > 
> > Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
> > ---
> >   Documentation/hwmon/cros_ec_hwmon.rst |  26 ++++
> >   Documentation/hwmon/index.rst         |   1 +
> >   MAINTAINERS                           |   8 +
> >   drivers/hwmon/Kconfig                 |  11 ++
> >   drivers/hwmon/Makefile                |   1 +
> >   drivers/hwmon/cros_ec_hwmon.c         | 269 ++++++++++++++++++++++++++++++++++
> >   6 files changed, 316 insertions(+)
> > 

<snip>

> > diff --git a/drivers/hwmon/cros_ec_hwmon.c b/drivers/hwmon/cros_ec_hwmon.c
> > new file mode 100644
> > index 000000000000..d59d39df2ac4
> > --- /dev/null
> > +++ b/drivers/hwmon/cros_ec_hwmon.c
> > @@ -0,0 +1,269 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + *  ChromesOS EC driver for hwmon
> > + *
> > + *  Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
> > + */
> > +
> > +#include <linux/device.h>
> > +#include <linux/hwmon.h>
> > +#include <linux/kernel.h>
> > +#include <linux/mod_devicetable.h>
> > +#include <linux/module.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/platform_data/cros_ec_commands.h>
> > +#include <linux/platform_data/cros_ec_proto.h>
> > +#include <linux/units.h>
> > +
> > +#define DRV_NAME	"cros-ec-hwmon"
> > +
> > +struct cros_ec_hwmon_priv {
> > +	struct cros_ec_device *cros_ec;
> > +	u8 thermal_version;
> > +	const char *temp_sensor_names[EC_TEMP_SENSOR_ENTRIES + EC_TEMP_SENSOR_B_ENTRIES];
> > +};
> > +
> > +static int cros_ec_hwmon_read_fan_speed(struct cros_ec_device *cros_ec, u8 index, u16 *speed)
> > +{
> > +	u16 data;
> > +	int ret;
> > +
> > +	ret = cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_FAN + index * 2, 2, &data);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	data = le16_to_cpu(data);
> > +
> > +	if (data == EC_FAN_SPEED_NOT_PRESENT)
> > +		return -ENODEV;
> > +
> 
> Don't forget it can also return `EC_FAN_SPEED_STALLED`.

Thanks for the hint. I'll need to think about how to handle this better.

> Like Guenter, I also don't like returning `-ENODEV`, but I don't have a
> problem with checking for `EC_FAN_SPEED_NOT_PRESENT` in case it was removed
> since init or something.

Ok.

> My approach was to return the speed as `0`, since the fan probably isn't
> spinning, but set HWMON_F_FAULT for `EC_FAN_SPEED_NOT_PRESENT` and
> HWMON_F_ALARM for `EC_FAN_SPEED_STALLED`.
> No idea if this is correct though.

I'm not a fan of returning a speed of 0 in case of errors.
Rather -EIO which can't be mistaken.
Maybe -EIO for both EC_FAN_SPEED_NOT_PRESENT (which should never happen)
and also for EC_FAN_SPEED_STALLED.
And EC_FAN_SPEED_STALLED also sets HWMON_F_FAULT.
HWMON_F_ALARM doesn't seem right to me.

But if Guenter has a preference, that will do, too.

> 
> > +	*speed = data;
> > +	return 0;
> > +}
> > +

<snip>

> But feel free to ignore me if I'm completly wrong about this, since I really
> don't have much experience with kernel dev.

Thanks for your feedback!

Would you mind if I Cc you on further revisions?


Thomas
Stephen Horvath May 28, 2024, 12:15 a.m. UTC | #5
Hi Thomas,

On 28/5/24 05:24, Thomas Weißschuh wrote:
> Hi Stephen,
> 
> On 2024-05-25 09:13:09+0000, Stephen Horvath wrote:
>> I was the one to implement fan monitoring/control into Dustin's driver, and
>> just had a quick comment for your driver:
>>
>> On 8/5/24 02:29, Thomas Weißschuh wrote:
>>> The ChromeOS Embedded Controller exposes fan speed and temperature
>>> readings.
>>> Expose this data through the hwmon subsystem.
>>>
>>> The driver is designed to be probed via the cros_ec mfd device.
>>>
>>> Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
>>> ---
>>>    Documentation/hwmon/cros_ec_hwmon.rst |  26 ++++
>>>    Documentation/hwmon/index.rst         |   1 +
>>>    MAINTAINERS                           |   8 +
>>>    drivers/hwmon/Kconfig                 |  11 ++
>>>    drivers/hwmon/Makefile                |   1 +
>>>    drivers/hwmon/cros_ec_hwmon.c         | 269 ++++++++++++++++++++++++++++++++++
>>>    6 files changed, 316 insertions(+)
>>>
> 
> <snip>
> 
>>> diff --git a/drivers/hwmon/cros_ec_hwmon.c b/drivers/hwmon/cros_ec_hwmon.c
>>> new file mode 100644
>>> index 000000000000..d59d39df2ac4
>>> --- /dev/null
>>> +++ b/drivers/hwmon/cros_ec_hwmon.c
>>> @@ -0,0 +1,269 @@
>>> +// SPDX-License-Identifier: GPL-2.0-or-later
>>> +/*
>>> + *  ChromesOS EC driver for hwmon
>>> + *
>>> + *  Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
>>> + */
>>> +
>>> +#include <linux/device.h>
>>> +#include <linux/hwmon.h>
>>> +#include <linux/kernel.h>
>>> +#include <linux/mod_devicetable.h>
>>> +#include <linux/module.h>
>>> +#include <linux/platform_device.h>
>>> +#include <linux/platform_data/cros_ec_commands.h>
>>> +#include <linux/platform_data/cros_ec_proto.h>
>>> +#include <linux/units.h>
>>> +
>>> +#define DRV_NAME	"cros-ec-hwmon"
>>> +
>>> +struct cros_ec_hwmon_priv {
>>> +	struct cros_ec_device *cros_ec;
>>> +	u8 thermal_version;
>>> +	const char *temp_sensor_names[EC_TEMP_SENSOR_ENTRIES + EC_TEMP_SENSOR_B_ENTRIES];
>>> +};
>>> +
>>> +static int cros_ec_hwmon_read_fan_speed(struct cros_ec_device *cros_ec, u8 index, u16 *speed)
>>> +{
>>> +	u16 data;
>>> +	int ret;
>>> +
>>> +	ret = cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_FAN + index * 2, 2, &data);
>>> +	if (ret < 0)
>>> +		return ret;
>>> +
>>> +	data = le16_to_cpu(data);
>>> +
>>> +	if (data == EC_FAN_SPEED_NOT_PRESENT)
>>> +		return -ENODEV;
>>> +
>>
>> Don't forget it can also return `EC_FAN_SPEED_STALLED`.
> 
> Thanks for the hint. I'll need to think about how to handle this better.
> 
>> Like Guenter, I also don't like returning `-ENODEV`, but I don't have a
>> problem with checking for `EC_FAN_SPEED_NOT_PRESENT` in case it was removed
>> since init or something.
> 
> Ok.
> 
>> My approach was to return the speed as `0`, since the fan probably isn't
>> spinning, but set HWMON_F_FAULT for `EC_FAN_SPEED_NOT_PRESENT` and
>> HWMON_F_ALARM for `EC_FAN_SPEED_STALLED`.
>> No idea if this is correct though.
> 
> I'm not a fan of returning a speed of 0 in case of errors.
> Rather -EIO which can't be mistaken.
> Maybe -EIO for both EC_FAN_SPEED_NOT_PRESENT (which should never happen)
> and also for EC_FAN_SPEED_STALLED.

Yeah, that's pretty reasonable.

> And EC_FAN_SPEED_STALLED also sets HWMON_F_FAULT.
> HWMON_F_ALARM doesn't seem right to me.

Fair enough, I thought I copied the behaviour off another driver, but I 
can't find which one, so maybe I just made it up. I do agree with you 
though.

> But if Guenter has a preference, that will do, too.

Of course!

>>
>>> +	*speed = data;
>>> +	return 0;
>>> +}
>>> +
> 
> <snip>
> 
>> But feel free to ignore me if I'm completly wrong about this, since I really
>> don't have much experience with kernel dev.
> 
> Thanks for your feedback!
> 
> Would you mind if I Cc you on further revisions?

Sure, I don't mind at all!
To be honest, I wouldn't mind a Cc on the other Framework related stuff 
too, but I don't mind either way.

Thanks,
Steve
Guenter Roeck May 28, 2024, 3:50 p.m. UTC | #6
On 5/27/24 17:15, Stephen Horvath wrote:
> Hi Thomas,
> 
> On 28/5/24 05:24, Thomas Weißschuh wrote:
>> Hi Stephen,
>>
>> On 2024-05-25 09:13:09+0000, Stephen Horvath wrote:
>>> I was the one to implement fan monitoring/control into Dustin's driver, and
>>> just had a quick comment for your driver:
>>>
>>> On 8/5/24 02:29, Thomas Weißschuh wrote:
>>>> The ChromeOS Embedded Controller exposes fan speed and temperature
>>>> readings.
>>>> Expose this data through the hwmon subsystem.
>>>>
>>>> The driver is designed to be probed via the cros_ec mfd device.
>>>>
>>>> Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
>>>> ---
>>>>    Documentation/hwmon/cros_ec_hwmon.rst |  26 ++++
>>>>    Documentation/hwmon/index.rst         |   1 +
>>>>    MAINTAINERS                           |   8 +
>>>>    drivers/hwmon/Kconfig                 |  11 ++
>>>>    drivers/hwmon/Makefile                |   1 +
>>>>    drivers/hwmon/cros_ec_hwmon.c         | 269 ++++++++++++++++++++++++++++++++++
>>>>    6 files changed, 316 insertions(+)
>>>>
>>
>> <snip>
>>
>>>> diff --git a/drivers/hwmon/cros_ec_hwmon.c b/drivers/hwmon/cros_ec_hwmon.c
>>>> new file mode 100644
>>>> index 000000000000..d59d39df2ac4
>>>> --- /dev/null
>>>> +++ b/drivers/hwmon/cros_ec_hwmon.c
>>>> @@ -0,0 +1,269 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-or-later
>>>> +/*
>>>> + *  ChromesOS EC driver for hwmon
>>>> + *
>>>> + *  Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
>>>> + */
>>>> +
>>>> +#include <linux/device.h>
>>>> +#include <linux/hwmon.h>
>>>> +#include <linux/kernel.h>
>>>> +#include <linux/mod_devicetable.h>
>>>> +#include <linux/module.h>
>>>> +#include <linux/platform_device.h>
>>>> +#include <linux/platform_data/cros_ec_commands.h>
>>>> +#include <linux/platform_data/cros_ec_proto.h>
>>>> +#include <linux/units.h>
>>>> +
>>>> +#define DRV_NAME    "cros-ec-hwmon"
>>>> +
>>>> +struct cros_ec_hwmon_priv {
>>>> +    struct cros_ec_device *cros_ec;
>>>> +    u8 thermal_version;
>>>> +    const char *temp_sensor_names[EC_TEMP_SENSOR_ENTRIES + EC_TEMP_SENSOR_B_ENTRIES];
>>>> +};
>>>> +
>>>> +static int cros_ec_hwmon_read_fan_speed(struct cros_ec_device *cros_ec, u8 index, u16 *speed)
>>>> +{
>>>> +    u16 data;
>>>> +    int ret;
>>>> +
>>>> +    ret = cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_FAN + index * 2, 2, &data);
>>>> +    if (ret < 0)
>>>> +        return ret;
>>>> +
>>>> +    data = le16_to_cpu(data);
>>>> +
>>>> +    if (data == EC_FAN_SPEED_NOT_PRESENT)
>>>> +        return -ENODEV;
>>>> +
>>>
>>> Don't forget it can also return `EC_FAN_SPEED_STALLED`.
>>
>> Thanks for the hint. I'll need to think about how to handle this better.
>>
>>> Like Guenter, I also don't like returning `-ENODEV`, but I don't have a
>>> problem with checking for `EC_FAN_SPEED_NOT_PRESENT` in case it was removed
>>> since init or something.
>>

That won't happen. Chromebooks are not servers, where one might be able to
replace a fan tray while the system is running.

>> Ok.
>>
>>> My approach was to return the speed as `0`, since the fan probably isn't
>>> spinning, but set HWMON_F_FAULT for `EC_FAN_SPEED_NOT_PRESENT` and
>>> HWMON_F_ALARM for `EC_FAN_SPEED_STALLED`.
>>> No idea if this is correct though.
>>
>> I'm not a fan of returning a speed of 0 in case of errors.
>> Rather -EIO which can't be mistaken.
>> Maybe -EIO for both EC_FAN_SPEED_NOT_PRESENT (which should never happen)
>> and also for EC_FAN_SPEED_STALLED.
> 
> Yeah, that's pretty reasonable.
> 

-EIO is an i/o error. I have trouble reconciling that with
EC_FAN_SPEED_NOT_PRESENT or EC_FAN_SPEED_STALLED.

Looking into the EC source code [1], I see:

EC_FAN_SPEED_NOT_PRESENT means that the fan is not present.
That should return -ENODEV in the above code, but only for
the purpose of making the attribute invisible.

EC_FAN_SPEED_STALLED means exactly that, i.e., that the fan
is present but not turning. The EC code does not expect that
to happen and generates a thermal event in case it does.
Given that, it does make sense to set the fault flag.
The actual fan speed value should then be reported as 0 or
possibly -ENODATA. It should _not_ generate any other error
because that would trip up the "sensors" command for no
good reason.

Guenter

---
[1] https://chromium.googlesource.com/chromiumos/platform/ec
Thomas Weißschuh May 28, 2024, 4:15 p.m. UTC | #7
On 2024-05-28 08:50:49+0000, Guenter Roeck wrote:
> On 5/27/24 17:15, Stephen Horvath wrote:
> > On 28/5/24 05:24, Thomas Weißschuh wrote:
> > > On 2024-05-25 09:13:09+0000, Stephen Horvath wrote:
> > > > I was the one to implement fan monitoring/control into Dustin's driver, and
> > > > just had a quick comment for your driver:
> > > > 
> > > > On 8/5/24 02:29, Thomas Weißschuh wrote:
> > > > > The ChromeOS Embedded Controller exposes fan speed and temperature
> > > > > readings.
> > > > > Expose this data through the hwmon subsystem.
> > > > > 
> > > > > The driver is designed to be probed via the cros_ec mfd device.
> > > > > 
> > > > > Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
> > > > > ---
> > > > >    Documentation/hwmon/cros_ec_hwmon.rst |  26 ++++
> > > > >    Documentation/hwmon/index.rst         |   1 +
> > > > >    MAINTAINERS                           |   8 +
> > > > >    drivers/hwmon/Kconfig                 |  11 ++
> > > > >    drivers/hwmon/Makefile                |   1 +
> > > > >    drivers/hwmon/cros_ec_hwmon.c         | 269 ++++++++++++++++++++++++++++++++++
> > > > >    6 files changed, 316 insertions(+)
> > > > > 
> > > 
> > > <snip>
> > > 
> > > > > diff --git a/drivers/hwmon/cros_ec_hwmon.c b/drivers/hwmon/cros_ec_hwmon.c
> > > > > new file mode 100644
> > > > > index 000000000000..d59d39df2ac4
> > > > > --- /dev/null
> > > > > +++ b/drivers/hwmon/cros_ec_hwmon.c
> > > > > @@ -0,0 +1,269 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > > +/*
> > > > > + *  ChromesOS EC driver for hwmon
> > > > > + *
> > > > > + *  Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
> > > > > + */
> > > > > +
> > > > > +#include <linux/device.h>
> > > > > +#include <linux/hwmon.h>
> > > > > +#include <linux/kernel.h>
> > > > > +#include <linux/mod_devicetable.h>
> > > > > +#include <linux/module.h>
> > > > > +#include <linux/platform_device.h>
> > > > > +#include <linux/platform_data/cros_ec_commands.h>
> > > > > +#include <linux/platform_data/cros_ec_proto.h>
> > > > > +#include <linux/units.h>
> > > > > +
> > > > > +#define DRV_NAME    "cros-ec-hwmon"
> > > > > +
> > > > > +struct cros_ec_hwmon_priv {
> > > > > +    struct cros_ec_device *cros_ec;
> > > > > +    u8 thermal_version;
> > > > > +    const char *temp_sensor_names[EC_TEMP_SENSOR_ENTRIES + EC_TEMP_SENSOR_B_ENTRIES];
> > > > > +};
> > > > > +
> > > > > +static int cros_ec_hwmon_read_fan_speed(struct cros_ec_device *cros_ec, u8 index, u16 *speed)
> > > > > +{
> > > > > +    u16 data;
> > > > > +    int ret;
> > > > > +
> > > > > +    ret = cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_FAN + index * 2, 2, &data);
> > > > > +    if (ret < 0)
> > > > > +        return ret;
> > > > > +
> > > > > +    data = le16_to_cpu(data);
> > > > > +
> > > > > +    if (data == EC_FAN_SPEED_NOT_PRESENT)
> > > > > +        return -ENODEV;
> > > > > +
> > > > 
> > > > Don't forget it can also return `EC_FAN_SPEED_STALLED`.
> > > 
> > > Thanks for the hint. I'll need to think about how to handle this better.
> > > 
> > > > Like Guenter, I also don't like returning `-ENODEV`, but I don't have a
> > > > problem with checking for `EC_FAN_SPEED_NOT_PRESENT` in case it was removed
> > > > since init or something.
> > > 
> 
> That won't happen. Chromebooks are not servers, where one might be able to
> replace a fan tray while the system is running.

In one of my testruns this actually happened.
When running on battery, one specific of the CPU sensors sporadically
returned EC_FAN_SPEED_NOT_PRESENT.

> > > Ok.
> > > 
> > > > My approach was to return the speed as `0`, since the fan probably isn't
> > > > spinning, but set HWMON_F_FAULT for `EC_FAN_SPEED_NOT_PRESENT` and
> > > > HWMON_F_ALARM for `EC_FAN_SPEED_STALLED`.
> > > > No idea if this is correct though.
> > > 
> > > I'm not a fan of returning a speed of 0 in case of errors.
> > > Rather -EIO which can't be mistaken.
> > > Maybe -EIO for both EC_FAN_SPEED_NOT_PRESENT (which should never happen)
> > > and also for EC_FAN_SPEED_STALLED.
> > 
> > Yeah, that's pretty reasonable.
> > 
> 
> -EIO is an i/o error. I have trouble reconciling that with
> EC_FAN_SPEED_NOT_PRESENT or EC_FAN_SPEED_STALLED.
> 
> Looking into the EC source code [1], I see:
> 
> EC_FAN_SPEED_NOT_PRESENT means that the fan is not present.
> That should return -ENODEV in the above code, but only for
> the purpose of making the attribute invisible.
> 
> EC_FAN_SPEED_STALLED means exactly that, i.e., that the fan
> is present but not turning. The EC code does not expect that
> to happen and generates a thermal event in case it does.
> Given that, it does make sense to set the fault flag.
> The actual fan speed value should then be reported as 0 or
> possibly -ENODATA. It should _not_ generate any other error
> because that would trip up the "sensors" command for no
> good reason.

Ack.

Currently I have the following logic (for both fans and temp):

if NOT_PRESENT during probing:
  make the attribute invisible.

if any error during runtime (including NOT_PRESENT):
  return -ENODATA and a FAULT

This should also handle the sporadic NOT_PRESENT failures.

What do you think?

Is there any other feedback to this revision or should I send the next?


Thanks,
Thomas
Guenter Roeck May 28, 2024, 11:29 p.m. UTC | #8
On 5/28/24 09:15, Thomas Weißschuh wrote:
> On 2024-05-28 08:50:49+0000, Guenter Roeck wrote:
>> On 5/27/24 17:15, Stephen Horvath wrote:
>>> On 28/5/24 05:24, Thomas Weißschuh wrote:
>>>> On 2024-05-25 09:13:09+0000, Stephen Horvath wrote:
>>>>> I was the one to implement fan monitoring/control into Dustin's driver, and
>>>>> just had a quick comment for your driver:
>>>>>
>>>>> On 8/5/24 02:29, Thomas Weißschuh wrote:
>>>>>> The ChromeOS Embedded Controller exposes fan speed and temperature
>>>>>> readings.
>>>>>> Expose this data through the hwmon subsystem.
>>>>>>
>>>>>> The driver is designed to be probed via the cros_ec mfd device.
>>>>>>
>>>>>> Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
>>>>>> ---
>>>>>>     Documentation/hwmon/cros_ec_hwmon.rst |  26 ++++
>>>>>>     Documentation/hwmon/index.rst         |   1 +
>>>>>>     MAINTAINERS                           |   8 +
>>>>>>     drivers/hwmon/Kconfig                 |  11 ++
>>>>>>     drivers/hwmon/Makefile                |   1 +
>>>>>>     drivers/hwmon/cros_ec_hwmon.c         | 269 ++++++++++++++++++++++++++++++++++
>>>>>>     6 files changed, 316 insertions(+)
>>>>>>
>>>>
>>>> <snip>
>>>>
>>>>>> diff --git a/drivers/hwmon/cros_ec_hwmon.c b/drivers/hwmon/cros_ec_hwmon.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..d59d39df2ac4
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/hwmon/cros_ec_hwmon.c
>>>>>> @@ -0,0 +1,269 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0-or-later
>>>>>> +/*
>>>>>> + *  ChromesOS EC driver for hwmon
>>>>>> + *
>>>>>> + *  Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
>>>>>> + */
>>>>>> +
>>>>>> +#include <linux/device.h>
>>>>>> +#include <linux/hwmon.h>
>>>>>> +#include <linux/kernel.h>
>>>>>> +#include <linux/mod_devicetable.h>
>>>>>> +#include <linux/module.h>
>>>>>> +#include <linux/platform_device.h>
>>>>>> +#include <linux/platform_data/cros_ec_commands.h>
>>>>>> +#include <linux/platform_data/cros_ec_proto.h>
>>>>>> +#include <linux/units.h>
>>>>>> +
>>>>>> +#define DRV_NAME    "cros-ec-hwmon"
>>>>>> +
>>>>>> +struct cros_ec_hwmon_priv {
>>>>>> +    struct cros_ec_device *cros_ec;
>>>>>> +    u8 thermal_version;
>>>>>> +    const char *temp_sensor_names[EC_TEMP_SENSOR_ENTRIES + EC_TEMP_SENSOR_B_ENTRIES];
>>>>>> +};
>>>>>> +
>>>>>> +static int cros_ec_hwmon_read_fan_speed(struct cros_ec_device *cros_ec, u8 index, u16 *speed)
>>>>>> +{
>>>>>> +    u16 data;
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    ret = cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_FAN + index * 2, 2, &data);
>>>>>> +    if (ret < 0)
>>>>>> +        return ret;
>>>>>> +
>>>>>> +    data = le16_to_cpu(data);
>>>>>> +
>>>>>> +    if (data == EC_FAN_SPEED_NOT_PRESENT)
>>>>>> +        return -ENODEV;
>>>>>> +
>>>>>
>>>>> Don't forget it can also return `EC_FAN_SPEED_STALLED`.
>>>>
>>>> Thanks for the hint. I'll need to think about how to handle this better.
>>>>
>>>>> Like Guenter, I also don't like returning `-ENODEV`, but I don't have a
>>>>> problem with checking for `EC_FAN_SPEED_NOT_PRESENT` in case it was removed
>>>>> since init or something.
>>>>
>>
>> That won't happen. Chromebooks are not servers, where one might be able to
>> replace a fan tray while the system is running.
> 
> In one of my testruns this actually happened.
> When running on battery, one specific of the CPU sensors sporadically
> returned EC_FAN_SPEED_NOT_PRESENT.
> 

What Chromebook was that ? I can't see the code path in the EC source
that would get me there.

>>>> Ok.
>>>>
>>>>> My approach was to return the speed as `0`, since the fan probably isn't
>>>>> spinning, but set HWMON_F_FAULT for `EC_FAN_SPEED_NOT_PRESENT` and
>>>>> HWMON_F_ALARM for `EC_FAN_SPEED_STALLED`.
>>>>> No idea if this is correct though.
>>>>
>>>> I'm not a fan of returning a speed of 0 in case of errors.
>>>> Rather -EIO which can't be mistaken.
>>>> Maybe -EIO for both EC_FAN_SPEED_NOT_PRESENT (which should never happen)
>>>> and also for EC_FAN_SPEED_STALLED.
>>>
>>> Yeah, that's pretty reasonable.
>>>
>>
>> -EIO is an i/o error. I have trouble reconciling that with
>> EC_FAN_SPEED_NOT_PRESENT or EC_FAN_SPEED_STALLED.
>>
>> Looking into the EC source code [1], I see:
>>
>> EC_FAN_SPEED_NOT_PRESENT means that the fan is not present.
>> That should return -ENODEV in the above code, but only for
>> the purpose of making the attribute invisible.
>>
>> EC_FAN_SPEED_STALLED means exactly that, i.e., that the fan
>> is present but not turning. The EC code does not expect that
>> to happen and generates a thermal event in case it does.
>> Given that, it does make sense to set the fault flag.
>> The actual fan speed value should then be reported as 0 or
>> possibly -ENODATA. It should _not_ generate any other error
>> because that would trip up the "sensors" command for no
>> good reason.
> 
> Ack.
> 
> Currently I have the following logic (for both fans and temp):
> 
> if NOT_PRESENT during probing:
>    make the attribute invisible.
> 
> if any error during runtime (including NOT_PRESENT):
>    return -ENODATA and a FAULT
> 
> This should also handle the sporadic NOT_PRESENT failures.
> 
> What do you think?
> 
> Is there any other feedback to this revision or should I send the next?
> 

No, except I'd really like to know which Chromebook randomly generates
a EC_FAN_SPEED_NOT_PRESENT response because that really looks like a bug.
Also, can you reproduce the problem with the ectool command ?

Thanks,
Guenter
Stephen Horvath May 29, 2024, 12:58 a.m. UTC | #9
Hi Guenter,

On 29/5/24 09:29, Guenter Roeck wrote:
> On 5/28/24 09:15, Thomas Weißschuh wrote:
>> On 2024-05-28 08:50:49+0000, Guenter Roeck wrote:
>>> On 5/27/24 17:15, Stephen Horvath wrote:
>>>> On 28/5/24 05:24, Thomas Weißschuh wrote:
>>>>> On 2024-05-25 09:13:09+0000, Stephen Horvath wrote:
>>>>>> I was the one to implement fan monitoring/control into Dustin's 
>>>>>> driver, and
>>>>>> just had a quick comment for your driver:
>>>>>>
>>>>>> On 8/5/24 02:29, Thomas Weißschuh wrote:
>>>>>>> The ChromeOS Embedded Controller exposes fan speed and temperature
>>>>>>> readings.
>>>>>>> Expose this data through the hwmon subsystem.
>>>>>>>
>>>>>>> The driver is designed to be probed via the cros_ec mfd device.
>>>>>>>
>>>>>>> Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
>>>>>>> ---
>>>>>>>     Documentation/hwmon/cros_ec_hwmon.rst |  26 ++++
>>>>>>>     Documentation/hwmon/index.rst         |   1 +
>>>>>>>     MAINTAINERS                           |   8 +
>>>>>>>     drivers/hwmon/Kconfig                 |  11 ++
>>>>>>>     drivers/hwmon/Makefile                |   1 +
>>>>>>>     drivers/hwmon/cros_ec_hwmon.c         | 269 
>>>>>>> ++++++++++++++++++++++++++++++++++
>>>>>>>     6 files changed, 316 insertions(+)
>>>>>>>
>>>>>
>>>>> <snip>
>>>>>
>>>>>>> diff --git a/drivers/hwmon/cros_ec_hwmon.c 
>>>>>>> b/drivers/hwmon/cros_ec_hwmon.c
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..d59d39df2ac4
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/hwmon/cros_ec_hwmon.c
>>>>>>> @@ -0,0 +1,269 @@
>>>>>>> +// SPDX-License-Identifier: GPL-2.0-or-later
>>>>>>> +/*
>>>>>>> + *  ChromesOS EC driver for hwmon
>>>>>>> + *
>>>>>>> + *  Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include <linux/device.h>
>>>>>>> +#include <linux/hwmon.h>
>>>>>>> +#include <linux/kernel.h>
>>>>>>> +#include <linux/mod_devicetable.h>
>>>>>>> +#include <linux/module.h>
>>>>>>> +#include <linux/platform_device.h>
>>>>>>> +#include <linux/platform_data/cros_ec_commands.h>
>>>>>>> +#include <linux/platform_data/cros_ec_proto.h>
>>>>>>> +#include <linux/units.h>
>>>>>>> +
>>>>>>> +#define DRV_NAME    "cros-ec-hwmon"
>>>>>>> +
>>>>>>> +struct cros_ec_hwmon_priv {
>>>>>>> +    struct cros_ec_device *cros_ec;
>>>>>>> +    u8 thermal_version;
>>>>>>> +    const char *temp_sensor_names[EC_TEMP_SENSOR_ENTRIES + 
>>>>>>> EC_TEMP_SENSOR_B_ENTRIES];
>>>>>>> +};
>>>>>>> +
>>>>>>> +static int cros_ec_hwmon_read_fan_speed(struct cros_ec_device 
>>>>>>> *cros_ec, u8 index, u16 *speed)
>>>>>>> +{
>>>>>>> +    u16 data;
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    ret = cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_FAN + index * 
>>>>>>> 2, 2, &data);
>>>>>>> +    if (ret < 0)
>>>>>>> +        return ret;
>>>>>>> +
>>>>>>> +    data = le16_to_cpu(data);
>>>>>>> +
>>>>>>> +    if (data == EC_FAN_SPEED_NOT_PRESENT)
>>>>>>> +        return -ENODEV;
>>>>>>> +
>>>>>>
>>>>>> Don't forget it can also return `EC_FAN_SPEED_STALLED`.
>>>>>
>>>>> Thanks for the hint. I'll need to think about how to handle this 
>>>>> better.
>>>>>
>>>>>> Like Guenter, I also don't like returning `-ENODEV`, but I don't 
>>>>>> have a
>>>>>> problem with checking for `EC_FAN_SPEED_NOT_PRESENT` in case it 
>>>>>> was removed
>>>>>> since init or something.
>>>>>
>>>
>>> That won't happen. Chromebooks are not servers, where one might be 
>>> able to
>>> replace a fan tray while the system is running.
>>
>> In one of my testruns this actually happened.
>> When running on battery, one specific of the CPU sensors sporadically
>> returned EC_FAN_SPEED_NOT_PRESENT.
>>
> 
> What Chromebook was that ? I can't see the code path in the EC source
> that would get me there.
> 

I believe Thomas and I both have the Framework 13 AMD, the source code 
is here: 
https://github.com/FrameworkComputer/EmbeddedController/tree/lotus-zephyr

The organisation confuses me a little, but Dustin has previous said on 
the framework forums 
(https://community.frame.work/t/what-ec-is-used/38574/2):

"This one is based on the Zephyr port of the ChromeOS EC, and tracks 
mainline more closely. It is in the branch lotus-zephyr.
All of the model-specific code lives in zephyr/program/lotus.
The 13"-specific code lives in a few subdirectories off the main tree 
named azalea."


Also I just unplugged my fan and you are definitely correct, the EC only 
generates EC_FAN_SPEED_NOT_PRESENT for fans it does not have the 
capability to support. Even after a reboot it just returns 0 RPM for an 
unplugged fan. I thought about simulating a stall too, but I was mildly 
scared I was going to break one of the tiny blades.

>>>>> Ok.
>>>>>
>>>>>> My approach was to return the speed as `0`, since the fan probably 
>>>>>> isn't
>>>>>> spinning, but set HWMON_F_FAULT for `EC_FAN_SPEED_NOT_PRESENT` and
>>>>>> HWMON_F_ALARM for `EC_FAN_SPEED_STALLED`.
>>>>>> No idea if this is correct though.
>>>>>
>>>>> I'm not a fan of returning a speed of 0 in case of errors.
>>>>> Rather -EIO which can't be mistaken.
>>>>> Maybe -EIO for both EC_FAN_SPEED_NOT_PRESENT (which should never 
>>>>> happen)
>>>>> and also for EC_FAN_SPEED_STALLED.
>>>>
>>>> Yeah, that's pretty reasonable.
>>>>
>>>
>>> -EIO is an i/o error. I have trouble reconciling that with
>>> EC_FAN_SPEED_NOT_PRESENT or EC_FAN_SPEED_STALLED.
>>>
>>> Looking into the EC source code [1], I see:
>>>
>>> EC_FAN_SPEED_NOT_PRESENT means that the fan is not present.
>>> That should return -ENODEV in the above code, but only for
>>> the purpose of making the attribute invisible.
>>>
>>> EC_FAN_SPEED_STALLED means exactly that, i.e., that the fan
>>> is present but not turning. The EC code does not expect that
>>> to happen and generates a thermal event in case it does.
>>> Given that, it does make sense to set the fault flag.
>>> The actual fan speed value should then be reported as 0 or
>>> possibly -ENODATA. It should _not_ generate any other error
>>> because that would trip up the "sensors" command for no
>>> good reason.
>>
>> Ack.
>>
>> Currently I have the following logic (for both fans and temp):
>>
>> if NOT_PRESENT during probing:
>>    make the attribute invisible.
>>
>> if any error during runtime (including NOT_PRESENT):
>>    return -ENODATA and a FAULT
>>
>> This should also handle the sporadic NOT_PRESENT failures.
>>
>> What do you think?
>>
>> Is there any other feedback to this revision or should I send the next?
>>
> 
> No, except I'd really like to know which Chromebook randomly generates
> a EC_FAN_SPEED_NOT_PRESENT response because that really looks like a bug.
> Also, can you reproduce the problem with the ectool command ?

I have a feeling it was related to the concurrency problems between ACPI 
and the CrOS code that are being fixed in another patch by Ben Walsh, I 
was also seeing some weird behaviour sometimes but I *believe* it was 
fixed by that.

Thanks,
Steve
Thomas Weißschuh May 29, 2024, 6:23 a.m. UTC | #10
On 2024-05-29 10:58:23+0000, Stephen Horvath wrote:
> On 29/5/24 09:29, Guenter Roeck wrote:
> > On 5/28/24 09:15, Thomas Weißschuh wrote:
> > > On 2024-05-28 08:50:49+0000, Guenter Roeck wrote:
> > > > On 5/27/24 17:15, Stephen Horvath wrote:
> > > > > On 28/5/24 05:24, Thomas Weißschuh wrote:
> > > > > > On 2024-05-25 09:13:09+0000, Stephen Horvath wrote:
> > > > > > > Don't forget it can also return `EC_FAN_SPEED_STALLED`.

<snip>

> > > > > > 
> > > > > > Thanks for the hint. I'll need to think about how to
> > > > > > handle this better.
> > > > > > 
> > > > > > > Like Guenter, I also don't like returning `-ENODEV`,
> > > > > > > but I don't have a
> > > > > > > problem with checking for `EC_FAN_SPEED_NOT_PRESENT`
> > > > > > > in case it was removed
> > > > > > > since init or something.
> > > > > > 
> > > > 
> > > > That won't happen. Chromebooks are not servers, where one might
> > > > be able to
> > > > replace a fan tray while the system is running.
> > > 
> > > In one of my testruns this actually happened.
> > > When running on battery, one specific of the CPU sensors sporadically
> > > returned EC_FAN_SPEED_NOT_PRESENT.
> > > 
> > 
> > What Chromebook was that ? I can't see the code path in the EC source
> > that would get me there.
> > 
> 
> I believe Thomas and I both have the Framework 13 AMD, the source code is
> here:
> https://github.com/FrameworkComputer/EmbeddedController/tree/lotus-zephyr

Correct.

> The organisation confuses me a little, but Dustin has previous said on the
> framework forums (https://community.frame.work/t/what-ec-is-used/38574/2):
> 
> "This one is based on the Zephyr port of the ChromeOS EC, and tracks
> mainline more closely. It is in the branch lotus-zephyr.
> All of the model-specific code lives in zephyr/program/lotus.
> The 13"-specific code lives in a few subdirectories off the main tree named
> azalea."

The EC code is at [0]:

$ ectool version
RO version:    azalea_v3.4.113353-ec:b4c1fb,os
RW version:    azalea_v3.4.113353-ec:b4c1fb,os
Firmware copy: RO
Build info:    azalea_v3.4.113353-ec:b4c1fb,os:7b88e1,cmsis:4aa3ff 2024-03-26 07:10:22 lotus@ip-172-26-3-226
Tool version:  0.0.1-isolate May  6 2024 none


From the build info I gather it should be commit b4c1fb, which is the
current HEAD of the lotus-zephyr branch.
Lotus is the Framework 16 AMD, which is very similar to Azalea, the
Framework 13 AMD, which I tested this against.
Both share the same codebase.

> Also I just unplugged my fan and you are definitely correct, the EC only
> generates EC_FAN_SPEED_NOT_PRESENT for fans it does not have the capability
> to support. Even after a reboot it just returns 0 RPM for an unplugged fan.
> I thought about simulating a stall too, but I was mildly scared I was going
> to break one of the tiny blades.

I get the error when unplugging *the charger*.

To be more precise:

It does not happen always.
It does not happen instantly on unplugging.
It goes away after a few seconds/minutes.
During the issue, one specific sensor reads 0xffff.

> > > > > > Ok.
> > > > > > 
> > > > > > > My approach was to return the speed as `0`, since
> > > > > > > the fan probably isn't
> > > > > > > spinning, but set HWMON_F_FAULT for `EC_FAN_SPEED_NOT_PRESENT` and
> > > > > > > HWMON_F_ALARM for `EC_FAN_SPEED_STALLED`.
> > > > > > > No idea if this is correct though.
> > > > > > 
> > > > > > I'm not a fan of returning a speed of 0 in case of errors.
> > > > > > Rather -EIO which can't be mistaken.
> > > > > > Maybe -EIO for both EC_FAN_SPEED_NOT_PRESENT (which
> > > > > > should never happen)
> > > > > > and also for EC_FAN_SPEED_STALLED.
> > > > > 
> > > > > Yeah, that's pretty reasonable.
> > > > > 
> > > > 
> > > > -EIO is an i/o error. I have trouble reconciling that with
> > > > EC_FAN_SPEED_NOT_PRESENT or EC_FAN_SPEED_STALLED.
> > > > 
> > > > Looking into the EC source code [1], I see:
> > > > 
> > > > EC_FAN_SPEED_NOT_PRESENT means that the fan is not present.
> > > > That should return -ENODEV in the above code, but only for
> > > > the purpose of making the attribute invisible.
> > > > 
> > > > EC_FAN_SPEED_STALLED means exactly that, i.e., that the fan
> > > > is present but not turning. The EC code does not expect that
> > > > to happen and generates a thermal event in case it does.
> > > > Given that, it does make sense to set the fault flag.
> > > > The actual fan speed value should then be reported as 0 or
> > > > possibly -ENODATA. It should _not_ generate any other error
> > > > because that would trip up the "sensors" command for no
> > > > good reason.
> > > 
> > > Ack.
> > > 
> > > Currently I have the following logic (for both fans and temp):
> > > 
> > > if NOT_PRESENT during probing:
> > >    make the attribute invisible.
> > > 
> > > if any error during runtime (including NOT_PRESENT):
> > >    return -ENODATA and a FAULT
> > > 
> > > This should also handle the sporadic NOT_PRESENT failures.
> > > 
> > > What do you think?
> > > 
> > > Is there any other feedback to this revision or should I send the next?
> > > 
> > 
> > No, except I'd really like to know which Chromebook randomly generates
> > a EC_FAN_SPEED_NOT_PRESENT response because that really looks like a bug.
> > Also, can you reproduce the problem with the ectool command ?

Yes, the ectool command reports the same issue at the same time.

The fan affected was always the sensor cpu@4c, which is
compatible = "amd,sb-tsi".

> I have a feeling it was related to the concurrency problems between ACPI and
> the CrOS code that are being fixed in another patch by Ben Walsh, I was also
> seeing some weird behaviour sometimes but I *believe* it was fixed by that.

I don't think it's this issue.
Ben's series at [1], is for MEC ECs which are the older Intel
Frameworks, not the Framework 13 AMD.

[0] https://github.com/FrameworkComputer/EmbeddedController
[1] https://lore.kernel.org/lkml/20240515055631.5775-1-ben@jubnut.com/
Stephen Horvath May 29, 2024, 7:40 a.m. UTC | #11
Hi Thomas,

On 29/5/24 16:23, Thomas Weißschuh wrote:
> On 2024-05-29 10:58:23+0000, Stephen Horvath wrote:
>> On 29/5/24 09:29, Guenter Roeck wrote:
>>> On 5/28/24 09:15, Thomas Weißschuh wrote:
>>>> On 2024-05-28 08:50:49+0000, Guenter Roeck wrote:
>>>>> On 5/27/24 17:15, Stephen Horvath wrote:
>>>>>> On 28/5/24 05:24, Thomas Weißschuh wrote:
>>>>>>> On 2024-05-25 09:13:09+0000, Stephen Horvath wrote:
>>>>>>>> Don't forget it can also return `EC_FAN_SPEED_STALLED`.
> 
> <snip>
> 
>>>>>>>
>>>>>>> Thanks for the hint. I'll need to think about how to
>>>>>>> handle this better.
>>>>>>>
>>>>>>>> Like Guenter, I also don't like returning `-ENODEV`,
>>>>>>>> but I don't have a
>>>>>>>> problem with checking for `EC_FAN_SPEED_NOT_PRESENT`
>>>>>>>> in case it was removed
>>>>>>>> since init or something.
>>>>>>>
>>>>>
>>>>> That won't happen. Chromebooks are not servers, where one might
>>>>> be able to
>>>>> replace a fan tray while the system is running.
>>>>
>>>> In one of my testruns this actually happened.
>>>> When running on battery, one specific of the CPU sensors sporadically
>>>> returned EC_FAN_SPEED_NOT_PRESENT.
>>>>
>>>
>>> What Chromebook was that ? I can't see the code path in the EC source
>>> that would get me there.
>>>
>>
>> I believe Thomas and I both have the Framework 13 AMD, the source code is
>> here:
>> https://github.com/FrameworkComputer/EmbeddedController/tree/lotus-zephyr
> 
> Correct.
> 
>> The organisation confuses me a little, but Dustin has previous said on the
>> framework forums (https://community.frame.work/t/what-ec-is-used/38574/2):
>>
>> "This one is based on the Zephyr port of the ChromeOS EC, and tracks
>> mainline more closely. It is in the branch lotus-zephyr.
>> All of the model-specific code lives in zephyr/program/lotus.
>> The 13"-specific code lives in a few subdirectories off the main tree named
>> azalea."
> 
> The EC code is at [0]:
> 
> $ ectool version
> RO version:    azalea_v3.4.113353-ec:b4c1fb,os
> RW version:    azalea_v3.4.113353-ec:b4c1fb,os
> Firmware copy: RO
> Build info:    azalea_v3.4.113353-ec:b4c1fb,os:7b88e1,cmsis:4aa3ff 2024-03-26 07:10:22 lotus@ip-172-26-3-226
> Tool version:  0.0.1-isolate May  6 2024 none

I can confirm mine is the same build too.

>  From the build info I gather it should be commit b4c1fb, which is the
> current HEAD of the lotus-zephyr branch.
> Lotus is the Framework 16 AMD, which is very similar to Azalea, the
> Framework 13 AMD, which I tested this against.
> Both share the same codebase.
> 
>> Also I just unplugged my fan and you are definitely correct, the EC only
>> generates EC_FAN_SPEED_NOT_PRESENT for fans it does not have the capability
>> to support. Even after a reboot it just returns 0 RPM for an unplugged fan.
>> I thought about simulating a stall too, but I was mildly scared I was going
>> to break one of the tiny blades.
> 
> I get the error when unplugging *the charger*.
> 
> To be more precise:
> 
> It does not happen always.
> It does not happen instantly on unplugging.
> It goes away after a few seconds/minutes.
> During the issue, one specific sensor reads 0xffff.
> 

Oh I see, I haven't played around with the temp sensors until now, but I 
can confirm the last temp sensor (cpu@4c / temp4) will randomly (every 
~2-15 seconds) return EC_TEMP_SENSOR_ERROR (0xfe).
Unplugging the charger doesn't seem to have any impact for me.
The related ACPI sensor also says 180.8°C.
I'll probably create an issue or something shortly.

I was mildly confused by 'CPU sensors' and 'EC_FAN_SPEED_NOT_PRESENT' in 
the same sentence, but I'm now assuming you mean the temp sensor?

>>>>>>> Ok.
>>>>>>>
>>>>>>>> My approach was to return the speed as `0`, since
>>>>>>>> the fan probably isn't
>>>>>>>> spinning, but set HWMON_F_FAULT for `EC_FAN_SPEED_NOT_PRESENT` and
>>>>>>>> HWMON_F_ALARM for `EC_FAN_SPEED_STALLED`.
>>>>>>>> No idea if this is correct though.
>>>>>>>
>>>>>>> I'm not a fan of returning a speed of 0 in case of errors.
>>>>>>> Rather -EIO which can't be mistaken.
>>>>>>> Maybe -EIO for both EC_FAN_SPEED_NOT_PRESENT (which
>>>>>>> should never happen)
>>>>>>> and also for EC_FAN_SPEED_STALLED.
>>>>>>
>>>>>> Yeah, that's pretty reasonable.
>>>>>>
>>>>>
>>>>> -EIO is an i/o error. I have trouble reconciling that with
>>>>> EC_FAN_SPEED_NOT_PRESENT or EC_FAN_SPEED_STALLED.
>>>>>
>>>>> Looking into the EC source code [1], I see:
>>>>>
>>>>> EC_FAN_SPEED_NOT_PRESENT means that the fan is not present.
>>>>> That should return -ENODEV in the above code, but only for
>>>>> the purpose of making the attribute invisible.
>>>>>
>>>>> EC_FAN_SPEED_STALLED means exactly that, i.e., that the fan
>>>>> is present but not turning. The EC code does not expect that
>>>>> to happen and generates a thermal event in case it does.
>>>>> Given that, it does make sense to set the fault flag.
>>>>> The actual fan speed value should then be reported as 0 or
>>>>> possibly -ENODATA. It should _not_ generate any other error
>>>>> because that would trip up the "sensors" command for no
>>>>> good reason.
>>>>
>>>> Ack.
>>>>
>>>> Currently I have the following logic (for both fans and temp):
>>>>
>>>> if NOT_PRESENT during probing:
>>>>     make the attribute invisible.
>>>>
>>>> if any error during runtime (including NOT_PRESENT):
>>>>     return -ENODATA and a FAULT
>>>>
>>>> This should also handle the sporadic NOT_PRESENT failures.
>>>>
>>>> What do you think?
>>>>
>>>> Is there any other feedback to this revision or should I send the next?
>>>>
>>>
>>> No, except I'd really like to know which Chromebook randomly generates
>>> a EC_FAN_SPEED_NOT_PRESENT response because that really looks like a bug.
>>> Also, can you reproduce the problem with the ectool command ?
> 
> Yes, the ectool command reports the same issue at the same time.
> 
> The fan affected was always the sensor cpu@4c, which is
> compatible = "amd,sb-tsi".
> 
>> I have a feeling it was related to the concurrency problems between ACPI and
>> the CrOS code that are being fixed in another patch by Ben Walsh, I was also
>> seeing some weird behaviour sometimes but I *believe* it was fixed by that.
> 
> I don't think it's this issue.
> Ben's series at [1], is for MEC ECs which are the older Intel
> Frameworks, not the Framework 13 AMD.

Yeah sorry, I saw it mentioned AMD and threw it into my kernel, I also 
thought it stopped the 'packet too long' messages (for 
EC_CMD_CONSOLE_SNAPSHOT) but it did not.

Thanks,
Steve
Guenter Roeck May 29, 2024, 5 p.m. UTC | #12
On Wed, May 29, 2024 at 12:40 AM Stephen Horvath
<s.horvath@outlook.com.au> wrote:
>
> Hi Thomas,
>
> On 29/5/24 16:23, Thomas Weißschuh wrote:
> > On 2024-05-29 10:58:23+0000, Stephen Horvath wrote:
> >> On 29/5/24 09:29, Guenter Roeck wrote:
> >>> On 5/28/24 09:15, Thomas Weißschuh wrote:
> >>>> On 2024-05-28 08:50:49+0000, Guenter Roeck wrote:
> >>>>> On 5/27/24 17:15, Stephen Horvath wrote:
> >>>>>> On 28/5/24 05:24, Thomas Weißschuh wrote:
> >>>>>>> On 2024-05-25 09:13:09+0000, Stephen Horvath wrote:
> >>>>>>>> Don't forget it can also return `EC_FAN_SPEED_STALLED`.
> >
> > <snip>
> >
> >>>>>>>
> >>>>>>> Thanks for the hint. I'll need to think about how to
> >>>>>>> handle this better.
> >>>>>>>
> >>>>>>>> Like Guenter, I also don't like returning `-ENODEV`,
> >>>>>>>> but I don't have a
> >>>>>>>> problem with checking for `EC_FAN_SPEED_NOT_PRESENT`
> >>>>>>>> in case it was removed
> >>>>>>>> since init or something.
> >>>>>>>
> >>>>>
> >>>>> That won't happen. Chromebooks are not servers, where one might
> >>>>> be able to
> >>>>> replace a fan tray while the system is running.
> >>>>
> >>>> In one of my testruns this actually happened.
> >>>> When running on battery, one specific of the CPU sensors sporadically
> >>>> returned EC_FAN_SPEED_NOT_PRESENT.
> >>>>
> >>>
> >>> What Chromebook was that ? I can't see the code path in the EC source
> >>> that would get me there.
> >>>
> >>
> >> I believe Thomas and I both have the Framework 13 AMD, the source code is
> >> here:
> >> https://github.com/FrameworkComputer/EmbeddedController/tree/lotus-zephyr
> >
> > Correct.
> >
> >> The organisation confuses me a little, but Dustin has previous said on the
> >> framework forums (https://community.frame.work/t/what-ec-is-used/38574/2):
> >>
> >> "This one is based on the Zephyr port of the ChromeOS EC, and tracks
> >> mainline more closely. It is in the branch lotus-zephyr.
> >> All of the model-specific code lives in zephyr/program/lotus.
> >> The 13"-specific code lives in a few subdirectories off the main tree named
> >> azalea."
> >
> > The EC code is at [0]:
> >
> > $ ectool version
> > RO version:    azalea_v3.4.113353-ec:b4c1fb,os
> > RW version:    azalea_v3.4.113353-ec:b4c1fb,os
> > Firmware copy: RO
> > Build info:    azalea_v3.4.113353-ec:b4c1fb,os:7b88e1,cmsis:4aa3ff 2024-03-26 07:10:22 lotus@ip-172-26-3-226
> > Tool version:  0.0.1-isolate May  6 2024 none
>
> I can confirm mine is the same build too.
>
> >  From the build info I gather it should be commit b4c1fb, which is the
> > current HEAD of the lotus-zephyr branch.
> > Lotus is the Framework 16 AMD, which is very similar to Azalea, the
> > Framework 13 AMD, which I tested this against.
> > Both share the same codebase.
> >
> >> Also I just unplugged my fan and you are definitely correct, the EC only
> >> generates EC_FAN_SPEED_NOT_PRESENT for fans it does not have the capability
> >> to support. Even after a reboot it just returns 0 RPM for an unplugged fan.
> >> I thought about simulating a stall too, but I was mildly scared I was going
> >> to break one of the tiny blades.
> >
> > I get the error when unplugging *the charger*.
> >
> > To be more precise:
> >
> > It does not happen always.
> > It does not happen instantly on unplugging.
> > It goes away after a few seconds/minutes.
> > During the issue, one specific sensor reads 0xffff.
> >
>
> Oh I see, I haven't played around with the temp sensors until now, but I
> can confirm the last temp sensor (cpu@4c / temp4) will randomly (every
> ~2-15 seconds) return EC_TEMP_SENSOR_ERROR (0xfe).
> Unplugging the charger doesn't seem to have any impact for me.
> The related ACPI sensor also says 180.8°C.
> I'll probably create an issue or something shortly.
>
> I was mildly confused by 'CPU sensors' and 'EC_FAN_SPEED_NOT_PRESENT' in
> the same sentence, but I'm now assuming you mean the temp sensor?
>

Same here. it might not matter as much if the values were the same,
but EC_FAN_SPEED_NOT_PRESENT == 0xffff,  and
EC_TEMP_SENSOR_NOT_PRESENT==0xff, so they must not be confused with
each other. EC_TEMP_SENSOR_NOT_PRESENT should be static as well,
though, and not be returned randomly.

Guenter

> >>>>>>> Ok.
> >>>>>>>
> >>>>>>>> My approach was to return the speed as `0`, since
> >>>>>>>> the fan probably isn't
> >>>>>>>> spinning, but set HWMON_F_FAULT for `EC_FAN_SPEED_NOT_PRESENT` and
> >>>>>>>> HWMON_F_ALARM for `EC_FAN_SPEED_STALLED`.
> >>>>>>>> No idea if this is correct though.
> >>>>>>>
> >>>>>>> I'm not a fan of returning a speed of 0 in case of errors.
> >>>>>>> Rather -EIO which can't be mistaken.
> >>>>>>> Maybe -EIO for both EC_FAN_SPEED_NOT_PRESENT (which
> >>>>>>> should never happen)
> >>>>>>> and also for EC_FAN_SPEED_STALLED.
> >>>>>>
> >>>>>> Yeah, that's pretty reasonable.
> >>>>>>
> >>>>>
> >>>>> -EIO is an i/o error. I have trouble reconciling that with
> >>>>> EC_FAN_SPEED_NOT_PRESENT or EC_FAN_SPEED_STALLED.
> >>>>>
> >>>>> Looking into the EC source code [1], I see:
> >>>>>
> >>>>> EC_FAN_SPEED_NOT_PRESENT means that the fan is not present.
> >>>>> That should return -ENODEV in the above code, but only for
> >>>>> the purpose of making the attribute invisible.
> >>>>>
> >>>>> EC_FAN_SPEED_STALLED means exactly that, i.e., that the fan
> >>>>> is present but not turning. The EC code does not expect that
> >>>>> to happen and generates a thermal event in case it does.
> >>>>> Given that, it does make sense to set the fault flag.
> >>>>> The actual fan speed value should then be reported as 0 or
> >>>>> possibly -ENODATA. It should _not_ generate any other error
> >>>>> because that would trip up the "sensors" command for no
> >>>>> good reason.
> >>>>
> >>>> Ack.
> >>>>
> >>>> Currently I have the following logic (for both fans and temp):
> >>>>
> >>>> if NOT_PRESENT during probing:
> >>>>     make the attribute invisible.
> >>>>
> >>>> if any error during runtime (including NOT_PRESENT):
> >>>>     return -ENODATA and a FAULT
> >>>>
> >>>> This should also handle the sporadic NOT_PRESENT failures.
> >>>>
> >>>> What do you think?
> >>>>
> >>>> Is there any other feedback to this revision or should I send the next?
> >>>>
> >>>
> >>> No, except I'd really like to know which Chromebook randomly generates
> >>> a EC_FAN_SPEED_NOT_PRESENT response because that really looks like a bug.
> >>> Also, can you reproduce the problem with the ectool command ?
> >
> > Yes, the ectool command reports the same issue at the same time.
> >
> > The fan affected was always the sensor cpu@4c, which is
> > compatible = "amd,sb-tsi".
> >
> >> I have a feeling it was related to the concurrency problems between ACPI and
> >> the CrOS code that are being fixed in another patch by Ben Walsh, I was also
> >> seeing some weird behaviour sometimes but I *believe* it was fixed by that.
> >
> > I don't think it's this issue.
> > Ben's series at [1], is for MEC ECs which are the older Intel
> > Frameworks, not the Framework 13 AMD.
>
> Yeah sorry, I saw it mentioned AMD and threw it into my kernel, I also
> thought it stopped the 'packet too long' messages (for
> EC_CMD_CONSOLE_SNAPSHOT) but it did not.
>
> Thanks,
> Steve
diff mbox series

Patch

diff --git a/Documentation/hwmon/cros_ec_hwmon.rst b/Documentation/hwmon/cros_ec_hwmon.rst
new file mode 100644
index 000000000000..aeb88c79d11b
--- /dev/null
+++ b/Documentation/hwmon/cros_ec_hwmon.rst
@@ -0,0 +1,26 @@ 
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Kernel driver cros_ec_hwmon
+===========================
+
+Supported chips:
+
+  * ChromeOS embedded controllers connected via LPC
+
+    Prefix: 'cros_ec'
+
+    Addresses scanned: -
+
+Author:
+
+  - Thomas Weißschuh <linux@weissschuh.net>
+
+Description
+-----------
+
+This driver implements support for hardware monitoring commands exposed by the
+ChromeOS embedded controller used in Chromebooks and other devices.
+
+The channel labels exposed via hwmon are retrieved from the EC itself.
+
+Fan and temperature readings are supported.
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 1ca7a4fe1f8f..355a83e66928 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -57,6 +57,7 @@  Hardware Monitoring Kernel Drivers
    coretemp
    corsair-cpro
    corsair-psu
+   cros_ec_hwmon
    da9052
    da9055
    dell-smm-hwmon
diff --git a/MAINTAINERS b/MAINTAINERS
index c23fda1aa1f0..aa5689169eca 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4988,6 +4988,14 @@  S:	Maintained
 F:	Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml
 F:	sound/soc/codecs/cros_ec_codec.*
 
+CHROMEOS EC HARDWARE MONITORING
+M:	Thomas Weißschuh <thomas@weissschuh.net>
+L:	chrome-platform@lists.linux.dev
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	Documentation/hwmon/chros_ec_hwmon.rst
+F:	drivers/hwmon/cros_ec_hwmon.c
+
 CHROMEOS EC SUBDRIVERS
 M:	Benson Leung <bleung@chromium.org>
 R:	Guenter Roeck <groeck@chromium.org>
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 83945397b6eb..c1284d42697f 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -506,6 +506,17 @@  config SENSORS_CORSAIR_PSU
 	  This driver can also be built as a module. If so, the module
 	  will be called corsair-psu.
 
+config SENSORS_CROS_EC
+	tristate "ChromeOS Embedded Controller sensors"
+	depends on MFD_CROS_EC_DEV
+	default MFD_CROS_EC_DEV
+	help
+	  If you say yes here you get support for ChromeOS Embedded Controller
+	  sensors.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called cros_ec_hwmon.
+
 config SENSORS_DRIVETEMP
 	tristate "Hard disk drives with temperature sensors"
 	depends on SCSI && ATA
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 5c31808f6378..8519a6b36c00 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -64,6 +64,7 @@  obj-$(CONFIG_SENSORS_CHIPCAP2) += chipcap2.o
 obj-$(CONFIG_SENSORS_CORETEMP)	+= coretemp.o
 obj-$(CONFIG_SENSORS_CORSAIR_CPRO) += corsair-cpro.o
 obj-$(CONFIG_SENSORS_CORSAIR_PSU) += corsair-psu.o
+obj-$(CONFIG_SENSORS_CROS_EC)	+= cros_ec_hwmon.o
 obj-$(CONFIG_SENSORS_DA9052_ADC)+= da9052-hwmon.o
 obj-$(CONFIG_SENSORS_DA9055)+= da9055-hwmon.o
 obj-$(CONFIG_SENSORS_DELL_SMM)	+= dell-smm-hwmon.o
diff --git a/drivers/hwmon/cros_ec_hwmon.c b/drivers/hwmon/cros_ec_hwmon.c
new file mode 100644
index 000000000000..d59d39df2ac4
--- /dev/null
+++ b/drivers/hwmon/cros_ec_hwmon.c
@@ -0,0 +1,269 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  ChromesOS EC driver for hwmon
+ *
+ *  Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#include <linux/device.h>
+#include <linux/hwmon.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
+#include <linux/units.h>
+
+#define DRV_NAME	"cros-ec-hwmon"
+
+struct cros_ec_hwmon_priv {
+	struct cros_ec_device *cros_ec;
+	u8 thermal_version;
+	const char *temp_sensor_names[EC_TEMP_SENSOR_ENTRIES + EC_TEMP_SENSOR_B_ENTRIES];
+};
+
+static int cros_ec_hwmon_read_fan_speed(struct cros_ec_device *cros_ec, u8 index, u16 *speed)
+{
+	u16 data;
+	int ret;
+
+	ret = cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_FAN + index * 2, 2, &data);
+	if (ret < 0)
+		return ret;
+
+	data = le16_to_cpu(data);
+
+	if (data == EC_FAN_SPEED_NOT_PRESENT)
+		return -ENODEV;
+
+	*speed = data;
+	return 0;
+}
+
+static int cros_ec_hwmon_read_temp(struct cros_ec_device *cros_ec, u8 thermal_version,
+				   u8 index, u8 *data)
+{
+	unsigned int offset;
+	int ret;
+
+	if (index < EC_TEMP_SENSOR_ENTRIES)
+		offset = EC_MEMMAP_TEMP_SENSOR + index;
+	else
+		offset = EC_MEMMAP_TEMP_SENSOR_B + index - EC_TEMP_SENSOR_ENTRIES;
+
+	ret = cros_ec->cmd_readmem(cros_ec, offset, 1, data);
+	if (ret < 0)
+		return ret;
+
+	if (*data == EC_TEMP_SENSOR_NOT_PRESENT ||
+	    *data == EC_TEMP_SENSOR_ERROR ||
+	    *data == EC_TEMP_SENSOR_NOT_POWERED ||
+	    *data == EC_TEMP_SENSOR_NOT_CALIBRATED)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int cros_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			      u32 attr, int channel, long *val)
+{
+	struct cros_ec_hwmon_priv *priv = dev_get_drvdata(dev);
+	int ret = -ENODATA;
+	u16 speed;
+	u8 temp;
+
+	if (type == hwmon_fan) {
+		ret = cros_ec_hwmon_read_fan_speed(priv->cros_ec, channel, &speed);
+		if (ret == 0)
+			*val = speed;
+	} else if (type == hwmon_temp) {
+		ret = cros_ec_hwmon_read_temp(priv->cros_ec, priv->thermal_version, channel, &temp);
+		if (ret == 0)
+			*val = kelvin_to_millicelsius((((long)temp) + EC_TEMP_SENSOR_OFFSET));
+	}
+
+	return ret;
+}
+
+static int cros_ec_hwmon_get_temp_sensor_info(struct cros_ec_device *cros_ec, u8 id,
+					      struct ec_response_temp_sensor_get_info *resp)
+{
+	int ret;
+	struct {
+		struct cros_ec_command msg;
+		union {
+			struct ec_params_temp_sensor_get_info req;
+			struct ec_response_temp_sensor_get_info resp;
+		} __packed data;
+	} __packed buf = {
+		.msg = {
+			.version = 0,
+			.command = EC_CMD_TEMP_SENSOR_GET_INFO,
+			.insize  = sizeof(buf.data.resp),
+			.outsize = sizeof(buf.data.req),
+		},
+		.data.req.id = id,
+	};
+
+	ret = cros_ec_cmd_xfer_status(cros_ec, &buf.msg);
+	if (ret < 0)
+		return ret;
+
+	*resp = buf.data.resp;
+	return 0;
+}
+
+static int cros_ec_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
+				     u32 attr, int channel, const char **str)
+{
+	struct cros_ec_hwmon_priv *priv = dev_get_drvdata(dev);
+
+	if (type == hwmon_temp && attr == hwmon_temp_label) {
+		*str = priv->temp_sensor_names[channel];
+		return 0;
+	}
+
+	return -ENODATA;
+}
+
+static umode_t cros_ec_hwmon_is_visible(const void *data, enum hwmon_sensor_types type,
+					u32 attr, int channel)
+{
+	const struct cros_ec_hwmon_priv *priv = data;
+	u16 speed;
+
+	if (type == hwmon_fan) {
+		if (cros_ec_hwmon_read_fan_speed(priv->cros_ec, channel, &speed) == 0)
+			return 0444;
+	} else if (type == hwmon_temp) {
+		if (priv->temp_sensor_names[channel])
+			return 0444;
+	}
+
+	return 0;
+}
+
+static const struct hwmon_channel_info * const cros_ec_hwmon_info[] = {
+	HWMON_CHANNEL_INFO(fan,
+			   HWMON_F_INPUT,
+			   HWMON_F_INPUT,
+			   HWMON_F_INPUT,
+			   HWMON_F_INPUT),
+	HWMON_CHANNEL_INFO(temp,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL,
+			   HWMON_T_INPUT | HWMON_T_LABEL),
+	NULL
+};
+
+static const struct hwmon_ops cros_ec_hwmon_ops = {
+	.read = cros_ec_hwmon_read,
+	.read_string = cros_ec_hwmon_read_string,
+	.is_visible = cros_ec_hwmon_is_visible,
+};
+
+static const struct hwmon_chip_info cros_ec_hwmon_chip_info = {
+	.ops = &cros_ec_hwmon_ops,
+	.info = cros_ec_hwmon_info,
+};
+
+static void cros_ec_hwmon_probe_temp_sensors(struct device *dev, struct cros_ec_hwmon_priv *priv)
+{
+	struct ec_response_temp_sensor_get_info info;
+	size_t candidates, i;
+	int ret;
+	u8 temp;
+
+	if (priv->thermal_version < 2)
+		candidates = EC_TEMP_SENSOR_ENTRIES;
+	else
+		candidates = ARRAY_SIZE(priv->temp_sensor_names);
+
+	for (i = 0; i < candidates; i++) {
+		if (cros_ec_hwmon_read_temp(priv->cros_ec, priv->thermal_version, i, &temp) != 0)
+			continue;
+
+		ret = cros_ec_hwmon_get_temp_sensor_info(priv->cros_ec, i, &info);
+		if (ret < 0)
+			continue;
+
+		priv->temp_sensor_names[i] = devm_kasprintf(dev, GFP_KERNEL, "%*s",
+							    (int)sizeof(info.sensor_name),
+							    info.sensor_name);
+	}
+}
+
+static int cros_ec_hwmon_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(dev->parent);
+	struct cros_ec_device *cros_ec = ec_dev->ec_dev;
+	struct cros_ec_hwmon_priv *priv;
+	struct device *hwmon_dev;
+	int ret;
+
+	/* Not every platform supports direct reads */
+	if (!cros_ec->cmd_readmem)
+		return -ENODEV;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->cros_ec = cros_ec;
+
+	ret = priv->cros_ec->cmd_readmem(cros_ec, EC_MEMMAP_THERMAL_VERSION,
+					 1, &priv->thermal_version);
+	if (ret < 0)
+		return ret;
+
+	/* Covers both fan and temp sensors */
+	if (!priv->thermal_version)
+		return -ENODEV;
+
+	cros_ec_hwmon_probe_temp_sensors(dev, priv);
+
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, "cros_ec", priv,
+							 &cros_ec_hwmon_chip_info, NULL);
+
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static const struct platform_device_id cros_ec_hwmon_id[] = {
+	{ DRV_NAME, 0 },
+	{ }
+};
+
+static struct platform_driver cros_ec_hwmon_driver = {
+	.driver.name	= DRV_NAME,
+	.probe		= cros_ec_hwmon_probe,
+	.id_table	= cros_ec_hwmon_id,
+};
+module_platform_driver(cros_ec_hwmon_driver);
+
+MODULE_DEVICE_TABLE(platform, cros_ec_hwmon_id);
+MODULE_DESCRIPTION("ChromeOS EC Hardware Monitoring Driver");
+MODULE_AUTHOR("Thomas Weißschuh <linux@weissschuh.net");
+MODULE_LICENSE("GPL");