Message ID | 20190418164813.21053-1-jeff.dagenais@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [v2] hwmon: max6650: add thermal cooling device capability | expand |
On Thu, Apr 18, 2019 at 12:48:13PM -0400, Jean-Francois Dagenais wrote: > This allows max6650 devices to be referenced in dts as a cooling device. > > The pwm value seems duplicated in cooling_dev_state but since pwm goes > through rounding logic into data->dac, it is modified and messes with > the thermal zone state algorithms. It's also better to serve a cache > value, thus avoiding periodic actual i2c traffic. > > Signed-off-by: Jean-Francois Dagenais <jeff.dagenais@gmail.com> > --- > Changes in v2: > Removed left-over debug printk. > > drivers/hwmon/max6650.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 90 insertions(+), 1 deletion(-) > > diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c > index 61135a2d0cff..f1cbbaaa1206 100644 > --- a/drivers/hwmon/max6650.c > +++ b/drivers/hwmon/max6650.c > @@ -40,6 +40,7 @@ > #include <linux/hwmon-sysfs.h> > #include <linux/err.h> > #include <linux/of_device.h> > +#include <linux/thermal.h> > > /* > * Insmod parameters > @@ -113,6 +114,7 @@ module_param(clock, int, 0444); > struct max6650_data { > struct i2c_client *client; > const struct attribute_group *groups[3]; > + struct thermal_cooling_device *cooling_dev; > struct mutex update_lock; > int nr_fans; > char valid; /* zero until following fields are valid */ > @@ -125,6 +127,7 @@ struct max6650_data { > u8 count; > u8 dac; > u8 alarm; > + unsigned long cooling_dev_state; > }; > > static const u8 tach_reg[] = { > @@ -694,6 +697,63 @@ static int max6650_init_client(struct max6650_data *data, > return 0; > } > > +#if IS_ENABLED(CONFIG_THERMAL) > +/* thermal cooling device callbacks */ > +static int max6650_get_max_state(struct thermal_cooling_device *cdev, > + unsigned long *state) > +{ > + *state = 255; > + > + return 0; > +} > + > +static int max6650_get_cur_state(struct thermal_cooling_device *cdev, > + unsigned long *state) > +{ > + struct max6650_data *data = cdev->devdata; > + > + *state = data->cooling_dev_state; > + > + return 0; > +} > + > +static int > +max6650_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) > +{ > + struct max6650_data *data = cdev->devdata; > + struct i2c_client *client = data->client; > + int err; > + > + state = clamp_val(state, 0, 255); > + > + mutex_lock(&data->update_lock); > + > + if (data->config & MAX6650_CFG_V12) > + data->dac = 180 - (180 * state)/255; > + else > + data->dac = 76 - (76 * state)/255; > + > + err = i2c_smbus_write_byte_data(client, MAX6650_REG_DAC, data->dac); > + > + if (!err) { > + max6650_set_operating_mode(data, state ? > + MAX6650_CFG_MODE_OPEN_LOOP : > + MAX6650_CFG_MODE_OFF); > + data->cooling_dev_state = state; > + } > + > + mutex_unlock(&data->update_lock); > + > + return err < 0 ? err : 0; > +} > + > +static const struct thermal_cooling_device_ops max6650_cooling_ops = { > + .get_max_state = max6650_get_max_state, > + .get_cur_state = max6650_get_cur_state, > + .set_cur_state = max6650_set_cur_state, > +}; > +#endif > + > static int max6650_probe(struct i2c_client *client, > const struct i2c_device_id *id) > { > @@ -709,6 +769,7 @@ static int max6650_probe(struct i2c_client *client, > return -ENOMEM; > > data->client = client; > + i2c_set_clientdata(client, data); > mutex_init(&data->update_lock); > data->nr_fans = of_id ? (int)(uintptr_t)of_id->data : id->driver_data; > > @@ -727,7 +788,34 @@ static int max6650_probe(struct i2c_client *client, > hwmon_dev = devm_hwmon_device_register_with_groups(dev, > client->name, data, > data->groups); > - return PTR_ERR_OR_ZERO(hwmon_dev); > + err = PTR_ERR_OR_ZERO(hwmon_dev); > + if (err) > + return err; > + > +#if IS_ENABLED(CONFIG_THERMAL) This will result in missing symbols if THERMAL is built as module and this driver is built into the kernel. You'll have to adjust Kconfig dependencies accordingly. See other drivers for examples. > + data->cooling_dev = > + thermal_of_cooling_device_register(client->dev.of_node, > + id->name, data, > + &max6650_cooling_ops); > + if (IS_ERR(data->cooling_dev)) { > + err = PTR_ERR(data->cooling_dev); > + dev_err(&client->dev, > + "Failed to register as cooling device (%d)\n", err); > + return err; Why would it be fatal for the driver if this fails ? It wasn't fatal before. > + } > + > + thermal_cdev_update(data->cooling_dev); > +#endif > + return 0; > +} > + > +static int max6650_remove(struct i2c_client *client) > +{ > + struct max6650_data *data = i2c_get_clientdata(client); > + > + thermal_cooling_device_unregister(data->cooling_dev); > + > + return 0; > } > > static const struct i2c_device_id max6650_id[] = { > @@ -743,6 +831,7 @@ static struct i2c_driver max6650_driver = { > .of_match_table = of_match_ptr(max6650_dt_match), > }, > .probe = max6650_probe, > + .remove = max6650_remove, > .id_table = max6650_id, > }; > > -- > 2.11.0 >
> On Apr 18, 2019, at 13:38, Guenter Roeck <linux@roeck-us.net> wrote: > >> +#if IS_ENABLED(CONFIG_THERMAL) > > This will result in missing symbols if THERMAL is built as module > and this driver is built into the kernel. You'll have to adjust > Kconfig dependencies accordingly. See other drivers for examples. Right! Was not a problem for me, but I do remember seing the "funny" ifdefs around. > >> + data->cooling_dev = >> + thermal_of_cooling_device_register(client->dev.of_node, >> + id->name, data, >> + &max6650_cooling_ops); >> + if (IS_ERR(data->cooling_dev)) { >> + err = PTR_ERR(data->cooling_dev); >> + dev_err(&client->dev, >> + "Failed to register as cooling device (%d)\n", err); >> + return err; > > Why would it be fatal for the driver if this fails ? It wasn't > fatal before. Mmmh, you are right. This assumes that all users of max6650 would now require to be referred to in some thermal zone. Again, this was not a problem for my test environment. Wow, two very egocentric issues. Will fix and send V3, thanks for the review!
On Thu, Apr 18, 2019 at 02:02:48PM -0400, Jean-Francois Dagenais wrote: > > > On Apr 18, 2019, at 13:38, Guenter Roeck <linux@roeck-us.net> wrote: > > > >> +#if IS_ENABLED(CONFIG_THERMAL) > > > > This will result in missing symbols if THERMAL is built as module > > and this driver is built into the kernel. You'll have to adjust > > Kconfig dependencies accordingly. See other drivers for examples. > > Right! Was not a problem for me, but I do remember seing the "funny" > ifdefs around. > I know, it is annoying. There was an effort to make THERMAL boolean instead of tristate, but it looks like that has stalled or was rejected. > > > >> + data->cooling_dev = > >> + thermal_of_cooling_device_register(client->dev.of_node, > >> + id->name, data, > >> + &max6650_cooling_ops); > >> + if (IS_ERR(data->cooling_dev)) { > >> + err = PTR_ERR(data->cooling_dev); > >> + dev_err(&client->dev, > >> + "Failed to register as cooling device (%d)\n", err); > >> + return err; > > > > Why would it be fatal for the driver if this fails ? It wasn't > > fatal before. > > Mmmh, you are right. This assumes that all users of max6650 would now require to > be referred to in some thermal zone. Again, this was not a problem for my test > environment. > > Wow, two very egocentric issues. Will fix and send V3, thanks for the review! Not really egocentric. Just keep in mind that there are existing use cases. Thanks, Guenter
> On Apr 18, 2019, at 14:06, Guenter Roeck <linux@roeck-us.net> wrote: > > On Thu, Apr 18, 2019 at 02:02:48PM -0400, Jean-Francois Dagenais wrote: >> >>> On Apr 18, 2019, at 13:38, Guenter Roeck <linux@roeck-us.net> wrote: >>> >>>> +#if IS_ENABLED(CONFIG_THERMAL) >>> >>> This will result in missing symbols if THERMAL is built as module >>> and this driver is built into the kernel. You'll have to adjust >>> Kconfig dependencies accordingly. See other drivers for examples. >> >> Right! Was not a problem for me, but I do remember seing the "funny" >> ifdefs around. >> > > I know, it is annoying. There was an effort to make THERMAL boolean > instead of tristate, but it looks like that has stalled or was > rejected. Ok, so I had used pwm-fan.c as a reference and simply forgotten to copy over the Kconfig bit which makes sure if the thermal is a module, then so is the chip driver. I will put in v3. > >>> >>>> + data->cooling_dev = >>>> + thermal_of_cooling_device_register(client->dev.of_node, >>>> + id->name, data, >>>> + &max6650_cooling_ops); >>>> + if (IS_ERR(data->cooling_dev)) { >>>> + err = PTR_ERR(data->cooling_dev); >>>> + dev_err(&client->dev, >>>> + "Failed to register as cooling device (%d)\n", err); >>>> + return err; >>> >>> Why would it be fatal for the driver if this fails ? It wasn't >>> fatal before. >> >> Mmmh, you are right. This assumes that all users of max6650 would now require to >> be referred to in some thermal zone. Again, this was not a problem for my test >> environment. And about this bit... I had confused the thermal cooling register with the thermal_of_sensor register. So if my static analysis of the code in __thermal_cooling_device_register is correct, the function cannot really fail unless something is terribly wrong in the kernel... like a malloc failure or something. If I am wrong, then pwm-fan.c should also be visited since my max6650_probe code was copy-pasted from it. On a related note, I am worried about everyone having to progressively add thermal-zone support to each fan chip in drivers/hwmon. Would it not be possible for hwmon to detect, through `enum hwmon_sensor_types` set to `hwmon_fan` in `struct hwmon_channel_info` to be registered as cooling device in thermal ? Of course for max665x to work, it would have to be modified to use `devm_hwmon_device_register_with_info` instead of `devm_hwmon_device_register_with_groups` Another possiblity would be to create a new driver in drivers/thermal for max665x as a thermal cooling device. Or even a drivers/thermal/thermal-generic-hwmon-fan.c or something, where DTS would instanciate such a cooling device which references a hwmon device's specific pwm channel. Phew! So many possibilities!
On Thu, Apr 18, 2019 at 03:54:32PM -0400, Jean-Francois Dagenais wrote: > > > > On Apr 18, 2019, at 14:06, Guenter Roeck <linux@roeck-us.net> wrote: > > > > On Thu, Apr 18, 2019 at 02:02:48PM -0400, Jean-Francois Dagenais wrote: > >> > >>> On Apr 18, 2019, at 13:38, Guenter Roeck <linux@roeck-us.net> wrote: > >>> > >>>> +#if IS_ENABLED(CONFIG_THERMAL) > >>> > >>> This will result in missing symbols if THERMAL is built as module > >>> and this driver is built into the kernel. You'll have to adjust > >>> Kconfig dependencies accordingly. See other drivers for examples. > >> > >> Right! Was not a problem for me, but I do remember seing the "funny" > >> ifdefs around. > >> > > > > I know, it is annoying. There was an effort to make THERMAL boolean > > instead of tristate, but it looks like that has stalled or was > > rejected. > > Ok, so I had used pwm-fan.c as a reference and simply forgotten to copy over the > Kconfig bit which makes sure if the thermal is a module, then so is the chip > driver. I will put in v3. > > > > >>> > >>>> + data->cooling_dev = > >>>> + thermal_of_cooling_device_register(client->dev.of_node, > >>>> + id->name, data, > >>>> + &max6650_cooling_ops); > >>>> + if (IS_ERR(data->cooling_dev)) { > >>>> + err = PTR_ERR(data->cooling_dev); > >>>> + dev_err(&client->dev, > >>>> + "Failed to register as cooling device (%d)\n", err); > >>>> + return err; > >>> > >>> Why would it be fatal for the driver if this fails ? It wasn't > >>> fatal before. > >> > >> Mmmh, you are right. This assumes that all users of max6650 would now require to > >> be referred to in some thermal zone. Again, this was not a problem for my test > >> environment. > > And about this bit... I had confused the thermal cooling register with the > thermal_of_sensor register. So if my static analysis of the code in > __thermal_cooling_device_register is correct, the function cannot really fail > unless something is terribly wrong in the kernel... like a malloc failure or > something. > That explains why the other drivers don't generate an error message. You might want to reconsider the dev_err() above; it appears it adds zero value. > If I am wrong, then pwm-fan.c should also be visited since my max6650_probe code > was copy-pasted from it. > You are correct, that should not fail either. But two wrongs don't make it right. > > On a related note, I am worried about everyone having to progressively add > thermal-zone support to each fan chip in drivers/hwmon. Would it not be possible > for hwmon to detect, through `enum hwmon_sensor_types` set to `hwmon_fan` in > `struct hwmon_channel_info` to be registered as cooling device in thermal ? Of > course for max665x to work, it would have to be modified to use > `devm_hwmon_device_register_with_info` instead of > `devm_hwmon_device_register_with_groups` > Yes, that would be desirable. Patches welcome. And, yes, the driver would have to be reworked to use the new API. > Another possiblity would be to create a new driver in drivers/thermal for > max665x as a thermal cooling device. Or even a Only if we also drop the existing driver from hwmon. Fine with me if others agree and let you do it (one less hwmon driver to maintain). Note that I just submitted a patch series to introduce devm_thermal_of_cooling_device_register(). We'll see if the thermal maintainers accept it. Guenter > drivers/thermal/thermal-generic-hwmon-fan.c or something, where DTS would > instanciate such a cooling device which references a hwmon device's specific > pwm channel. >
> On Apr 18, 2019, at 16:12, Guenter Roeck <linux@roeck-us.net> wrote: > > That explains why the other drivers don't generate an error message. > You might want to reconsider the dev_err() above; it appears it adds zero > value. > >> If I am wrong, then pwm-fan.c should also be visited since my max6650_probe code >> was copy-pasted from it. >> > > You are correct, that should not fail either. But two wrongs don't make it right. So you think any fan chip driver probe function in hwmon should succeed even if (devm_)thermal_of_cooling_device_register fails?. Mmmh... perhaps I am missing something, but I tend to disagree. That means that for example my system boots normally without any errors, yet the thermal-zone I defined, which is there to prevent my system from burning out, would remain incomplete (without required cooling device) and yet no errors came out of the kernel. I just tested it and that's exactly what's happening! Yikes. > >> >> On a related note, I am worried about everyone having to progressively add >> thermal-zone support to each fan chip in drivers/hwmon. Would it not be possible >> for hwmon to detect, through `enum hwmon_sensor_types` set to `hwmon_fan` in >> `struct hwmon_channel_info` to be registered as cooling device in thermal ? Of >> course for max665x to work, it would have to be modified to use >> `devm_hwmon_device_register_with_info` instead of >> `devm_hwmon_device_register_with_groups` >> > > Yes, that would be desirable. Patches welcome. And, yes, the driver would have > to be reworked to use the new API. > >> Another possiblity would be to create a new driver in drivers/thermal for >> max665x as a thermal cooling device. Or even a > > Only if we also drop the existing driver from hwmon. Fine with me if others > agree and let you do it (one less hwmon driver to maintain). > > Note that I just submitted a patch series to introduce > devm_thermal_of_cooling_device_register(). We'll see if the thermal maintainers > accept it. Saw that. For a minute I really thought you pulled out a series which did exactly what I was talking about (i.e. auto-registering fans in thermal cooling)!! Since my problem is fixed in the short term, and am very stretched for time with our project, I will not have time to do this now. Perhaps one day... ;)
On Thu, Apr 18, 2019 at 04:39:35PM -0400, Jean-Francois Dagenais wrote: > > > > On Apr 18, 2019, at 16:12, Guenter Roeck <linux@roeck-us.net> wrote: > > > > That explains why the other drivers don't generate an error message. > > You might want to reconsider the dev_err() above; it appears it adds zero > > value. > > > >> If I am wrong, then pwm-fan.c should also be visited since my max6650_probe code > >> was copy-pasted from it. > >> > > > > You are correct, that should not fail either. But two wrongs don't make it right. > > So you think any fan chip driver probe function in hwmon should succeed even if > (devm_)thermal_of_cooling_device_register fails?. Mmmh... perhaps I am missing > something, but I tend to disagree. That means that for example my system boots > normally without any errors, yet the thermal-zone I defined, which is there to > prevent my system from burning out, would remain incomplete (without required > cooling device) and yet no errors came out of the kernel. > > I just tested it and that's exactly what's happening! Yikes. > You would still get a warning message. Better than failing to load the driver completely. On top of that, this used to work before. Plus, you said it only happens in severe situations, ie if the kernel is out of memory, and in thact case there would be a traceback. Sorry, I don't get your point. Guenter
> On Apr 18, 2019, at 16:50, Guenter Roeck <linux@roeck-us.net> wrote: > >> normally without any errors, yet the thermal-zone I defined, which is there to >> prevent my system from burning out, would remain incomplete (without required >> cooling device) and yet no errors came out of the kernel. >> >> I just tested it and that's exactly what's happening! Yikes. >> > You would still get a warning message. Better than failing to load > the driver completely. On top of that, this used to work before. > Plus, you said it only happens in severe situations, ie if the kernel > is out of memory, and in thact case there would be a traceback. > Sorry, I don't get your point. Ok, I see. We're having the same discussion than on the v3 submission then. If I change the dev_err to a dev_warn, but still return 0 on thermal_of_cooling_device_register failure, you would apply the patch. It sounds like a compromise. Completely failing the probe call or partially succeed it results in the same behaviour for thermal cooling use cases. But the dev_warn definitely becomes important though. I will v4 with that if you agree and leave the devm_ thermal register for another day since I agree the series might be delayed too long (or indefinitely). Cheers!
On Thu, Apr 18, 2019 at 05:33:13PM -0400, Jean-Francois Dagenais wrote: > > > On Apr 18, 2019, at 16:50, Guenter Roeck <linux@roeck-us.net> wrote: > > > >> normally without any errors, yet the thermal-zone I defined, which is there to > >> prevent my system from burning out, would remain incomplete (without required > >> cooling device) and yet no errors came out of the kernel. > >> > >> I just tested it and that's exactly what's happening! Yikes. > >> > > You would still get a warning message. Better than failing to load > > the driver completely. On top of that, this used to work before. > > Plus, you said it only happens in severe situations, ie if the kernel > > is out of memory, and in thact case there would be a traceback. > > Sorry, I don't get your point. > > Ok, I see. We're having the same discussion than on the v3 submission then. > > If I change the dev_err to a dev_warn, but still return 0 on > thermal_of_cooling_device_register failure, you would apply the patch. It sounds > like a compromise. > > Completely failing the probe call or partially succeed it results in the same > behaviour for thermal cooling use cases. But the dev_warn definitely becomes > important though. > > I will v4 with that if you agree and leave the devm_ thermal register for > another day since I agree the series might be delayed too long (or > indefinitely). > Ok. Guenter
diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c index 61135a2d0cff..f1cbbaaa1206 100644 --- a/drivers/hwmon/max6650.c +++ b/drivers/hwmon/max6650.c @@ -40,6 +40,7 @@ #include <linux/hwmon-sysfs.h> #include <linux/err.h> #include <linux/of_device.h> +#include <linux/thermal.h> /* * Insmod parameters @@ -113,6 +114,7 @@ module_param(clock, int, 0444); struct max6650_data { struct i2c_client *client; const struct attribute_group *groups[3]; + struct thermal_cooling_device *cooling_dev; struct mutex update_lock; int nr_fans; char valid; /* zero until following fields are valid */ @@ -125,6 +127,7 @@ struct max6650_data { u8 count; u8 dac; u8 alarm; + unsigned long cooling_dev_state; }; static const u8 tach_reg[] = { @@ -694,6 +697,63 @@ static int max6650_init_client(struct max6650_data *data, return 0; } +#if IS_ENABLED(CONFIG_THERMAL) +/* thermal cooling device callbacks */ +static int max6650_get_max_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + *state = 255; + + return 0; +} + +static int max6650_get_cur_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + struct max6650_data *data = cdev->devdata; + + *state = data->cooling_dev_state; + + return 0; +} + +static int +max6650_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) +{ + struct max6650_data *data = cdev->devdata; + struct i2c_client *client = data->client; + int err; + + state = clamp_val(state, 0, 255); + + mutex_lock(&data->update_lock); + + if (data->config & MAX6650_CFG_V12) + data->dac = 180 - (180 * state)/255; + else + data->dac = 76 - (76 * state)/255; + + err = i2c_smbus_write_byte_data(client, MAX6650_REG_DAC, data->dac); + + if (!err) { + max6650_set_operating_mode(data, state ? + MAX6650_CFG_MODE_OPEN_LOOP : + MAX6650_CFG_MODE_OFF); + data->cooling_dev_state = state; + } + + mutex_unlock(&data->update_lock); + + return err < 0 ? err : 0; +} + +static const struct thermal_cooling_device_ops max6650_cooling_ops = { + .get_max_state = max6650_get_max_state, + .get_cur_state = max6650_get_cur_state, + .set_cur_state = max6650_set_cur_state, +}; +#endif + static int max6650_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -709,6 +769,7 @@ static int max6650_probe(struct i2c_client *client, return -ENOMEM; data->client = client; + i2c_set_clientdata(client, data); mutex_init(&data->update_lock); data->nr_fans = of_id ? (int)(uintptr_t)of_id->data : id->driver_data; @@ -727,7 +788,34 @@ static int max6650_probe(struct i2c_client *client, hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, data, data->groups); - return PTR_ERR_OR_ZERO(hwmon_dev); + err = PTR_ERR_OR_ZERO(hwmon_dev); + if (err) + return err; + +#if IS_ENABLED(CONFIG_THERMAL) + data->cooling_dev = + thermal_of_cooling_device_register(client->dev.of_node, + id->name, data, + &max6650_cooling_ops); + if (IS_ERR(data->cooling_dev)) { + err = PTR_ERR(data->cooling_dev); + dev_err(&client->dev, + "Failed to register as cooling device (%d)\n", err); + return err; + } + + thermal_cdev_update(data->cooling_dev); +#endif + return 0; +} + +static int max6650_remove(struct i2c_client *client) +{ + struct max6650_data *data = i2c_get_clientdata(client); + + thermal_cooling_device_unregister(data->cooling_dev); + + return 0; } static const struct i2c_device_id max6650_id[] = { @@ -743,6 +831,7 @@ static struct i2c_driver max6650_driver = { .of_match_table = of_match_ptr(max6650_dt_match), }, .probe = max6650_probe, + .remove = max6650_remove, .id_table = max6650_id, };
This allows max6650 devices to be referenced in dts as a cooling device. The pwm value seems duplicated in cooling_dev_state but since pwm goes through rounding logic into data->dac, it is modified and messes with the thermal zone state algorithms. It's also better to serve a cache value, thus avoiding periodic actual i2c traffic. Signed-off-by: Jean-Francois Dagenais <jeff.dagenais@gmail.com> --- Changes in v2: Removed left-over debug printk. drivers/hwmon/max6650.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-)