diff mbox series

[7/8] hwmon: (scmi) Register explicitly with Thermal Framework

Message ID 20221028140833.280091-7-cristian.marussi@arm.com (mailing list archive)
State New, archived
Headers show
Series [1/8] firmware: arm_scmi: Cleanup core driver removal callback | expand

Commit Message

Cristian Marussi Oct. 28, 2022, 2:08 p.m. UTC
Available sensors are enumerated and reported by the SCMI platform server
using a 16bit identification number; not all such sensors are of a type
supported by hwmon subsystem and, among the supported ones, only a subset
could be temperature sensors that have to be registered with the Thermal
Framework.
Potential clashes between hwmon channels indexes and the underlying real
sensors IDs do not play well with the hwmon<-->thermal bridge automatic
registration routines and could need a sensible number of fake dummy
sensors to be made up in order to keep indexes and IDs in sync.

Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ
attribute and instead explicit register temperature sensors directly with
the Thermal Framework.

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: linux-hwmon@vger.kernel.org
Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
---
 drivers/hwmon/scmi-hwmon.c | 115 ++++++++++++++++++++++++++++++++-----
 1 file changed, 102 insertions(+), 13 deletions(-)

Comments

Guenter Roeck Oct. 28, 2022, 3:11 p.m. UTC | #1
On 10/28/22 07:08, Cristian Marussi wrote:
> Available sensors are enumerated and reported by the SCMI platform server
> using a 16bit identification number; not all such sensors are of a type
> supported by hwmon subsystem and, among the supported ones, only a subset
> could be temperature sensors that have to be registered with the Thermal
> Framework.
> Potential clashes between hwmon channels indexes and the underlying real
> sensors IDs do not play well with the hwmon<-->thermal bridge automatic
> registration routines and could need a sensible number of fake dummy
> sensors to be made up in order to keep indexes and IDs in sync.
> 
> Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ
> attribute and instead explicit register temperature sensors directly with
> the Thermal Framework.
> 


For my reference:

Reviewed-by: Guenter Roeck <linux@roeck-us.net>

$subject says "patch 7/8". Patches 1-6 are firmware patches. Does this patch
depend on the other patches of the series or can I apply it on its own ?

Additional comment inline below.

Thanks,
Guenter

> Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
> Cc: Guenter Roeck <linux@roeck-us.net>
> Cc: linux-hwmon@vger.kernel.org
> Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
> ---
>   drivers/hwmon/scmi-hwmon.c | 115 ++++++++++++++++++++++++++++++++-----
>   1 file changed, 102 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c
> index b1329a58ce40..124fe8ee1b9b 100644
> --- a/drivers/hwmon/scmi-hwmon.c
> +++ b/drivers/hwmon/scmi-hwmon.c
> @@ -20,6 +20,11 @@ struct scmi_sensors {
>   	const struct scmi_sensor_info **info[hwmon_max];
>   };
>   
> +struct scmi_thermal_sensor {
> +	const struct scmi_protocol_handle *ph;
> +	const struct scmi_sensor_info *info;
> +};
> +
>   static inline u64 __pow10(u8 x)
>   {
>   	u64 r = 1;
> @@ -64,16 +69,14 @@ static int scmi_hwmon_scale(const struct scmi_sensor_info *sensor, u64 *value)
>   	return 0;
>   }
>   
> -static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> -			   u32 attr, int channel, long *val)
> +static int scmi_hwmon_read_scaled_value(const struct scmi_protocol_handle *ph,
> +					const struct scmi_sensor_info *sensor,
> +					long *val)
>   {
>   	int ret;
>   	u64 value;
> -	const struct scmi_sensor_info *sensor;
> -	struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
>   
> -	sensor = *(scmi_sensors->info[type] + channel);
> -	ret = sensor_ops->reading_get(scmi_sensors->ph, sensor->id, &value);
> +	ret = sensor_ops->reading_get(ph, sensor->id, &value);
>   	if (ret)
>   		return ret;
>   
> @@ -84,6 +87,17 @@ static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
>   	return ret;
>   }
>   
> +static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> +			   u32 attr, int channel, long *val)
> +{
> +	const struct scmi_sensor_info *sensor;
> +	struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
> +
> +	sensor = *(scmi_sensors->info[type] + channel);
> +
> +	return scmi_hwmon_read_scaled_value(scmi_sensors->ph, sensor, val);
> +}
> +
>   static int
>   scmi_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
>   		       u32 attr, int channel, const char **str)
> @@ -122,6 +136,25 @@ static struct hwmon_chip_info scmi_chip_info = {
>   	.info = NULL,
>   };
>   
> +static int scmi_hwmon_thermal_get_temp(struct thermal_zone_device *tz,
> +				       int *temp)
> +{
> +	int ret;
> +	long value;
> +	struct scmi_thermal_sensor *th_sensor = tz->devdata;
> +
> +	ret = scmi_hwmon_read_scaled_value(th_sensor->ph, th_sensor->info,
> +					   &value);
> +	if (!ret)
> +		*temp = value;
> +
> +	return ret;
> +}
> +
> +static const struct thermal_zone_device_ops scmi_hwmon_thermal_ops = {
> +	.get_temp = scmi_hwmon_thermal_get_temp,
> +};
> +
>   static int scmi_hwmon_add_chan_info(struct hwmon_channel_info *scmi_hwmon_chan,
>   				    struct device *dev, int num,
>   				    enum hwmon_sensor_types type, u32 config)
> @@ -149,7 +182,6 @@ static enum hwmon_sensor_types scmi_types[] = {
>   };
>   
>   static u32 hwmon_attributes[hwmon_max] = {
> -	[hwmon_chip] = HWMON_C_REGISTER_TZ,
>   	[hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL,
>   	[hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL,
>   	[hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL,
> @@ -157,6 +189,43 @@ static u32 hwmon_attributes[hwmon_max] = {
>   	[hwmon_energy] = HWMON_E_INPUT | HWMON_E_LABEL,
>   };
>   
> +static int scmi_thermal_sensor_register(struct device *dev,
> +					const struct scmi_protocol_handle *ph,
> +					const struct scmi_sensor_info *sensor)
> +{
> +	struct scmi_thermal_sensor *th_sensor;
> +	struct thermal_zone_device *tzd;
> +
> +	th_sensor = devm_kzalloc(dev, sizeof(*th_sensor), GFP_KERNEL);
> +	if (!th_sensor)
> +		return -ENOMEM;
> +
> +	th_sensor->ph = ph;
> +	th_sensor->info = sensor;
> +
> +	/*
> +	 * Try to register a temperature sensor with the Thermal Framework:
> +	 * skip sensors not defined as part of any thermal zone (-ENODEV) but
> +	 * report any other errors related to misconfigured zones/sensors.
> +	 */
> +	tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
> +					    &scmi_hwmon_thermal_ops);
> +	if (IS_ERR(tzd)) {
> +		devm_kfree(dev, th_sensor);
> +
> +		if (PTR_ERR(tzd) != -ENODEV)
> +			return PTR_ERR(tzd);
> +
> +		dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
> +			 sensor->name);

There were complaints about this message as it is noisy. If you send
another version, please drop it unless attaching each sensor to a thermal
zone is strongly expected. If you don't send another version, I'll drop it
while applying.

> +	} else {
> +		dev_dbg(dev, "Sensor '%s' attached to thermal zone ID:%d\n",
> +			sensor->name, tzd->id);
> +	}
> +
> +	return 0;
> +}
> +
>   static int scmi_hwmon_probe(struct scmi_device *sdev)
>   {
>   	int i, idx;
> @@ -164,7 +233,7 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
>   	enum hwmon_sensor_types type;
>   	struct scmi_sensors *scmi_sensors;
>   	const struct scmi_sensor_info *sensor;
> -	int nr_count[hwmon_max] = {0}, nr_types = 0;
> +	int nr_count[hwmon_max] = {0}, nr_types = 0, nr_count_temp = 0;
>   	const struct hwmon_chip_info *chip_info;
>   	struct device *hwdev, *dev = &sdev->dev;
>   	struct hwmon_channel_info *scmi_hwmon_chan;
> @@ -208,10 +277,8 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
>   		}
>   	}
>   
> -	if (nr_count[hwmon_temp]) {
> -		nr_count[hwmon_chip]++;
> -		nr_types++;
> -	}
> +	if (nr_count[hwmon_temp])
> +		nr_count_temp = nr_count[hwmon_temp];
>   
>   	scmi_hwmon_chan = devm_kcalloc(dev, nr_types, sizeof(*scmi_hwmon_chan),
>   				       GFP_KERNEL);
> @@ -262,8 +329,30 @@ static int scmi_hwmon_probe(struct scmi_device *sdev)
>   	hwdev = devm_hwmon_device_register_with_info(dev, "scmi_sensors",
>   						     scmi_sensors, chip_info,
>   						     NULL);
> +	if (IS_ERR(hwdev))
> +		return PTR_ERR(hwdev);
> +
> +	for (i = 0; i < nr_count_temp; i++) {
> +		int ret;
>   
> -	return PTR_ERR_OR_ZERO(hwdev);
> +		sensor = *(scmi_sensors->info[hwmon_temp] + i);
> +		if (!sensor)
> +			continue;
> +
> +		/*
> +		 * Warn on any misconfiguration related to thermal zones but
> +		 * bail out of probing only on memory errors.
> +		 */
> +		ret = scmi_thermal_sensor_register(dev, ph, sensor);
> +		if (ret == -ENOMEM)
> +			return ret;
> +		else if (ret)
> +			dev_warn(dev,
> +				 "Thermal zone misconfigured for %s. err=%d\n",
> +				 sensor->name, ret);
> +	}
> +
> +	return 0;
>   }
>   
>   static const struct scmi_device_id scmi_id_table[] = {
Cristian Marussi Oct. 28, 2022, 3:35 p.m. UTC | #2
On Fri, Oct 28, 2022 at 08:11:59AM -0700, Guenter Roeck wrote:
> On 10/28/22 07:08, Cristian Marussi wrote:
> > Available sensors are enumerated and reported by the SCMI platform server
> > using a 16bit identification number; not all such sensors are of a type
> > supported by hwmon subsystem and, among the supported ones, only a subset
> > could be temperature sensors that have to be registered with the Thermal
> > Framework.
> > Potential clashes between hwmon channels indexes and the underlying real
> > sensors IDs do not play well with the hwmon<-->thermal bridge automatic
> > registration routines and could need a sensible number of fake dummy
> > sensors to be made up in order to keep indexes and IDs in sync.
> > 
> > Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ
> > attribute and instead explicit register temperature sensors directly with
> > the Thermal Framework.
> > 
> 
> 
> For my reference:
> 
> Reviewed-by: Guenter Roeck <linux@roeck-us.net>
> 
> $subject says "patch 7/8". Patches 1-6 are firmware patches. Does this patch
> depend on the other patches of the series or can I apply it on its own ?

Thanks for having a look first of all !

This patch can be applied on its own...it's just that I have bundled
together a bunch of fixes (... this being probably a bit too big really it
should have been on its own, sorry for that...)

> 
> Additional comment inline below.
> 
> Thanks,
> Guenter
> 
> > Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
> > Cc: Guenter Roeck <linux@roeck-us.net>
> > Cc: linux-hwmon@vger.kernel.org
> > Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
> > ---
> >   drivers/hwmon/scmi-hwmon.c | 115 ++++++++++++++++++++++++++++++++-----
> >   1 file changed, 102 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c
> > index b1329a58ce40..124fe8ee1b9b 100644
> > --- a/drivers/hwmon/scmi-hwmon.c
> > +++ b/drivers/hwmon/scmi-hwmon.c
> > @@ -20,6 +20,11 @@ struct scmi_sensors {
> >   	const struct scmi_sensor_info **info[hwmon_max];
> >   };
> > +struct scmi_thermal_sensor {
> > +	const struct scmi_protocol_handle *ph;
> > +	const struct scmi_sensor_info *info;
> > +};
> > +
> >   static inline u64 __pow10(u8 x)
> >   {
> >   	u64 r = 1;
> > @@ -64,16 +69,14 @@ static int scmi_hwmon_scale(const struct scmi_sensor_info *sensor, u64 *value)
> >   	return 0;
> >   }
> > -static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> > -			   u32 attr, int channel, long *val)
> > +static int scmi_hwmon_read_scaled_value(const struct scmi_protocol_handle *ph,
> > +					const struct scmi_sensor_info *sensor,
> > +					long *val)
> >   {
> >   	int ret;
> >   	u64 value;
> > -	const struct scmi_sensor_info *sensor;
> > -	struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
> > -	sensor = *(scmi_sensors->info[type] + channel);
> > -	ret = sensor_ops->reading_get(scmi_sensors->ph, sensor->id, &value);
> > +	ret = sensor_ops->reading_get(ph, sensor->id, &value);
> >   	if (ret)
> >   		return ret;
> > @@ -84,6 +87,17 @@ static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> >   	return ret;
> >   }
> > +static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> > +			   u32 attr, int channel, long *val)
> > +{
> > +	const struct scmi_sensor_info *sensor;
> > +	struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
> > +
> > +	sensor = *(scmi_sensors->info[type] + channel);
> > +
> > +	return scmi_hwmon_read_scaled_value(scmi_sensors->ph, sensor, val);
> > +}
> > +
> >   static int
> >   scmi_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
> >   		       u32 attr, int channel, const char **str)
> > @@ -122,6 +136,25 @@ static struct hwmon_chip_info scmi_chip_info = {
> >   	.info = NULL,
> >   };
> > +static int scmi_hwmon_thermal_get_temp(struct thermal_zone_device *tz,
> > +				       int *temp)
> > +{
> > +	int ret;
> > +	long value;
> > +	struct scmi_thermal_sensor *th_sensor = tz->devdata;
> > +
> > +	ret = scmi_hwmon_read_scaled_value(th_sensor->ph, th_sensor->info,
> > +					   &value);
> > +	if (!ret)
> > +		*temp = value;
> > +
> > +	return ret;
> > +}
> > +
> > +static const struct thermal_zone_device_ops scmi_hwmon_thermal_ops = {
> > +	.get_temp = scmi_hwmon_thermal_get_temp,
> > +};
> > +
> >   static int scmi_hwmon_add_chan_info(struct hwmon_channel_info *scmi_hwmon_chan,
> >   				    struct device *dev, int num,
> >   				    enum hwmon_sensor_types type, u32 config)
> > @@ -149,7 +182,6 @@ static enum hwmon_sensor_types scmi_types[] = {
> >   };
> >   static u32 hwmon_attributes[hwmon_max] = {
> > -	[hwmon_chip] = HWMON_C_REGISTER_TZ,
> >   	[hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL,
> >   	[hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL,
> >   	[hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL,
> > @@ -157,6 +189,43 @@ static u32 hwmon_attributes[hwmon_max] = {
> >   	[hwmon_energy] = HWMON_E_INPUT | HWMON_E_LABEL,
> >   };
> > +static int scmi_thermal_sensor_register(struct device *dev,
> > +					const struct scmi_protocol_handle *ph,
> > +					const struct scmi_sensor_info *sensor)
> > +{
> > +	struct scmi_thermal_sensor *th_sensor;
> > +	struct thermal_zone_device *tzd;
> > +
> > +	th_sensor = devm_kzalloc(dev, sizeof(*th_sensor), GFP_KERNEL);
> > +	if (!th_sensor)
> > +		return -ENOMEM;
> > +
> > +	th_sensor->ph = ph;
> > +	th_sensor->info = sensor;
> > +
> > +	/*
> > +	 * Try to register a temperature sensor with the Thermal Framework:
> > +	 * skip sensors not defined as part of any thermal zone (-ENODEV) but
> > +	 * report any other errors related to misconfigured zones/sensors.
> > +	 */
> > +	tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
> > +					    &scmi_hwmon_thermal_ops);
> > +	if (IS_ERR(tzd)) {
> > +		devm_kfree(dev, th_sensor);
> > +
> > +		if (PTR_ERR(tzd) != -ENODEV)
> > +			return PTR_ERR(tzd);
> > +
> > +		dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
> > +			 sensor->name);
> 
> There were complaints about this message as it is noisy. If you send
> another version, please drop it unless attaching each sensor to a thermal
> zone is strongly expected. If you don't send another version, I'll drop it
> while applying.
> 

Ok fine for me. I am waiting to have some feedback from Sudeep too, but
I do not have plan for another version as of now.

As a side note, though, I understand the 'noisiness' argument, but,
sincerely this same message in the original HWMON code was the only
reason why I spotted that something was wrong with the SCMI/HWMON
interactions and discovered the indexes/ids mismatch...if not for
that it would have gone un-noticed that a perfectly configured
ThermalZone/Sensor was not working properly...
(un-noticed at least until something would have been burnt to fire
 in my house .. joking :P)

Thanks,
Cristian
Guenter Roeck Oct. 28, 2022, 3:58 p.m. UTC | #3
On 10/28/22 08:35, Cristian Marussi wrote:
[ ... ]
>>> +	/*
>>> +	 * Try to register a temperature sensor with the Thermal Framework:
>>> +	 * skip sensors not defined as part of any thermal zone (-ENODEV) but
>>> +	 * report any other errors related to misconfigured zones/sensors.
>>> +	 */
>>> +	tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
>>> +					    &scmi_hwmon_thermal_ops);
>>> +	if (IS_ERR(tzd)) {
>>> +		devm_kfree(dev, th_sensor);
>>> +
>>> +		if (PTR_ERR(tzd) != -ENODEV)
>>> +			return PTR_ERR(tzd);
>>> +
>>> +		dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
>>> +			 sensor->name);
>>
>> There were complaints about this message as it is noisy. If you send
>> another version, please drop it unless attaching each sensor to a thermal
>> zone is strongly expected. If you don't send another version, I'll drop it
>> while applying.
>>
> 
> Ok fine for me. I am waiting to have some feedback from Sudeep too, but
> I do not have plan for another version as of now.
> 
> As a side note, though, I understand the 'noisiness' argument, but,
> sincerely this same message in the original HWMON code was the only
> reason why I spotted that something was wrong with the SCMI/HWMON
> interactions and discovered the indexes/ids mismatch...if not for
> that it would have gone un-noticed that a perfectly configured
> ThermalZone/Sensor was not working properly...
> (un-noticed at least until something would have been burnt to fire
>   in my house .. joking :P)
> 

Good point.

Did you ever check the returned error code ? Maybe we could use it to
distinguish "it is not attached to a thermal zone because it is not
associated with one" from "attaching to a thermal zone failed because
its configuration is bad/incomplete".

Thanks,
Guenter
Cristian Marussi Oct. 28, 2022, 4:15 p.m. UTC | #4
On Fri, Oct 28, 2022 at 08:58:58AM -0700, Guenter Roeck wrote:
> On 10/28/22 08:35, Cristian Marussi wrote:
> [ ... ]
> > > > +	/*
> > > > +	 * Try to register a temperature sensor with the Thermal Framework:
> > > > +	 * skip sensors not defined as part of any thermal zone (-ENODEV) but
> > > > +	 * report any other errors related to misconfigured zones/sensors.
> > > > +	 */
> > > > +	tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
> > > > +					    &scmi_hwmon_thermal_ops);
> > > > +	if (IS_ERR(tzd)) {
> > > > +		devm_kfree(dev, th_sensor);
> > > > +
> > > > +		if (PTR_ERR(tzd) != -ENODEV)
> > > > +			return PTR_ERR(tzd);
> > > > +
> > > > +		dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
> > > > +			 sensor->name);
> > > 
> > > There were complaints about this message as it is noisy. If you send
> > > another version, please drop it unless attaching each sensor to a thermal
> > > zone is strongly expected. If you don't send another version, I'll drop it
> > > while applying.
> > > 
> > 
> > Ok fine for me. I am waiting to have some feedback from Sudeep too, but
> > I do not have plan for another version as of now.
> > 
> > As a side note, though, I understand the 'noisiness' argument, but,
> > sincerely this same message in the original HWMON code was the only
> > reason why I spotted that something was wrong with the SCMI/HWMON
> > interactions and discovered the indexes/ids mismatch...if not for
> > that it would have gone un-noticed that a perfectly configured
> > ThermalZone/Sensor was not working properly...
> > (un-noticed at least until something would have been burnt to fire
> >   in my house .. joking :P)
> > 
> 
> Good point.
> 
> Did you ever check the returned error code ? Maybe we could use it to
> distinguish "it is not attached to a thermal zone because it is not
> associated with one" from "attaching to a thermal zone failed because
> its configuration is bad/incomplete".
> 

Yes, it is what I do already indeed, in this regards I mimicked what
the hwmon-thermal bridge was doing.

In scmi_thermal_sensor_register() this message is printed out only
if Thermal registration returned -ENODEV and no err is reported
(which means teh specified sensor was not found attached to any TZ),
while in the caller of scmi_thermal_sensor_register() for any error
returned but -ENOMEM I print:

	"Thermal zone misconfigured for %s. err=%d\n",

since any error reported by Thermal other than ENODEV and ENOMEM
means the DT parsing unveiled some configuration anomaly.

Thanks,
Cristian
Guenter Roeck Oct. 28, 2022, 4:34 p.m. UTC | #5
On 10/28/22 09:15, Cristian Marussi wrote:
> On Fri, Oct 28, 2022 at 08:58:58AM -0700, Guenter Roeck wrote:
>> On 10/28/22 08:35, Cristian Marussi wrote:
>> [ ... ]
>>>>> +	/*
>>>>> +	 * Try to register a temperature sensor with the Thermal Framework:
>>>>> +	 * skip sensors not defined as part of any thermal zone (-ENODEV) but
>>>>> +	 * report any other errors related to misconfigured zones/sensors.
>>>>> +	 */
>>>>> +	tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
>>>>> +					    &scmi_hwmon_thermal_ops);
>>>>> +	if (IS_ERR(tzd)) {
>>>>> +		devm_kfree(dev, th_sensor);
>>>>> +
>>>>> +		if (PTR_ERR(tzd) != -ENODEV)
>>>>> +			return PTR_ERR(tzd);
>>>>> +
>>>>> +		dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
>>>>> +			 sensor->name);
>>>>
>>>> There were complaints about this message as it is noisy. If you send
>>>> another version, please drop it unless attaching each sensor to a thermal
>>>> zone is strongly expected. If you don't send another version, I'll drop it
>>>> while applying.
>>>>
>>>
>>> Ok fine for me. I am waiting to have some feedback from Sudeep too, but
>>> I do not have plan for another version as of now.
>>>
>>> As a side note, though, I understand the 'noisiness' argument, but,
>>> sincerely this same message in the original HWMON code was the only
>>> reason why I spotted that something was wrong with the SCMI/HWMON
>>> interactions and discovered the indexes/ids mismatch...if not for
>>> that it would have gone un-noticed that a perfectly configured
>>> ThermalZone/Sensor was not working properly...
>>> (un-noticed at least until something would have been burnt to fire
>>>    in my house .. joking :P)
>>>
>>
>> Good point.
>>
>> Did you ever check the returned error code ? Maybe we could use it to
>> distinguish "it is not attached to a thermal zone because it is not
>> associated with one" from "attaching to a thermal zone failed because
>> its configuration is bad/incomplete".
>>
> 
> Yes, it is what I do already indeed, in this regards I mimicked what
> the hwmon-thermal bridge was doing.
> 
> In scmi_thermal_sensor_register() this message is printed out only
> if Thermal registration returned -ENODEV and no err is reported
> (which means teh specified sensor was not found attached to any TZ),
> while in the caller of scmi_thermal_sensor_register() for any error
> returned but -ENOMEM I print:
> 
> 	"Thermal zone misconfigured for %s. err=%d\n",
> 
> since any error reported by Thermal other than ENODEV and ENOMEM
> means the DT parsing unveiled some configuration anomaly.
> 

Ok, then let's hope that this finds misconfigurations and drop the
info message.

I just noticed another problem in your code:

+		if (ret == -ENOMEM)
+			return ret;
+		else if (ret)
+			dev_warn(dev,
+				 "Thermal zone misconfigured for %s. err=%d\n",
+				 sensor->name, ret);

Static analyzers will rightfully notice that else after return is unnecessary.
Please rewrite and drop the else. I think something like

		if (ret) {
			if (ret == -ENOMEM)
				return ret;
			dev_warn(dev,
				 "Thermal zone misconfigured for %s. err=%d\n",
				 sensor->name, ret);
		}

would be better since ret would only be evaluated once in the no-error case.

Thanks,
Guenter
Cristian Marussi Oct. 28, 2022, 5:12 p.m. UTC | #6
On Fri, Oct 28, 2022 at 09:34:05AM -0700, Guenter Roeck wrote:
> On 10/28/22 09:15, Cristian Marussi wrote:
> > On Fri, Oct 28, 2022 at 08:58:58AM -0700, Guenter Roeck wrote:
> > > On 10/28/22 08:35, Cristian Marussi wrote:
> > > [ ... ]
> > > > > > +	/*
> > > > > > +	 * Try to register a temperature sensor with the Thermal Framework:
> > > > > > +	 * skip sensors not defined as part of any thermal zone (-ENODEV) but
> > > > > > +	 * report any other errors related to misconfigured zones/sensors.
> > > > > > +	 */
> > > > > > +	tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
> > > > > > +					    &scmi_hwmon_thermal_ops);
> > > > > > +	if (IS_ERR(tzd)) {
> > > > > > +		devm_kfree(dev, th_sensor);
> > > > > > +
> > > > > > +		if (PTR_ERR(tzd) != -ENODEV)
> > > > > > +			return PTR_ERR(tzd);
> > > > > > +
> > > > > > +		dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
> > > > > > +			 sensor->name);
> > > > > 
> > > > > There were complaints about this message as it is noisy. If you send
> > > > > another version, please drop it unless attaching each sensor to a thermal
> > > > > zone is strongly expected. If you don't send another version, I'll drop it
> > > > > while applying.
> > > > > 
> > > > 
> > > > Ok fine for me. I am waiting to have some feedback from Sudeep too, but
> > > > I do not have plan for another version as of now.
> > > > 
> > > > As a side note, though, I understand the 'noisiness' argument, but,
> > > > sincerely this same message in the original HWMON code was the only
> > > > reason why I spotted that something was wrong with the SCMI/HWMON
> > > > interactions and discovered the indexes/ids mismatch...if not for
> > > > that it would have gone un-noticed that a perfectly configured
> > > > ThermalZone/Sensor was not working properly...
> > > > (un-noticed at least until something would have been burnt to fire
> > > >    in my house .. joking :P)
> > > > 
> > > 
> > > Good point.
> > > 
> > > Did you ever check the returned error code ? Maybe we could use it to
> > > distinguish "it is not attached to a thermal zone because it is not
> > > associated with one" from "attaching to a thermal zone failed because
> > > its configuration is bad/incomplete".
> > > 
> > 
> > Yes, it is what I do already indeed, in this regards I mimicked what
> > the hwmon-thermal bridge was doing.
> > 
> > In scmi_thermal_sensor_register() this message is printed out only
> > if Thermal registration returned -ENODEV and no err is reported
> > (which means teh specified sensor was not found attached to any TZ),
> > while in the caller of scmi_thermal_sensor_register() for any error
> > returned but -ENOMEM I print:
> > 
> > 	"Thermal zone misconfigured for %s. err=%d\n",
> > 
> > since any error reported by Thermal other than ENODEV and ENOMEM
> > means the DT parsing unveiled some configuration anomaly.
> > 
> 
> Ok, then let's hope that this finds misconfigurations and drop the
> info message.

The mismatch of indexes at hand won't be catched being reported by
Thermal as misconfig but just as not found ENODEV.

Anyway it is fine for me to drop the message.

> 
> I just noticed another problem in your code:
> 
> +		if (ret == -ENOMEM)
> +			return ret;
> +		else if (ret)
> +			dev_warn(dev,
> +				 "Thermal zone misconfigured for %s. err=%d\n",
> +				 sensor->name, ret);
> 
> Static analyzers will rightfully notice that else after return is unnecessary.
> Please rewrite and drop the else. I think something like
> 

Ah yes...my bad.

> 		if (ret) {
> 			if (ret == -ENOMEM)
> 				return ret;
> 			dev_warn(dev,
> 				 "Thermal zone misconfigured for %s. err=%d\n",
> 				 sensor->name, ret);
> 		}
> 
> would be better since ret would only be evaluated once in the no-error case.
> 

I'll resend this one with the fix and the dropped message.

Thanks,
Cristian
diff mbox series

Patch

diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c
index b1329a58ce40..124fe8ee1b9b 100644
--- a/drivers/hwmon/scmi-hwmon.c
+++ b/drivers/hwmon/scmi-hwmon.c
@@ -20,6 +20,11 @@  struct scmi_sensors {
 	const struct scmi_sensor_info **info[hwmon_max];
 };
 
+struct scmi_thermal_sensor {
+	const struct scmi_protocol_handle *ph;
+	const struct scmi_sensor_info *info;
+};
+
 static inline u64 __pow10(u8 x)
 {
 	u64 r = 1;
@@ -64,16 +69,14 @@  static int scmi_hwmon_scale(const struct scmi_sensor_info *sensor, u64 *value)
 	return 0;
 }
 
-static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
-			   u32 attr, int channel, long *val)
+static int scmi_hwmon_read_scaled_value(const struct scmi_protocol_handle *ph,
+					const struct scmi_sensor_info *sensor,
+					long *val)
 {
 	int ret;
 	u64 value;
-	const struct scmi_sensor_info *sensor;
-	struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
 
-	sensor = *(scmi_sensors->info[type] + channel);
-	ret = sensor_ops->reading_get(scmi_sensors->ph, sensor->id, &value);
+	ret = sensor_ops->reading_get(ph, sensor->id, &value);
 	if (ret)
 		return ret;
 
@@ -84,6 +87,17 @@  static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
 	return ret;
 }
 
+static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			   u32 attr, int channel, long *val)
+{
+	const struct scmi_sensor_info *sensor;
+	struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev);
+
+	sensor = *(scmi_sensors->info[type] + channel);
+
+	return scmi_hwmon_read_scaled_value(scmi_sensors->ph, sensor, val);
+}
+
 static int
 scmi_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
 		       u32 attr, int channel, const char **str)
@@ -122,6 +136,25 @@  static struct hwmon_chip_info scmi_chip_info = {
 	.info = NULL,
 };
 
+static int scmi_hwmon_thermal_get_temp(struct thermal_zone_device *tz,
+				       int *temp)
+{
+	int ret;
+	long value;
+	struct scmi_thermal_sensor *th_sensor = tz->devdata;
+
+	ret = scmi_hwmon_read_scaled_value(th_sensor->ph, th_sensor->info,
+					   &value);
+	if (!ret)
+		*temp = value;
+
+	return ret;
+}
+
+static const struct thermal_zone_device_ops scmi_hwmon_thermal_ops = {
+	.get_temp = scmi_hwmon_thermal_get_temp,
+};
+
 static int scmi_hwmon_add_chan_info(struct hwmon_channel_info *scmi_hwmon_chan,
 				    struct device *dev, int num,
 				    enum hwmon_sensor_types type, u32 config)
@@ -149,7 +182,6 @@  static enum hwmon_sensor_types scmi_types[] = {
 };
 
 static u32 hwmon_attributes[hwmon_max] = {
-	[hwmon_chip] = HWMON_C_REGISTER_TZ,
 	[hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL,
 	[hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL,
 	[hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL,
@@ -157,6 +189,43 @@  static u32 hwmon_attributes[hwmon_max] = {
 	[hwmon_energy] = HWMON_E_INPUT | HWMON_E_LABEL,
 };
 
+static int scmi_thermal_sensor_register(struct device *dev,
+					const struct scmi_protocol_handle *ph,
+					const struct scmi_sensor_info *sensor)
+{
+	struct scmi_thermal_sensor *th_sensor;
+	struct thermal_zone_device *tzd;
+
+	th_sensor = devm_kzalloc(dev, sizeof(*th_sensor), GFP_KERNEL);
+	if (!th_sensor)
+		return -ENOMEM;
+
+	th_sensor->ph = ph;
+	th_sensor->info = sensor;
+
+	/*
+	 * Try to register a temperature sensor with the Thermal Framework:
+	 * skip sensors not defined as part of any thermal zone (-ENODEV) but
+	 * report any other errors related to misconfigured zones/sensors.
+	 */
+	tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor,
+					    &scmi_hwmon_thermal_ops);
+	if (IS_ERR(tzd)) {
+		devm_kfree(dev, th_sensor);
+
+		if (PTR_ERR(tzd) != -ENODEV)
+			return PTR_ERR(tzd);
+
+		dev_info(dev, "Sensor '%s' not attached to any thermal zone.\n",
+			 sensor->name);
+	} else {
+		dev_dbg(dev, "Sensor '%s' attached to thermal zone ID:%d\n",
+			sensor->name, tzd->id);
+	}
+
+	return 0;
+}
+
 static int scmi_hwmon_probe(struct scmi_device *sdev)
 {
 	int i, idx;
@@ -164,7 +233,7 @@  static int scmi_hwmon_probe(struct scmi_device *sdev)
 	enum hwmon_sensor_types type;
 	struct scmi_sensors *scmi_sensors;
 	const struct scmi_sensor_info *sensor;
-	int nr_count[hwmon_max] = {0}, nr_types = 0;
+	int nr_count[hwmon_max] = {0}, nr_types = 0, nr_count_temp = 0;
 	const struct hwmon_chip_info *chip_info;
 	struct device *hwdev, *dev = &sdev->dev;
 	struct hwmon_channel_info *scmi_hwmon_chan;
@@ -208,10 +277,8 @@  static int scmi_hwmon_probe(struct scmi_device *sdev)
 		}
 	}
 
-	if (nr_count[hwmon_temp]) {
-		nr_count[hwmon_chip]++;
-		nr_types++;
-	}
+	if (nr_count[hwmon_temp])
+		nr_count_temp = nr_count[hwmon_temp];
 
 	scmi_hwmon_chan = devm_kcalloc(dev, nr_types, sizeof(*scmi_hwmon_chan),
 				       GFP_KERNEL);
@@ -262,8 +329,30 @@  static int scmi_hwmon_probe(struct scmi_device *sdev)
 	hwdev = devm_hwmon_device_register_with_info(dev, "scmi_sensors",
 						     scmi_sensors, chip_info,
 						     NULL);
+	if (IS_ERR(hwdev))
+		return PTR_ERR(hwdev);
+
+	for (i = 0; i < nr_count_temp; i++) {
+		int ret;
 
-	return PTR_ERR_OR_ZERO(hwdev);
+		sensor = *(scmi_sensors->info[hwmon_temp] + i);
+		if (!sensor)
+			continue;
+
+		/*
+		 * Warn on any misconfiguration related to thermal zones but
+		 * bail out of probing only on memory errors.
+		 */
+		ret = scmi_thermal_sensor_register(dev, ph, sensor);
+		if (ret == -ENOMEM)
+			return ret;
+		else if (ret)
+			dev_warn(dev,
+				 "Thermal zone misconfigured for %s. err=%d\n",
+				 sensor->name, ret);
+	}
+
+	return 0;
 }
 
 static const struct scmi_device_id scmi_id_table[] = {