diff mbox series

[v2,4/4] thermal: mediatek: add another get_temp ops for thermal sensors

Message ID 20220720181854.547881-5-aouledameur@baylibre.com (mailing list archive)
State New, archived
Headers show
Series thermal: mediatek: Add support for MT8365 SoC | expand

Commit Message

Amjad Ouled-Ameur July 20, 2022, 6:18 p.m. UTC
Provide thermal zone to read thermal sensor in the SoC. We can read all the
thermal sensors value in the SoC by the node /sys/class/thermal/

In mtk_thermal_bank_temperature, return -EAGAIN instead of -EACCESS
on the first read of sensor that often are bogus values.
This can avoid following warning on boot:

  thermal thermal_zone6: failed to read out thermal zone (-13)

Signed-off-by: default avatarMichael Kao <michael.kao@mediatek.com>
Signed-off-by: default avatarHsin-Yi Wang <hsinyi@chromium.org>
Signed-off-by: Amjad Ouled-Ameur <aouledameur@baylibre.com>
Tested-by: Amjad Ouled-Ameur <aouledameur@baylibre.com>
---
 drivers/thermal/mtk_thermal.c | 100 ++++++++++++++++++++++++++--------
 1 file changed, 76 insertions(+), 24 deletions(-)

Comments

Christophe JAILLET July 20, 2022, 6:54 p.m. UTC | #1
Le 20/07/2022 à 20:18, Amjad Ouled-Ameur a écrit :
> Provide thermal zone to read thermal sensor in the SoC. We can read all the
> thermal sensors value in the SoC by the node /sys/class/thermal/
> 
> In mtk_thermal_bank_temperature, return -EAGAIN instead of -EACCESS
> on the first read of sensor that often are bogus values.
> This can avoid following warning on boot:
> 
>    thermal thermal_zone6: failed to read out thermal zone (-13)
> 
> Signed-off-by: default avatarMichael Kao <michael.kao-NuS5LvNUpcJWk0Htik3J/w@public.gmane.org>
> Signed-off-by: default avatarHsin-Yi Wang <hsinyi-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
> Signed-off-by: Amjad Ouled-Ameur <aouledameur-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
> Tested-by: Amjad Ouled-Ameur <aouledameur-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
> ---
>   drivers/thermal/mtk_thermal.c | 100 ++++++++++++++++++++++++++--------
>   1 file changed, 76 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c
> index 1dc276f8c4f1..79b14ce1a08d 100644
> --- a/drivers/thermal/mtk_thermal.c
> +++ b/drivers/thermal/mtk_thermal.c
> @@ -259,6 +259,11 @@ enum mtk_thermal_version {
>   
>   struct mtk_thermal;
>   
> +struct mtk_thermal_zone {
> +	struct mtk_thermal *mt;
> +	int id;
> +};
> +
>   struct thermal_bank_cfg {
>   	unsigned int num_sensors;
>   	const int *sensors;
> @@ -709,6 +714,32 @@ static void mtk_thermal_put_bank(struct mtk_thermal_bank *bank)
>   		mutex_unlock(&mt->lock);
>   }
>   
> +static u32 _get_sensor_temp(struct mtk_thermal *mt, int id)
> +{
> +	u32 raw;
> +	int temp;
> +
> +	const struct mtk_thermal_data *conf = mt->conf;
> +
> +	raw = readl(mt->thermal_base + conf->msr[id]);
> +
> +	if (mt->conf->version == MTK_THERMAL_V1)
> +		temp = raw_to_mcelsius_v1(mt, id, raw);
> +	else
> +		temp = raw_to_mcelsius_v2(mt, id, raw);
> +
> +	/*
> +	 * The first read of a sensor often contains very high bogus
> +	 * temperature value. Filter these out so that the system does
> +	 * not immediately shut down.
> +	 */
> +
> +	if (temp > 200000)
> +		return  -EAGAIN;

This function returns a u32. Is it ok to return -EAGAIN?

There is also 2 spaces here...

> +	else
> +		return	temp;

... and a tab here.

> +}
> +
>   /**
>    * mtk_thermal_bank_temperature - get the temperature of a bank
>    * @bank:	The bank
> @@ -721,26 +752,9 @@ static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank)
>   	struct mtk_thermal *mt = bank->mt;
>   	const struct mtk_thermal_data *conf = mt->conf;
>   	int i, temp = INT_MIN, max = INT_MIN;
> -	u32 raw;
>   
>   	for (i = 0; i < conf->bank_data[bank->id].num_sensors; i++) {
> -		raw = readl(mt->thermal_base + conf->msr[i]);
> -
> -		if (mt->conf->version == MTK_THERMAL_V1) {
> -			temp = raw_to_mcelsius_v1(
> -				mt, conf->bank_data[bank->id].sensors[i], raw);
> -		} else {
> -			temp = raw_to_mcelsius_v2(
> -				mt, conf->bank_data[bank->id].sensors[i], raw);
> -		}
> -
> -		/*
> -		 * The first read of a sensor often contains very high bogus
> -		 * temperature value. Filter these out so that the system does
> -		 * not immediately shut down.
> -		 */
> -		if (temp > 200000)
> -			temp = 0;
> +		temp = _get_sensor_temp(mt, i);

Is it ok if _get_sensor_temp() returns -EAGAIN?

>   
>   		if (temp > max)
>   			max = temp;
> @@ -751,7 +765,8 @@ static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank)
>   
>   static int mtk_read_temp(void *data, int *temperature)
>   {
> -	struct mtk_thermal *mt = data;
> +	struct mtk_thermal_zone *tz = data;
> +	struct mtk_thermal *mt = tz->mt;
>   	int i;
>   	int tempmax = INT_MIN;
>   
> @@ -770,10 +785,28 @@ static int mtk_read_temp(void *data, int *temperature)
>   	return 0;
>   }
>   
> +static int mtk_read_sensor_temp(void *data, int *temperature)
> +{
> +	struct mtk_thermal_zone *tz = data;
> +	struct mtk_thermal *mt = tz->mt;
> +	int id = tz->id - 1;
> +
> +	if (id < 0)
> +		return  -EACCES;

2 spaces.

> +
> +	*temperature = _get_sensor_temp(mt, id);

If _get_sensor_temp() returns -EAGAIN, should this be propagated to the 
caller?

> +
> +	return 0;
> +}
> +
>   static const struct thermal_zone_of_device_ops mtk_thermal_ops = {
>   	.get_temp = mtk_read_temp,
>   };
>   
> +static const struct thermal_zone_of_device_ops mtk_thermal_sensor_ops = {
> +	.get_temp = mtk_read_sensor_temp,
> +};
> +
>   static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num,
>   				  u32 apmixed_phys_base, u32 auxadc_phys_base,
>   				  int ctrl_id)
> @@ -1072,6 +1105,7 @@ static int mtk_thermal_probe(struct platform_device *pdev)
>   	u64 auxadc_phys_base, apmixed_phys_base;
>   	struct thermal_zone_device *tzdev;
>   	void __iomem *apmixed_base, *auxadc_base;
> +	struct mtk_thermal_zone *tz;
>   
>   	mt = devm_kzalloc(&pdev->dev, sizeof(*mt), GFP_KERNEL);
>   	if (!mt)
> @@ -1161,11 +1195,29 @@ static int mtk_thermal_probe(struct platform_device *pdev)
>   
>   	platform_set_drvdata(pdev, mt);
>   
> -	tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, mt,
> -						     &mtk_thermal_ops);
> -	if (IS_ERR(tzdev)) {
> -		ret = PTR_ERR(tzdev);
> -		goto err_disable_clk_peri_therm;
> +	for (i = 0; i < mt->conf->num_sensors + 1; i++) {
> +		tz = kmalloc(sizeof(*tz), GFP_KERNEL);

Should this memory allocation be a devm_kmalloc(), or is this memory 
freed at some point by the framework?

(I don't know the thermal_zone API and the patch has no kfree())

CJ

> +		if (!tz)
> +			return -ENOMEM;
> +
> +		tz->mt = mt;
> +		tz->id = i;
> +
> +		tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, i, tz, (i == 0) ?
> +							     &mtk_thermal_ops :
> +							     &mtk_thermal_sensor_ops);
> +
> +		if (IS_ERR(tzdev)) {
> +			if (PTR_ERR(tzdev) == -ENODEV) {
> +				dev_warn(&pdev->dev,
> +					 "sensor %d not registered in thermal zone in dt\n", i);
> +				continue;
> +			}
> +			if (PTR_ERR(tzdev) == -EACCES) {
> +				ret = PTR_ERR(tzdev);
> +				goto err_disable_clk_peri_therm;
> +			}
> +		}
>   	}
>   
>   	ret = devm_thermal_add_hwmon_sysfs(tzdev);
Amjad Ouled-Ameur Aug. 26, 2022, 2:12 p.m. UTC | #2
Hi Christophe,

On 7/20/22 20:54, Christophe JAILLET wrote:
> Le 20/07/2022 à 20:18, Amjad Ouled-Ameur a écrit :
>> Provide thermal zone to read thermal sensor in the SoC. We can read 
>> all the
>> thermal sensors value in the SoC by the node /sys/class/thermal/
>>
>> In mtk_thermal_bank_temperature, return -EAGAIN instead of -EACCESS
>> on the first read of sensor that often are bogus values.
>> This can avoid following warning on boot:
>>
>>    thermal thermal_zone6: failed to read out thermal zone (-13)
>>
>> Signed-off-by: default avatarMichael Kao 
>> <michael.kao-NuS5LvNUpcJWk0Htik3J/w@public.gmane.org>
>> Signed-off-by: default avatarHsin-Yi Wang 
>> <hsinyi-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
>> Signed-off-by: Amjad Ouled-Ameur 
>> <aouledameur-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
>> Tested-by: Amjad Ouled-Ameur 
>> <aouledameur-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
>> ---
>>   drivers/thermal/mtk_thermal.c | 100 ++++++++++++++++++++++++++--------
>>   1 file changed, 76 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/thermal/mtk_thermal.c 
>> b/drivers/thermal/mtk_thermal.c
>> index 1dc276f8c4f1..79b14ce1a08d 100644
>> --- a/drivers/thermal/mtk_thermal.c
>> +++ b/drivers/thermal/mtk_thermal.c
>> @@ -259,6 +259,11 @@ enum mtk_thermal_version {
>>     struct mtk_thermal;
>>   +struct mtk_thermal_zone {
>> +    struct mtk_thermal *mt;
>> +    int id;
>> +};
>> +
>>   struct thermal_bank_cfg {
>>       unsigned int num_sensors;
>>       const int *sensors;
>> @@ -709,6 +714,32 @@ static void mtk_thermal_put_bank(struct 
>> mtk_thermal_bank *bank)
>>           mutex_unlock(&mt->lock);
>>   }
>>   +static u32 _get_sensor_temp(struct mtk_thermal *mt, int id)
>> +{
>> +    u32 raw;
>> +    int temp;
>> +
>> +    const struct mtk_thermal_data *conf = mt->conf;
>> +
>> +    raw = readl(mt->thermal_base + conf->msr[id]);
>> +
>> +    if (mt->conf->version == MTK_THERMAL_V1)
>> +        temp = raw_to_mcelsius_v1(mt, id, raw);
>> +    else
>> +        temp = raw_to_mcelsius_v2(mt, id, raw);
>> +
>> +    /*
>> +     * The first read of a sensor often contains very high bogus
>> +     * temperature value. Filter these out so that the system does
>> +     * not immediately shut down.
>> +     */
>> +
>> +    if (temp > 200000)
>> +        return  -EAGAIN;
>
> This function returns a u32. Is it ok to return -EAGAIN?
>
_get_sensor_temp() should normally return int instead u32, will fix it 
in V3.
> There is also 2 spaces here...
>
>> +    else
>> +        return    temp;
>
> ... and a tab here.
>
will fix them in V3.
>> +}
>> +
>>   /**
>>    * mtk_thermal_bank_temperature - get the temperature of a bank
>>    * @bank:    The bank
>> @@ -721,26 +752,9 @@ static int mtk_thermal_bank_temperature(struct 
>> mtk_thermal_bank *bank)
>>       struct mtk_thermal *mt = bank->mt;
>>       const struct mtk_thermal_data *conf = mt->conf;
>>       int i, temp = INT_MIN, max = INT_MIN;
>> -    u32 raw;
>>         for (i = 0; i < conf->bank_data[bank->id].num_sensors; i++) {
>> -        raw = readl(mt->thermal_base + conf->msr[i]);
>> -
>> -        if (mt->conf->version == MTK_THERMAL_V1) {
>> -            temp = raw_to_mcelsius_v1(
>> -                mt, conf->bank_data[bank->id].sensors[i], raw);
>> -        } else {
>> -            temp = raw_to_mcelsius_v2(
>> -                mt, conf->bank_data[bank->id].sensors[i], raw);
>> -        }
>> -
>> -        /*
>> -         * The first read of a sensor often contains very high bogus
>> -         * temperature value. Filter these out so that the system does
>> -         * not immediately shut down.
>> -         */
>> -        if (temp > 200000)
>> -            temp = 0;
>> +        temp = _get_sensor_temp(mt, i);
>
> Is it ok if _get_sensor_temp() returns -EAGAIN?
>
drivers/thermal/thermal_core.c:update_temperature() checks for -EAGAIN 
after

thermal_zone_get_temp() is called, thus, I think it's good to return 
-EAGAIN in

case of failure.

>>             if (temp > max)
>>               max = temp;
>> @@ -751,7 +765,8 @@ static int mtk_thermal_bank_temperature(struct 
>> mtk_thermal_bank *bank)
>>     static int mtk_read_temp(void *data, int *temperature)
>>   {
>> -    struct mtk_thermal *mt = data;
>> +    struct mtk_thermal_zone *tz = data;
>> +    struct mtk_thermal *mt = tz->mt;
>>       int i;
>>       int tempmax = INT_MIN;
>>   @@ -770,10 +785,28 @@ static int mtk_read_temp(void *data, int 
>> *temperature)
>>       return 0;
>>   }
>>   +static int mtk_read_sensor_temp(void *data, int *temperature)
>> +{
>> +    struct mtk_thermal_zone *tz = data;
>> +    struct mtk_thermal *mt = tz->mt;
>> +    int id = tz->id - 1;
>> +
>> +    if (id < 0)
>> +        return  -EACCES;
>
> 2 spaces.
>

will fix it in V3.
>> +
>> +    *temperature = _get_sensor_temp(mt, id);
>
> If _get_sensor_temp() returns -EAGAIN, should this be propagated to 
> the caller?
>
>> +
>> +    return 0;
>> +}
>> +
>>   static const struct thermal_zone_of_device_ops mtk_thermal_ops = {
>>       .get_temp = mtk_read_temp,
>>   };
>>   +static const struct thermal_zone_of_device_ops 
>> mtk_thermal_sensor_ops = {
>> +    .get_temp = mtk_read_sensor_temp,
>> +};
>> +
>>   static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num,
>>                     u32 apmixed_phys_base, u32 auxadc_phys_base,
>>                     int ctrl_id)
>> @@ -1072,6 +1105,7 @@ static int mtk_thermal_probe(struct 
>> platform_device *pdev)
>>       u64 auxadc_phys_base, apmixed_phys_base;
>>       struct thermal_zone_device *tzdev;
>>       void __iomem *apmixed_base, *auxadc_base;
>> +    struct mtk_thermal_zone *tz;
>>         mt = devm_kzalloc(&pdev->dev, sizeof(*mt), GFP_KERNEL);
>>       if (!mt)
>> @@ -1161,11 +1195,29 @@ static int mtk_thermal_probe(struct 
>> platform_device *pdev)
>>         platform_set_drvdata(pdev, mt);
>>   -    tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, mt,
>> -                             &mtk_thermal_ops);
>> -    if (IS_ERR(tzdev)) {
>> -        ret = PTR_ERR(tzdev);
>> -        goto err_disable_clk_peri_therm;
>> +    for (i = 0; i < mt->conf->num_sensors + 1; i++) {
>> +        tz = kmalloc(sizeof(*tz), GFP_KERNEL);
>
> Should this memory allocation be a devm_kmalloc(), or is this memory 
> freed at some point by the framework?
>
> (I don't know the thermal_zone API and the patch has no kfree())
AFAIK, thermal API does not free private data, therefore devm_kmalloc() 
should be used.
>
> CJ
>
>> +        if (!tz)
>> +            return -ENOMEM;
>> +
>> +        tz->mt = mt;
>> +        tz->id = i;
>> +
>> +        tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, i, 
>> tz, (i == 0) ?
>> +                                 &mtk_thermal_ops :
>> +                                 &mtk_thermal_sensor_ops);
>> +
>> +        if (IS_ERR(tzdev)) {
>> +            if (PTR_ERR(tzdev) == -ENODEV) {
>> +                dev_warn(&pdev->dev,
>> +                     "sensor %d not registered in thermal zone in 
>> dt\n", i);
>> +                continue;
>> +            }
>> +            if (PTR_ERR(tzdev) == -EACCES) {
>> +                ret = PTR_ERR(tzdev);
>> +                goto err_disable_clk_peri_therm;
>> +            }
>> +        }
>>       }
>>         ret = devm_thermal_add_hwmon_sysfs(tzdev);
>
Thank you Christophe for the review.

Regards,

Amjad
diff mbox series

Patch

diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c
index 1dc276f8c4f1..79b14ce1a08d 100644
--- a/drivers/thermal/mtk_thermal.c
+++ b/drivers/thermal/mtk_thermal.c
@@ -259,6 +259,11 @@  enum mtk_thermal_version {
 
 struct mtk_thermal;
 
+struct mtk_thermal_zone {
+	struct mtk_thermal *mt;
+	int id;
+};
+
 struct thermal_bank_cfg {
 	unsigned int num_sensors;
 	const int *sensors;
@@ -709,6 +714,32 @@  static void mtk_thermal_put_bank(struct mtk_thermal_bank *bank)
 		mutex_unlock(&mt->lock);
 }
 
+static u32 _get_sensor_temp(struct mtk_thermal *mt, int id)
+{
+	u32 raw;
+	int temp;
+
+	const struct mtk_thermal_data *conf = mt->conf;
+
+	raw = readl(mt->thermal_base + conf->msr[id]);
+
+	if (mt->conf->version == MTK_THERMAL_V1)
+		temp = raw_to_mcelsius_v1(mt, id, raw);
+	else
+		temp = raw_to_mcelsius_v2(mt, id, raw);
+
+	/*
+	 * The first read of a sensor often contains very high bogus
+	 * temperature value. Filter these out so that the system does
+	 * not immediately shut down.
+	 */
+
+	if (temp > 200000)
+		return  -EAGAIN;
+	else
+		return	temp;
+}
+
 /**
  * mtk_thermal_bank_temperature - get the temperature of a bank
  * @bank:	The bank
@@ -721,26 +752,9 @@  static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank)
 	struct mtk_thermal *mt = bank->mt;
 	const struct mtk_thermal_data *conf = mt->conf;
 	int i, temp = INT_MIN, max = INT_MIN;
-	u32 raw;
 
 	for (i = 0; i < conf->bank_data[bank->id].num_sensors; i++) {
-		raw = readl(mt->thermal_base + conf->msr[i]);
-
-		if (mt->conf->version == MTK_THERMAL_V1) {
-			temp = raw_to_mcelsius_v1(
-				mt, conf->bank_data[bank->id].sensors[i], raw);
-		} else {
-			temp = raw_to_mcelsius_v2(
-				mt, conf->bank_data[bank->id].sensors[i], raw);
-		}
-
-		/*
-		 * The first read of a sensor often contains very high bogus
-		 * temperature value. Filter these out so that the system does
-		 * not immediately shut down.
-		 */
-		if (temp > 200000)
-			temp = 0;
+		temp = _get_sensor_temp(mt, i);
 
 		if (temp > max)
 			max = temp;
@@ -751,7 +765,8 @@  static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank)
 
 static int mtk_read_temp(void *data, int *temperature)
 {
-	struct mtk_thermal *mt = data;
+	struct mtk_thermal_zone *tz = data;
+	struct mtk_thermal *mt = tz->mt;
 	int i;
 	int tempmax = INT_MIN;
 
@@ -770,10 +785,28 @@  static int mtk_read_temp(void *data, int *temperature)
 	return 0;
 }
 
+static int mtk_read_sensor_temp(void *data, int *temperature)
+{
+	struct mtk_thermal_zone *tz = data;
+	struct mtk_thermal *mt = tz->mt;
+	int id = tz->id - 1;
+
+	if (id < 0)
+		return  -EACCES;
+
+	*temperature = _get_sensor_temp(mt, id);
+
+	return 0;
+}
+
 static const struct thermal_zone_of_device_ops mtk_thermal_ops = {
 	.get_temp = mtk_read_temp,
 };
 
+static const struct thermal_zone_of_device_ops mtk_thermal_sensor_ops = {
+	.get_temp = mtk_read_sensor_temp,
+};
+
 static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num,
 				  u32 apmixed_phys_base, u32 auxadc_phys_base,
 				  int ctrl_id)
@@ -1072,6 +1105,7 @@  static int mtk_thermal_probe(struct platform_device *pdev)
 	u64 auxadc_phys_base, apmixed_phys_base;
 	struct thermal_zone_device *tzdev;
 	void __iomem *apmixed_base, *auxadc_base;
+	struct mtk_thermal_zone *tz;
 
 	mt = devm_kzalloc(&pdev->dev, sizeof(*mt), GFP_KERNEL);
 	if (!mt)
@@ -1161,11 +1195,29 @@  static int mtk_thermal_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, mt);
 
-	tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, mt,
-						     &mtk_thermal_ops);
-	if (IS_ERR(tzdev)) {
-		ret = PTR_ERR(tzdev);
-		goto err_disable_clk_peri_therm;
+	for (i = 0; i < mt->conf->num_sensors + 1; i++) {
+		tz = kmalloc(sizeof(*tz), GFP_KERNEL);
+		if (!tz)
+			return -ENOMEM;
+
+		tz->mt = mt;
+		tz->id = i;
+
+		tzdev = devm_thermal_zone_of_sensor_register(&pdev->dev, i, tz, (i == 0) ?
+							     &mtk_thermal_ops :
+							     &mtk_thermal_sensor_ops);
+
+		if (IS_ERR(tzdev)) {
+			if (PTR_ERR(tzdev) == -ENODEV) {
+				dev_warn(&pdev->dev,
+					 "sensor %d not registered in thermal zone in dt\n", i);
+				continue;
+			}
+			if (PTR_ERR(tzdev) == -EACCES) {
+				ret = PTR_ERR(tzdev);
+				goto err_disable_clk_peri_therm;
+			}
+		}
 	}
 
 	ret = devm_thermal_add_hwmon_sysfs(tzdev);