diff mbox series

[2/6] thermal: of: Export non-devres helper to register/unregister thermal zone

Message ID 20250103163805.1775705-3-claudiu.beznea.uj@bp.renesas.com (mailing list archive)
State New
Delegated to: Daniel Lezcano
Headers show
Series thermal: renesas: Add support for RZ/G3S | expand

Commit Message

Claudiu Jan. 3, 2025, 4:38 p.m. UTC
From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>

On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
clocks are managed through PM domains. These PM domains, registered on
behalf of the clock controller driver, are configured with
GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
clocks are enabled/disabled using runtime PM APIs.

During probe, devices are attached to the PM domain controlling their
clocks. Similarly, during removal, devices are detached from the PM domain.

The detachment call stack is as follows:

device_driver_detach() ->
  device_release_driver_internal() ->
    __device_release_driver() ->
      device_remove() ->
        platform_remove() ->
	  dev_pm_domain_detach()

In the upcoming Renesas RZ/G3S thermal driver, the
struct thermal_zone_device_ops::change_mode API is implemented to
start/stop the thermal sensor unit. Register settings are updated within
the change_mode API.

In case devres helpers are used for thermal zone register/unregister the
struct thermal_zone_device_ops::change_mode API is invoked when the
driver is unbound. The identified call stack is as follows:

device_driver_detach() ->
  device_release_driver_internal() ->
    device_unbind_cleanup() ->
      devres_release_all() ->
        devm_thermal_of_zone_release() ->
	  thermal_zone_device_disable() ->
	    thermal_zone_device_set_mode() ->
	      rzg3s_thermal_change_mode()

The device_unbind_cleanup() function is called after the thermal device is
detached from the PM domain (via dev_pm_domain_detach()).

The rzg3s_thermal_change_mode() implementation calls
pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
accessing the registers. However, during the unbind scenario, the
devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
Consequently, the clocks are not enabled, as the device is removed from
the PM domain at this time, leading to an Asynchronous SError Interrupt.
The system cannot be used after this.

Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
be used in the upcomming RZ/G3S thermal driver.

Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
---
 drivers/thermal/thermal_of.c |  8 +++++---
 include/linux/thermal.h      | 14 ++++++++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

Comments

Daniel Lezcano Jan. 9, 2025, 5:34 p.m. UTC | #1
Ulf,

can you have a look at this particular patch please ?

Perhaps this scenario already happened in the past and there is an 
alternative to fix it instead of this proposed change


On 03/01/2025 17:38, Claudiu wrote:
> From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
> 
> On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
> clocks are managed through PM domains. These PM domains, registered on
> behalf of the clock controller driver, are configured with
> GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
> clocks are enabled/disabled using runtime PM APIs.
> 
> During probe, devices are attached to the PM domain controlling their
> clocks. Similarly, during removal, devices are detached from the PM domain.
> 
> The detachment call stack is as follows:
> 
> device_driver_detach() ->
>    device_release_driver_internal() ->
>      __device_release_driver() ->
>        device_remove() ->
>          platform_remove() ->
> 	  dev_pm_domain_detach()
> 
> In the upcoming Renesas RZ/G3S thermal driver, the
> struct thermal_zone_device_ops::change_mode API is implemented to
> start/stop the thermal sensor unit. Register settings are updated within
> the change_mode API.
> 
> In case devres helpers are used for thermal zone register/unregister the
> struct thermal_zone_device_ops::change_mode API is invoked when the
> driver is unbound. The identified call stack is as follows:
> 
> device_driver_detach() ->
>    device_release_driver_internal() ->
>      device_unbind_cleanup() ->
>        devres_release_all() ->
>          devm_thermal_of_zone_release() ->
> 	  thermal_zone_device_disable() ->
> 	    thermal_zone_device_set_mode() ->
> 	      rzg3s_thermal_change_mode()
> 
> The device_unbind_cleanup() function is called after the thermal device is
> detached from the PM domain (via dev_pm_domain_detach()).
> 
> The rzg3s_thermal_change_mode() implementation calls
> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
> accessing the registers. However, during the unbind scenario, the
> devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
> Consequently, the clocks are not enabled, as the device is removed from
> the PM domain at this time, leading to an Asynchronous SError Interrupt.
> The system cannot be used after this.
> 
> Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
> be used in the upcomming RZ/G3S thermal driver.
> 
> Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
> ---
>   drivers/thermal/thermal_of.c |  8 +++++---
>   include/linux/thermal.h      | 14 ++++++++++++++
>   2 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
> index fab11b98ca49..8fc35d20db60 100644
> --- a/drivers/thermal/thermal_of.c
> +++ b/drivers/thermal/thermal_of.c
> @@ -329,11 +329,12 @@ static bool thermal_of_should_bind(struct thermal_zone_device *tz,
>    *
>    * @tz: a pointer to the thermal zone structure
>    */
> -static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> +void thermal_of_zone_unregister(struct thermal_zone_device *tz)
>   {
>   	thermal_zone_device_disable(tz);
>   	thermal_zone_device_unregister(tz);
>   }
> +EXPORT_SYMBOL_GPL(thermal_of_zone_unregister);
>   
>   /**
>    * thermal_of_zone_register - Register a thermal zone with device node
> @@ -355,8 +356,8 @@ static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
>    *	- ENOMEM: if one structure can not be allocated
>    *	- Other negative errors are returned by the underlying called functions
>    */
> -static struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> -							    const struct thermal_zone_device_ops *ops)
> +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> +						     const struct thermal_zone_device_ops *ops)
>   {
>   	struct thermal_zone_device_ops of_ops = *ops;
>   	struct thermal_zone_device *tz;
> @@ -429,6 +430,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
>   
>   	return ERR_PTR(ret);
>   }
> +EXPORT_SYMBOL_GPL(thermal_of_zone_register);
>   
>   static void devm_thermal_of_zone_release(struct device *dev, void *res)
>   {
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 69f9bedd0ee8..adbb4092a064 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -195,13 +195,23 @@ struct thermal_zone_params {
>   
>   /* Function declarations */
>   #ifdef CONFIG_THERMAL_OF
> +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> +						     const struct thermal_zone_device_ops *ops);
>   struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
>   							  const struct thermal_zone_device_ops *ops);
>   
> +void thermal_of_zone_unregister(struct thermal_zone_device *tz);
>   void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz);
>   
>   #else
>   
> +static inline
> +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> +						     const struct thermal_zone_device_ops *ops)
> +{
> +	return ERR_PTR(-ENOTSUPP);
> +}
> +
>   static inline
>   struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
>   							  const struct thermal_zone_device_ops *ops)
> @@ -209,6 +219,10 @@ struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, in
>   	return ERR_PTR(-ENOTSUPP);
>   }
>   
> +static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> +{
> +}
> +
>   static inline void devm_thermal_of_zone_unregister(struct device *dev,
>   						   struct thermal_zone_device *tz)
>   {
Ulf Hansson Jan. 15, 2025, 3:42 p.m. UTC | #2
On Thu, 9 Jan 2025 at 18:34, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>
>
> Ulf,
>
> can you have a look at this particular patch please ?
>
> Perhaps this scenario already happened in the past and there is an
> alternative to fix it instead of this proposed change

I think the patch makes sense.

If there is a PM domain that is attached to the device that is
managing the clocks for the thermal zone, the detach procedure
certainly needs to be well controlled/synchronized.

>
>
> On 03/01/2025 17:38, Claudiu wrote:
> > From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
> >
> > On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
> > clocks are managed through PM domains. These PM domains, registered on
> > behalf of the clock controller driver, are configured with
> > GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
> > clocks are enabled/disabled using runtime PM APIs.
> >
> > During probe, devices are attached to the PM domain controlling their
> > clocks. Similarly, during removal, devices are detached from the PM domain.
> >
> > The detachment call stack is as follows:
> >
> > device_driver_detach() ->
> >    device_release_driver_internal() ->
> >      __device_release_driver() ->
> >        device_remove() ->
> >          platform_remove() ->
> >         dev_pm_domain_detach()
> >
> > In the upcoming Renesas RZ/G3S thermal driver, the
> > struct thermal_zone_device_ops::change_mode API is implemented to
> > start/stop the thermal sensor unit. Register settings are updated within
> > the change_mode API.
> >
> > In case devres helpers are used for thermal zone register/unregister the
> > struct thermal_zone_device_ops::change_mode API is invoked when the
> > driver is unbound. The identified call stack is as follows:
> >
> > device_driver_detach() ->
> >    device_release_driver_internal() ->
> >      device_unbind_cleanup() ->
> >        devres_release_all() ->
> >          devm_thermal_of_zone_release() ->
> >         thermal_zone_device_disable() ->
> >           thermal_zone_device_set_mode() ->
> >             rzg3s_thermal_change_mode()
> >
> > The device_unbind_cleanup() function is called after the thermal device is
> > detached from the PM domain (via dev_pm_domain_detach()).
> >
> > The rzg3s_thermal_change_mode() implementation calls
> > pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
> > accessing the registers. However, during the unbind scenario, the
> > devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
> > Consequently, the clocks are not enabled, as the device is removed from
> > the PM domain at this time, leading to an Asynchronous SError Interrupt.
> > The system cannot be used after this.
> >
> > Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
> > be used in the upcomming RZ/G3S thermal driver.
> >
> > Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>

Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>

Kind regards
Uffe

> > ---
> >   drivers/thermal/thermal_of.c |  8 +++++---
> >   include/linux/thermal.h      | 14 ++++++++++++++
> >   2 files changed, 19 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
> > index fab11b98ca49..8fc35d20db60 100644
> > --- a/drivers/thermal/thermal_of.c
> > +++ b/drivers/thermal/thermal_of.c
> > @@ -329,11 +329,12 @@ static bool thermal_of_should_bind(struct thermal_zone_device *tz,
> >    *
> >    * @tz: a pointer to the thermal zone structure
> >    */
> > -static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> > +void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> >   {
> >       thermal_zone_device_disable(tz);
> >       thermal_zone_device_unregister(tz);
> >   }
> > +EXPORT_SYMBOL_GPL(thermal_of_zone_unregister);
> >
> >   /**
> >    * thermal_of_zone_register - Register a thermal zone with device node
> > @@ -355,8 +356,8 @@ static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> >    *  - ENOMEM: if one structure can not be allocated
> >    *  - Other negative errors are returned by the underlying called functions
> >    */
> > -static struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> > -                                                         const struct thermal_zone_device_ops *ops)
> > +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> > +                                                  const struct thermal_zone_device_ops *ops)
> >   {
> >       struct thermal_zone_device_ops of_ops = *ops;
> >       struct thermal_zone_device *tz;
> > @@ -429,6 +430,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
> >
> >       return ERR_PTR(ret);
> >   }
> > +EXPORT_SYMBOL_GPL(thermal_of_zone_register);
> >
> >   static void devm_thermal_of_zone_release(struct device *dev, void *res)
> >   {
> > diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> > index 69f9bedd0ee8..adbb4092a064 100644
> > --- a/include/linux/thermal.h
> > +++ b/include/linux/thermal.h
> > @@ -195,13 +195,23 @@ struct thermal_zone_params {
> >
> >   /* Function declarations */
> >   #ifdef CONFIG_THERMAL_OF
> > +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> > +                                                  const struct thermal_zone_device_ops *ops);
> >   struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
> >                                                         const struct thermal_zone_device_ops *ops);
> >
> > +void thermal_of_zone_unregister(struct thermal_zone_device *tz);
> >   void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz);
> >
> >   #else
> >
> > +static inline
> > +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> > +                                                  const struct thermal_zone_device_ops *ops)
> > +{
> > +     return ERR_PTR(-ENOTSUPP);
> > +}
> > +
> >   static inline
> >   struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
> >                                                         const struct thermal_zone_device_ops *ops)
> > @@ -209,6 +219,10 @@ struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, in
> >       return ERR_PTR(-ENOTSUPP);
> >   }
> >
> > +static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> > +{
> > +}
> > +
> >   static inline void devm_thermal_of_zone_unregister(struct device *dev,
> >                                                  struct thermal_zone_device *tz)
> >   {
>
>
> --
> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog
Daniel Lezcano Jan. 29, 2025, 5:24 p.m. UTC | #3
Hi Claudiu,

On Fri, Jan 03, 2025 at 06:38:01PM +0200, Claudiu wrote:
> From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
> 
> On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
> clocks are managed through PM domains. These PM domains, registered on
> behalf of the clock controller driver, are configured with
> GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
> clocks are enabled/disabled using runtime PM APIs.
> 
> During probe, devices are attached to the PM domain controlling their
> clocks. Similarly, during removal, devices are detached from the PM domain.
> 
> The detachment call stack is as follows:
> 
> device_driver_detach() ->
>   device_release_driver_internal() ->
>     __device_release_driver() ->
>       device_remove() ->
>         platform_remove() ->
> 	  dev_pm_domain_detach()
> 
> In the upcoming Renesas RZ/G3S thermal driver, the
> struct thermal_zone_device_ops::change_mode API is implemented to
> start/stop the thermal sensor unit. Register settings are updated within
> the change_mode API.
> 
> In case devres helpers are used for thermal zone register/unregister the
> struct thermal_zone_device_ops::change_mode API is invoked when the
> driver is unbound. The identified call stack is as follows:
> 
> device_driver_detach() ->
>   device_release_driver_internal() ->
>     device_unbind_cleanup() ->
>       devres_release_all() ->
>         devm_thermal_of_zone_release() ->
> 	  thermal_zone_device_disable() ->
> 	    thermal_zone_device_set_mode() ->
> 	      rzg3s_thermal_change_mode()
> 
> The device_unbind_cleanup() function is called after the thermal device is
> detached from the PM domain (via dev_pm_domain_detach()).
> 
> The rzg3s_thermal_change_mode() implementation calls
> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
> accessing the registers. However, during the unbind scenario, the
> devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
> Consequently, the clocks are not enabled, as the device is removed from
> the PM domain at this time, leading to an Asynchronous SError Interrupt.
> The system cannot be used after this.

I've been through the driver before responding to this change. What is the
benefit of powering down / up (or clock off / on) the thermal sensor when
reading the temperature ?

I can understand for disable / enable but I don't get for the classic usage
where a governor will be reading the temperature regularly.

Would the IP need some cycles to capture the temperature accurately after the
clock is enabled ?

> Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
> be used in the upcomming RZ/G3S thermal driver.
> 
> Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Claudiu Jan. 30, 2025, 9:08 a.m. UTC | #4
Hi, Daniel,

On 29.01.2025 19:24, Daniel Lezcano wrote:
> Hi Claudiu,
> 
> On Fri, Jan 03, 2025 at 06:38:01PM +0200, Claudiu wrote:
>> From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
>>
>> On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
>> clocks are managed through PM domains. These PM domains, registered on
>> behalf of the clock controller driver, are configured with
>> GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
>> clocks are enabled/disabled using runtime PM APIs.
>>
>> During probe, devices are attached to the PM domain controlling their
>> clocks. Similarly, during removal, devices are detached from the PM domain.
>>
>> The detachment call stack is as follows:
>>
>> device_driver_detach() ->
>>   device_release_driver_internal() ->
>>     __device_release_driver() ->
>>       device_remove() ->
>>         platform_remove() ->
>> 	  dev_pm_domain_detach()
>>
>> In the upcoming Renesas RZ/G3S thermal driver, the
>> struct thermal_zone_device_ops::change_mode API is implemented to
>> start/stop the thermal sensor unit. Register settings are updated within
>> the change_mode API.
>>
>> In case devres helpers are used for thermal zone register/unregister the
>> struct thermal_zone_device_ops::change_mode API is invoked when the
>> driver is unbound. The identified call stack is as follows:
>>
>> device_driver_detach() ->
>>   device_release_driver_internal() ->
>>     device_unbind_cleanup() ->
>>       devres_release_all() ->
>>         devm_thermal_of_zone_release() ->
>> 	  thermal_zone_device_disable() ->
>> 	    thermal_zone_device_set_mode() ->
>> 	      rzg3s_thermal_change_mode()
>>
>> The device_unbind_cleanup() function is called after the thermal device is
>> detached from the PM domain (via dev_pm_domain_detach()).
>>
>> The rzg3s_thermal_change_mode() implementation calls
>> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
>> accessing the registers. However, during the unbind scenario, the
>> devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
>> Consequently, the clocks are not enabled, as the device is removed from
>> the PM domain at this time, leading to an Asynchronous SError Interrupt.
>> The system cannot be used after this.
> 
> I've been through the driver before responding to this change. What is the
> benefit of powering down / up (or clock off / on) the thermal sensor when
> reading the temperature ?
> 
> I can understand for disable / enable but I don't get for the classic usage
> where a governor will be reading the temperature regularly.

I tried to be as power saving as possible both at runtime and after the IP
is not used anymore as the HW manual doesn't mentioned anything about
accuracy or implications of disabling the IP clock at runtime. We use
similar approach (of disabling clocks at runtime) for other IPs in the
RZ/G3S SoC as well.

> 
> Would the IP need some cycles to capture the temperature accurately after the
> clock is enabled ?

There is nothing about this mentioned about this in the HW manual of the
RZ/G3S SoC. The only points mentioned are as described in the driver code:
- wait at least 3us after each IIO channel read
- wait at least 30us after enabling the sensor
- wait at least 50us after setting OE bit in TSU_SM

For this I chose to have it implemented as proposed.

If any, the HW manual is available here
https://www.renesas.com/en/document/mah/rzg3s-group-users-manual-hardware?r=25458591
(an archive is here; the manual is in Deliverables/r01uh1014ej0110-rzg3s.pdf)

Thank you for your review,
Claudiu

> 
>> Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
>> be used in the upcomming RZ/G3S thermal driver.
>>
>> Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Daniel Lezcano Jan. 30, 2025, 10:07 a.m. UTC | #5
On Thu, Jan 30, 2025 at 11:08:03AM +0200, Claudiu Beznea wrote:
> Hi, Daniel,
> 
> On 29.01.2025 19:24, Daniel Lezcano wrote:
> > Hi Claudiu,
> > 
> > On Fri, Jan 03, 2025 at 06:38:01PM +0200, Claudiu wrote:
> >> From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
> >>
> >> On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
> >> clocks are managed through PM domains. These PM domains, registered on
> >> behalf of the clock controller driver, are configured with
> >> GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
> >> clocks are enabled/disabled using runtime PM APIs.
> >>
> >> During probe, devices are attached to the PM domain controlling their
> >> clocks. Similarly, during removal, devices are detached from the PM domain.
> >>
> >> The detachment call stack is as follows:
> >>
> >> device_driver_detach() ->
> >>   device_release_driver_internal() ->
> >>     __device_release_driver() ->
> >>       device_remove() ->
> >>         platform_remove() ->
> >> 	  dev_pm_domain_detach()
> >>
> >> In the upcoming Renesas RZ/G3S thermal driver, the
> >> struct thermal_zone_device_ops::change_mode API is implemented to
> >> start/stop the thermal sensor unit. Register settings are updated within
> >> the change_mode API.
> >>
> >> In case devres helpers are used for thermal zone register/unregister the
> >> struct thermal_zone_device_ops::change_mode API is invoked when the
> >> driver is unbound. The identified call stack is as follows:
> >>
> >> device_driver_detach() ->
> >>   device_release_driver_internal() ->
> >>     device_unbind_cleanup() ->
> >>       devres_release_all() ->
> >>         devm_thermal_of_zone_release() ->
> >> 	  thermal_zone_device_disable() ->
> >> 	    thermal_zone_device_set_mode() ->
> >> 	      rzg3s_thermal_change_mode()
> >>
> >> The device_unbind_cleanup() function is called after the thermal device is
> >> detached from the PM domain (via dev_pm_domain_detach()).
> >>
> >> The rzg3s_thermal_change_mode() implementation calls
> >> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
> >> accessing the registers. However, during the unbind scenario, the
> >> devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
> >> Consequently, the clocks are not enabled, as the device is removed from
> >> the PM domain at this time, leading to an Asynchronous SError Interrupt.
> >> The system cannot be used after this.
> > 
> > I've been through the driver before responding to this change. What is the
> > benefit of powering down / up (or clock off / on) the thermal sensor when
> > reading the temperature ?
> > 
> > I can understand for disable / enable but I don't get for the classic usage
> > where a governor will be reading the temperature regularly.
> 
> I tried to be as power saving as possible both at runtime and after the IP
> is not used anymore as the HW manual doesn't mentioned anything about
> accuracy or implications of disabling the IP clock at runtime. We use
> similar approach (of disabling clocks at runtime) for other IPs in the
> RZ/G3S SoC as well.
> 
> > 
> > Would the IP need some cycles to capture the temperature accurately after the
> > clock is enabled ?
> 
> There is nothing about this mentioned about this in the HW manual of the
> RZ/G3S SoC. The only points mentioned are as described in the driver code:
> - wait at least 3us after each IIO channel read
> - wait at least 30us after enabling the sensor
> - wait at least 50us after setting OE bit in TSU_SM
> 
> For this I chose to have it implemented as proposed.

IMO, disabling/enabling the clock between two reads through the pm runtime may
not be a good thing, especially if the system enters a thermal situation where
it has to mitigate.

Without any testing capturing the temperatures and compare between the always-on
and on/off, it is hard to say if it is true or not. Up to you to test that or
not. If you think it is fine, then let's go with it.
 
> If any, the HW manual is available here
> https://www.renesas.com/en/document/mah/rzg3s-group-users-manual-hardware?r=25458591
> (an archive is here; the manual is in Deliverables/r01uh1014ej0110-rzg3s.pdf)
> 
> Thank you for your review,
> Claudiu
> 
> > 
> >> Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
> >> be used in the upcomming RZ/G3S thermal driver.
> >>
> >> Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
>
Claudiu Jan. 30, 2025, 10:30 a.m. UTC | #6
On 30.01.2025 12:07, Daniel Lezcano wrote:
> On Thu, Jan 30, 2025 at 11:08:03AM +0200, Claudiu Beznea wrote:
>> Hi, Daniel,
>>
>> On 29.01.2025 19:24, Daniel Lezcano wrote:
>>> Hi Claudiu,
>>>
>>> On Fri, Jan 03, 2025 at 06:38:01PM +0200, Claudiu wrote:
>>>> From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
>>>>
>>>> On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
>>>> clocks are managed through PM domains. These PM domains, registered on
>>>> behalf of the clock controller driver, are configured with
>>>> GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
>>>> clocks are enabled/disabled using runtime PM APIs.
>>>>
>>>> During probe, devices are attached to the PM domain controlling their
>>>> clocks. Similarly, during removal, devices are detached from the PM domain.
>>>>
>>>> The detachment call stack is as follows:
>>>>
>>>> device_driver_detach() ->
>>>>   device_release_driver_internal() ->
>>>>     __device_release_driver() ->
>>>>       device_remove() ->
>>>>         platform_remove() ->
>>>> 	  dev_pm_domain_detach()
>>>>
>>>> In the upcoming Renesas RZ/G3S thermal driver, the
>>>> struct thermal_zone_device_ops::change_mode API is implemented to
>>>> start/stop the thermal sensor unit. Register settings are updated within
>>>> the change_mode API.
>>>>
>>>> In case devres helpers are used for thermal zone register/unregister the
>>>> struct thermal_zone_device_ops::change_mode API is invoked when the
>>>> driver is unbound. The identified call stack is as follows:
>>>>
>>>> device_driver_detach() ->
>>>>   device_release_driver_internal() ->
>>>>     device_unbind_cleanup() ->
>>>>       devres_release_all() ->
>>>>         devm_thermal_of_zone_release() ->
>>>> 	  thermal_zone_device_disable() ->
>>>> 	    thermal_zone_device_set_mode() ->
>>>> 	      rzg3s_thermal_change_mode()
>>>>
>>>> The device_unbind_cleanup() function is called after the thermal device is
>>>> detached from the PM domain (via dev_pm_domain_detach()).
>>>>
>>>> The rzg3s_thermal_change_mode() implementation calls
>>>> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
>>>> accessing the registers. However, during the unbind scenario, the
>>>> devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
>>>> Consequently, the clocks are not enabled, as the device is removed from
>>>> the PM domain at this time, leading to an Asynchronous SError Interrupt.
>>>> The system cannot be used after this.
>>>
>>> I've been through the driver before responding to this change. What is the
>>> benefit of powering down / up (or clock off / on) the thermal sensor when
>>> reading the temperature ?
>>>
>>> I can understand for disable / enable but I don't get for the classic usage
>>> where a governor will be reading the temperature regularly.
>>
>> I tried to be as power saving as possible both at runtime and after the IP
>> is not used anymore as the HW manual doesn't mentioned anything about
>> accuracy or implications of disabling the IP clock at runtime. We use
>> similar approach (of disabling clocks at runtime) for other IPs in the
>> RZ/G3S SoC as well.
>>
>>>
>>> Would the IP need some cycles to capture the temperature accurately after the
>>> clock is enabled ?
>>
>> There is nothing about this mentioned about this in the HW manual of the
>> RZ/G3S SoC. The only points mentioned are as described in the driver code:
>> - wait at least 3us after each IIO channel read
>> - wait at least 30us after enabling the sensor
>> - wait at least 50us after setting OE bit in TSU_SM
>>
>> For this I chose to have it implemented as proposed.
> 
> IMO, disabling/enabling the clock between two reads through the pm runtime may
> not be a good thing, especially if the system enters a thermal situation where
> it has to mitigate.
> 
> Without any testing capturing the temperatures and compare between the always-on
> and on/off, it is hard to say if it is true or not. Up to you to test that or
> not. If you think it is fine, then let's go with it.

I tested it with and w/o the runtime PM and on/off support (so, everything
ON from the probe) and the reported temperature values were similar.

Thank you,
Claudiu

>  
>> If any, the HW manual is available here
>> https://www.renesas.com/en/document/mah/rzg3s-group-users-manual-hardware?r=25458591
>> (an archive is here; the manual is in Deliverables/r01uh1014ej0110-rzg3s.pdf)
>>
>> Thank you for your review,
>> Claudiu
>>
>>>
>>>> Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
>>>> be used in the upcomming RZ/G3S thermal driver.
>>>>
>>>> Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
>>
>
Biju Das Jan. 30, 2025, 10:33 a.m. UTC | #7
Hi Daniel Lezcano, 

> -----Original Message-----
> From: Daniel Lezcano <daniel.lezcano@linaro.org>
> Sent: 30 January 2025 10:07
> Subject: Re: [PATCH 2/6] thermal: of: Export non-devres helper to register/unregister thermal zone
> 
> On Thu, Jan 30, 2025 at 11:08:03AM +0200, Claudiu Beznea wrote:
> > Hi, Daniel,
> >
> > On 29.01.2025 19:24, Daniel Lezcano wrote:
> > > Hi Claudiu,
> > >
> > > On Fri, Jan 03, 2025 at 06:38:01PM +0200, Claudiu wrote:
> > >> From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
> > >>
> > >> On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC,
> > >> UL}), clocks are managed through PM domains. These PM domains,
> > >> registered on behalf of the clock controller driver, are configured
> > >> with GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ
> > >> SoCs, the clocks are enabled/disabled using runtime PM APIs.
> > >>
> > >> During probe, devices are attached to the PM domain controlling
> > >> their clocks. Similarly, during removal, devices are detached from the PM domain.
> > >>
> > >> The detachment call stack is as follows:
> > >>
> > >> device_driver_detach() ->
> > >>   device_release_driver_internal() ->
> > >>     __device_release_driver() ->
> > >>       device_remove() ->
> > >>         platform_remove() ->
> > >> 	  dev_pm_domain_detach()
> > >>
> > >> In the upcoming Renesas RZ/G3S thermal driver, the struct
> > >> thermal_zone_device_ops::change_mode API is implemented to
> > >> start/stop the thermal sensor unit. Register settings are updated
> > >> within the change_mode API.
> > >>
> > >> In case devres helpers are used for thermal zone
> > >> register/unregister the struct thermal_zone_device_ops::change_mode
> > >> API is invoked when the driver is unbound. The identified call stack is as follows:
> > >>
> > >> device_driver_detach() ->
> > >>   device_release_driver_internal() ->
> > >>     device_unbind_cleanup() ->
> > >>       devres_release_all() ->
> > >>         devm_thermal_of_zone_release() ->
> > >> 	  thermal_zone_device_disable() ->
> > >> 	    thermal_zone_device_set_mode() ->
> > >> 	      rzg3s_thermal_change_mode()
> > >>
> > >> The device_unbind_cleanup() function is called after the thermal
> > >> device is detached from the PM domain (via dev_pm_domain_detach()).
> > >>
> > >> The rzg3s_thermal_change_mode() implementation calls
> > >> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend()
> > >> before/after accessing the registers. However, during the unbind
> > >> scenario, the
> > >> devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
> > >> Consequently, the clocks are not enabled, as the device is removed
> > >> from the PM domain at this time, leading to an Asynchronous SError Interrupt.
> > >> The system cannot be used after this.
> > >
> > > I've been through the driver before responding to this change. What
> > > is the benefit of powering down / up (or clock off / on) the thermal
> > > sensor when reading the temperature ?
> > >
> > > I can understand for disable / enable but I don't get for the
> > > classic usage where a governor will be reading the temperature regularly.
> >
> > I tried to be as power saving as possible both at runtime and after
> > the IP is not used anymore as the HW manual doesn't mentioned anything
> > about accuracy or implications of disabling the IP clock at runtime.
> > We use similar approach (of disabling clocks at runtime) for other IPs
> > in the RZ/G3S SoC as well.
> >
> > >
> > > Would the IP need some cycles to capture the temperature accurately
> > > after the clock is enabled ?
> >
> > There is nothing about this mentioned about this in the HW manual of
> > the RZ/G3S SoC. The only points mentioned are as described in the driver code:
> > - wait at least 3us after each IIO channel read
> > - wait at least 30us after enabling the sensor
> > - wait at least 50us after setting OE bit in TSU_SM
> >
> > For this I chose to have it implemented as proposed.
> 
> IMO, disabling/enabling the clock between two reads through the pm runtime may not be a good thing,
> especially if the system enters a thermal situation where it has to mitigate.

Just a question, You mean to avoid device destruction due to high temperature?? Assuming disabling the clk happens
when the temp reaches the boundary and re-enabling of the clk after a time(which involves monitoring the CLK ON
bit after enabling it, or a run time enable failure happens), where it exceeds the threshold??

Cheers,
Biju
Claudiu Jan. 30, 2025, 10:50 a.m. UTC | #8
On 30.01.2025 12:30, Claudiu Beznea wrote:
> 
> 
> On 30.01.2025 12:07, Daniel Lezcano wrote:
>> On Thu, Jan 30, 2025 at 11:08:03AM +0200, Claudiu Beznea wrote:
>>> Hi, Daniel,
>>>
>>> On 29.01.2025 19:24, Daniel Lezcano wrote:
>>>> Hi Claudiu,
>>>>
>>>> On Fri, Jan 03, 2025 at 06:38:01PM +0200, Claudiu wrote:
>>>>> From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
>>>>>
>>>>> On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
>>>>> clocks are managed through PM domains. These PM domains, registered on
>>>>> behalf of the clock controller driver, are configured with
>>>>> GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
>>>>> clocks are enabled/disabled using runtime PM APIs.
>>>>>
>>>>> During probe, devices are attached to the PM domain controlling their
>>>>> clocks. Similarly, during removal, devices are detached from the PM domain.
>>>>>
>>>>> The detachment call stack is as follows:
>>>>>
>>>>> device_driver_detach() ->
>>>>>   device_release_driver_internal() ->
>>>>>     __device_release_driver() ->
>>>>>       device_remove() ->
>>>>>         platform_remove() ->
>>>>> 	  dev_pm_domain_detach()
>>>>>
>>>>> In the upcoming Renesas RZ/G3S thermal driver, the
>>>>> struct thermal_zone_device_ops::change_mode API is implemented to
>>>>> start/stop the thermal sensor unit. Register settings are updated within
>>>>> the change_mode API.
>>>>>
>>>>> In case devres helpers are used for thermal zone register/unregister the
>>>>> struct thermal_zone_device_ops::change_mode API is invoked when the
>>>>> driver is unbound. The identified call stack is as follows:
>>>>>
>>>>> device_driver_detach() ->
>>>>>   device_release_driver_internal() ->
>>>>>     device_unbind_cleanup() ->
>>>>>       devres_release_all() ->
>>>>>         devm_thermal_of_zone_release() ->
>>>>> 	  thermal_zone_device_disable() ->
>>>>> 	    thermal_zone_device_set_mode() ->
>>>>> 	      rzg3s_thermal_change_mode()
>>>>>
>>>>> The device_unbind_cleanup() function is called after the thermal device is
>>>>> detached from the PM domain (via dev_pm_domain_detach()).
>>>>>
>>>>> The rzg3s_thermal_change_mode() implementation calls
>>>>> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
>>>>> accessing the registers. However, during the unbind scenario, the
>>>>> devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
>>>>> Consequently, the clocks are not enabled, as the device is removed from
>>>>> the PM domain at this time, leading to an Asynchronous SError Interrupt.
>>>>> The system cannot be used after this.
>>>>
>>>> I've been through the driver before responding to this change. What is the
>>>> benefit of powering down / up (or clock off / on) the thermal sensor when
>>>> reading the temperature ?
>>>>
>>>> I can understand for disable / enable but I don't get for the classic usage
>>>> where a governor will be reading the temperature regularly.
>>>
>>> I tried to be as power saving as possible both at runtime and after the IP
>>> is not used anymore as the HW manual doesn't mentioned anything about
>>> accuracy or implications of disabling the IP clock at runtime. We use
>>> similar approach (of disabling clocks at runtime) for other IPs in the
>>> RZ/G3S SoC as well.
>>>
>>>>
>>>> Would the IP need some cycles to capture the temperature accurately after the
>>>> clock is enabled ?
>>>
>>> There is nothing about this mentioned about this in the HW manual of the
>>> RZ/G3S SoC. The only points mentioned are as described in the driver code:
>>> - wait at least 3us after each IIO channel read
>>> - wait at least 30us after enabling the sensor
>>> - wait at least 50us after setting OE bit in TSU_SM
>>>
>>> For this I chose to have it implemented as proposed.
>>
>> IMO, disabling/enabling the clock between two reads through the pm runtime may
>> not be a good thing, especially if the system enters a thermal situation where
>> it has to mitigate.

If it's not 100% safe I can drop the runtime PM support.

>>
>> Without any testing capturing the temperatures and compare between the always-on
>> and on/off, it is hard to say if it is true or not. Up to you to test that or
>> not. If you think it is fine, then let's go with it.
> 
> I tested it with and w/o the runtime PM and on/off support (so, everything
> ON from the probe) and the reported temperature values were similar.
> 
> Thank you,
> Claudiu
> 
>>  
>>> If any, the HW manual is available here
>>> https://www.renesas.com/en/document/mah/rzg3s-group-users-manual-hardware?r=25458591
>>> (an archive is here; the manual is in Deliverables/r01uh1014ej0110-rzg3s.pdf)
>>>
>>> Thank you for your review,
>>> Claudiu
>>>
>>>>
>>>>> Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
>>>>> be used in the upcomming RZ/G3S thermal driver.
>>>>>
>>>>> Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
>>>
>>
>
Daniel Lezcano Jan. 30, 2025, 5:24 p.m. UTC | #9
On 30/01/2025 11:30, Claudiu Beznea wrote:
> 
> 
> On 30.01.2025 12:07, Daniel Lezcano wrote:
>> On Thu, Jan 30, 2025 at 11:08:03AM +0200, Claudiu Beznea wrote:
>>> Hi, Daniel,

[ ... ]

>>>> Would the IP need some cycles to capture the temperature accurately after the
>>>> clock is enabled ?
>>>
>>> There is nothing about this mentioned about this in the HW manual of the
>>> RZ/G3S SoC. The only points mentioned are as described in the driver code:
>>> - wait at least 3us after each IIO channel read
>>> - wait at least 30us after enabling the sensor
>>> - wait at least 50us after setting OE bit in TSU_SM
>>>
>>> For this I chose to have it implemented as proposed.
>>
>> IMO, disabling/enabling the clock between two reads through the pm runtime may
>> not be a good thing, especially if the system enters a thermal situation where
>> it has to mitigate.
>>
>> Without any testing capturing the temperatures and compare between the always-on
>> and on/off, it is hard to say if it is true or not. Up to you to test that or
>> not. If you think it is fine, then let's go with it.
> 
> I tested it with and w/o the runtime PM and on/off support (so, everything
> ON from the probe) and the reported temperature values were similar.


Did you remove the roundup to 0.5°C ?
Daniel Lezcano Jan. 30, 2025, 5:31 p.m. UTC | #10
On 30/01/2025 11:33, Biju Das wrote:
> Hi Daniel Lezcano,
> 
>> -----Original Message-----

[ ... ]

>>>> I've been through the driver before responding to this change. What
>>>> is the benefit of powering down / up (or clock off / on) the thermal
>>>> sensor when reading the temperature ?
>>>>
>>>> I can understand for disable / enable but I don't get for the
>>>> classic usage where a governor will be reading the temperature regularly.
>>>
>>> I tried to be as power saving as possible both at runtime and after
>>> the IP is not used anymore as the HW manual doesn't mentioned anything
>>> about accuracy or implications of disabling the IP clock at runtime.
>>> We use similar approach (of disabling clocks at runtime) for other IPs
>>> in the RZ/G3S SoC as well.
>>>
>>>>
>>>> Would the IP need some cycles to capture the temperature accurately
>>>> after the clock is enabled ?
>>>
>>> There is nothing about this mentioned about this in the HW manual of
>>> the RZ/G3S SoC. The only points mentioned are as described in the driver code:
>>> - wait at least 3us after each IIO channel read
>>> - wait at least 30us after enabling the sensor
>>> - wait at least 50us after setting OE bit in TSU_SM
>>>
>>> For this I chose to have it implemented as proposed.
>>
>> IMO, disabling/enabling the clock between two reads through the pm runtime may not be a good thing,
>> especially if the system enters a thermal situation where it has to mitigate.
> 
> Just a question, You mean to avoid device destruction due to high temperature?? Assuming disabling the clk happens
> when the temp reaches the boundary and re-enabling of the clk after a time(which involves monitoring the CLK ON
> bit after enabling it, or a run time enable failure happens), where it exceeds the threshold??


Well, I have some comments with the device tree thermal configuration 
which may answer your question but I'll wait for Claudiu to check the 
temperature read comparison without rounding to 0.5°C

What I meant is if the temperature read is inaccurate, the mitigation 
will be inaccurate too. It may not reach the critical temperature but it 
is possible the performance could be impacted negatively under thermal 
stress.
Claudiu Jan. 30, 2025, 5:32 p.m. UTC | #11
On 30.01.2025 19:24, Daniel Lezcano wrote:
> On 30/01/2025 11:30, Claudiu Beznea wrote:
>>
>>
>> On 30.01.2025 12:07, Daniel Lezcano wrote:
>>> On Thu, Jan 30, 2025 at 11:08:03AM +0200, Claudiu Beznea wrote:
>>>> Hi, Daniel,
> 
> [ ... ]
> 
>>>>> Would the IP need some cycles to capture the temperature accurately
>>>>> after the
>>>>> clock is enabled ?
>>>>
>>>> There is nothing about this mentioned about this in the HW manual of the
>>>> RZ/G3S SoC. The only points mentioned are as described in the driver code:
>>>> - wait at least 3us after each IIO channel read
>>>> - wait at least 30us after enabling the sensor
>>>> - wait at least 50us after setting OE bit in TSU_SM
>>>>
>>>> For this I chose to have it implemented as proposed.
>>>
>>> IMO, disabling/enabling the clock between two reads through the pm
>>> runtime may
>>> not be a good thing, especially if the system enters a thermal situation
>>> where
>>> it has to mitigate.
>>>
>>> Without any testing capturing the temperatures and compare between the
>>> always-on
>>> and on/off, it is hard to say if it is true or not. Up to you to test
>>> that or
>>> not. If you think it is fine, then let's go with it.
>>
>> I tested it with and w/o the runtime PM and on/off support (so, everything
>> ON from the probe) and the reported temperature values were similar.
> 
> 
> Did you remove the roundup to 0.5°C ?

No, the roundup was present in both tested versions.

Thank you,
Claudiu

> 
>
Biju Das Jan. 30, 2025, 5:47 p.m. UTC | #12
Hi Daniel Lezcano,

> -----Original Message-----
> From: Daniel Lezcano <daniel.lezcano@linaro.org>
> Sent: 30 January 2025 17:32
> Subject: Re: [PATCH 2/6] thermal: of: Export non-devres helper to register/unregister thermal zone
> 
> On 30/01/2025 11:33, Biju Das wrote:
> > Hi Daniel Lezcano,
> >
> >> -----Original Message-----
> 
> [ ... ]
> 
> >>>> I've been through the driver before responding to this change. What
> >>>> is the benefit of powering down / up (or clock off / on) the
> >>>> thermal sensor when reading the temperature ?
> >>>>
> >>>> I can understand for disable / enable but I don't get for the
> >>>> classic usage where a governor will be reading the temperature regularly.
> >>>
> >>> I tried to be as power saving as possible both at runtime and after
> >>> the IP is not used anymore as the HW manual doesn't mentioned
> >>> anything about accuracy or implications of disabling the IP clock at runtime.
> >>> We use similar approach (of disabling clocks at runtime) for other
> >>> IPs in the RZ/G3S SoC as well.
> >>>
> >>>>
> >>>> Would the IP need some cycles to capture the temperature accurately
> >>>> after the clock is enabled ?
> >>>
> >>> There is nothing about this mentioned about this in the HW manual of
> >>> the RZ/G3S SoC. The only points mentioned are as described in the driver code:
> >>> - wait at least 3us after each IIO channel read
> >>> - wait at least 30us after enabling the sensor
> >>> - wait at least 50us after setting OE bit in TSU_SM
> >>>
> >>> For this I chose to have it implemented as proposed.
> >>
> >> IMO, disabling/enabling the clock between two reads through the pm
> >> runtime may not be a good thing, especially if the system enters a thermal situation where it has
> to mitigate.
> >
> > Just a question, You mean to avoid device destruction due to high
> > temperature?? Assuming disabling the clk happens when the temp reaches
> > the boundary and re-enabling of the clk after a time(which involves monitoring the CLK ON bit after
> enabling it, or a run time enable failure happens), where it exceeds the threshold??
> 
> 
> Well, I have some comments with the device tree thermal configuration which may answer your question
> but I'll wait for Claudiu to check the temperature read comparison without rounding to 0.5°C
> 
> What I meant is if the temperature read is inaccurate, the mitigation will be inaccurate too. It may
> not reach the critical temperature but it is possible the performance could be impacted negatively
> under thermal stress.

Thanks for the explanation. 

I thought you meant " disabling/enabling the clock between two reads through the pm
runtime may not be a good thing" under stress/hot condition, temperature raises, and
in those corner cases if runtime PM fails, then we cannot read temperature and
we cannot take any corrective action.

Cheers,
Biju
Claudiu Jan. 30, 2025, 8:53 p.m. UTC | #13
Hi, Daniel,

On 30.01.2025 19:24, Daniel Lezcano wrote:
> On 30/01/2025 11:30, Claudiu Beznea wrote:
>>
>>
>> On 30.01.2025 12:07, Daniel Lezcano wrote:
>>> On Thu, Jan 30, 2025 at 11:08:03AM +0200, Claudiu Beznea wrote:
>>>> Hi, Daniel,
> 
> [ ... ]
> 
>>>>> Would the IP need some cycles to capture the temperature accurately
>>>>> after the
>>>>> clock is enabled ?
>>>>
>>>> There is nothing about this mentioned about this in the HW manual of the
>>>> RZ/G3S SoC. The only points mentioned are as described in the driver code:
>>>> - wait at least 3us after each IIO channel read
>>>> - wait at least 30us after enabling the sensor
>>>> - wait at least 50us after setting OE bit in TSU_SM
>>>>
>>>> For this I chose to have it implemented as proposed.
>>>
>>> IMO, disabling/enabling the clock between two reads through the pm
>>> runtime may
>>> not be a good thing, especially if the system enters a thermal situation
>>> where
>>> it has to mitigate.
>>>
>>> Without any testing capturing the temperatures and compare between the
>>> always-on
>>> and on/off, it is hard to say if it is true or not. Up to you to test
>>> that or
>>> not. If you think it is fine, then let's go with it.
>>
>> I tested it with and w/o the runtime PM and on/off support (so, everything
>> ON from the probe) and the reported temperature values were similar.
> 
> 
> Did you remove the roundup to 0.5°C ?

I did the testing as suggested and, this time, collected results and
compared side by side. I read the temperature for 10 minutes, 60 seconds
after the Linux prompt showed up. There is, indeed, a slight difference b/w
the 2 cases.

When the runtime PM doesn't touch the clocks on read the reported
temperature varies b/w 53-54 degrees while when the runtime PM
enables/disables the clocks a single read reported 55 degrees, the rest
reported 54 degrees.

I plotted the results side by side here:
https://i2.paste.pics/f07eaeddc2ccc3c6695fe5056b52f4a2.png?trs=0a0eaab99bb59ebcb10051eb298f437c7cd50c16437a87392aebc16cd9013e18&rand=vWXm2VTrbt

Please let me know how do you consider it.

Thank you,
Claudiu

> 
>
Daniel Lezcano Jan. 30, 2025, 10:33 p.m. UTC | #14
On 30/01/2025 21:53, Claudiu Beznea wrote:
> Hi, Daniel,
> 
> On 30.01.2025 19:24, Daniel Lezcano wrote:
>> On 30/01/2025 11:30, Claudiu Beznea wrote:
>>>
>>>
>>> On 30.01.2025 12:07, Daniel Lezcano wrote:
>>>> On Thu, Jan 30, 2025 at 11:08:03AM +0200, Claudiu Beznea wrote:
>>>>> Hi, Daniel,
>>
>> [ ... ]
>>
>>>>>> Would the IP need some cycles to capture the temperature accurately
>>>>>> after the
>>>>>> clock is enabled ?
>>>>>
>>>>> There is nothing about this mentioned about this in the HW manual of the
>>>>> RZ/G3S SoC. The only points mentioned are as described in the driver code:
>>>>> - wait at least 3us after each IIO channel read
>>>>> - wait at least 30us after enabling the sensor
>>>>> - wait at least 50us after setting OE bit in TSU_SM
>>>>>
>>>>> For this I chose to have it implemented as proposed.
>>>>
>>>> IMO, disabling/enabling the clock between two reads through the pm
>>>> runtime may
>>>> not be a good thing, especially if the system enters a thermal situation
>>>> where
>>>> it has to mitigate.
>>>>
>>>> Without any testing capturing the temperatures and compare between the
>>>> always-on
>>>> and on/off, it is hard to say if it is true or not. Up to you to test
>>>> that or
>>>> not. If you think it is fine, then let's go with it.
>>>
>>> I tested it with and w/o the runtime PM and on/off support (so, everything
>>> ON from the probe) and the reported temperature values were similar.
>>
>>
>> Did you remove the roundup to 0.5°C ?
> 
> I did the testing as suggested and, this time, collected results and
> compared side by side. I read the temperature for 10 minutes, 60 seconds
> after the Linux prompt showed up. There is, indeed, a slight difference b/w
> the 2 cases.
> 
> When the runtime PM doesn't touch the clocks on read the reported
> temperature varies b/w 53-54 degrees while when the runtime PM
> enables/disables the clocks a single read reported 55 degrees, the rest
> reported 54 degrees.
> 
> I plotted the results side by side here:
> https://i2.paste.pics/f07eaeddc2ccc3c6695fe5056b52f4a2.png?trs=0a0eaab99bb59ebcb10051eb298f437c7cd50c16437a87392aebc16cd9013e18&rand=vWXm2VTrbt
> 
> Please let me know how do you consider it.

Thanks for taking the time to provide a figure

Testing thermal can be painful because it should be done under certain 
conditions.

I guess there was no particular work load on the system when running the 
tests.

At the first glance, it seems, without the pm runtime, the measurement 
is more precise as it catches more thermal changes. But the test does 
not give information about the thermal behavior under stress. And one 
second sampling is too long to really figure it out.

In the kernel source tree, there is a tool to read the temperature in an 
optimized manner, you may want to use it to read the temperature at a 
higher rate. It is located in tools/thermal/thermometer

Compiling is a bit fuzzy ATM, so until it is fixed, here are the steps:

(you should install libconfig-dev and libnl-3-dev packages).

cd $LINUX_DIR/tools/thermal/lib
make
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LINUX_DIR/tools/thermal/lib

cd $LINUX_DIR/tools
make thermometer



Then change directory:

cd $LINUX_DIR/tools/thermal/thermometer


Run the tool:

./thermometer -o out -c t.conf -l DEBUG -- <my_command>


The content of the configuration file t.conf is:

thermal-zones = (
	      {	name = "cpu[0_9].*-thermal";
		polling = 100; }
       )

All the captured data will be in the 'out' directory

For 'my_command', I suggest to use a script containing:

sleep 10; dhrystone -t 1 -r 120; sleep 10

If you need the dhrystone binary, let me know.

The thermal zone device tree configuration should be changed to use a 
65°C passive trip point instead of 100°C (and the kernel setup with the 
step wise governor as default).

The resulting figure from the temperature should show a flat temperature 
figure during 10 seconds, then the temperature increasing until reaching 
the temperature threshold of 65°C, the temperature stabilizing around 
it, then followed by a temperature decreasing when the test finishes.

If the temperature does not reach the limit, decrease the trip point 
temperature or increase the dhrystone duration (the -r 120 option)

At this point, you should the test with and without pm runtime but in 
order to have consistent results, you should wait ~20 minutes between 
two tests.

The shape of the figures will give the immediate information about how 
the mitigation vs thermal sensor vs cooling device behave.

Additionally, you can enable the thermal DEBUGFS option and add the 
collected information statistics from /sys/kernel/debug/thermal/*** in 
the results.


Hope that helps
Claudiu Jan. 30, 2025, 11:16 p.m. UTC | #15
Hi, Daniel,

On 31.01.2025 00:33, Daniel Lezcano wrote:
> On 30/01/2025 21:53, Claudiu Beznea wrote:
>> Hi, Daniel,
>>
>> On 30.01.2025 19:24, Daniel Lezcano wrote:
>>> On 30/01/2025 11:30, Claudiu Beznea wrote:
>>>>
>>>>
>>>> On 30.01.2025 12:07, Daniel Lezcano wrote:
>>>>> On Thu, Jan 30, 2025 at 11:08:03AM +0200, Claudiu Beznea wrote:
>>>>>> Hi, Daniel,
>>>
>>> [ ... ]
>>>
>>>>>>> Would the IP need some cycles to capture the temperature accurately
>>>>>>> after the
>>>>>>> clock is enabled ?
>>>>>>
>>>>>> There is nothing about this mentioned about this in the HW manual of the
>>>>>> RZ/G3S SoC. The only points mentioned are as described in the driver
>>>>>> code:
>>>>>> - wait at least 3us after each IIO channel read
>>>>>> - wait at least 30us after enabling the sensor
>>>>>> - wait at least 50us after setting OE bit in TSU_SM
>>>>>>
>>>>>> For this I chose to have it implemented as proposed.
>>>>>
>>>>> IMO, disabling/enabling the clock between two reads through the pm
>>>>> runtime may
>>>>> not be a good thing, especially if the system enters a thermal situation
>>>>> where
>>>>> it has to mitigate.
>>>>>
>>>>> Without any testing capturing the temperatures and compare between the
>>>>> always-on
>>>>> and on/off, it is hard to say if it is true or not. Up to you to test
>>>>> that or
>>>>> not. If you think it is fine, then let's go with it.
>>>>
>>>> I tested it with and w/o the runtime PM and on/off support (so, everything
>>>> ON from the probe) and the reported temperature values were similar.
>>>
>>>
>>> Did you remove the roundup to 0.5°C ?
>>
>> I did the testing as suggested and, this time, collected results and
>> compared side by side. I read the temperature for 10 minutes, 60 seconds
>> after the Linux prompt showed up. There is, indeed, a slight difference b/w
>> the 2 cases.
>>
>> When the runtime PM doesn't touch the clocks on read the reported
>> temperature varies b/w 53-54 degrees while when the runtime PM
>> enables/disables the clocks a single read reported 55 degrees, the rest
>> reported 54 degrees.
>>
>> I plotted the results side by side here:
>> https://i2.paste.pics/f07eaeddc2ccc3c6695fe5056b52f4a2.png?
>> trs=0a0eaab99bb59ebcb10051eb298f437c7cd50c16437a87392aebc16cd9013e18&rand=vWXm2VTrbt
>>
>> Please let me know how do you consider it.
> 

After sending this to you I figured it out that precision is lost somewhere
so I re-tested it with the following diff (multiplied parts of the equation
with 1000):

diff --git a/drivers/thermal/renesas/rzg3s_thermal.c
b/drivers/thermal/renesas/rzg3s_thermal.c
index 6719f9ca05eb..84e18ff69d7c 100644
--- a/drivers/thermal/renesas/rzg3s_thermal.c
+++ b/drivers/thermal/renesas/rzg3s_thermal.c
@@ -83,7 +83,7 @@ static int rzg3s_thermal_get_temp(struct
thermal_zone_device *tz, int *temp)
        }

        ret = 0;
-       ts_code_ave = DIV_ROUND_CLOSEST(ts_code_ave, TSU_READ_STEPS);
+       ts_code_ave = DIV_ROUND_CLOSEST(MCELSIUS(ts_code_ave), TSU_READ_STEPS);

        /*
         * According to the HW manual (section 40.4.4 Procedure for
Measuring the Temperature)
@@ -91,11 +91,8 @@ static int rzg3s_thermal_get_temp(struct
thermal_zone_device *tz, int *temp)
         *
         * Tj = (ts_code_ave - priv->calib0) * 165 / (priv->calib0 -
priv->calib1) - 40
         */
-       *temp = DIV_ROUND_CLOSEST((ts_code_ave - priv->calib1) * 165,
-                                 (priv->calib0 - priv->calib1)) - 40;
-
-       /* Report it in mili degrees Celsius and round it up to 0.5 degrees
Celsius. */
-       *temp = roundup(MCELSIUS(*temp), 500);
+       *temp = DIV_ROUND_CLOSEST((u64)(ts_code_ave -
MCELSIUS(priv->calib1)) * MCELSIUS(165),
+                                 MCELSIUS(priv->calib0 - priv->calib1)) -
MCELSIUS(40);

 rpm_put:
        pm_runtime_mark_last_busy(dev);

With this, the results seems similar b/w runtime PM and no runtime PM cases.

The tests were executed after the board was off for few hours. The
first test was with runtime PM suspend/resume on each read. Then the board
was rebooted and re-run the test w/o runtime PM suspend/resume on reads.

Figure with results is here:
https://i2.paste.pics/5f353a4f04b07b4bead3086624aba23f.png?trs=0a0eaab99bb59ebcb10051eb298f437c7cd50c16437a87392aebc16cd9013e18&rand=5n34QNjWID


> Thanks for taking the time to provide a figure
> 
> Testing thermal can be painful because it should be done under certain
> conditions.
> 
> I guess there was no particular work load on the system when running the
> tests.

No load, indeed.

> 
> At the first glance, it seems, without the pm runtime, the measurement is
> more precise as it catches more thermal changes. But the test does not give
> information about the thermal behavior under stress. And one second
> sampling is too long to really figure it out.
> 
> In the kernel source tree, there is a tool to read the temperature in an
> optimized manner, you may want to use it to read the temperature at a
> higher rate. It is located in tools/thermal/thermometer
> 
> Compiling is a bit fuzzy ATM, so until it is fixed, here are the steps:
> 
> (you should install libconfig-dev and libnl-3-dev packages).
> 
> cd $LINUX_DIR/tools/thermal/lib
> make
> LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LINUX_DIR/tools/thermal/lib
> 
> cd $LINUX_DIR/tools
> make thermometer
> 
> 
> 
> Then change directory:
> 
> cd $LINUX_DIR/tools/thermal/thermometer
> 
> 
> Run the tool:
> 
> ./thermometer -o out -c t.conf -l DEBUG -- <my_command>
> 
> 
> The content of the configuration file t.conf is:
> 
> thermal-zones = (
>           {    name = "cpu[0_9].*-thermal";
>         polling = 100; }
>       )
> 
> All the captured data will be in the 'out' directory
> 
> For 'my_command', I suggest to use a script containing:
> 
> sleep 10; dhrystone -t 1 -r 120; sleep 10
> 
> If you need the dhrystone binary, let me know.
> 
> The thermal zone device tree configuration should be changed to use a 65°C
> passive trip point instead of 100°C (and the kernel setup with the step
> wise governor as default).
> 
> The resulting figure from the temperature should show a flat temperature
> figure during 10 seconds, then the temperature increasing until reaching
> the temperature threshold of 65°C, the temperature stabilizing around it,
> then followed by a temperature decreasing when the test finishes.
> 
> If the temperature does not reach the limit, decrease the trip point
> temperature or increase the dhrystone duration (the -r 120 option)
> 
> At this point, you should the test with and without pm runtime but in order
> to have consistent results, you should wait ~20 minutes between two tests.
> 
> The shape of the figures will give the immediate information about how the
> mitigation vs thermal sensor vs cooling device behave.
> 
> Additionally, you can enable the thermal DEBUGFS option and add the
> collected information statistics from /sys/kernel/debug/thermal/*** in the
> results.
> 
> 
> Hope that helps

Thank you for all these details. I'll have a look on it but starting with
Monday as I won't have access to setup in the following days.

Thank you,
Claudiu

> 
> 
> 
> 
> 
>
Claudiu Feb. 3, 2025, 4:30 p.m. UTC | #16
Hi, Daniel,

On 31.01.2025 01:16, Claudiu Beznea wrote:
> Hi, Daniel,
> 
> On 31.01.2025 00:33, Daniel Lezcano wrote:
>> On 30/01/2025 21:53, Claudiu Beznea wrote:
>>> Hi, Daniel,
>>>
>>> On 30.01.2025 19:24, Daniel Lezcano wrote:
>>>> On 30/01/2025 11:30, Claudiu Beznea wrote:
>>>>>
>>>>>
>>>>> On 30.01.2025 12:07, Daniel Lezcano wrote:
>>>>>> On Thu, Jan 30, 2025 at 11:08:03AM +0200, Claudiu Beznea wrote:
>>>>>>> Hi, Daniel,
>>>>
>>>> [ ... ]
>>>>
>>>>>>>> Would the IP need some cycles to capture the temperature accurately
>>>>>>>> after the
>>>>>>>> clock is enabled ?
>>>>>>>
>>>>>>> There is nothing about this mentioned about this in the HW manual of the
>>>>>>> RZ/G3S SoC. The only points mentioned are as described in the driver
>>>>>>> code:
>>>>>>> - wait at least 3us after each IIO channel read
>>>>>>> - wait at least 30us after enabling the sensor
>>>>>>> - wait at least 50us after setting OE bit in TSU_SM
>>>>>>>
>>>>>>> For this I chose to have it implemented as proposed.
>>>>>>
>>>>>> IMO, disabling/enabling the clock between two reads through the pm
>>>>>> runtime may
>>>>>> not be a good thing, especially if the system enters a thermal situation
>>>>>> where
>>>>>> it has to mitigate.
>>>>>>
>>>>>> Without any testing capturing the temperatures and compare between the
>>>>>> always-on
>>>>>> and on/off, it is hard to say if it is true or not. Up to you to test
>>>>>> that or
>>>>>> not. If you think it is fine, then let's go with it.
>>>>>
>>>>> I tested it with and w/o the runtime PM and on/off support (so, everything
>>>>> ON from the probe) and the reported temperature values were similar.
>>>>
>>>>
>>>> Did you remove the roundup to 0.5°C ?
>>>
>>> I did the testing as suggested and, this time, collected results and
>>> compared side by side. I read the temperature for 10 minutes, 60 seconds
>>> after the Linux prompt showed up. There is, indeed, a slight difference b/w
>>> the 2 cases.
>>>
>>> When the runtime PM doesn't touch the clocks on read the reported
>>> temperature varies b/w 53-54 degrees while when the runtime PM
>>> enables/disables the clocks a single read reported 55 degrees, the rest
>>> reported 54 degrees.
>>>
>>> I plotted the results side by side here:
>>> https://i2.paste.pics/f07eaeddc2ccc3c6695fe5056b52f4a2.png?
>>> trs=0a0eaab99bb59ebcb10051eb298f437c7cd50c16437a87392aebc16cd9013e18&rand=vWXm2VTrbt
>>>
>>> Please let me know how do you consider it.
>>
> 
> After sending this to you I figured it out that precision is lost somewhere
> so I re-tested it with the following diff (multiplied parts of the equation
> with 1000):
> 
> diff --git a/drivers/thermal/renesas/rzg3s_thermal.c
> b/drivers/thermal/renesas/rzg3s_thermal.c
> index 6719f9ca05eb..84e18ff69d7c 100644
> --- a/drivers/thermal/renesas/rzg3s_thermal.c
> +++ b/drivers/thermal/renesas/rzg3s_thermal.c
> @@ -83,7 +83,7 @@ static int rzg3s_thermal_get_temp(struct
> thermal_zone_device *tz, int *temp)
>         }
> 
>         ret = 0;
> -       ts_code_ave = DIV_ROUND_CLOSEST(ts_code_ave, TSU_READ_STEPS);
> +       ts_code_ave = DIV_ROUND_CLOSEST(MCELSIUS(ts_code_ave), TSU_READ_STEPS);
> 
>         /*
>          * According to the HW manual (section 40.4.4 Procedure for
> Measuring the Temperature)
> @@ -91,11 +91,8 @@ static int rzg3s_thermal_get_temp(struct
> thermal_zone_device *tz, int *temp)
>          *
>          * Tj = (ts_code_ave - priv->calib0) * 165 / (priv->calib0 -
> priv->calib1) - 40
>          */
> -       *temp = DIV_ROUND_CLOSEST((ts_code_ave - priv->calib1) * 165,
> -                                 (priv->calib0 - priv->calib1)) - 40;
> -
> -       /* Report it in mili degrees Celsius and round it up to 0.5 degrees
> Celsius. */
> -       *temp = roundup(MCELSIUS(*temp), 500);
> +       *temp = DIV_ROUND_CLOSEST((u64)(ts_code_ave -
> MCELSIUS(priv->calib1)) * MCELSIUS(165),
> +                                 MCELSIUS(priv->calib0 - priv->calib1)) -
> MCELSIUS(40);
> 
>  rpm_put:
>         pm_runtime_mark_last_busy(dev);
> 
> With this, the results seems similar b/w runtime PM and no runtime PM cases.
> 
> The tests were executed after the board was off for few hours. The
> first test was with runtime PM suspend/resume on each read. Then the board
> was rebooted and re-run the test w/o runtime PM suspend/resume on reads.
> 
> Figure with results is here:
> https://i2.paste.pics/5f353a4f04b07b4bead3086624aba23f.png?trs=0a0eaab99bb59ebcb10051eb298f437c7cd50c16437a87392aebc16cd9013e18&rand=5n34QNjWID
> 
> 
>> Thanks for taking the time to provide a figure
>>
>> Testing thermal can be painful because it should be done under certain
>> conditions.
>>
>> I guess there was no particular work load on the system when running the
>> tests.
> 
> No load, indeed.
> 
>>
>> At the first glance, it seems, without the pm runtime, the measurement is
>> more precise as it catches more thermal changes. But the test does not give
>> information about the thermal behavior under stress. And one second
>> sampling is too long to really figure it out.
>>
>> In the kernel source tree, there is a tool to read the temperature in an
>> optimized manner, you may want to use it to read the temperature at a
>> higher rate. It is located in tools/thermal/thermometer
>>
>> Compiling is a bit fuzzy ATM, so until it is fixed, here are the steps:
>>
>> (you should install libconfig-dev and libnl-3-dev packages).
>>
>> cd $LINUX_DIR/tools/thermal/lib
>> make
>> LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LINUX_DIR/tools/thermal/lib
>>
>> cd $LINUX_DIR/tools
>> make thermometer
>>
>>
>>
>> Then change directory:
>>
>> cd $LINUX_DIR/tools/thermal/thermometer
>>
>>
>> Run the tool:
>>
>> ./thermometer -o out -c t.conf -l DEBUG -- <my_command>
>>
>>
>> The content of the configuration file t.conf is:
>>
>> thermal-zones = (
>>           {    name = "cpu[0_9].*-thermal";
>>         polling = 100; }
>>       )
>>
>> All the captured data will be in the 'out' directory
>>
>> For 'my_command', I suggest to use a script containing:
>>
>> sleep 10; dhrystone -t 1 -r 120; sleep 10
>>
>> If you need the dhrystone binary, let me know.
>>
>> The thermal zone device tree configuration should be changed to use a 65°C
>> passive trip point instead of 100°C (and the kernel setup with the step
>> wise governor as default).
>>>> The resulting figure from the temperature should show a flat temperature
>> figure during 10 seconds, then the temperature increasing until reaching
>> the temperature threshold of 65°C, the temperature stabilizing around it,
>> then followed by a temperature decreasing when the test finishes.
>>
>> If the temperature does not reach the limit, decrease the trip point
>> temperature or increase the dhrystone duration (the -r 120 option)
>>
>> At this point, you should the test with and without pm runtime but in order
>> to have consistent results, you should wait ~20 minutes between two tests.
>>
>> The shape of the figures will give the immediate information about how the
>> mitigation vs thermal sensor vs cooling device behave.
>>
>> Additionally, you can enable the thermal DEBUGFS option and add the
>> collected information statistics from /sys/kernel/debug/thermal/*** in the
>> results.
>>
>>
>> Hope that helps
> 
> Thank you for all these details. I'll have a look on it but starting with
> Monday as I won't have access to setup in the following days.

I re-run the tests with the thermometer application that you indicated.

This is the conf I used:

thermal-zones = (
          {    name = "cpu-thermal";
        polling = 100; }
      )

The used device tree is as follows:

	thermal-zones {
		cpu_thermal: cpu-thermal {
			polling-delay-passive = <250>;
			polling-delay = <1000>;
			thermal-sensors = <&tsu>;
			sustainable-power = <423>;

			cooling-maps {
				map0 {
					trip = <&target>;
					cooling-device = <&cpu0 0 2>;
					contribution = <1024>;
				};
			};

			trips {
				sensor_crit: sensor-crit {
					temperature = <125000>;
					hysteresis = <1000>;
					type = "critical";
				};

				target: trip-point {
					temperature = <56000>;
					hysteresis = <1000>;
					type = "passive";
				};
			};
		};
	};

I executed with:

time ./thermometer -o out -l DEBUG -c t.conf -- ./test.sh

where test.sh is:

sleep 10; time echo 100000000 | dhry; sleep 10

My dhry has no -t or -r option so I passed the number of runs checking that
the test executes for 120 seconds.

I executed first the thermometer application with runtime PM suspend/resume
on temperature read, then wait for ~25 minutes then executed the tests w/o
runtime PM suspend/resume on temperature read.

The output of the thermometer application is as follows:

- runtime PM suspend/resume when reading: https://p.fr33tux.org/5bbb4d
- no runtime PM suspend/resumes when reading: https://p.fr33tux.org/c9a7cf
- full console log while testing: https://p.fr33tux.org/ace3a6

I also plotted the results for visual comparison as follows:

1/ RPM + no-RPM (continuous time base):
https://i2.paste.pics/c3956d15a7a889a9e1ee5b60529b42f6.png?rand=axUi4IsA1C

2/ RPM + no-RPM (first samples, for side by side comparison):
https://i2.paste.pics/e2a30af590e28a091415e3afb74eb0ac.png?rand=XQhoxUe1EM

3/ RPM only:
https://i2.paste.pics/977d4de070b8e2a19694ae2b4ba3c5fc.png?rand=IfZOkonRd9

4/ no-RPM only:
https://i2.paste.pics/5d6e3d0a124e5e4b3b8b397d7b5b057c.png?rand=UaigrMRNvy

Please let me know how your input.

Thank you,
Claudiu


> 
> Thank you,
> Claudiu
> 
>>
>>
>>
>>
>>
>>
>
Jonathan Cameron Feb. 4, 2025, 2:33 p.m. UTC | #17
On Wed, 15 Jan 2025 16:42:37 +0100
Ulf Hansson <ulf.hansson@linaro.org> wrote:

> On Thu, 9 Jan 2025 at 18:34, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
> >
> >
> > Ulf,
> >
> > can you have a look at this particular patch please ?
> >
> > Perhaps this scenario already happened in the past and there is an
> > alternative to fix it instead of this proposed change  
> 
> I think the patch makes sense.
> 
> If there is a PM domain that is attached to the device that is
> managing the clocks for the thermal zone, the detach procedure
> certainly needs to be well controlled/synchronized.
> 
Does this boil down to the same issue as
https://lore.kernel.org/linux-iio/20250128105908.0000353b@huawei.com/
?

Just to point out there is another way like is done in i2c:
https://elixir.bootlin.com/linux/v6.12.6/source/drivers/i2c/i2c-core-base.c#L630

Register a devres_release_group() in bus probe() and release it before
the dev_pm_domain_detach() call.  That keeps the detach procedure well
controlled and synchronized as it is entirely in control of the driver.

That IIO thread has kind of died out for now though with no resolution
so far.

Jonathan


> >
> >
> > On 03/01/2025 17:38, Claudiu wrote:  
> > > From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
> > >
> > > On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
> > > clocks are managed through PM domains. These PM domains, registered on
> > > behalf of the clock controller driver, are configured with
> > > GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
> > > clocks are enabled/disabled using runtime PM APIs.
> > >
> > > During probe, devices are attached to the PM domain controlling their
> > > clocks. Similarly, during removal, devices are detached from the PM domain.
> > >
> > > The detachment call stack is as follows:
> > >
> > > device_driver_detach() ->
> > >    device_release_driver_internal() ->
> > >      __device_release_driver() ->
> > >        device_remove() ->
> > >          platform_remove() ->
> > >         dev_pm_domain_detach()
> > >
> > > In the upcoming Renesas RZ/G3S thermal driver, the
> > > struct thermal_zone_device_ops::change_mode API is implemented to
> > > start/stop the thermal sensor unit. Register settings are updated within
> > > the change_mode API.
> > >
> > > In case devres helpers are used for thermal zone register/unregister the
> > > struct thermal_zone_device_ops::change_mode API is invoked when the
> > > driver is unbound. The identified call stack is as follows:
> > >
> > > device_driver_detach() ->
> > >    device_release_driver_internal() ->
> > >      device_unbind_cleanup() ->
> > >        devres_release_all() ->
> > >          devm_thermal_of_zone_release() ->
> > >         thermal_zone_device_disable() ->
> > >           thermal_zone_device_set_mode() ->
> > >             rzg3s_thermal_change_mode()
> > >
> > > The device_unbind_cleanup() function is called after the thermal device is
> > > detached from the PM domain (via dev_pm_domain_detach()).
> > >
> > > The rzg3s_thermal_change_mode() implementation calls
> > > pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
> > > accessing the registers. However, during the unbind scenario, the
> > > devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
> > > Consequently, the clocks are not enabled, as the device is removed from
> > > the PM domain at this time, leading to an Asynchronous SError Interrupt.
> > > The system cannot be used after this.
> > >
> > > Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
> > > be used in the upcomming RZ/G3S thermal driver.
> > >
> > > Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>  
> 
> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
> 
> Kind regards
> Uffe
> 
> > > ---
> > >   drivers/thermal/thermal_of.c |  8 +++++---
> > >   include/linux/thermal.h      | 14 ++++++++++++++
> > >   2 files changed, 19 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
> > > index fab11b98ca49..8fc35d20db60 100644
> > > --- a/drivers/thermal/thermal_of.c
> > > +++ b/drivers/thermal/thermal_of.c
> > > @@ -329,11 +329,12 @@ static bool thermal_of_should_bind(struct thermal_zone_device *tz,
> > >    *
> > >    * @tz: a pointer to the thermal zone structure
> > >    */
> > > -static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> > > +void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> > >   {
> > >       thermal_zone_device_disable(tz);
> > >       thermal_zone_device_unregister(tz);
> > >   }
> > > +EXPORT_SYMBOL_GPL(thermal_of_zone_unregister);
> > >
> > >   /**
> > >    * thermal_of_zone_register - Register a thermal zone with device node
> > > @@ -355,8 +356,8 @@ static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> > >    *  - ENOMEM: if one structure can not be allocated
> > >    *  - Other negative errors are returned by the underlying called functions
> > >    */
> > > -static struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> > > -                                                         const struct thermal_zone_device_ops *ops)
> > > +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> > > +                                                  const struct thermal_zone_device_ops *ops)
> > >   {
> > >       struct thermal_zone_device_ops of_ops = *ops;
> > >       struct thermal_zone_device *tz;
> > > @@ -429,6 +430,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
> > >
> > >       return ERR_PTR(ret);
> > >   }
> > > +EXPORT_SYMBOL_GPL(thermal_of_zone_register);
> > >
> > >   static void devm_thermal_of_zone_release(struct device *dev, void *res)
> > >   {
> > > diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> > > index 69f9bedd0ee8..adbb4092a064 100644
> > > --- a/include/linux/thermal.h
> > > +++ b/include/linux/thermal.h
> > > @@ -195,13 +195,23 @@ struct thermal_zone_params {
> > >
> > >   /* Function declarations */
> > >   #ifdef CONFIG_THERMAL_OF
> > > +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> > > +                                                  const struct thermal_zone_device_ops *ops);
> > >   struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
> > >                                                         const struct thermal_zone_device_ops *ops);
> > >
> > > +void thermal_of_zone_unregister(struct thermal_zone_device *tz);
> > >   void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz);
> > >
> > >   #else
> > >
> > > +static inline
> > > +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> > > +                                                  const struct thermal_zone_device_ops *ops)
> > > +{
> > > +     return ERR_PTR(-ENOTSUPP);
> > > +}
> > > +
> > >   static inline
> > >   struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
> > >                                                         const struct thermal_zone_device_ops *ops)
> > > @@ -209,6 +219,10 @@ struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, in
> > >       return ERR_PTR(-ENOTSUPP);
> > >   }
> > >
> > > +static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> > > +{
> > > +}
> > > +
> > >   static inline void devm_thermal_of_zone_unregister(struct device *dev,
> > >                                                  struct thermal_zone_device *tz)
> > >   {  
> >
> >
> > --
> > <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
> >
> > Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> > <http://twitter.com/#!/linaroorg> Twitter |
> > <http://www.linaro.org/linaro-blog/> Blog  
>
Claudiu Feb. 5, 2025, 8:33 a.m. UTC | #18
Hi, Jonathan,

On 04.02.2025 16:33, Jonathan Cameron wrote:
> On Wed, 15 Jan 2025 16:42:37 +0100
> Ulf Hansson <ulf.hansson@linaro.org> wrote:
> 
>> On Thu, 9 Jan 2025 at 18:34, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:
>>>
>>>
>>> Ulf,
>>>
>>> can you have a look at this particular patch please ?
>>>
>>> Perhaps this scenario already happened in the past and there is an
>>> alternative to fix it instead of this proposed change  
>>
>> I think the patch makes sense.
>>
>> If there is a PM domain that is attached to the device that is
>> managing the clocks for the thermal zone, the detach procedure
>> certainly needs to be well controlled/synchronized.
>>
> Does this boil down to the same issue as
> https://lore.kernel.org/linux-iio/20250128105908.0000353b@huawei.com/
> ?

Yes, as described in the cover letter.

> 
> Just to point out there is another way like is done in i2c:
> https://elixir.bootlin.com/linux/v6.12.6/source/drivers/i2c/i2c-core-base.c#L630
> 
> Register a devres_release_group() in bus probe() and release it before
> the dev_pm_domain_detach() call.  That keeps the detach procedure well
> controlled and synchronized as it is entirely in control of the driver.

From the IIO thread I got that Ulf doesn't consider it a good approach for
all the cases.

Thank you,
Claudiu

> 
> That IIO thread has kind of died out for now though with no resolution
> so far.
> 
> Jonathan
> 
> 
>>>
>>>
>>> On 03/01/2025 17:38, Claudiu wrote:  
>>>> From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
>>>>
>>>> On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
>>>> clocks are managed through PM domains. These PM domains, registered on
>>>> behalf of the clock controller driver, are configured with
>>>> GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
>>>> clocks are enabled/disabled using runtime PM APIs.
>>>>
>>>> During probe, devices are attached to the PM domain controlling their
>>>> clocks. Similarly, during removal, devices are detached from the PM domain.
>>>>
>>>> The detachment call stack is as follows:
>>>>
>>>> device_driver_detach() ->
>>>>    device_release_driver_internal() ->
>>>>      __device_release_driver() ->
>>>>        device_remove() ->
>>>>          platform_remove() ->
>>>>         dev_pm_domain_detach()
>>>>
>>>> In the upcoming Renesas RZ/G3S thermal driver, the
>>>> struct thermal_zone_device_ops::change_mode API is implemented to
>>>> start/stop the thermal sensor unit. Register settings are updated within
>>>> the change_mode API.
>>>>
>>>> In case devres helpers are used for thermal zone register/unregister the
>>>> struct thermal_zone_device_ops::change_mode API is invoked when the
>>>> driver is unbound. The identified call stack is as follows:
>>>>
>>>> device_driver_detach() ->
>>>>    device_release_driver_internal() ->
>>>>      device_unbind_cleanup() ->
>>>>        devres_release_all() ->
>>>>          devm_thermal_of_zone_release() ->
>>>>         thermal_zone_device_disable() ->
>>>>           thermal_zone_device_set_mode() ->
>>>>             rzg3s_thermal_change_mode()
>>>>
>>>> The device_unbind_cleanup() function is called after the thermal device is
>>>> detached from the PM domain (via dev_pm_domain_detach()).
>>>>
>>>> The rzg3s_thermal_change_mode() implementation calls
>>>> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
>>>> accessing the registers. However, during the unbind scenario, the
>>>> devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
>>>> Consequently, the clocks are not enabled, as the device is removed from
>>>> the PM domain at this time, leading to an Asynchronous SError Interrupt.
>>>> The system cannot be used after this.
>>>>
>>>> Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
>>>> be used in the upcomming RZ/G3S thermal driver.
>>>>
>>>> Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>  
>>
>> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
>>
>> Kind regards
>> Uffe
>>
>>>> ---
>>>>   drivers/thermal/thermal_of.c |  8 +++++---
>>>>   include/linux/thermal.h      | 14 ++++++++++++++
>>>>   2 files changed, 19 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
>>>> index fab11b98ca49..8fc35d20db60 100644
>>>> --- a/drivers/thermal/thermal_of.c
>>>> +++ b/drivers/thermal/thermal_of.c
>>>> @@ -329,11 +329,12 @@ static bool thermal_of_should_bind(struct thermal_zone_device *tz,
>>>>    *
>>>>    * @tz: a pointer to the thermal zone structure
>>>>    */
>>>> -static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
>>>> +void thermal_of_zone_unregister(struct thermal_zone_device *tz)
>>>>   {
>>>>       thermal_zone_device_disable(tz);
>>>>       thermal_zone_device_unregister(tz);
>>>>   }
>>>> +EXPORT_SYMBOL_GPL(thermal_of_zone_unregister);
>>>>
>>>>   /**
>>>>    * thermal_of_zone_register - Register a thermal zone with device node
>>>> @@ -355,8 +356,8 @@ static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
>>>>    *  - ENOMEM: if one structure can not be allocated
>>>>    *  - Other negative errors are returned by the underlying called functions
>>>>    */
>>>> -static struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
>>>> -                                                         const struct thermal_zone_device_ops *ops)
>>>> +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
>>>> +                                                  const struct thermal_zone_device_ops *ops)
>>>>   {
>>>>       struct thermal_zone_device_ops of_ops = *ops;
>>>>       struct thermal_zone_device *tz;
>>>> @@ -429,6 +430,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
>>>>
>>>>       return ERR_PTR(ret);
>>>>   }
>>>> +EXPORT_SYMBOL_GPL(thermal_of_zone_register);
>>>>
>>>>   static void devm_thermal_of_zone_release(struct device *dev, void *res)
>>>>   {
>>>> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
>>>> index 69f9bedd0ee8..adbb4092a064 100644
>>>> --- a/include/linux/thermal.h
>>>> +++ b/include/linux/thermal.h
>>>> @@ -195,13 +195,23 @@ struct thermal_zone_params {
>>>>
>>>>   /* Function declarations */
>>>>   #ifdef CONFIG_THERMAL_OF
>>>> +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
>>>> +                                                  const struct thermal_zone_device_ops *ops);
>>>>   struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
>>>>                                                         const struct thermal_zone_device_ops *ops);
>>>>
>>>> +void thermal_of_zone_unregister(struct thermal_zone_device *tz);
>>>>   void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz);
>>>>
>>>>   #else
>>>>
>>>> +static inline
>>>> +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
>>>> +                                                  const struct thermal_zone_device_ops *ops)
>>>> +{
>>>> +     return ERR_PTR(-ENOTSUPP);
>>>> +}
>>>> +
>>>>   static inline
>>>>   struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
>>>>                                                         const struct thermal_zone_device_ops *ops)
>>>> @@ -209,6 +219,10 @@ struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, in
>>>>       return ERR_PTR(-ENOTSUPP);
>>>>   }
>>>>
>>>> +static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz)
>>>> +{
>>>> +}
>>>> +
>>>>   static inline void devm_thermal_of_zone_unregister(struct device *dev,
>>>>                                                  struct thermal_zone_device *tz)
>>>>   {  
>>>
>>>
>>> --
>>> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
>>>
>>> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
>>> <http://twitter.com/#!/linaroorg> Twitter |
>>> <http://www.linaro.org/linaro-blog/> Blog  
>>
>
Jonathan Cameron Feb. 5, 2025, 11:39 a.m. UTC | #19
On Wed, 5 Feb 2025 10:33:39 +0200
Claudiu Beznea <claudiu.beznea@tuxon.dev> wrote:

> Hi, Jonathan,
> 
> On 04.02.2025 16:33, Jonathan Cameron wrote:
> > On Wed, 15 Jan 2025 16:42:37 +0100
> > Ulf Hansson <ulf.hansson@linaro.org> wrote:
> >   
> >> On Thu, 9 Jan 2025 at 18:34, Daniel Lezcano <daniel.lezcano@linaro.org> wrote:  
> >>>
> >>>
> >>> Ulf,
> >>>
> >>> can you have a look at this particular patch please ?
> >>>
> >>> Perhaps this scenario already happened in the past and there is an
> >>> alternative to fix it instead of this proposed change    
> >>
> >> I think the patch makes sense.
> >>
> >> If there is a PM domain that is attached to the device that is
> >> managing the clocks for the thermal zone, the detach procedure
> >> certainly needs to be well controlled/synchronized.
> >>  
> > Does this boil down to the same issue as
> > https://lore.kernel.org/linux-iio/20250128105908.0000353b@huawei.com/
> > ?  
> 
> Yes, as described in the cover letter.
> 
> > 
> > Just to point out there is another way like is done in i2c:
> > https://elixir.bootlin.com/linux/v6.12.6/source/drivers/i2c/i2c-core-base.c#L630
> > 
> > Register a devres_release_group() in bus probe() and release it before
> > the dev_pm_domain_detach() call.  That keeps the detach procedure well
> > controlled and synchronized as it is entirely in control of the driver.  
> 
> From the IIO thread I got that Ulf doesn't consider it a good approach for
> all the cases.
> 

Maybe true (I'll let Ulf comment!) and I think the solution proposed here is
not great because it is putting the cost on every driver rather than solving
the basic problem in one place (and there is clear precedence in other
bus subsystems). Ideally I'd like more people to get involved in that discussion.

Jonathan



> Thank you,
> Claudiu
> 
> > 
> > That IIO thread has kind of died out for now though with no resolution
> > so far.
> > 
> > Jonathan
> > 
> >   
> >>>
> >>>
> >>> On 03/01/2025 17:38, Claudiu wrote:    
> >>>> From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
> >>>>
> >>>> On the Renesas RZ/G3S (and other Renesas SoCs, e.g., RZ/G2{L, LC, UL}),
> >>>> clocks are managed through PM domains. These PM domains, registered on
> >>>> behalf of the clock controller driver, are configured with
> >>>> GENPD_FLAG_PM_CLK. In most of the Renesas drivers used by RZ SoCs, the
> >>>> clocks are enabled/disabled using runtime PM APIs.
> >>>>
> >>>> During probe, devices are attached to the PM domain controlling their
> >>>> clocks. Similarly, during removal, devices are detached from the PM domain.
> >>>>
> >>>> The detachment call stack is as follows:
> >>>>
> >>>> device_driver_detach() ->
> >>>>    device_release_driver_internal() ->
> >>>>      __device_release_driver() ->
> >>>>        device_remove() ->
> >>>>          platform_remove() ->
> >>>>         dev_pm_domain_detach()
> >>>>
> >>>> In the upcoming Renesas RZ/G3S thermal driver, the
> >>>> struct thermal_zone_device_ops::change_mode API is implemented to
> >>>> start/stop the thermal sensor unit. Register settings are updated within
> >>>> the change_mode API.
> >>>>
> >>>> In case devres helpers are used for thermal zone register/unregister the
> >>>> struct thermal_zone_device_ops::change_mode API is invoked when the
> >>>> driver is unbound. The identified call stack is as follows:
> >>>>
> >>>> device_driver_detach() ->
> >>>>    device_release_driver_internal() ->
> >>>>      device_unbind_cleanup() ->
> >>>>        devres_release_all() ->
> >>>>          devm_thermal_of_zone_release() ->
> >>>>         thermal_zone_device_disable() ->
> >>>>           thermal_zone_device_set_mode() ->
> >>>>             rzg3s_thermal_change_mode()
> >>>>
> >>>> The device_unbind_cleanup() function is called after the thermal device is
> >>>> detached from the PM domain (via dev_pm_domain_detach()).
> >>>>
> >>>> The rzg3s_thermal_change_mode() implementation calls
> >>>> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() before/after
> >>>> accessing the registers. However, during the unbind scenario, the
> >>>> devm_thermal_of_zone_release() is invoked after dev_pm_domain_detach().
> >>>> Consequently, the clocks are not enabled, as the device is removed from
> >>>> the PM domain at this time, leading to an Asynchronous SError Interrupt.
> >>>> The system cannot be used after this.
> >>>>
> >>>> Add thermal_of_zone_register()/thermal_of_zone_unregister(). These will
> >>>> be used in the upcomming RZ/G3S thermal driver.
> >>>>
> >>>> Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>    
> >>
> >> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
> >>
> >> Kind regards
> >> Uffe
> >>  
> >>>> ---
> >>>>   drivers/thermal/thermal_of.c |  8 +++++---
> >>>>   include/linux/thermal.h      | 14 ++++++++++++++
> >>>>   2 files changed, 19 insertions(+), 3 deletions(-)
> >>>>
> >>>> diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
> >>>> index fab11b98ca49..8fc35d20db60 100644
> >>>> --- a/drivers/thermal/thermal_of.c
> >>>> +++ b/drivers/thermal/thermal_of.c
> >>>> @@ -329,11 +329,12 @@ static bool thermal_of_should_bind(struct thermal_zone_device *tz,
> >>>>    *
> >>>>    * @tz: a pointer to the thermal zone structure
> >>>>    */
> >>>> -static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> >>>> +void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> >>>>   {
> >>>>       thermal_zone_device_disable(tz);
> >>>>       thermal_zone_device_unregister(tz);
> >>>>   }
> >>>> +EXPORT_SYMBOL_GPL(thermal_of_zone_unregister);
> >>>>
> >>>>   /**
> >>>>    * thermal_of_zone_register - Register a thermal zone with device node
> >>>> @@ -355,8 +356,8 @@ static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> >>>>    *  - ENOMEM: if one structure can not be allocated
> >>>>    *  - Other negative errors are returned by the underlying called functions
> >>>>    */
> >>>> -static struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> >>>> -                                                         const struct thermal_zone_device_ops *ops)
> >>>> +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> >>>> +                                                  const struct thermal_zone_device_ops *ops)
> >>>>   {
> >>>>       struct thermal_zone_device_ops of_ops = *ops;
> >>>>       struct thermal_zone_device *tz;
> >>>> @@ -429,6 +430,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
> >>>>
> >>>>       return ERR_PTR(ret);
> >>>>   }
> >>>> +EXPORT_SYMBOL_GPL(thermal_of_zone_register);
> >>>>
> >>>>   static void devm_thermal_of_zone_release(struct device *dev, void *res)
> >>>>   {
> >>>> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> >>>> index 69f9bedd0ee8..adbb4092a064 100644
> >>>> --- a/include/linux/thermal.h
> >>>> +++ b/include/linux/thermal.h
> >>>> @@ -195,13 +195,23 @@ struct thermal_zone_params {
> >>>>
> >>>>   /* Function declarations */
> >>>>   #ifdef CONFIG_THERMAL_OF
> >>>> +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> >>>> +                                                  const struct thermal_zone_device_ops *ops);
> >>>>   struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
> >>>>                                                         const struct thermal_zone_device_ops *ops);
> >>>>
> >>>> +void thermal_of_zone_unregister(struct thermal_zone_device *tz);
> >>>>   void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz);
> >>>>
> >>>>   #else
> >>>>
> >>>> +static inline
> >>>> +struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
> >>>> +                                                  const struct thermal_zone_device_ops *ops)
> >>>> +{
> >>>> +     return ERR_PTR(-ENOTSUPP);
> >>>> +}
> >>>> +
> >>>>   static inline
> >>>>   struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
> >>>>                                                         const struct thermal_zone_device_ops *ops)
> >>>> @@ -209,6 +219,10 @@ struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, in
> >>>>       return ERR_PTR(-ENOTSUPP);
> >>>>   }
> >>>>
> >>>> +static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz)
> >>>> +{
> >>>> +}
> >>>> +
> >>>>   static inline void devm_thermal_of_zone_unregister(struct device *dev,
> >>>>                                                  struct thermal_zone_device *tz)
> >>>>   {    
> >>>
> >>>
> >>> --
> >>> <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
> >>>
> >>> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> >>> <http://twitter.com/#!/linaroorg> Twitter |
> >>> <http://www.linaro.org/linaro-blog/> Blog    
> >>  
> >   
>
diff mbox series

Patch

diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index fab11b98ca49..8fc35d20db60 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -329,11 +329,12 @@  static bool thermal_of_should_bind(struct thermal_zone_device *tz,
  *
  * @tz: a pointer to the thermal zone structure
  */
-static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
+void thermal_of_zone_unregister(struct thermal_zone_device *tz)
 {
 	thermal_zone_device_disable(tz);
 	thermal_zone_device_unregister(tz);
 }
+EXPORT_SYMBOL_GPL(thermal_of_zone_unregister);
 
 /**
  * thermal_of_zone_register - Register a thermal zone with device node
@@ -355,8 +356,8 @@  static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
  *	- ENOMEM: if one structure can not be allocated
  *	- Other negative errors are returned by the underlying called functions
  */
-static struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
-							    const struct thermal_zone_device_ops *ops)
+struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
+						     const struct thermal_zone_device_ops *ops)
 {
 	struct thermal_zone_device_ops of_ops = *ops;
 	struct thermal_zone_device *tz;
@@ -429,6 +430,7 @@  static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
 
 	return ERR_PTR(ret);
 }
+EXPORT_SYMBOL_GPL(thermal_of_zone_register);
 
 static void devm_thermal_of_zone_release(struct device *dev, void *res)
 {
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 69f9bedd0ee8..adbb4092a064 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -195,13 +195,23 @@  struct thermal_zone_params {
 
 /* Function declarations */
 #ifdef CONFIG_THERMAL_OF
+struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
+						     const struct thermal_zone_device_ops *ops);
 struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
 							  const struct thermal_zone_device_ops *ops);
 
+void thermal_of_zone_unregister(struct thermal_zone_device *tz);
 void devm_thermal_of_zone_unregister(struct device *dev, struct thermal_zone_device *tz);
 
 #else
 
+static inline
+struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, int id, void *data,
+						     const struct thermal_zone_device_ops *ops)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
 static inline
 struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, int id, void *data,
 							  const struct thermal_zone_device_ops *ops)
@@ -209,6 +219,10 @@  struct thermal_zone_device *devm_thermal_of_zone_register(struct device *dev, in
 	return ERR_PTR(-ENOTSUPP);
 }
 
+static inline void thermal_of_zone_unregister(struct thermal_zone_device *tz)
+{
+}
+
 static inline void devm_thermal_of_zone_unregister(struct device *dev,
 						   struct thermal_zone_device *tz)
 {