diff mbox series

[v4,1/6] arm64: dts: rockchip: add thermal zones information on RK3588

Message ID 20240506-rk-dts-additions-v4-1-271023ddfd40@gmail.com (mailing list archive)
State New
Headers show
Series RK3588 and Rock 5B dts additions: thermal, OPP and fan | expand

Commit Message

Alexey Charkov May 6, 2024, 9:36 a.m. UTC
This includes the necessary device tree data to allow thermal
monitoring on RK3588(s) using the on-chip TSADC device, along with
trip points for automatic thermal management.

Each of the CPU clusters (one for the little cores and two for
the big cores) get a passive cooling trip point at 85C, which
will trigger DVFS throttling of the respective cluster upon
reaching a high temperature condition.

All zones also have a critical trip point at 115C, which will
trigger a reset.

Signed-off-by: Alexey Charkov <alchark@gmail.com>
---
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 147 ++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)

Comments

Dragan Simic May 6, 2024, 9:52 a.m. UTC | #1
Hello Alexey,

Thanks for submitting the v4 of this series!  Please, see a couple
of my comments below.

On 2024-05-06 11:36, Alexey Charkov wrote:
> This includes the necessary device tree data to allow thermal
> monitoring on RK3588(s) using the on-chip TSADC device, along with
> trip points for automatic thermal management.
> 
> Each of the CPU clusters (one for the little cores and two for
> the big cores) get a passive cooling trip point at 85C, which
> will trigger DVFS throttling of the respective cluster upon
> reaching a high temperature condition.
> 
> All zones also have a critical trip point at 115C, which will
> trigger a reset.
> 
> Signed-off-by: Alexey Charkov <alchark@gmail.com>
> ---
>  arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 147 
> ++++++++++++++++++++++++++++++
>  1 file changed, 147 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> index 6ac5ac8b48ab..ef06c1f742e8 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> @@ -10,6 +10,7 @@
>  #include <dt-bindings/reset/rockchip,rk3588-cru.h>
>  #include <dt-bindings/phy/phy.h>
>  #include <dt-bindings/ata/ahci.h>
> +#include <dt-bindings/thermal/thermal.h>
> 
>  / {
>  	compatible = "rockchip,rk3588";
> @@ -2368,6 +2369,152 @@ pwm15: pwm@febf0030 {
>  		status = "disabled";
>  	};
> 
> +	thermal_zones: thermal-zones {
> +		/* sensor near the center of the SoC */
> +		package_thermal: package-thermal {
> +			polling-delay-passive = <0>;
> +			polling-delay = <0>;
> +			thermal-sensors = <&tsadc 0>;
> +
> +			trips {
> +				package_crit: package-crit {
> +					temperature = <115000>;
> +					hysteresis = <0>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		/* sensor between A76 cores 0 and 1 */
> +		bigcore0_thermal: bigcore0-thermal {
> +			polling-delay-passive = <100>;
> +			polling-delay = <0>;
> +			thermal-sensors = <&tsadc 1>;
> +
> +			trips {
> +				bigcore0_alert: bigcore0-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};

Doesn't removing the second passive trip, which was present in the v3,
result in confusing the IPA governor?

> +				bigcore0_crit: bigcore0-crit {
> +					temperature = <115000>;
> +					hysteresis = <0>;
> +					type = "critical";
> +				};
> +			};
> +			cooling-maps {
> +				map0 {
> +					trip = <&bigcore0_alert>;
> +					cooling-device =
> +						<&cpu_b0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_b1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +				};
> +			};
> +		};
> +
> +		/* sensor between A76 cores 2 and 3 */
> +		bigcore2_thermal: bigcore2-thermal {
> +			polling-delay-passive = <100>;
> +			polling-delay = <0>;
> +			thermal-sensors = <&tsadc 2>;
> +
> +			trips {
> +				bigcore2_alert: bigcore2-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};

The same question about the second passive trip applies here, and to
all similar cases below.

> +				bigcore2_crit: bigcore2-crit {
> +					temperature = <115000>;
> +					hysteresis = <0>;
> +					type = "critical";
> +				};
> +			};
> +			cooling-maps {
> +				map0 {
> +					trip = <&bigcore2_alert>;
> +					cooling-device =
> +						<&cpu_b2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_b3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +				};
> +			};
> +		};
> +
> +		/* sensor between the four A55 cores */
> +		little_core_thermal: littlecore-thermal {
> +			polling-delay-passive = <100>;
> +			polling-delay = <0>;
> +			thermal-sensors = <&tsadc 3>;
> +
> +			trips {
> +				littlecore_alert: littlecore-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};
> +				littlecore_crit: littlecore-crit {
> +					temperature = <115000>;
> +					hysteresis = <0>;
> +					type = "critical";
> +				};
> +			};
> +			cooling-maps {
> +				map0 {
> +					trip = <&littlecore_alert>;
> +					cooling-device =
> +						<&cpu_l0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +				};
> +			};
> +		};
> +
> +		/* sensor near the PD_CENTER power domain */
> +		center_thermal: center-thermal {
> +			polling-delay-passive = <0>;
> +			polling-delay = <0>;
> +			thermal-sensors = <&tsadc 4>;
> +
> +			trips {
> +				center_crit: center-crit {
> +					temperature = <115000>;
> +					hysteresis = <0>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		gpu_thermal: gpu-thermal {
> +			polling-delay-passive = <0>;
> +			polling-delay = <0>;
> +			thermal-sensors = <&tsadc 5>;
> +
> +			trips {
> +				gpu_crit: gpu-crit {
> +					temperature = <115000>;
> +					hysteresis = <0>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		npu_thermal: npu-thermal {
> +			polling-delay-passive = <0>;
> +			polling-delay = <0>;
> +			thermal-sensors = <&tsadc 6>;
> +
> +			trips {
> +				npu_crit: npu-crit {
> +					temperature = <115000>;
> +					hysteresis = <0>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +	};
> +
>  	tsadc: tsadc@fec00000 {
>  		compatible = "rockchip,rk3588-tsadc";
>  		reg = <0x0 0xfec00000 0x0 0x400>;
Alexey Charkov May 6, 2024, 10:29 a.m. UTC | #2
Hello Dragan,

On Mon, May 6, 2024 at 1:52 PM Dragan Simic <dsimic@manjaro.org> wrote:
>
> Hello Alexey,
>
> Thanks for submitting the v4 of this series!  Please, see a couple
> of my comments below.
>
> On 2024-05-06 11:36, Alexey Charkov wrote:
> > This includes the necessary device tree data to allow thermal
> > monitoring on RK3588(s) using the on-chip TSADC device, along with
> > trip points for automatic thermal management.
> >
> > Each of the CPU clusters (one for the little cores and two for
> > the big cores) get a passive cooling trip point at 85C, which
> > will trigger DVFS throttling of the respective cluster upon
> > reaching a high temperature condition.
> >
> > All zones also have a critical trip point at 115C, which will
> > trigger a reset.
> >
> > Signed-off-by: Alexey Charkov <alchark@gmail.com>
> > ---
> >  arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 147
> > ++++++++++++++++++++++++++++++
> >  1 file changed, 147 insertions(+)
> >
> > diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> > b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> > index 6ac5ac8b48ab..ef06c1f742e8 100644
> > --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> > +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> > @@ -10,6 +10,7 @@
> >  #include <dt-bindings/reset/rockchip,rk3588-cru.h>
> >  #include <dt-bindings/phy/phy.h>
> >  #include <dt-bindings/ata/ahci.h>
> > +#include <dt-bindings/thermal/thermal.h>
> >
> >  / {
> >       compatible = "rockchip,rk3588";
> > @@ -2368,6 +2369,152 @@ pwm15: pwm@febf0030 {
> >               status = "disabled";
> >       };
> >
> > +     thermal_zones: thermal-zones {
> > +             /* sensor near the center of the SoC */
> > +             package_thermal: package-thermal {
> > +                     polling-delay-passive = <0>;
> > +                     polling-delay = <0>;
> > +                     thermal-sensors = <&tsadc 0>;
> > +
> > +                     trips {
> > +                             package_crit: package-crit {
> > +                                     temperature = <115000>;
> > +                                     hysteresis = <0>;
> > +                                     type = "critical";
> > +                             };
> > +                     };
> > +             };
> > +
> > +             /* sensor between A76 cores 0 and 1 */
> > +             bigcore0_thermal: bigcore0-thermal {
> > +                     polling-delay-passive = <100>;
> > +                     polling-delay = <0>;
> > +                     thermal-sensors = <&tsadc 1>;
> > +
> > +                     trips {
> > +                             bigcore0_alert: bigcore0-alert {
> > +                                     temperature = <85000>;
> > +                                     hysteresis = <2000>;
> > +                                     type = "passive";
> > +                             };
>
> Doesn't removing the second passive trip, which was present in the v3,
> result in confusing the IPA governor?

Not really - it will just treat the missing trip as 0C for its initial
PID calculations [1], and will continually run the governor as opposed
to putting it to rest when the temperature is below the "switch on"
value [2].

Getting the power allocation governor to work optimally (i.e. to
provide tangible benefits over, say, stepwise) is much more involved
than defining an arbitrary switch-on trip point, as it requires an
accurate estimate of sustainable power per thermal zone (which we
don't have for RK3588 in general, and furthermore it must depend a lot
on a particular cooling setup), and ideally some userspace
power/thermal model capable of tuning the PID coefficients and
updating them via sysfs based on how a particular system accumulates
and dissipates heat under different load.

So after thinking over it for a while I decided that those extra
passive trips were rather self-deceiving, as they are only useful in
the context of a power allocation governor but we do not have any of
the other pieces in place for the power allocation governor to work.
Better not to clutter the device tree IMO.

Best regards,
Alexey

[1] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/drivers/thermal/gov_power_allocator.c#n156
[2] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/drivers/thermal/gov_power_allocator.c#n487
Dragan Simic May 6, 2024, 12:04 p.m. UTC | #3
On 2024-05-06 12:29, Alexey Charkov wrote:
> On Mon, May 6, 2024 at 1:52 PM Dragan Simic <dsimic@manjaro.org> wrote:
>> Thanks for submitting the v4 of this series!  Please, see a couple
>> of my comments below.
>> 
>> On 2024-05-06 11:36, Alexey Charkov wrote:
>> > This includes the necessary device tree data to allow thermal
>> > monitoring on RK3588(s) using the on-chip TSADC device, along with
>> > trip points for automatic thermal management.
>> >
>> > Each of the CPU clusters (one for the little cores and two for
>> > the big cores) get a passive cooling trip point at 85C, which
>> > will trigger DVFS throttling of the respective cluster upon
>> > reaching a high temperature condition.
>> >
>> > All zones also have a critical trip point at 115C, which will
>> > trigger a reset.
>> >
>> > Signed-off-by: Alexey Charkov <alchark@gmail.com>
>> > ---
>> >  arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 147
>> > ++++++++++++++++++++++++++++++
>> >  1 file changed, 147 insertions(+)
>> >
>> > diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>> > b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>> > index 6ac5ac8b48ab..ef06c1f742e8 100644
>> > --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>> > +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>> > @@ -10,6 +10,7 @@
>> >  #include <dt-bindings/reset/rockchip,rk3588-cru.h>
>> >  #include <dt-bindings/phy/phy.h>
>> >  #include <dt-bindings/ata/ahci.h>
>> > +#include <dt-bindings/thermal/thermal.h>
>> >
>> >  / {
>> >       compatible = "rockchip,rk3588";
>> > @@ -2368,6 +2369,152 @@ pwm15: pwm@febf0030 {
>> >               status = "disabled";
>> >       };
>> >
>> > +     thermal_zones: thermal-zones {
>> > +             /* sensor near the center of the SoC */
>> > +             package_thermal: package-thermal {
>> > +                     polling-delay-passive = <0>;
>> > +                     polling-delay = <0>;
>> > +                     thermal-sensors = <&tsadc 0>;
>> > +
>> > +                     trips {
>> > +                             package_crit: package-crit {
>> > +                                     temperature = <115000>;
>> > +                                     hysteresis = <0>;
>> > +                                     type = "critical";
>> > +                             };
>> > +                     };
>> > +             };
>> > +
>> > +             /* sensor between A76 cores 0 and 1 */
>> > +             bigcore0_thermal: bigcore0-thermal {
>> > +                     polling-delay-passive = <100>;
>> > +                     polling-delay = <0>;
>> > +                     thermal-sensors = <&tsadc 1>;
>> > +
>> > +                     trips {
>> > +                             bigcore0_alert: bigcore0-alert {
>> > +                                     temperature = <85000>;
>> > +                                     hysteresis = <2000>;
>> > +                                     type = "passive";
>> > +                             };
>> 
>> Doesn't removing the second passive trip, which was present in the v3,
>> result in confusing the IPA governor?
> 
> Not really - it will just treat the missing trip as 0C for its initial
> PID calculations [1], and will continually run the governor as opposed
> to putting it to rest when the temperature is below the "switch on"
> value [2].
> 
> Getting the power allocation governor to work optimally (i.e. to
> provide tangible benefits over, say, stepwise) is much more involved
> than defining an arbitrary switch-on trip point, as it requires an
> accurate estimate of sustainable power per thermal zone (which we
> don't have for RK3588 in general, and furthermore it must depend a lot
> on a particular cooling setup), and ideally some userspace
> power/thermal model capable of tuning the PID coefficients and
> updating them via sysfs based on how a particular system accumulates
> and dissipates heat under different load.
> 
> So after thinking over it for a while I decided that those extra
> passive trips were rather self-deceiving, as they are only useful in
> the context of a power allocation governor but we do not have any of
> the other pieces in place for the power allocation governor to work.
> Better not to clutter the device tree IMO.

I see, thanks for the clarification.  Please, give me some time
to thoroughly test your patches, which I'll hopefully be able to
do in the next few days.

> [1] 
> https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/drivers/thermal/gov_power_allocator.c#n156
> [2] 
> https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/drivers/thermal/gov_power_allocator.c#n487
diff mbox series

Patch

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 6ac5ac8b48ab..ef06c1f742e8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -10,6 +10,7 @@ 
 #include <dt-bindings/reset/rockchip,rk3588-cru.h>
 #include <dt-bindings/phy/phy.h>
 #include <dt-bindings/ata/ahci.h>
+#include <dt-bindings/thermal/thermal.h>
 
 / {
 	compatible = "rockchip,rk3588";
@@ -2368,6 +2369,152 @@  pwm15: pwm@febf0030 {
 		status = "disabled";
 	};
 
+	thermal_zones: thermal-zones {
+		/* sensor near the center of the SoC */
+		package_thermal: package-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 0>;
+
+			trips {
+				package_crit: package-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+		};
+
+		/* sensor between A76 cores 0 and 1 */
+		bigcore0_thermal: bigcore0-thermal {
+			polling-delay-passive = <100>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 1>;
+
+			trips {
+				bigcore0_alert: bigcore0-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				bigcore0_crit: bigcore0-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+			cooling-maps {
+				map0 {
+					trip = <&bigcore0_alert>;
+					cooling-device =
+						<&cpu_b0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_b1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+
+		/* sensor between A76 cores 2 and 3 */
+		bigcore2_thermal: bigcore2-thermal {
+			polling-delay-passive = <100>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 2>;
+
+			trips {
+				bigcore2_alert: bigcore2-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				bigcore2_crit: bigcore2-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+			cooling-maps {
+				map0 {
+					trip = <&bigcore2_alert>;
+					cooling-device =
+						<&cpu_b2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_b3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+
+		/* sensor between the four A55 cores */
+		little_core_thermal: littlecore-thermal {
+			polling-delay-passive = <100>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 3>;
+
+			trips {
+				littlecore_alert: littlecore-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				littlecore_crit: littlecore-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+			cooling-maps {
+				map0 {
+					trip = <&littlecore_alert>;
+					cooling-device =
+						<&cpu_l0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_l1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_l2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_l3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+
+		/* sensor near the PD_CENTER power domain */
+		center_thermal: center-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 4>;
+
+			trips {
+				center_crit: center-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+		};
+
+		gpu_thermal: gpu-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 5>;
+
+			trips {
+				gpu_crit: gpu-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+		};
+
+		npu_thermal: npu-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 6>;
+
+			trips {
+				npu_crit: npu-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+		};
+	};
+
 	tsadc: tsadc@fec00000 {
 		compatible = "rockchip,rk3588-tsadc";
 		reg = <0x0 0xfec00000 0x0 0x400>;