diff mbox series

[v2,1/5] arch_topology: Introduce thermal pressure update function

Message ID 20211015144550.23719-2-lukasz.luba@arm.com (mailing list archive)
State Superseded
Headers show
Series Refactor thermal pressure update to avoid code duplication | expand

Commit Message

Lukasz Luba Oct. 15, 2021, 2:45 p.m. UTC
The thermal pressure is a mechanism which is used for providing
information about reduced CPU performance to the scheduler. Usually code
has to convert the value from frequency units into capacity units,
which are understandable by the scheduler. Create a common conversion code
which can be just used via a handy API.

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
---
 arch/arm/include/asm/topology.h   |  1 +
 arch/arm64/include/asm/topology.h |  1 +
 drivers/base/arch_topology.c      | 36 ++++++++++++++++++++++++++++++-
 include/linux/arch_topology.h     |  3 +++
 include/linux/sched/topology.h    |  7 ++++++
 5 files changed, 47 insertions(+), 1 deletion(-)

Comments

Dietmar Eggemann Oct. 26, 2021, 4:51 p.m. UTC | #1
On 15/10/2021 16:45, Lukasz Luba wrote:
> The thermal pressure is a mechanism which is used for providing
> information about reduced CPU performance to the scheduler. Usually code
> has to convert the value from frequency units into capacity units,
> which are understandable by the scheduler. Create a common conversion code
> which can be just used via a handy API.
> 
> Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
> ---
>  arch/arm/include/asm/topology.h   |  1 +
>  arch/arm64/include/asm/topology.h |  1 +
>  drivers/base/arch_topology.c      | 36 ++++++++++++++++++++++++++++++-
>  include/linux/arch_topology.h     |  3 +++
>  include/linux/sched/topology.h    |  7 ++++++
>  5 files changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
> index 470299ee2fba..aee6c456c085 100644
> --- a/arch/arm/include/asm/topology.h
> +++ b/arch/arm/include/asm/topology.h
> @@ -24,6 +24,7 @@
>  /* Replace task scheduler's default thermal pressure API */
>  #define arch_scale_thermal_pressure topology_get_thermal_pressure
>  #define arch_set_thermal_pressure   topology_set_thermal_pressure
> +#define arch_thermal_pressure_update	topology_thermal_pressure_update
>  
>  #else
>  
> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> index ec2db3419c41..c997015402bc 100644
> --- a/arch/arm64/include/asm/topology.h
> +++ b/arch/arm64/include/asm/topology.h
> @@ -33,6 +33,7 @@ void update_freq_counters_refs(void);
>  /* Replace task scheduler's default thermal pressure API */
>  #define arch_scale_thermal_pressure topology_get_thermal_pressure
>  #define arch_set_thermal_pressure   topology_set_thermal_pressure
> +#define arch_thermal_pressure_update	topology_thermal_pressure_update

s/thermal_pressure_update/update_thermal_pressure ?

The scheme seems to be {arch|topology}_*foo*_thermal_pressure

But ...

>  
>  #include <asm-generic/topology.h>
>  
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index 43407665918f..1fa28b5afdb2 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -25,6 +25,7 @@
>  static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
>  static struct cpumask scale_freq_counters_mask;
>  static bool scale_freq_invariant;
> +static DEFINE_PER_CPU(u32, freq_factor) = 1;
>  
>  static bool supports_scale_freq_counters(const struct cpumask *cpus)
>  {
> @@ -168,6 +169,40 @@ void topology_set_thermal_pressure(const struct cpumask *cpus,
>  }
>  EXPORT_SYMBOL_GPL(topology_set_thermal_pressure);
>  
> +/**
> + * topology_thermal_pressure_update() - Update thermal pressure for CPUs
> + * @cpus	: The related CPUs for which capacity has been reduced
> + * @capped_freq	: The maximum allowed frequency that CPUs can run at
> + *
> + * Update the value of thermal pressure for all @cpus in the mask. The
> + * cpumask should include all (online+offline) affected CPUs, to avoid
> + * operating on stale data when hot-plug is used for some CPUs. The
> + * @capped_freq must be less or equal to the max possible frequency and
> + * reflects the currently allowed max CPUs frequency due to thermal capping.
> + * The @capped_freq must be provided in kHz.
> + */
> +void topology_thermal_pressure_update(const struct cpumask *cpus,
> +				      unsigned long capped_freq)
> +{

... why not just s/unsigned long th_pressure/unsigned long capped_freq
in existing topology_set_thermal_pressure() and move code the
frequency/capacity conversion in there? The patch set will become
considerably smaller.

 void topology_set_thermal_pressure(const struct cpumask *cpus,
-                              unsigned long th_pressure)
+                              unsigned long capped_freq)
 {
+       unsigned long max_capacity, capacity;
        int cpu;

-       for_each_cpu(cpu, cpus)
-               WRITE_ONCE(per_cpu(thermal_pressure, cpu), th_pressure);
+       if (!cpus)
+               return;
+
+       cpu = cpumask_first(cpus);
+       max_capacity = arch_scale_cpu_capacity(cpu);
+
+       /* Convert to MHz scale which is used in 'freq_factor' */
+       capped_freq /= 1000;
+
+       capacity = mult_frac(capped_freq, max_capacity,
+                            per_cpu(freq_factor, cpu));
+
+       for_each_cpu(cpu, cpus) {
+               WRITE_ONCE(per_cpu(thermal_pressure, cpu),
+                          max_capacity - capacity);
+       }
 }
 EXPORT_SYMBOL_GPL(topology_set_thermal_pressure);

And a user like [drivers/thermal/cpufreq_cooling.c] can call
arch_set_thermal_pressure(cpus, frequency).

[...]
Lukasz Luba Oct. 27, 2021, 8:56 a.m. UTC | #2
Hi Dietmar,

Thank you for having a look at this.

On 10/26/21 5:51 PM, Dietmar Eggemann wrote:
> On 15/10/2021 16:45, Lukasz Luba wrote:

[snip]

>> +#define arch_thermal_pressure_update	topology_thermal_pressure_update
> 
> s/thermal_pressure_update/update_thermal_pressure ?

I can reorder that naming.

> 
> The scheme seems to be {arch|topology}_*foo*_thermal_pressure
> 
> But ...
> 
>>   

[snip]

>> +void topology_thermal_pressure_update(const struct cpumask *cpus,
>> +				      unsigned long capped_freq)
>> +{
> 
> ... why not just s/unsigned long th_pressure/unsigned long capped_freq
> in existing topology_set_thermal_pressure() and move code the
> frequency/capacity conversion in there? The patch set will become
> considerably smaller.

I've been trying to avoid confusion when changing actually behavior
of the API function. Thus, introducing new would IMO opinion
make sure the old 'set' function was expecting proper pressure
value, while the new 'update' expects frequency.

I agree that the patch set would be smaller in that case, but I'm
not sure if that would not hide some issues. This one would
definitely break compilation of some vendor modules (or drivers
queuing or under review), not silently passing them through (with wrong
argument).

> 
>   void topology_set_thermal_pressure(const struct cpumask *cpus,
> -                              unsigned long th_pressure)
> +                              unsigned long capped_freq)

[snip]

>   EXPORT_SYMBOL_GPL(topology_set_thermal_pressure);
> 
> And a user like [drivers/thermal/cpufreq_cooling.c] can call
> arch_set_thermal_pressure(cpus, frequency).
> 
> [...]
> 

I'm not sure if that is a safe way.

Regards,
Lukasz
Dietmar Eggemann Oct. 27, 2021, 1:35 p.m. UTC | #3
On 27/10/2021 10:56, Lukasz Luba wrote:
> Hi Dietmar,
> 
> Thank you for having a look at this.
> 
> On 10/26/21 5:51 PM, Dietmar Eggemann wrote:
>> On 15/10/2021 16:45, Lukasz Luba wrote:

[...]

>>> +void topology_thermal_pressure_update(const struct cpumask *cpus,
>>> +                      unsigned long capped_freq)
>>> +{
>>
>> ... why not just s/unsigned long th_pressure/unsigned long capped_freq
>> in existing topology_set_thermal_pressure() and move code the
>> frequency/capacity conversion in there? The patch set will become
>> considerably smaller.
> 
> I've been trying to avoid confusion when changing actually behavior
> of the API function. Thus, introducing new would IMO opinion
> make sure the old 'set' function was expecting proper pressure
> value, while the new 'update' expects frequency.
> 
> I agree that the patch set would be smaller in that case, but I'm
> not sure if that would not hide some issues. This one would
> definitely break compilation of some vendor modules (or drivers
> queuing or under review), not silently passing them through (with wrong
> argument).

I see, since the parameter type list would stay the same, this could
potentially happen.

[...]
Bjorn Andersson Oct. 27, 2021, 6:43 p.m. UTC | #4
On Fri 15 Oct 07:45 PDT 2021, Lukasz Luba wrote:
> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
[..]
> +/**
> + * topology_thermal_pressure_update() - Update thermal pressure for CPUs
> + * @cpus	: The related CPUs for which capacity has been reduced
> + * @capped_freq	: The maximum allowed frequency that CPUs can run at

I know this matches what I see in e.g. the Qualcomm cpufreq hw driver,
but in what cases will @capped_freq differ from
cpufreq_get_hw_max_freq(cpumask_first(cpus))?

Regards,
Bjorn

> + *
> + * Update the value of thermal pressure for all @cpus in the mask. The
> + * cpumask should include all (online+offline) affected CPUs, to avoid
> + * operating on stale data when hot-plug is used for some CPUs. The
> + * @capped_freq must be less or equal to the max possible frequency and
> + * reflects the currently allowed max CPUs frequency due to thermal capping.
> + * The @capped_freq must be provided in kHz.
> + */
> +void topology_thermal_pressure_update(const struct cpumask *cpus,
> +				      unsigned long capped_freq)
> +{
> +	unsigned long max_capacity, capacity;
> +	int cpu;
> +
> +	if (!cpus)
> +		return;
> +
> +	cpu = cpumask_first(cpus);
> +	max_capacity = arch_scale_cpu_capacity(cpu);
> +
> +	/* Convert to MHz scale which is used in 'freq_factor' */
> +	capped_freq /= 1000;
> +
> +	capacity = mult_frac(capped_freq, max_capacity,
> +			     per_cpu(freq_factor, cpu));
> +
> +	arch_set_thermal_pressure(cpus, max_capacity - capacity);
> +}
> +EXPORT_SYMBOL_GPL(topology_thermal_pressure_update);
> +
>  static ssize_t cpu_capacity_show(struct device *dev,
>  				 struct device_attribute *attr,
>  				 char *buf)
> @@ -220,7 +255,6 @@ static void update_topology_flags_workfn(struct work_struct *work)
>  	update_topology = 0;
>  }
>  
> -static DEFINE_PER_CPU(u32, freq_factor) = 1;
>  static u32 *raw_capacity;
>  
>  static int free_raw_capacity(void)
Viresh Kumar Oct. 28, 2021, 5:44 a.m. UTC | #5
On 15-10-21, 15:45, Lukasz Luba wrote:
> +/**
> + * topology_thermal_pressure_update() - Update thermal pressure for CPUs
> + * @cpus	: The related CPUs for which capacity has been reduced
> + * @capped_freq	: The maximum allowed frequency that CPUs can run at

Maybe replace tabs with spaces here ?

> + *
> + * Update the value of thermal pressure for all @cpus in the mask. The
> + * cpumask should include all (online+offline) affected CPUs, to avoid
> + * operating on stale data when hot-plug is used for some CPUs. The
> + * @capped_freq must be less or equal to the max possible frequency and
> + * reflects the currently allowed max CPUs frequency due to thermal capping.
> + * The @capped_freq must be provided in kHz.
> + */
> +void topology_thermal_pressure_update(const struct cpumask *cpus,
> +				      unsigned long capped_freq)
> +{
> +	unsigned long max_capacity, capacity;
> +	int cpu;
> +
> +	if (!cpus)

I will drop this and let the kernel crash :)

> +		return;
> +
> +	cpu = cpumask_first(cpus);
> +	max_capacity = arch_scale_cpu_capacity(cpu);
> +
> +	/* Convert to MHz scale which is used in 'freq_factor' */
> +	capped_freq /= 1000;

We should make sure capped_freq > freq_factor and WARN if not. This will also
get rid of similar checks at the users.

> +
> +	capacity = mult_frac(capped_freq, max_capacity,
> +			     per_cpu(freq_factor, cpu));
> +
> +	arch_set_thermal_pressure(cpus, max_capacity - capacity);
> +}
> +EXPORT_SYMBOL_GPL(topology_thermal_pressure_update);
Lukasz Luba Oct. 28, 2021, 7:16 a.m. UTC | #6
On 10/27/21 7:43 PM, Bjorn Andersson wrote:
> On Fri 15 Oct 07:45 PDT 2021, Lukasz Luba wrote:
>> diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> [..]
>> +/**
>> + * topology_thermal_pressure_update() - Update thermal pressure for CPUs
>> + * @cpus	: The related CPUs for which capacity has been reduced
>> + * @capped_freq	: The maximum allowed frequency that CPUs can run at
> 
> I know this matches what I see in e.g. the Qualcomm cpufreq hw driver,
> but in what cases will @capped_freq differ from
> cpufreq_get_hw_max_freq(cpumask_first(cpus))?

The @capped_freq is the maximum allowed frequency value due to
thermal reasons, which will always be lower or equal to the value
returned by cpufreq_get_hw_max_freq()
(effectively: 'policy->cpuinfo.max_freq').

We limit the frequency (and voltage) of CPU to reduce power (and heat)
in the passive cooling system. That information is important to us,
because scheduler needs to know how fast the CPU can go. It cannot
assume that the speed is always 'policy->cpuinfo.max_freq'. Often
it's less then that at heavy load or GPU heavy load (the same SoC).

Regards,
Lukasz
Lukasz Luba Oct. 28, 2021, 7:19 a.m. UTC | #7
On 10/28/21 6:44 AM, Viresh Kumar wrote:
> On 15-10-21, 15:45, Lukasz Luba wrote:
>> +/**
>> + * topology_thermal_pressure_update() - Update thermal pressure for CPUs
>> + * @cpus	: The related CPUs for which capacity has been reduced
>> + * @capped_freq	: The maximum allowed frequency that CPUs can run at
> 
> Maybe replace tabs with spaces here ?

Sure

> 
>> + *
>> + * Update the value of thermal pressure for all @cpus in the mask. The
>> + * cpumask should include all (online+offline) affected CPUs, to avoid
>> + * operating on stale data when hot-plug is used for some CPUs. The
>> + * @capped_freq must be less or equal to the max possible frequency and
>> + * reflects the currently allowed max CPUs frequency due to thermal capping.
>> + * The @capped_freq must be provided in kHz.
>> + */
>> +void topology_thermal_pressure_update(const struct cpumask *cpus,
>> +				      unsigned long capped_freq)
>> +{
>> +	unsigned long max_capacity, capacity;
>> +	int cpu;
>> +
>> +	if (!cpus)
> 
> I will drop this and let the kernel crash :)

OK :)

> 
>> +		return;
>> +
>> +	cpu = cpumask_first(cpus);
>> +	max_capacity = arch_scale_cpu_capacity(cpu);
>> +
>> +	/* Convert to MHz scale which is used in 'freq_factor' */
>> +	capped_freq /= 1000;
> 
> We should make sure capped_freq > freq_factor and WARN if not. This will also
> get rid of similar checks at the users.

OK, I'll change that.

Thank you for the review.

Regards,
Lukasz
Bjorn Andersson Oct. 28, 2021, 11:12 p.m. UTC | #8
On Thu 28 Oct 00:16 PDT 2021, Lukasz Luba wrote:

> 
> 
> On 10/27/21 7:43 PM, Bjorn Andersson wrote:
> > On Fri 15 Oct 07:45 PDT 2021, Lukasz Luba wrote:
> > > diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
> > [..]
> > > +/**
> > > + * topology_thermal_pressure_update() - Update thermal pressure for CPUs
> > > + * @cpus	: The related CPUs for which capacity has been reduced
> > > + * @capped_freq	: The maximum allowed frequency that CPUs can run at
> > 
> > I know this matches what I see in e.g. the Qualcomm cpufreq hw driver,
> > but in what cases will @capped_freq differ from
> > cpufreq_get_hw_max_freq(cpumask_first(cpus))?
> 
> The @capped_freq is the maximum allowed frequency value due to
> thermal reasons, which will always be lower or equal to the value
> returned by cpufreq_get_hw_max_freq()
> (effectively: 'policy->cpuinfo.max_freq').
> 

Read patch 3 and 4 again and now this makes sense to me.

Thanks,
Bjorn

> We limit the frequency (and voltage) of CPU to reduce power (and heat)
> in the passive cooling system. That information is important to us,
> because scheduler needs to know how fast the CPU can go. It cannot
> assume that the speed is always 'policy->cpuinfo.max_freq'. Often
> it's less then that at heavy load or GPU heavy load (the same SoC).
> 
> Regards,
> Lukasz
diff mbox series

Patch

diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 470299ee2fba..aee6c456c085 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -24,6 +24,7 @@ 
 /* Replace task scheduler's default thermal pressure API */
 #define arch_scale_thermal_pressure topology_get_thermal_pressure
 #define arch_set_thermal_pressure   topology_set_thermal_pressure
+#define arch_thermal_pressure_update	topology_thermal_pressure_update
 
 #else
 
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index ec2db3419c41..c997015402bc 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -33,6 +33,7 @@  void update_freq_counters_refs(void);
 /* Replace task scheduler's default thermal pressure API */
 #define arch_scale_thermal_pressure topology_get_thermal_pressure
 #define arch_set_thermal_pressure   topology_set_thermal_pressure
+#define arch_thermal_pressure_update	topology_thermal_pressure_update
 
 #include <asm-generic/topology.h>
 
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 43407665918f..1fa28b5afdb2 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -25,6 +25,7 @@ 
 static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
 static struct cpumask scale_freq_counters_mask;
 static bool scale_freq_invariant;
+static DEFINE_PER_CPU(u32, freq_factor) = 1;
 
 static bool supports_scale_freq_counters(const struct cpumask *cpus)
 {
@@ -168,6 +169,40 @@  void topology_set_thermal_pressure(const struct cpumask *cpus,
 }
 EXPORT_SYMBOL_GPL(topology_set_thermal_pressure);
 
+/**
+ * topology_thermal_pressure_update() - Update thermal pressure for CPUs
+ * @cpus	: The related CPUs for which capacity has been reduced
+ * @capped_freq	: The maximum allowed frequency that CPUs can run at
+ *
+ * Update the value of thermal pressure for all @cpus in the mask. The
+ * cpumask should include all (online+offline) affected CPUs, to avoid
+ * operating on stale data when hot-plug is used for some CPUs. The
+ * @capped_freq must be less or equal to the max possible frequency and
+ * reflects the currently allowed max CPUs frequency due to thermal capping.
+ * The @capped_freq must be provided in kHz.
+ */
+void topology_thermal_pressure_update(const struct cpumask *cpus,
+				      unsigned long capped_freq)
+{
+	unsigned long max_capacity, capacity;
+	int cpu;
+
+	if (!cpus)
+		return;
+
+	cpu = cpumask_first(cpus);
+	max_capacity = arch_scale_cpu_capacity(cpu);
+
+	/* Convert to MHz scale which is used in 'freq_factor' */
+	capped_freq /= 1000;
+
+	capacity = mult_frac(capped_freq, max_capacity,
+			     per_cpu(freq_factor, cpu));
+
+	arch_set_thermal_pressure(cpus, max_capacity - capacity);
+}
+EXPORT_SYMBOL_GPL(topology_thermal_pressure_update);
+
 static ssize_t cpu_capacity_show(struct device *dev,
 				 struct device_attribute *attr,
 				 char *buf)
@@ -220,7 +255,6 @@  static void update_topology_flags_workfn(struct work_struct *work)
 	update_topology = 0;
 }
 
-static DEFINE_PER_CPU(u32, freq_factor) = 1;
 static u32 *raw_capacity;
 
 static int free_raw_capacity(void)
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index f180240dc95f..9e183621a59b 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -59,6 +59,9 @@  static inline unsigned long topology_get_thermal_pressure(int cpu)
 void topology_set_thermal_pressure(const struct cpumask *cpus,
 				   unsigned long th_pressure);
 
+void topology_thermal_pressure_update(const struct cpumask *cpus,
+				      unsigned long capped_freq);
+
 struct cpu_topology {
 	int thread_id;
 	int core_id;
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 8f0f778b7c91..990d14814427 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -266,6 +266,13 @@  void arch_set_thermal_pressure(const struct cpumask *cpus,
 { }
 #endif
 
+#ifndef arch_thermal_pressure_update
+static __always_inline
+void arch_thermal_pressure_update(const struct cpumask *cpus,
+				      unsigned long capped_frequency)
+{ }
+#endif
+
 static inline int task_node(const struct task_struct *p)
 {
 	return cpu_to_node(task_cpu(p));