diff mbox series

[v2,3/5] cpufreq: intel_pstate: Add ->offline and ->online callbacks

Message ID 2786976.RHVxHup3hB@kreacher (mailing list archive)
State Superseded, archived
Headers show
Series cpufreq: intel_pstate: Address some HWP-related oddities | expand

Commit Message

Rafael J. Wysocki Aug. 24, 2020, 5:43 p.m. UTC
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>

Add ->offline and ->online driver callbacks to prepare for taking a
CPU offline and to restore its working configuration when it goes
back online, respectively, to avoid invoking the ->init callback on
every CPU online which is quite a bit of unnecessary overhead.

Define ->offline and ->online so that they can be used in the
passive mode as well as in the active mode and because ->offline
will do the majority of ->stop_cpu work, the passive mode does
not need that callback any more, so drop it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

-> v2: Typo fixes and changelog edits (Doug).

---
 drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

Comments

Jinjie Ruan Nov. 3, 2023, 10:56 a.m. UTC | #1
On 2020/8/25 1:43, Rafael J. Wysocki wrote:
> From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
> 
> Add ->offline and ->online driver callbacks to prepare for taking a
> CPU offline and to restore its working configuration when it goes
> back online, respectively, to avoid invoking the ->init callback on
> every CPU online which is quite a bit of unnecessary overhead.
> 
> Define ->offline and ->online so that they can be used in the
> passive mode as well as in the active mode and because ->offline
> will do the majority of ->stop_cpu work, the passive mode does
> not need that callback any more, so drop it.
> 
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
> 
> -> v2: Typo fixes and changelog edits (Doug).
> 
> ---
>  drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
>  1 file changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 3d18934fa975..98836ac299db 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -2297,28 +2297,51 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
>  	return 0;
>  }
>  
> -static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
> +static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
>  {
> +	pr_debug("CPU %d going offline\n", policy->cpu);
> +
> +	intel_pstate_exit_perf_limits(policy);
> +
> +	/*
> +	 * If the CPU is an SMT thread and it goes offline with the performance
> +	 * settings different from the minimum, it will prevent its sibling
> +	 * from getting to lower performance levels, so force the minimum
> +	 * performance on CPU offline to prevent that from happening.
> +	 */
>  	if (hwp_active)
>  		intel_pstate_hwp_force_min_perf(policy->cpu);
>  	else
>  		intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
> +
> +	return 0;
> +}
> +
> +static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
> +{
> +	pr_debug("CPU %d going online\n", policy->cpu);
> +
> +	intel_pstate_init_acpi_perf_limits(policy);
> +
> +	if (hwp_active)
> +		wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
> +			      all_cpu_data[policy->cpu]->hwp_req_cached);
> +
> +	return 0;
>  }

On Ice Lake server, there seems a bug when CONFIG_X86_INTEL_PSTATE=y and
not configure intel_pstate=xxx in command line.

Although the Performance tuner is used, the CPU have the lowest
frequency in scaling_cur_freq after the CPU goes offline and then goes
online, running the same infinite loop load.

How to produce:

echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor

cat while_true.c
#include <stdio.h>
void main(void)
{
        while(1);
}


[root@localhost freq_test]# cat test.sh
#!/bin/bash

cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
taskset -c ${1} ./while_true &
sleep 1s

cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

echo 0 > /sys/devices/system/cpu/cpu${1}/online

sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

sleep 1s

echo 1 > /sys/devices/system/cpu/cpu${1}/online
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

taskset -c ${1} ./while_true &

sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

sleep 1s
cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq


[root@localhost freq_test]# sh test.sh 40
2300000
performance
2299977
cat: /sys/devices/system/cpu/cpu40/cpufreq/scaling_cur_freq: Device or
resource busy
2300000
2300022
2300000
2299953
[root@localhost freq_test]# sh test.sh 50
2300000
performance
2300000
cat: /sys/devices/system/cpu/cpu50/cpufreq/scaling_cur_freq: Device or
resource busy
2300000
2299977
2300022
2299977
[root@localhost freq_test]# sh test.sh 20
2300000
performance
2299977
cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
resource busy
800000
800000
800000
799992
[root@localhost freq_test]# sh test.sh 21
2300000
performance
2300000
cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
resource busy
800000
800000
800000
800000

[root@localhost freq_test]# cat
/sys/devices/system/cpu/cpu21/cpufreq/scaling_max_freq
2300000
[root@localhost freq_test]# cat
/sys/devices/system/cpu/cpu21/cpufreq/scaling_min_freq
800000

>  
>  static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
>  {
> -	pr_debug("CPU %d exiting\n", policy->cpu);
> +	pr_debug("CPU %d stopping\n", policy->cpu);
>  
>  	intel_pstate_clear_update_util_hook(policy->cpu);
>  	if (hwp_active)
>  		intel_pstate_hwp_save_state(policy);
> -
> -	intel_cpufreq_stop_cpu(policy);
>  }
>  
>  static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
>  {
> -	intel_pstate_exit_perf_limits(policy);
> +	pr_debug("CPU %d exiting\n", policy->cpu);
>  
>  	policy->fast_switch_possible = false;
>  
> @@ -2398,6 +2421,8 @@ static struct cpufreq_driver intel_pstate = {
>  	.init		= intel_pstate_cpu_init,
>  	.exit		= intel_pstate_cpu_exit,
>  	.stop_cpu	= intel_pstate_stop_cpu,
> +	.offline	= intel_pstate_cpu_offline,
> +	.online		= intel_pstate_cpu_online,
>  	.update_limits	= intel_pstate_update_limits,
>  	.name		= "intel_pstate",
>  };
> @@ -2652,7 +2677,8 @@ static struct cpufreq_driver intel_cpufreq = {
>  	.fast_switch	= intel_cpufreq_fast_switch,
>  	.init		= intel_cpufreq_cpu_init,
>  	.exit		= intel_cpufreq_cpu_exit,
> -	.stop_cpu	= intel_cpufreq_stop_cpu,
> +	.offline	= intel_pstate_cpu_offline,
> +	.online		= intel_pstate_cpu_online,
>  	.update_limits	= intel_pstate_update_limits,
>  	.name		= "intel_cpufreq",
>  };
Doug Smythies Nov. 3, 2023, 3:56 p.m. UTC | #2
On Fri, Nov 3, 2023 at 3:57 AM Jinjie Ruan <ruanjinjie@huawei.com> wrote:
> On 2020/8/25 1:43, Rafael J. Wysocki wrote:
> > From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
> >
> > Add ->offline and ->online driver callbacks to prepare for taking a
> > CPU offline and to restore its working configuration when it goes
> > back online, respectively, to avoid invoking the ->init callback on
> > every CPU online which is quite a bit of unnecessary overhead.
> >
> > Define ->offline and ->online so that they can be used in the
> > passive mode as well as in the active mode and because ->offline
> > will do the majority of ->stop_cpu work, the passive mode does
> > not need that callback any more, so drop it.
> >
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > ---
> >
> > -> v2: Typo fixes and changelog edits (Doug).
> >
> > ---
> >  drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
> >  1 file changed, 32 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> > index 3d18934fa975..98836ac299db 100644
> > --- a/drivers/cpufreq/intel_pstate.c
> > +++ b/drivers/cpufreq/intel_pstate.c
> > @@ -2297,28 +2297,51 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
> >       return 0;
> >  }
> >
> > -static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
> > +static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
> >  {
> > +     pr_debug("CPU %d going offline\n", policy->cpu);
> > +
> > +     intel_pstate_exit_perf_limits(policy);
> > +
> > +     /*
> > +      * If the CPU is an SMT thread and it goes offline with the performance
> > +      * settings different from the minimum, it will prevent its sibling
> > +      * from getting to lower performance levels, so force the minimum
> > +      * performance on CPU offline to prevent that from happening.
> > +      */
> >       if (hwp_active)
> >               intel_pstate_hwp_force_min_perf(policy->cpu);
> >       else
> >               intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
> > +
> > +     return 0;
> > +}
> > +
> > +static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
> > +{
> > +     pr_debug("CPU %d going online\n", policy->cpu);
> > +
> > +     intel_pstate_init_acpi_perf_limits(policy);
> > +
> > +     if (hwp_active)
> > +             wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
> > +                           all_cpu_data[policy->cpu]->hwp_req_cached);
> > +
> > +     return 0;
> >  }
>
> On Ice Lake server, there seems a bug when CONFIG_X86_INTEL_PSTATE=y and
> not configure intel_pstate=xxx in command line.
>
> Although the Performance tuner is used, the CPU have the lowest
> frequency in scaling_cur_freq after the CPU goes offline and then goes
> online, running the same infinite loop load.
>
> How to produce:
>
> echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor
>
> cat while_true.c
> #include <stdio.h>
> void main(void)
> {
>         while(1);
> }
>
>
> [root@localhost freq_test]# cat test.sh
> #!/bin/bash
>
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
> taskset -c ${1} ./while_true &
> sleep 1s
>
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
> echo 0 > /sys/devices/system/cpu/cpu${1}/online
>
> sleep 1s
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
> sleep 1s
>
> echo 1 > /sys/devices/system/cpu/cpu${1}/online
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
> taskset -c ${1} ./while_true &
>
> sleep 1s
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
> sleep 1s
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
> sleep 1s
> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>
>
> [root@localhost freq_test]# sh test.sh 40
> 2300000
> performance
> 2299977
> cat: /sys/devices/system/cpu/cpu40/cpufreq/scaling_cur_freq: Device or
> resource busy
> 2300000
> 2300022
> 2300000
> 2299953
> [root@localhost freq_test]# sh test.sh 50
> 2300000
> performance
> 2300000
> cat: /sys/devices/system/cpu/cpu50/cpufreq/scaling_cur_freq: Device or
> resource busy
> 2300000
> 2299977
> 2300022
> 2299977
> [root@localhost freq_test]# sh test.sh 20
> 2300000
> performance
> 2299977
> cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
> resource busy
> 800000
> 800000
> 800000
> 799992
> [root@localhost freq_test]# sh test.sh 21
> 2300000
> performance
> 2300000
> cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
> resource busy
> 800000
> 800000
> 800000
> 800000
>
> [root@localhost freq_test]# cat
> /sys/devices/system/cpu/cpu21/cpufreq/scaling_max_freq
> 2300000
> [root@localhost freq_test]# cat
> /sys/devices/system/cpu/cpu21/cpufreq/scaling_min_freq
> 800000

Hi,

I followed your "how to reproduce" notes exactly.
So far, I have been unable to reproduce your issue.

I am using kernel 6.6.
My processor is:
Intel(R) Core(TM) i5-10600K CPU @ 4.10GHz

Results:
root@s19:/home/doug/pstate# ./test.sh 8
800000
performance
4799994
cat: /sys/devices/system/cpu/cpu8/cpufreq/scaling_cur_freq: Device or
resource busy
4799999
4800000
4800001
4799996
root@s19:/home/doug/pstate# ./test.sh 7
800000
performance
4800001
cat: /sys/devices/system/cpu/cpu7/cpufreq/scaling_cur_freq: Device or
resource busy
4799967
4800028
4800006
4799997
root@s19:/home/doug/pstate# ./test.sh 6
800000
performance
4800001
cat: /sys/devices/system/cpu/cpu6/cpufreq/scaling_cur_freq: Device or
resource busy
4799983
4800001
4799993
4800002
root@s19:/home/doug/pstate# ./test.sh 5
800000
performance
4799990
cat: /sys/devices/system/cpu/cpu5/cpufreq/scaling_cur_freq: Device or
resource busy
4800006
4800002
4800011
4799980
root@s19:/home/doug/pstate# ./test.sh 4
4799940
performance
4799985
cat: /sys/devices/system/cpu/cpu4/cpufreq/scaling_cur_freq: Device or
resource busy
4799975
4799994
4799984
4799996
root@s19:/home/doug/pstate# ./test.sh 3
4799986
performance
4799990
cat: /sys/devices/system/cpu/cpu3/cpufreq/scaling_cur_freq: Device or
resource busy
4799976
4800015
4800000
4799995

... Doug
Jinjie Ruan Nov. 6, 2023, 3:03 a.m. UTC | #3
On 2023/11/3 23:56, Doug Smythies wrote:
> On Fri, Nov 3, 2023 at 3:57 AM Jinjie Ruan <ruanjinjie@huawei.com> wrote:
>> On 2020/8/25 1:43, Rafael J. Wysocki wrote:
>>> From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
>>>
>>> Add ->offline and ->online driver callbacks to prepare for taking a
>>> CPU offline and to restore its working configuration when it goes
>>> back online, respectively, to avoid invoking the ->init callback on
>>> every CPU online which is quite a bit of unnecessary overhead.
>>>
>>> Define ->offline and ->online so that they can be used in the
>>> passive mode as well as in the active mode and because ->offline
>>> will do the majority of ->stop_cpu work, the passive mode does
>>> not need that callback any more, so drop it.
>>>
>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>> ---
>>>
>>> -> v2: Typo fixes and changelog edits (Doug).
>>>
>>> ---
>>>  drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
>>>  1 file changed, 32 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
>>> index 3d18934fa975..98836ac299db 100644
>>> --- a/drivers/cpufreq/intel_pstate.c
>>> +++ b/drivers/cpufreq/intel_pstate.c
>>> @@ -2297,28 +2297,51 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
>>>       return 0;
>>>  }
>>>
>>> -static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
>>> +static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
>>>  {
>>> +     pr_debug("CPU %d going offline\n", policy->cpu);
>>> +
>>> +     intel_pstate_exit_perf_limits(policy);
>>> +
>>> +     /*
>>> +      * If the CPU is an SMT thread and it goes offline with the performance
>>> +      * settings different from the minimum, it will prevent its sibling
>>> +      * from getting to lower performance levels, so force the minimum
>>> +      * performance on CPU offline to prevent that from happening.
>>> +      */
>>>       if (hwp_active)
>>>               intel_pstate_hwp_force_min_perf(policy->cpu);
>>>       else
>>>               intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
>>> +{
>>> +     pr_debug("CPU %d going online\n", policy->cpu);
>>> +
>>> +     intel_pstate_init_acpi_perf_limits(policy);
>>> +
>>> +     if (hwp_active)
>>> +             wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
>>> +                           all_cpu_data[policy->cpu]->hwp_req_cached);
>>> +
>>> +     return 0;
>>>  }
>>
>> On Ice Lake server, there seems a bug when CONFIG_X86_INTEL_PSTATE=y and
>> not configure intel_pstate=xxx in command line.
>>
>> Although the Performance tuner is used, the CPU have the lowest
>> frequency in scaling_cur_freq after the CPU goes offline and then goes
>> online, running the same infinite loop load.
>>
>> How to produce:
>>
>> echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor
>>
>> cat while_true.c
>> #include <stdio.h>
>> void main(void)
>> {
>>         while(1);
>> }
>>
>>
>> [root@localhost freq_test]# cat test.sh
>> #!/bin/bash
>>
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
>> taskset -c ${1} ./while_true &
>> sleep 1s
>>
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> echo 0 > /sys/devices/system/cpu/cpu${1}/online
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> sleep 1s
>>
>> echo 1 > /sys/devices/system/cpu/cpu${1}/online
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> taskset -c ${1} ./while_true &
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>>
>> [root@localhost freq_test]# sh test.sh 40
>> 2300000
>> performance
>> 2299977
>> cat: /sys/devices/system/cpu/cpu40/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 2300000
>> 2300022
>> 2300000
>> 2299953
>> [root@localhost freq_test]# sh test.sh 50
>> 2300000
>> performance
>> 2300000
>> cat: /sys/devices/system/cpu/cpu50/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 2300000
>> 2299977
>> 2300022
>> 2299977
>> [root@localhost freq_test]# sh test.sh 20
>> 2300000
>> performance
>> 2299977
>> cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 800000
>> 800000
>> 800000
>> 799992
>> [root@localhost freq_test]# sh test.sh 21
>> 2300000
>> performance
>> 2300000
>> cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 800000
>> 800000
>> 800000
>> 800000
>>
>> [root@localhost freq_test]# cat
>> /sys/devices/system/cpu/cpu21/cpufreq/scaling_max_freq
>> 2300000
>> [root@localhost freq_test]# cat
>> /sys/devices/system/cpu/cpu21/cpufreq/scaling_min_freq
>> 800000
> 
> Hi,
> 
> I followed your "how to reproduce" notes exactly.
> So far, I have been unable to reproduce your issue.

It seems that this issue is platform-specific.

The following CPU family has the issue:

1、Products formerly Haswell

2、Model name:            Intel(R) Xeon(R) Platinum 8380 CPU @
2.30GHz(Ice Lake server)

But the following CPU family do not have the issue:

1、Model name:                         Intel(R) Xeon(R) CPU E5-2620 v2 @
2.10GHz

2、Model name:     Intel(R) Xeon(R) CPU E5-2698 v3 @ 2.30GHz

> 
> I am using kernel 6.6.
> My processor is:
> Intel(R) Core(TM) i5-10600K CPU @ 4.10GHz
> 
> Results:
> root@s19:/home/doug/pstate# ./test.sh 8
> 800000
> performance
> 4799994
> cat: /sys/devices/system/cpu/cpu8/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799999
> 4800000
> 4800001
> 4799996
> root@s19:/home/doug/pstate# ./test.sh 7
> 800000
> performance
> 4800001
> cat: /sys/devices/system/cpu/cpu7/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799967
> 4800028
> 4800006
> 4799997
> root@s19:/home/doug/pstate# ./test.sh 6
> 800000
> performance
> 4800001
> cat: /sys/devices/system/cpu/cpu6/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799983
> 4800001
> 4799993
> 4800002
> root@s19:/home/doug/pstate# ./test.sh 5
> 800000
> performance
> 4799990
> cat: /sys/devices/system/cpu/cpu5/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4800006
> 4800002
> 4800011
> 4799980
> root@s19:/home/doug/pstate# ./test.sh 4
> 4799940
> performance
> 4799985
> cat: /sys/devices/system/cpu/cpu4/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799975
> 4799994
> 4799984
> 4799996
> root@s19:/home/doug/pstate# ./test.sh 3
> 4799986
> performance
> 4799990
> cat: /sys/devices/system/cpu/cpu3/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799976
> 4800015
> 4800000
> 4799995
> 
> ... Doug
>
diff mbox series

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3d18934fa975..98836ac299db 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2297,28 +2297,51 @@  static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
 	return 0;
 }
 
-static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
+static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
 {
+	pr_debug("CPU %d going offline\n", policy->cpu);
+
+	intel_pstate_exit_perf_limits(policy);
+
+	/*
+	 * If the CPU is an SMT thread and it goes offline with the performance
+	 * settings different from the minimum, it will prevent its sibling
+	 * from getting to lower performance levels, so force the minimum
+	 * performance on CPU offline to prevent that from happening.
+	 */
 	if (hwp_active)
 		intel_pstate_hwp_force_min_perf(policy->cpu);
 	else
 		intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
+
+	return 0;
+}
+
+static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
+{
+	pr_debug("CPU %d going online\n", policy->cpu);
+
+	intel_pstate_init_acpi_perf_limits(policy);
+
+	if (hwp_active)
+		wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
+			      all_cpu_data[policy->cpu]->hwp_req_cached);
+
+	return 0;
 }
 
 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
 {
-	pr_debug("CPU %d exiting\n", policy->cpu);
+	pr_debug("CPU %d stopping\n", policy->cpu);
 
 	intel_pstate_clear_update_util_hook(policy->cpu);
 	if (hwp_active)
 		intel_pstate_hwp_save_state(policy);
-
-	intel_cpufreq_stop_cpu(policy);
 }
 
 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
 {
-	intel_pstate_exit_perf_limits(policy);
+	pr_debug("CPU %d exiting\n", policy->cpu);
 
 	policy->fast_switch_possible = false;
 
@@ -2398,6 +2421,8 @@  static struct cpufreq_driver intel_pstate = {
 	.init		= intel_pstate_cpu_init,
 	.exit		= intel_pstate_cpu_exit,
 	.stop_cpu	= intel_pstate_stop_cpu,
+	.offline	= intel_pstate_cpu_offline,
+	.online		= intel_pstate_cpu_online,
 	.update_limits	= intel_pstate_update_limits,
 	.name		= "intel_pstate",
 };
@@ -2652,7 +2677,8 @@  static struct cpufreq_driver intel_cpufreq = {
 	.fast_switch	= intel_cpufreq_fast_switch,
 	.init		= intel_cpufreq_cpu_init,
 	.exit		= intel_cpufreq_cpu_exit,
-	.stop_cpu	= intel_cpufreq_stop_cpu,
+	.offline	= intel_pstate_cpu_offline,
+	.online		= intel_pstate_cpu_online,
 	.update_limits	= intel_pstate_update_limits,
 	.name		= "intel_cpufreq",
 };