diff mbox series

cpufreq: intel_pstate: Fix CPU lowest Frequency bug when offline/online for passive

Message ID 20231107025838.2806500-1-ruanjinjie@huawei.com (mailing list archive)
State New
Delegated to: Rafael Wysocki
Headers show
Series cpufreq: intel_pstate: Fix CPU lowest Frequency bug when offline/online for passive | expand

Commit Message

Jinjie Ruan Nov. 7, 2023, 2:58 a.m. UTC
There is a bug in passive mode for intel pstate when
CONFIG_X86_INTEL_PSTATE = y and configure intel_pstate = passive in command
line. On Ice Lake server, although the performance tuner is used, the CPU
have the lowest frequency in scaling_cur_freq after the CPU goes offline and
then goes online, running the same infinite loop load.

How to reproduce:

cat while_true.c
	#include <stdio.h>
	void main(void)
	{
	        while(1);
	}

[root@localhost freq_test]# cat test.sh
	#!/bin/bash

	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
	taskset -c ${1} ./while_true &
	sleep 1s

	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

	echo 0 > /sys/devices/system/cpu/cpu${1}/online

	sleep 1s
	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

	sleep 1s

	echo 1 > /sys/devices/system/cpu/cpu${1}/online
	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

	taskset -c ${1} ./while_true &

	sleep 1s
	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

	sleep 1s
	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

	sleep 1s
	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq

The CPU frequency is adjusted to the minimum after offline and online:

[root@localhost freq_test]# sh test.sh 20
	2300000
	performance
	2299977
	cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
	resource busy
	800000
	800000
	800000
	799992
[root@localhost freq_test]# sh test.sh 21
	2300000
	performance
	2300000
	cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
	resource busy
	800000
	800000
	800000
	800000

As in __cpufreq_driver_target(), the cpufreq core will not call intel
cpufreq's target() callback if the target freq is equal with policy->cur
and do not set CPUFREQ_NEED_UPDATE_LIMITS flag, but the hardware also not
proactively keep CPU with the policy->cur frequency. So also set
CPUFREQ_NEED_UPDATE_LIMITS for passive mode. After applying this patch,
the CPU frequency is consistent as what performance tuner expected after
CPU offline and online as below:

[root@localhost freq_test]# sh test.sh 20
	2300000
	performance
	2300000
	cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or resource busy
	2300000
	2300000
	2299977
	2299977
[root@localhost freq_test]# sh test.sh 21
	2300000
	performance
	2300000
	cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or resource busy
	2300000
	2300000
	2300000
	2300000
[root@localhost freq_test]# cat /sys/devices/system/cpu/intel_pstate/status
	passive

Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
 drivers/cpufreq/intel_pstate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Comments

Jinjie Ruan Nov. 13, 2023, 6:18 a.m. UTC | #1
Ping.

On 2023/11/7 10:58, Jinjie Ruan wrote:
> There is a bug in passive mode for intel pstate when
> CONFIG_X86_INTEL_PSTATE = y and configure intel_pstate = passive in command
> line. On Ice Lake server, although the performance tuner is used, the CPU
> have the lowest frequency in scaling_cur_freq after the CPU goes offline and
> then goes online, running the same infinite loop load.
> 
> How to reproduce:
> 
> cat while_true.c
> 	#include <stdio.h>
> 	void main(void)
> 	{
> 	        while(1);
> 	}
> 
> [root@localhost freq_test]# cat test.sh
> 	#!/bin/bash
> 
> 	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> 	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
> 	taskset -c ${1} ./while_true &
> 	sleep 1s
> 
> 	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> 
> 	echo 0 > /sys/devices/system/cpu/cpu${1}/online
> 
> 	sleep 1s
> 	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> 
> 	sleep 1s
> 
> 	echo 1 > /sys/devices/system/cpu/cpu${1}/online
> 	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> 
> 	taskset -c ${1} ./while_true &
> 
> 	sleep 1s
> 	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> 
> 	sleep 1s
> 	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> 
> 	sleep 1s
> 	cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> 
> The CPU frequency is adjusted to the minimum after offline and online:
> 
> [root@localhost freq_test]# sh test.sh 20
> 	2300000
> 	performance
> 	2299977
> 	cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
> 	resource busy
> 	800000
> 	800000
> 	800000
> 	799992
> [root@localhost freq_test]# sh test.sh 21
> 	2300000
> 	performance
> 	2300000
> 	cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
> 	resource busy
> 	800000
> 	800000
> 	800000
> 	800000
> 
> As in __cpufreq_driver_target(), the cpufreq core will not call intel
> cpufreq's target() callback if the target freq is equal with policy->cur
> and do not set CPUFREQ_NEED_UPDATE_LIMITS flag, but the hardware also not
> proactively keep CPU with the policy->cur frequency. So also set
> CPUFREQ_NEED_UPDATE_LIMITS for passive mode. After applying this patch,
> the CPU frequency is consistent as what performance tuner expected after
> CPU offline and online as below:
> 
> [root@localhost freq_test]# sh test.sh 20
> 	2300000
> 	performance
> 	2300000
> 	cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or resource busy
> 	2300000
> 	2300000
> 	2299977
> 	2299977
> [root@localhost freq_test]# sh test.sh 21
> 	2300000
> 	performance
> 	2300000
> 	cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or resource busy
> 	2300000
> 	2300000
> 	2300000
> 	2300000
> [root@localhost freq_test]# cat /sys/devices/system/cpu/intel_pstate/status
> 	passive
> 
> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> ---
>  drivers/cpufreq/intel_pstate.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index a534a1f7f1ee..73403f1292b0 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -3091,7 +3091,7 @@ static int intel_cpufreq_suspend(struct cpufreq_policy *policy)
>  }
>  
>  static struct cpufreq_driver intel_cpufreq = {
> -	.flags		= CPUFREQ_CONST_LOOPS,
> +	.flags		= CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
>  	.verify		= intel_cpufreq_verify_policy,
>  	.target		= intel_cpufreq_target,
>  	.fast_switch	= intel_cpufreq_fast_switch,
Rafael J. Wysocki Nov. 20, 2023, 5:16 p.m. UTC | #2
On Mon, Nov 13, 2023 at 7:18 AM Jinjie Ruan <ruanjinjie@huawei.com> wrote:
>
> Ping.

I see the problem, but I'm not sure if the approach taken in the patch
is the best one, as it has side effects.

> On 2023/11/7 10:58, Jinjie Ruan wrote:
> > There is a bug in passive mode for intel pstate when
> > CONFIG_X86_INTEL_PSTATE = y and configure intel_pstate = passive in command
> > line. On Ice Lake server, although the performance tuner is used, the CPU
> > have the lowest frequency in scaling_cur_freq after the CPU goes offline and
> > then goes online, running the same infinite loop load.
> >
> > How to reproduce:
> >
> > cat while_true.c
> >       #include <stdio.h>
> >       void main(void)
> >       {
> >               while(1);
> >       }
> >
> > [root@localhost freq_test]# cat test.sh
> >       #!/bin/bash
> >
> >       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> >       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
> >       taskset -c ${1} ./while_true &
> >       sleep 1s
> >
> >       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> >
> >       echo 0 > /sys/devices/system/cpu/cpu${1}/online
> >
> >       sleep 1s
> >       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> >
> >       sleep 1s
> >
> >       echo 1 > /sys/devices/system/cpu/cpu${1}/online
> >       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> >
> >       taskset -c ${1} ./while_true &
> >
> >       sleep 1s
> >       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> >
> >       sleep 1s
> >       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> >
> >       sleep 1s
> >       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
> >
> > The CPU frequency is adjusted to the minimum after offline and online:
> >
> > [root@localhost freq_test]# sh test.sh 20
> >       2300000
> >       performance
> >       2299977
> >       cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
> >       resource busy
> >       800000
> >       800000
> >       800000
> >       799992
> > [root@localhost freq_test]# sh test.sh 21
> >       2300000
> >       performance
> >       2300000
> >       cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
> >       resource busy
> >       800000
> >       800000
> >       800000
> >       800000
> >
> > As in __cpufreq_driver_target(), the cpufreq core will not call intel
> > cpufreq's target() callback if the target freq is equal with policy->cur
> > and do not set CPUFREQ_NEED_UPDATE_LIMITS flag, but the hardware also not
> > proactively keep CPU with the policy->cur frequency. So also set
> > CPUFREQ_NEED_UPDATE_LIMITS for passive mode. After applying this patch,
> > the CPU frequency is consistent as what performance tuner expected after
> > CPU offline and online as below:
> >
> > [root@localhost freq_test]# sh test.sh 20
> >       2300000
> >       performance
> >       2300000
> >       cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or resource busy
> >       2300000
> >       2300000
> >       2299977
> >       2299977
> > [root@localhost freq_test]# sh test.sh 21
> >       2300000
> >       performance
> >       2300000
> >       cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or resource busy
> >       2300000
> >       2300000
> >       2300000
> >       2300000
> > [root@localhost freq_test]# cat /sys/devices/system/cpu/intel_pstate/status
> >       passive
> >
> > Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> > ---
> >  drivers/cpufreq/intel_pstate.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> > index a534a1f7f1ee..73403f1292b0 100644
> > --- a/drivers/cpufreq/intel_pstate.c
> > +++ b/drivers/cpufreq/intel_pstate.c
> > @@ -3091,7 +3091,7 @@ static int intel_cpufreq_suspend(struct cpufreq_policy *policy)
> >  }
> >
> >  static struct cpufreq_driver intel_cpufreq = {
> > -     .flags          = CPUFREQ_CONST_LOOPS,
> > +     .flags          = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
> >       .verify         = intel_cpufreq_verify_policy,
> >       .target         = intel_cpufreq_target,
> >       .fast_switch    = intel_cpufreq_fast_switch,
Jinjie Ruan Nov. 21, 2023, 1:41 a.m. UTC | #3
On 2023/11/21 1:16, Rafael J. Wysocki wrote:
> On Mon, Nov 13, 2023 at 7:18 AM Jinjie Ruan <ruanjinjie@huawei.com> wrote:
>>
>> Ping.
> 
> I see the problem, but I'm not sure if the approach taken in the patch
> is the best one, as it has side effects.

Could you please make it more clear, what are the side effects? I'm not
sure about the possible negative effects of the patch.

> 
>> On 2023/11/7 10:58, Jinjie Ruan wrote:
>>> There is a bug in passive mode for intel pstate when
>>> CONFIG_X86_INTEL_PSTATE = y and configure intel_pstate = passive in command
>>> line. On Ice Lake server, although the performance tuner is used, the CPU
>>> have the lowest frequency in scaling_cur_freq after the CPU goes offline and
>>> then goes online, running the same infinite loop load.
>>>
>>> How to reproduce:
>>>
>>> cat while_true.c
>>>       #include <stdio.h>
>>>       void main(void)
>>>       {
>>>               while(1);
>>>       }
>>>
>>> [root@localhost freq_test]# cat test.sh
>>>       #!/bin/bash
>>>
>>>       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>>       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
>>>       taskset -c ${1} ./while_true &
>>>       sleep 1s
>>>
>>>       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>>
>>>       echo 0 > /sys/devices/system/cpu/cpu${1}/online
>>>
>>>       sleep 1s
>>>       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>>
>>>       sleep 1s
>>>
>>>       echo 1 > /sys/devices/system/cpu/cpu${1}/online
>>>       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>>
>>>       taskset -c ${1} ./while_true &
>>>
>>>       sleep 1s
>>>       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>>
>>>       sleep 1s
>>>       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>>
>>>       sleep 1s
>>>       cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>>
>>> The CPU frequency is adjusted to the minimum after offline and online:
>>>
>>> [root@localhost freq_test]# sh test.sh 20
>>>       2300000
>>>       performance
>>>       2299977
>>>       cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
>>>       resource busy
>>>       800000
>>>       800000
>>>       800000
>>>       799992
>>> [root@localhost freq_test]# sh test.sh 21
>>>       2300000
>>>       performance
>>>       2300000
>>>       cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
>>>       resource busy
>>>       800000
>>>       800000
>>>       800000
>>>       800000
>>>
>>> As in __cpufreq_driver_target(), the cpufreq core will not call intel
>>> cpufreq's target() callback if the target freq is equal with policy->cur
>>> and do not set CPUFREQ_NEED_UPDATE_LIMITS flag, but the hardware also not
>>> proactively keep CPU with the policy->cur frequency. So also set
>>> CPUFREQ_NEED_UPDATE_LIMITS for passive mode. After applying this patch,
>>> the CPU frequency is consistent as what performance tuner expected after
>>> CPU offline and online as below:
>>>
>>> [root@localhost freq_test]# sh test.sh 20
>>>       2300000
>>>       performance
>>>       2300000
>>>       cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or resource busy
>>>       2300000
>>>       2300000
>>>       2299977
>>>       2299977
>>> [root@localhost freq_test]# sh test.sh 21
>>>       2300000
>>>       performance
>>>       2300000
>>>       cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or resource busy
>>>       2300000
>>>       2300000
>>>       2300000
>>>       2300000
>>> [root@localhost freq_test]# cat /sys/devices/system/cpu/intel_pstate/status
>>>       passive
>>>
>>> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
>>> ---
>>>  drivers/cpufreq/intel_pstate.c | 2 +-
>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
>>> index a534a1f7f1ee..73403f1292b0 100644
>>> --- a/drivers/cpufreq/intel_pstate.c
>>> +++ b/drivers/cpufreq/intel_pstate.c
>>> @@ -3091,7 +3091,7 @@ static int intel_cpufreq_suspend(struct cpufreq_policy *policy)
>>>  }
>>>
>>>  static struct cpufreq_driver intel_cpufreq = {
>>> -     .flags          = CPUFREQ_CONST_LOOPS,
>>> +     .flags          = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
>>>       .verify         = intel_cpufreq_verify_policy,
>>>       .target         = intel_cpufreq_target,
>>>       .fast_switch    = intel_cpufreq_fast_switch,
>
diff mbox series

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index a534a1f7f1ee..73403f1292b0 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3091,7 +3091,7 @@  static int intel_cpufreq_suspend(struct cpufreq_policy *policy)
 }
 
 static struct cpufreq_driver intel_cpufreq = {
-	.flags		= CPUFREQ_CONST_LOOPS,
+	.flags		= CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
 	.verify		= intel_cpufreq_verify_policy,
 	.target		= intel_cpufreq_target,
 	.fast_switch	= intel_cpufreq_fast_switch,