diff mbox series

[v2,3/5] cpufreq/amd-pstate: Add support for platform profile class

Message ID 20250304152327.1561017-4-superm1@kernel.org (mailing list archive)
State New
Delegated to: Mario Limonciello
Headers show
Series amd-pstate Dynamic EPP and raw EPP | expand

Commit Message

Mario Limonciello March 4, 2025, 3:23 p.m. UTC
From: Mario Limonciello <mario.limonciello@amd.com>

The platform profile core allows multiple drivers and devices to
register platform profile support.

When the legacy platform profile interface is used all drivers will
adjust the platform profile as well.

Add support for registering every CPU with the platform profile handler
when dynamic EPP is enabled.

The end result will be that changing the platform profile will modify
EPP accordingly.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
 Documentation/admin-guide/pm/amd-pstate.rst |   4 +-
 drivers/cpufreq/Kconfig.x86                 |   1 +
 drivers/cpufreq/amd-pstate.c                | 142 +++++++++++++++++---
 drivers/cpufreq/amd-pstate.h                |  10 ++
 4 files changed, 140 insertions(+), 17 deletions(-)

Comments

Gautham R. Shenoy March 7, 2025, 4:22 p.m. UTC | #1
On Tue, Mar 04, 2025 at 09:23:25AM -0600, Mario Limonciello wrote:
> From: Mario Limonciello <mario.limonciello@amd.com>
> 
> The platform profile core allows multiple drivers and devices to
> register platform profile support.
> 
> When the legacy platform profile interface is used all drivers will
> adjust the platform profile as well.
> 
> Add support for registering every CPU with the platform profile handler
> when dynamic EPP is enabled.
> 
> The end result will be that changing the platform profile will modify
> EPP accordingly.
> 
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
>  Documentation/admin-guide/pm/amd-pstate.rst |   4 +-
>  drivers/cpufreq/Kconfig.x86                 |   1 +
>  drivers/cpufreq/amd-pstate.c                | 142 +++++++++++++++++---
>  drivers/cpufreq/amd-pstate.h                |  10 ++
>  4 files changed, 140 insertions(+), 17 deletions(-)
> 
> diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
> index 8424e7119dd7e..36950fb6568c0 100644
> --- a/Documentation/admin-guide/pm/amd-pstate.rst
> +++ b/Documentation/admin-guide/pm/amd-pstate.rst
> @@ -321,7 +321,9 @@ Whether this behavior is enabled by default with the kernel config option
>  at runtime by the sysfs file ``/sys/devices/system/cpu/cpufreq/policyX/dynamic_epp``.
>  
>  When set to enabled, the driver will select a different energy performance
> -profile when the machine is running on battery or AC power.
> +profile when the machine is running on battery or AC power. The driver will
> +also register with the platform profile handler to receive notifications of
> +user desired power state and react to those.
>  When set to disabled, the driver will not change the energy performance profile
>  based on the power source and will not react to user desired power state.
>  
> diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
> index c5ef92634ddf4..905eab998b836 100644
> --- a/drivers/cpufreq/Kconfig.x86
> +++ b/drivers/cpufreq/Kconfig.x86
> @@ -40,6 +40,7 @@ config X86_AMD_PSTATE
>  	select ACPI_PROCESSOR
>  	select ACPI_CPPC_LIB if X86_64
>  	select CPU_FREQ_GOV_SCHEDUTIL if SMP
> +	select ACPI_PLATFORM_PROFILE
>  	help
>  	  This driver adds a CPUFreq driver which utilizes a fine grain
>  	  processor performance frequency control range instead of legacy
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 9911808fe0bcf..28c02edf6e40b 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -105,6 +105,7 @@ static struct quirk_entry *quirks;
>   *	2		balance_performance
>   *	3		balance_power
>   *	4		power
> + *	5		custom (for raw EPP values)
>   */
>  enum energy_perf_value_index {
>  	EPP_INDEX_DEFAULT = 0,
> @@ -112,6 +113,7 @@ enum energy_perf_value_index {
>  	EPP_INDEX_BALANCE_PERFORMANCE,
>  	EPP_INDEX_BALANCE_POWERSAVE,
>  	EPP_INDEX_POWERSAVE,
> +	EPP_INDEX_CUSTOM,
>  };
>  
>  static const char * const energy_perf_strings[] = {
> @@ -120,6 +122,7 @@ static const char * const energy_perf_strings[] = {
>  	[EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
>  	[EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
>  	[EPP_INDEX_POWERSAVE] = "power",
> +	[EPP_INDEX_CUSTOM] = "custom",
>  	NULL
>  };
>  
> @@ -1073,6 +1076,10 @@ static int amd_pstate_power_supply_notifier(struct notifier_block *nb,
>  	if (event != PSY_EVENT_PROP_CHANGED)
>  		return NOTIFY_OK;
>  
> +	/* dynamic actions are only applied while platform profile is in balanced */
> +	if (cpudata->current_profile != PLATFORM_PROFILE_BALANCED)
> +		return 0;
> +
>  	epp = amd_pstate_get_balanced_epp(policy);
>  
>  	ret = amd_pstate_set_epp(policy, epp);
> @@ -1081,14 +1088,84 @@ static int amd_pstate_power_supply_notifier(struct notifier_block *nb,
>  
>  	return NOTIFY_OK;
>  }
> -static void amd_pstate_clear_dynamic_epp(struct cpufreq_policy *policy)
> +
> +static int amd_pstate_profile_probe(void *drvdata, unsigned long *choices)
> +{
> +	set_bit(PLATFORM_PROFILE_LOW_POWER, choices);
> +	set_bit(PLATFORM_PROFILE_BALANCED, choices);
> +	set_bit(PLATFORM_PROFILE_PERFORMANCE, choices);
> +
> +	return 0;
> +}
> +
> +static int amd_pstate_profile_get(struct device *dev,
> +				  enum platform_profile_option *profile)
> +{
> +	struct amd_cpudata *cpudata = dev_get_drvdata(dev);
> +
> +	*profile = cpudata->current_profile;
> +
> +	return 0;
> +}
> +
> +static int amd_pstate_profile_set(struct device *dev,
> +				  enum platform_profile_option profile)
> +{
> +	struct amd_cpudata *cpudata = dev_get_drvdata(dev);
> +	struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
> +	int ret;
> +
> +	switch (profile) {
> +	case PLATFORM_PROFILE_LOW_POWER:
> +		if (cpudata->policy != CPUFREQ_POLICY_POWERSAVE)
> +			cpudata->policy = CPUFREQ_POLICY_POWERSAVE;

So prior to the patch, cpudata->policy is supposed to mirror
policy->policy.  With this patch, this assumption is no longer
true. So it is possible for the user to again override the choice of
EPP set via platform profile by changing the cpufreq governor ?

Is this the expected behaviour?

The bigger concern is, if the governor was previously "performance"
and then the platform profile requested "low power", "cat
/sys/devices/system/cpu/cpuX/cpufreq/scaling_governor" would still
show "performance", which is inconsistent with the behaviour.
Mario Limonciello March 7, 2025, 4:55 p.m. UTC | #2
On 3/7/2025 10:22, Gautham R. Shenoy wrote:
> On Tue, Mar 04, 2025 at 09:23:25AM -0600, Mario Limonciello wrote:
>> From: Mario Limonciello <mario.limonciello@amd.com>
>>
>> The platform profile core allows multiple drivers and devices to
>> register platform profile support.
>>
>> When the legacy platform profile interface is used all drivers will
>> adjust the platform profile as well.
>>
>> Add support for registering every CPU with the platform profile handler
>> when dynamic EPP is enabled.
>>
>> The end result will be that changing the platform profile will modify
>> EPP accordingly.
>>
>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>> ---
>>   Documentation/admin-guide/pm/amd-pstate.rst |   4 +-
>>   drivers/cpufreq/Kconfig.x86                 |   1 +
>>   drivers/cpufreq/amd-pstate.c                | 142 +++++++++++++++++---
>>   drivers/cpufreq/amd-pstate.h                |  10 ++
>>   4 files changed, 140 insertions(+), 17 deletions(-)
>>
>> diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
>> index 8424e7119dd7e..36950fb6568c0 100644
>> --- a/Documentation/admin-guide/pm/amd-pstate.rst
>> +++ b/Documentation/admin-guide/pm/amd-pstate.rst
>> @@ -321,7 +321,9 @@ Whether this behavior is enabled by default with the kernel config option
>>   at runtime by the sysfs file ``/sys/devices/system/cpu/cpufreq/policyX/dynamic_epp``.
>>   
>>   When set to enabled, the driver will select a different energy performance
>> -profile when the machine is running on battery or AC power.
>> +profile when the machine is running on battery or AC power. The driver will
>> +also register with the platform profile handler to receive notifications of
>> +user desired power state and react to those.
>>   When set to disabled, the driver will not change the energy performance profile
>>   based on the power source and will not react to user desired power state.
>>   
>> diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
>> index c5ef92634ddf4..905eab998b836 100644
>> --- a/drivers/cpufreq/Kconfig.x86
>> +++ b/drivers/cpufreq/Kconfig.x86
>> @@ -40,6 +40,7 @@ config X86_AMD_PSTATE
>>   	select ACPI_PROCESSOR
>>   	select ACPI_CPPC_LIB if X86_64
>>   	select CPU_FREQ_GOV_SCHEDUTIL if SMP
>> +	select ACPI_PLATFORM_PROFILE
>>   	help
>>   	  This driver adds a CPUFreq driver which utilizes a fine grain
>>   	  processor performance frequency control range instead of legacy
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index 9911808fe0bcf..28c02edf6e40b 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -105,6 +105,7 @@ static struct quirk_entry *quirks;
>>    *	2		balance_performance
>>    *	3		balance_power
>>    *	4		power
>> + *	5		custom (for raw EPP values)
>>    */
>>   enum energy_perf_value_index {
>>   	EPP_INDEX_DEFAULT = 0,
>> @@ -112,6 +113,7 @@ enum energy_perf_value_index {
>>   	EPP_INDEX_BALANCE_PERFORMANCE,
>>   	EPP_INDEX_BALANCE_POWERSAVE,
>>   	EPP_INDEX_POWERSAVE,
>> +	EPP_INDEX_CUSTOM,
>>   };
>>   
>>   static const char * const energy_perf_strings[] = {
>> @@ -120,6 +122,7 @@ static const char * const energy_perf_strings[] = {
>>   	[EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
>>   	[EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
>>   	[EPP_INDEX_POWERSAVE] = "power",
>> +	[EPP_INDEX_CUSTOM] = "custom",
>>   	NULL
>>   };
>>   
>> @@ -1073,6 +1076,10 @@ static int amd_pstate_power_supply_notifier(struct notifier_block *nb,
>>   	if (event != PSY_EVENT_PROP_CHANGED)
>>   		return NOTIFY_OK;
>>   
>> +	/* dynamic actions are only applied while platform profile is in balanced */
>> +	if (cpudata->current_profile != PLATFORM_PROFILE_BALANCED)
>> +		return 0;
>> +
>>   	epp = amd_pstate_get_balanced_epp(policy);
>>   
>>   	ret = amd_pstate_set_epp(policy, epp);
>> @@ -1081,14 +1088,84 @@ static int amd_pstate_power_supply_notifier(struct notifier_block *nb,
>>   
>>   	return NOTIFY_OK;
>>   }
>> -static void amd_pstate_clear_dynamic_epp(struct cpufreq_policy *policy)
>> +
>> +static int amd_pstate_profile_probe(void *drvdata, unsigned long *choices)
>> +{
>> +	set_bit(PLATFORM_PROFILE_LOW_POWER, choices);
>> +	set_bit(PLATFORM_PROFILE_BALANCED, choices);
>> +	set_bit(PLATFORM_PROFILE_PERFORMANCE, choices);
>> +
>> +	return 0;
>> +}
>> +
>> +static int amd_pstate_profile_get(struct device *dev,
>> +				  enum platform_profile_option *profile)
>> +{
>> +	struct amd_cpudata *cpudata = dev_get_drvdata(dev);
>> +
>> +	*profile = cpudata->current_profile;
>> +
>> +	return 0;
>> +}
>> +
>> +static int amd_pstate_profile_set(struct device *dev,
>> +				  enum platform_profile_option profile)
>> +{
>> +	struct amd_cpudata *cpudata = dev_get_drvdata(dev);
>> +	struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
>> +	int ret;
>> +
>> +	switch (profile) {
>> +	case PLATFORM_PROFILE_LOW_POWER:
>> +		if (cpudata->policy != CPUFREQ_POLICY_POWERSAVE)
>> +			cpudata->policy = CPUFREQ_POLICY_POWERSAVE;
> 
> So prior to the patch, cpudata->policy is supposed to mirror
> policy->policy.  With this patch, this assumption is no longer
> true. So it is possible for the user to again override the choice of
> EPP set via platform profile by changing the cpufreq governor ?
> 
> Is this the expected behaviour?
> 
> The bigger concern is, if the governor was previously "performance"
> and then the platform profile requested "low power", "cat
> /sys/devices/system/cpu/cpuX/cpufreq/scaling_governor" would still
> show "performance", which is inconsistent with the behaviour.
> 
> 

This ties back to the previous patches for dynamic EPP.  My expectation 
was that when dynamic EPP is enabled that users can't manually set the 
EPP anymore (it will return -EBUSY) and likewise turning on dynamic EPP
should keep the governor as powersave.

I'll double check all those are properly enforced; but that's at least 
the intent.

IMO this "should" all work because turning on Dynamic EPP sysfs file 
forces the driver to go through a state transition that it will tear 
everything down and back up.  The policy will come back up in 
"powersave" even if it was previously in "performance" when the dynamic 
EPP sysfs file was turned on.

Longer term; I also envision the scheduler influencing EPP values when 
dynamic_epp is turned on.  The "platform profile" would be an "input" to 
that decision making process (maybe giving a weighting?), not the only 
lever.

I haven't given any serious look at how to do this with the scheduler, I 
wanted to lay the foundation first being dynamic EPP and raw EPP.

So even if dynamic_epp isn't interesting "right now" for server because 
the focus is around behavior for AC/DC, don't write it off just yet.
Mario Limonciello March 8, 2025, 4:30 a.m. UTC | #3
On 3/7/2025 10:55, Mario Limonciello wrote:
> On 3/7/2025 10:22, Gautham R. Shenoy wrote:
>> On Tue, Mar 04, 2025 at 09:23:25AM -0600, Mario Limonciello wrote:
>>> From: Mario Limonciello <mario.limonciello@amd.com>
>>>
>>> The platform profile core allows multiple drivers and devices to
>>> register platform profile support.
>>>
>>> When the legacy platform profile interface is used all drivers will
>>> adjust the platform profile as well.
>>>
>>> Add support for registering every CPU with the platform profile handler
>>> when dynamic EPP is enabled.
>>>
>>> The end result will be that changing the platform profile will modify
>>> EPP accordingly.
>>>
>>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>>> ---
>>>   Documentation/admin-guide/pm/amd-pstate.rst |   4 +-
>>>   drivers/cpufreq/Kconfig.x86                 |   1 +
>>>   drivers/cpufreq/amd-pstate.c                | 142 +++++++++++++++++---
>>>   drivers/cpufreq/amd-pstate.h                |  10 ++
>>>   4 files changed, 140 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/ 
>>> Documentation/admin-guide/pm/amd-pstate.rst
>>> index 8424e7119dd7e..36950fb6568c0 100644
>>> --- a/Documentation/admin-guide/pm/amd-pstate.rst
>>> +++ b/Documentation/admin-guide/pm/amd-pstate.rst
>>> @@ -321,7 +321,9 @@ Whether this behavior is enabled by default with 
>>> the kernel config option
>>>   at runtime by the sysfs file ``/sys/devices/system/cpu/cpufreq/ 
>>> policyX/dynamic_epp``.
>>>   When set to enabled, the driver will select a different energy 
>>> performance
>>> -profile when the machine is running on battery or AC power.
>>> +profile when the machine is running on battery or AC power. The 
>>> driver will
>>> +also register with the platform profile handler to receive 
>>> notifications of
>>> +user desired power state and react to those.
>>>   When set to disabled, the driver will not change the energy 
>>> performance profile
>>>   based on the power source and will not react to user desired power 
>>> state.
>>> diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
>>> index c5ef92634ddf4..905eab998b836 100644
>>> --- a/drivers/cpufreq/Kconfig.x86
>>> +++ b/drivers/cpufreq/Kconfig.x86
>>> @@ -40,6 +40,7 @@ config X86_AMD_PSTATE
>>>       select ACPI_PROCESSOR
>>>       select ACPI_CPPC_LIB if X86_64
>>>       select CPU_FREQ_GOV_SCHEDUTIL if SMP
>>> +    select ACPI_PLATFORM_PROFILE
>>>       help
>>>         This driver adds a CPUFreq driver which utilizes a fine grain
>>>         processor performance frequency control range instead of legacy
>>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>>> index 9911808fe0bcf..28c02edf6e40b 100644
>>> --- a/drivers/cpufreq/amd-pstate.c
>>> +++ b/drivers/cpufreq/amd-pstate.c
>>> @@ -105,6 +105,7 @@ static struct quirk_entry *quirks;
>>>    *    2        balance_performance
>>>    *    3        balance_power
>>>    *    4        power
>>> + *    5        custom (for raw EPP values)
>>>    */
>>>   enum energy_perf_value_index {
>>>       EPP_INDEX_DEFAULT = 0,
>>> @@ -112,6 +113,7 @@ enum energy_perf_value_index {
>>>       EPP_INDEX_BALANCE_PERFORMANCE,
>>>       EPP_INDEX_BALANCE_POWERSAVE,
>>>       EPP_INDEX_POWERSAVE,
>>> +    EPP_INDEX_CUSTOM,
>>>   };
>>>   static const char * const energy_perf_strings[] = {
>>> @@ -120,6 +122,7 @@ static const char * const energy_perf_strings[] = {
>>>       [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
>>>       [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
>>>       [EPP_INDEX_POWERSAVE] = "power",
>>> +    [EPP_INDEX_CUSTOM] = "custom",
>>>       NULL
>>>   };
>>> @@ -1073,6 +1076,10 @@ static int 
>>> amd_pstate_power_supply_notifier(struct notifier_block *nb,
>>>       if (event != PSY_EVENT_PROP_CHANGED)
>>>           return NOTIFY_OK;
>>> +    /* dynamic actions are only applied while platform profile is in 
>>> balanced */
>>> +    if (cpudata->current_profile != PLATFORM_PROFILE_BALANCED)
>>> +        return 0;
>>> +
>>>       epp = amd_pstate_get_balanced_epp(policy);
>>>       ret = amd_pstate_set_epp(policy, epp);
>>> @@ -1081,14 +1088,84 @@ static int 
>>> amd_pstate_power_supply_notifier(struct notifier_block *nb,
>>>       return NOTIFY_OK;
>>>   }
>>> -static void amd_pstate_clear_dynamic_epp(struct cpufreq_policy *policy)
>>> +
>>> +static int amd_pstate_profile_probe(void *drvdata, unsigned long 
>>> *choices)
>>> +{
>>> +    set_bit(PLATFORM_PROFILE_LOW_POWER, choices);
>>> +    set_bit(PLATFORM_PROFILE_BALANCED, choices);
>>> +    set_bit(PLATFORM_PROFILE_PERFORMANCE, choices);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static int amd_pstate_profile_get(struct device *dev,
>>> +                  enum platform_profile_option *profile)
>>> +{
>>> +    struct amd_cpudata *cpudata = dev_get_drvdata(dev);
>>> +
>>> +    *profile = cpudata->current_profile;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static int amd_pstate_profile_set(struct device *dev,
>>> +                  enum platform_profile_option profile)
>>> +{
>>> +    struct amd_cpudata *cpudata = dev_get_drvdata(dev);
>>> +    struct cpufreq_policy *policy __free(put_cpufreq_policy) = 
>>> cpufreq_cpu_get(cpudata->cpu);
>>> +    int ret;
>>> +
>>> +    switch (profile) {
>>> +    case PLATFORM_PROFILE_LOW_POWER:
>>> +        if (cpudata->policy != CPUFREQ_POLICY_POWERSAVE)
>>> +            cpudata->policy = CPUFREQ_POLICY_POWERSAVE;
>>
>> So prior to the patch, cpudata->policy is supposed to mirror
>> policy->policy.  With this patch, this assumption is no longer
>> true. So it is possible for the user to again override the choice of
>> EPP set via platform profile by changing the cpufreq governor ?
>>
>> Is this the expected behaviour?
>>
>> The bigger concern is, if the governor was previously "performance"
>> and then the platform profile requested "low power", "cat
>> /sys/devices/system/cpu/cpuX/cpufreq/scaling_governor" would still
>> show "performance", which is inconsistent with the behaviour.
>>
>>
> 
> This ties back to the previous patches for dynamic EPP.  My expectation 
> was that when dynamic EPP is enabled that users can't manually set the 
> EPP anymore (it will return -EBUSY) and likewise turning on dynamic EPP
> should keep the governor as powersave.
> 
> I'll double check all those are properly enforced; but that's at least 
> the intent.

FWIW - I double checked and confirmed that this is working as intended.
* I couldn't change from powersave to performance when dynamic_epp was 
enabled (-EBUSY)
* I couldn't change energy_performance_preference when dynamic_epp was 
enabled (-EBUSY)

> 
> IMO this "should" all work because turning on Dynamic EPP sysfs file 
> forces the driver to go through a state transition that it will tear 
> everything down and back up.  The policy will come back up in 
> "powersave" even if it was previously in "performance" when the dynamic 
> EPP sysfs file was turned on.
> 
> Longer term; I also envision the scheduler influencing EPP values when 
> dynamic_epp is turned on.  The "platform profile" would be an "input" to 
> that decision making process (maybe giving a weighting?), not the only 
> lever.
> 
> I haven't given any serious look at how to do this with the scheduler, I 
> wanted to lay the foundation first being dynamic EPP and raw EPP.
> 
> So even if dynamic_epp isn't interesting "right now" for server because 
> the focus is around behavior for AC/DC, don't write it off just yet.
Gautham R. Shenoy March 10, 2025, 5:09 a.m. UTC | #4
[...snip...]

On Fri, Mar 07, 2025 at 10:30:25PM -0600, Mario Limonciello wrote:
> On 3/7/2025 10:55, Mario Limonciello wrote:
> > On 3/7/2025 10:22, Gautham R. Shenoy wrote:
> > > > +static int amd_pstate_profile_set(struct device *dev,
> > > > +                  enum platform_profile_option profile)
> > > > +{
> > > > +    struct amd_cpudata *cpudata = dev_get_drvdata(dev);
> > > > +    struct cpufreq_policy *policy __free(put_cpufreq_policy) =
> > > > cpufreq_cpu_get(cpudata->cpu);
> > > > +    int ret;
> > > > +
> > > > +    switch (profile) {
> > > > +    case PLATFORM_PROFILE_LOW_POWER:
> > > > +        if (cpudata->policy != CPUFREQ_POLICY_POWERSAVE)
> > > > +            cpudata->policy = CPUFREQ_POLICY_POWERSAVE;
> > > 
> > > So prior to the patch, cpudata->policy is supposed to mirror
> > > policy->policy.  With this patch, this assumption is no longer
> > > true. So it is possible for the user to again override the choice of
> > > EPP set via platform profile by changing the cpufreq governor ?
> > > 
> > > Is this the expected behaviour?
> > > 
> > > The bigger concern is, if the governor was previously "performance"
> > > and then the platform profile requested "low power", "cat
> > > /sys/devices/system/cpu/cpuX/cpufreq/scaling_governor" would still
> > > show "performance", which is inconsistent with the behaviour.
> > > 
> > > 
> > 
> > This ties back to the previous patches for dynamic EPP.  My expectation
> > was that when dynamic EPP is enabled that users can't manually set the
> > EPP anymore (it will return -EBUSY) and likewise turning on dynamic EPP
> > should keep the governor as powersave.
> > 
> > I'll double check all those are properly enforced; but that's at least
> > the intent.
> 
> FWIW - I double checked and confirmed that this is working as intended.
> * I couldn't change from powersave to performance when dynamic_epp was
> enabled (-EBUSY)
> * I couldn't change energy_performance_preference when dynamic_epp was
> enabled (-EBUSY)

Thanks for double checking this. 


> 
> > 
> > IMO this "should" all work because turning on Dynamic EPP sysfs file
> > forces the driver to go through a state transition that it will tear
> > everything down and back up.  The policy will come back up in
> > "powersave" even if it was previously in "performance" when the dynamic
> > EPP sysfs file was turned on.
> > 
> > Longer term; I also envision the scheduler influencing EPP values when
> > dynamic_epp is turned on.  The "platform profile" would be an "input" to
> > that decision making process (maybe giving a weighting?), not the only
> > lever.

Yes, the scheduler influencing the EPP values is something that I have
been wanting to explore as well. My idea was to use the nature of the
task + the load on the rq to determine the EPP value.

> > 
> > I haven't given any serious look at how to do this with the scheduler, I
> > wanted to lay the foundation first being dynamic EPP and raw EPP.
> >
> > So even if dynamic_epp isn't interesting "right now" for server because
> > the focus is around behavior for AC/DC, don't write it off just yet.
>

Fair enough.
diff mbox series

Patch

diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 8424e7119dd7e..36950fb6568c0 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -321,7 +321,9 @@  Whether this behavior is enabled by default with the kernel config option
 at runtime by the sysfs file ``/sys/devices/system/cpu/cpufreq/policyX/dynamic_epp``.
 
 When set to enabled, the driver will select a different energy performance
-profile when the machine is running on battery or AC power.
+profile when the machine is running on battery or AC power. The driver will
+also register with the platform profile handler to receive notifications of
+user desired power state and react to those.
 When set to disabled, the driver will not change the energy performance profile
 based on the power source and will not react to user desired power state.
 
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index c5ef92634ddf4..905eab998b836 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -40,6 +40,7 @@  config X86_AMD_PSTATE
 	select ACPI_PROCESSOR
 	select ACPI_CPPC_LIB if X86_64
 	select CPU_FREQ_GOV_SCHEDUTIL if SMP
+	select ACPI_PLATFORM_PROFILE
 	help
 	  This driver adds a CPUFreq driver which utilizes a fine grain
 	  processor performance frequency control range instead of legacy
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 9911808fe0bcf..28c02edf6e40b 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -105,6 +105,7 @@  static struct quirk_entry *quirks;
  *	2		balance_performance
  *	3		balance_power
  *	4		power
+ *	5		custom (for raw EPP values)
  */
 enum energy_perf_value_index {
 	EPP_INDEX_DEFAULT = 0,
@@ -112,6 +113,7 @@  enum energy_perf_value_index {
 	EPP_INDEX_BALANCE_PERFORMANCE,
 	EPP_INDEX_BALANCE_POWERSAVE,
 	EPP_INDEX_POWERSAVE,
+	EPP_INDEX_CUSTOM,
 };
 
 static const char * const energy_perf_strings[] = {
@@ -120,6 +122,7 @@  static const char * const energy_perf_strings[] = {
 	[EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
 	[EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
 	[EPP_INDEX_POWERSAVE] = "power",
+	[EPP_INDEX_CUSTOM] = "custom",
 	NULL
 };
 
@@ -1073,6 +1076,10 @@  static int amd_pstate_power_supply_notifier(struct notifier_block *nb,
 	if (event != PSY_EVENT_PROP_CHANGED)
 		return NOTIFY_OK;
 
+	/* dynamic actions are only applied while platform profile is in balanced */
+	if (cpudata->current_profile != PLATFORM_PROFILE_BALANCED)
+		return 0;
+
 	epp = amd_pstate_get_balanced_epp(policy);
 
 	ret = amd_pstate_set_epp(policy, epp);
@@ -1081,14 +1088,84 @@  static int amd_pstate_power_supply_notifier(struct notifier_block *nb,
 
 	return NOTIFY_OK;
 }
-static void amd_pstate_clear_dynamic_epp(struct cpufreq_policy *policy)
+
+static int amd_pstate_profile_probe(void *drvdata, unsigned long *choices)
+{
+	set_bit(PLATFORM_PROFILE_LOW_POWER, choices);
+	set_bit(PLATFORM_PROFILE_BALANCED, choices);
+	set_bit(PLATFORM_PROFILE_PERFORMANCE, choices);
+
+	return 0;
+}
+
+static int amd_pstate_profile_get(struct device *dev,
+				  enum platform_profile_option *profile)
+{
+	struct amd_cpudata *cpudata = dev_get_drvdata(dev);
+
+	*profile = cpudata->current_profile;
+
+	return 0;
+}
+
+static int amd_pstate_profile_set(struct device *dev,
+				  enum platform_profile_option profile)
+{
+	struct amd_cpudata *cpudata = dev_get_drvdata(dev);
+	struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
+	int ret;
+
+	switch (profile) {
+	case PLATFORM_PROFILE_LOW_POWER:
+		if (cpudata->policy != CPUFREQ_POLICY_POWERSAVE)
+			cpudata->policy = CPUFREQ_POLICY_POWERSAVE;
+		ret = amd_pstate_set_epp(policy, AMD_CPPC_EPP_POWERSAVE);
+		if (ret)
+			return ret;
+		break;
+	case PLATFORM_PROFILE_BALANCED:
+		if (cpudata->policy != CPUFREQ_POLICY_POWERSAVE)
+			cpudata->policy = CPUFREQ_POLICY_POWERSAVE;
+		ret = amd_pstate_set_epp(policy,
+					 amd_pstate_get_balanced_epp(policy));
+		if (ret)
+			return ret;
+		break;
+	case PLATFORM_PROFILE_PERFORMANCE:
+		ret = amd_pstate_set_epp(policy, AMD_CPPC_EPP_PERFORMANCE);
+		if (ret)
+			return ret;
+		break;
+	default:
+		pr_err("Unknown Platform Profile %d\n", profile);
+		return -EOPNOTSUPP;
+	}
+
+	cpudata->current_profile = profile;
+
+	return 0;
+}
+
+static const struct platform_profile_ops amd_pstate_profile_ops = {
+	.probe = amd_pstate_profile_probe,
+	.profile_set = amd_pstate_profile_set,
+	.profile_get = amd_pstate_profile_get,
+};
+
+void amd_pstate_clear_dynamic_epp(struct cpufreq_policy *policy)
 {
 	struct amd_cpudata *cpudata = policy->driver_data;
 
 	if (cpudata->power_nb.notifier_call)
 		power_supply_unreg_notifier(&cpudata->power_nb);
+	if (cpudata->ppdev) {
+		platform_profile_remove(cpudata->ppdev);
+		cpudata->ppdev = NULL;
+	}
+	kfree(cpudata->profile_name);
 	cpudata->dynamic_epp = false;
 }
+EXPORT_SYMBOL_GPL(amd_pstate_clear_dynamic_epp);
 
 static int amd_pstate_set_dynamic_epp(struct cpufreq_policy *policy)
 {
@@ -1096,11 +1173,35 @@  static int amd_pstate_set_dynamic_epp(struct cpufreq_policy *policy)
 	int ret;
 	u8 epp;
 
-	epp = amd_pstate_get_balanced_epp(policy);
+	switch (cpudata->current_profile) {
+	case PLATFORM_PROFILE_PERFORMANCE:
+		epp = AMD_CPPC_EPP_PERFORMANCE;
+		break;
+	case PLATFORM_PROFILE_LOW_POWER:
+		epp = AMD_CPPC_EPP_POWERSAVE;
+		break;
+	case PLATFORM_PROFILE_BALANCED:
+		epp = amd_pstate_get_balanced_epp(policy);
+		break;
+	default:
+		pr_err("Unknown Platform Profile %d\n", cpudata->current_profile);
+		return -EOPNOTSUPP;
+	}
 	ret = amd_pstate_set_epp(policy, epp);
 	if (ret)
 		return ret;
 
+	cpudata->profile_name = kasprintf(GFP_KERNEL, "amd-pstate-epp-cpu%d", cpudata->cpu);
+
+	cpudata->ppdev = platform_profile_register(get_cpu_device(policy->cpu),
+						   cpudata->profile_name,
+						   policy->driver_data,
+						   &amd_pstate_profile_ops);
+	if (IS_ERR(cpudata->ppdev)) {
+		ret = PTR_ERR(cpudata->ppdev);
+		goto cleanup;
+	}
+
 	/* only enable notifier if things will actually change */
 	if (cpudata->epp_default_ac != cpudata->epp_default_dc) {
 		ret = power_supply_reg_notifier(&cpudata->power_nb);
@@ -1207,8 +1308,8 @@  static ssize_t show_energy_performance_available_preferences(
 	return offset;
 }
 
-static ssize_t store_energy_performance_preference(
-		struct cpufreq_policy *policy, const char *buf, size_t count)
+ssize_t store_energy_performance_preference(struct cpufreq_policy *policy,
+					    const char *buf, size_t count)
 {
 	struct amd_cpudata *cpudata = policy->driver_data;
 	char str_preference[21];
@@ -1224,16 +1325,22 @@  static ssize_t store_energy_performance_preference(
 	if (ret != 1)
 		return -EINVAL;
 
-	ret = match_string(energy_perf_strings, -1, str_preference);
-	if (ret < 0)
-		return -EINVAL;
-
-	if (ret)
-		epp = epp_values[ret];
-	else
-		epp = amd_pstate_get_balanced_epp(policy);
+	/*
+	 * if the value matches a number, use that, otherwise see if
+	 * matches an index in the energy_perf_strings array
+	 */
+	ret = kstrtou8(str_preference, 0, &epp);
+	if (ret) {
+		ret = match_string(energy_perf_strings, -1, str_preference);
+		if (ret < 0 || ret == EPP_INDEX_CUSTOM)
+			return -EINVAL;
+		if (ret)
+			epp = epp_values[ret];
+		else
+			epp = amd_pstate_get_balanced_epp(policy);
+	}
 
-	if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
+	if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		pr_debug("EPP cannot be set under performance policy\n");
 		return -EBUSY;
 	}
@@ -1244,9 +1351,9 @@  static ssize_t store_energy_performance_preference(
 
 	return ret ? ret : count;
 }
+EXPORT_SYMBOL_GPL(store_energy_performance_preference);
 
-static ssize_t show_energy_performance_preference(
-				struct cpufreq_policy *policy, char *buf)
+ssize_t show_energy_performance_preference(struct cpufreq_policy *policy, char *buf)
 {
 	struct amd_cpudata *cpudata = policy->driver_data;
 	u8 preference, epp;
@@ -1267,11 +1374,12 @@  static ssize_t show_energy_performance_preference(
 		preference = EPP_INDEX_POWERSAVE;
 		break;
 	default:
-		return -EINVAL;
+		return sysfs_emit(buf, "%u\n", epp);
 	}
 
 	return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
 }
+EXPORT_SYMBOL_GPL(show_energy_performance_preference);
 
 static void amd_pstate_driver_cleanup(void)
 {
@@ -1595,10 +1703,12 @@  static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
 	    amd_pstate_acpi_pm_profile_undefined()) {
 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 		cpudata->epp_default_ac = cpudata->epp_default_dc = amd_pstate_get_epp(cpudata);
+		cpudata->current_profile = PLATFORM_PROFILE_PERFORMANCE;
 	} else {
 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
 		cpudata->epp_default_ac = AMD_CPPC_EPP_PERFORMANCE;
 		cpudata->epp_default_dc = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
+		cpudata->current_profile = PLATFORM_PROFILE_BALANCED;
 	}
 
 	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
index 6882876f895de..b4c5374762110 100644
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -9,6 +9,7 @@ 
 #define _LINUX_AMD_PSTATE_H
 
 #include <linux/pm_qos.h>
+#include <linux/platform_profile.h>
 
 /*********************************************************************
  *                        AMD P-state INTERFACE                       *
@@ -108,6 +109,11 @@  struct amd_cpudata {
 	u8	epp_default_dc;
 	bool	dynamic_epp;
 	struct notifier_block power_nb;
+
+	/* platform profile */
+	enum platform_profile_option current_profile;
+	struct device *ppdev;
+	char *profile_name;
 };
 
 /*
@@ -123,5 +129,9 @@  enum amd_pstate_mode {
 };
 const char *amd_pstate_get_mode_string(enum amd_pstate_mode mode);
 int amd_pstate_update_status(const char *buf, size_t size);
+ssize_t store_energy_performance_preference(struct cpufreq_policy *policy,
+					    const char *buf, size_t count);
+ssize_t show_energy_performance_preference(struct cpufreq_policy *policy, char *buf);
+void amd_pstate_clear_dynamic_epp(struct cpufreq_policy *policy);
 
 #endif /* _LINUX_AMD_PSTATE_H */