diff mbox

[RFC] intel_pstate: play well with frequency limits set by acpi

Message ID 20150716181706.6500.64386.stgit@buzz (mailing list archive)
State RFC
Delegated to: Rafael Wysocki
Headers show

Commit Message

Konstantin Khlebnikov July 16, 2015, 6:17 p.m. UTC
IPMI can control CPU P-states remotely: configuration is reported via
common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
support in intel_pstate to receive and use these P-state limits.

* ignore limit of top state in _PPC: it lower than turbo boost frequency
* register intel_pstate in acpi-processor to get states from _PSS
* link acpi_processor_get_bios_limit: this adds attribute "bios_limit"

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 drivers/acpi/processor_perflib.c |    3 +-
 drivers/cpufreq/intel_pstate.c   |   57 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Srinivas Pandruvada July 16, 2015, 10:08 p.m. UTC | #1
On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
> IPMI can control CPU P-states remotely: configuration is reported via
> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
> support in intel_pstate to receive and use these P-state limits.
> 
> * ignore limit of top state in _PPC: it lower than turbo boost frequency
> * register intel_pstate in acpi-processor to get states from _PSS
> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> ---
>  drivers/acpi/processor_perflib.c |    3 +-
>  drivers/cpufreq/intel_pstate.c   |   57 ++++++++++++++++++++++++++++++++++++++
>  2 files changed, 59 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> index cfc8aba72f86..781e328c9d5f 100644
> --- a/drivers/acpi/processor_perflib.c
> +++ b/drivers/acpi/processor_perflib.c
> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
>  
>  	ppc = (unsigned int)pr->performance_platform_limit;
>  
> -	if (ppc >= pr->performance->state_count)
> +	/* Ignore limit of top state: it lower than turbo boost frequency */
> +	if (!ppc || ppc >= pr->performance->state_count)
Why? Isn't the previous check enough?
>  		goto out;
>  
>  	cpufreq_verify_within_limits(policy, 0,
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 15ada47bb720..4a34ddf4fa73 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -26,6 +26,7 @@
>  #include <linux/fs.h>
>  #include <linux/debugfs.h>
>  #include <linux/acpi.h>
> +#include <acpi/processor.h>
>  #include <linux/vmalloc.h>
>  #include <trace/events/power.h>
>  
> @@ -113,6 +114,9 @@ struct cpudata {
>  	u64	prev_mperf;
>  	u64	prev_tsc;
>  	struct sample sample;
> +#ifdef CONFIG_ACPI_PROCESSOR
> +	struct acpi_processor_performance acpi_data;
> +#endif
>  };
>  
>  static struct cpudata **all_cpu_data;
> @@ -145,6 +149,7 @@ static int hwp_active;
>  
>  struct perf_limits {
>  	int no_turbo;
> +	int no_acpi;
>  	int turbo_disabled;
>  	int max_perf_pct;
>  	int min_perf_pct;
> @@ -158,6 +163,7 @@ struct perf_limits {
>  
>  static struct perf_limits limits = {
>  	.no_turbo = 0,
> +	.no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
>  	.turbo_disabled = 0,
>  	.max_perf_pct = 100,
>  	.max_perf = int_tofp(1),
> @@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
>  	return count;
>  }
>  
> +static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
> +			     const char *buf, size_t count)
> +{
> +#ifdef CONFIG_ACPI_PROCESSOR
> +	return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
> +#else
> +	return -ENODEV;
> +#endif
> +}
> +show_one(no_acpi, no_acpi);
> +define_one_global_rw(no_acpi);
> +
>  show_one(max_perf_pct, max_perf_pct);
>  show_one(min_perf_pct, min_perf_pct);
>  
> @@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);
>  
>  static struct attribute *intel_pstate_attributes[] = {
>  	&no_turbo.attr,
> +	&no_acpi.attr,
>  	&max_perf_pct.attr,
>  	&min_perf_pct.attr,
>  	&turbo_pct.attr,
> @@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
>  	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
>  	cpumask_set_cpu(policy->cpu, policy->cpus);
>  
> +#ifdef CONFIG_ACPI_PROCESSOR
> +	if (!limits.no_acpi) {
> +		/*
> +		 * Minimum necessary to get acpi_processor_ppc_notifier() and
> +		 * acpi_processor_get_bios_limit() working.
> +		 */
> +		if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
> +					GFP_KERNEL))
> +			rc = -ENOMEM;
> +		else
> +			rc = acpi_processor_register_performance(
> +					&cpu->acpi_data, policy->cpu);
> +		if (rc) {
> +			pr_err("intel_pstate: acpi init failed: %d\n", rc);
> +			free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> +			limits.no_acpi = 1;
> +		}
> +	}
> +#endif
> +	return 0;
> +}
> +
> +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
> +{
> +#ifdef CONFIG_ACPI_PROCESSOR
> +	struct cpudata *cpu = all_cpu_data[policy->cpu];
> +
> +	if (cpu->acpi_data.state_count)
> +		acpi_processor_unregister_performance(&cpu->acpi_data,
> +						      policy->cpu);
> +	free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> +#endif
>  	return 0;
>  }
>  
> @@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver = {
>  	.verify		= intel_pstate_verify_policy,
>  	.setpolicy	= intel_pstate_set_policy,
>  	.get		= intel_pstate_get,
> +#ifdef CONFIG_ACPI_PROCESSOR
> +	.bios_limit	= acpi_processor_get_bios_limit,
> +#endif
>  	.init		= intel_pstate_cpu_init,
> +	.exit		= intel_pstate_cpu_exit,
>  	.stop_cpu	= intel_pstate_stop_cpu,
>  	.name		= "intel_pstate",
>  };
> @@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
>  		force_load = 1;
>  	if (!strcmp(str, "hwp_only"))
>  		hwp_only = 1;
> +	if (!strcmp(str, "no_acpi"))
> +		limits.no_acpi = 1;
>  	return 0;
>  }
>  early_param("intel_pstate", intel_pstate_setup);
> 
_PPC is index into _PSS. Since intel P state doesn't follow _PSS, the
states may not be 1:1 matching. So we have to harmonize them.


> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Konstantin Khlebnikov July 17, 2015, 4:36 a.m. UTC | #2
On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
> On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
>> IPMI can control CPU P-states remotely: configuration is reported via
>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
>> support in intel_pstate to receive and use these P-state limits.
>>
>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
>> * register intel_pstate in acpi-processor to get states from _PSS
>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>>
>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
>> ---
>>  drivers/acpi/processor_perflib.c |    3 +-
>>  drivers/cpufreq/intel_pstate.c   |   57 ++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 59 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
>> index cfc8aba72f86..781e328c9d5f 100644
>> --- a/drivers/acpi/processor_perflib.c
>> +++ b/drivers/acpi/processor_perflib.c
>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
>>
>>       ppc = (unsigned int)pr->performance_platform_limit;
>>
>> -     if (ppc >= pr->performance->state_count)
>> +     /* Ignore limit of top state: it lower than turbo boost frequency */
>> +     if (!ppc || ppc >= pr->performance->state_count)
> Why? Isn't the previous check enough?

Zero _PPC state must be top performance state but as I see frequency in
_PSS is lower than maximum possible turbo frequency. So, in this case
intel_pstate cannnot get "100%"  for max bound even it there is no limit set.

For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
state is 3400 Mhz.

>>               goto out;
>>
>>       cpufreq_verify_within_limits(policy, 0,
>> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
>> index 15ada47bb720..4a34ddf4fa73 100644
>> --- a/drivers/cpufreq/intel_pstate.c
>> +++ b/drivers/cpufreq/intel_pstate.c
>> @@ -26,6 +26,7 @@
>>  #include <linux/fs.h>
>>  #include <linux/debugfs.h>
>>  #include <linux/acpi.h>
>> +#include <acpi/processor.h>
>>  #include <linux/vmalloc.h>
>>  #include <trace/events/power.h>
>>
>> @@ -113,6 +114,9 @@ struct cpudata {
>>       u64     prev_mperf;
>>       u64     prev_tsc;
>>       struct sample sample;
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> +     struct acpi_processor_performance acpi_data;
>> +#endif
>>  };
>>
>>  static struct cpudata **all_cpu_data;
>> @@ -145,6 +149,7 @@ static int hwp_active;
>>
>>  struct perf_limits {
>>       int no_turbo;
>> +     int no_acpi;
>>       int turbo_disabled;
>>       int max_perf_pct;
>>       int min_perf_pct;
>> @@ -158,6 +163,7 @@ struct perf_limits {
>>
>>  static struct perf_limits limits = {
>>       .no_turbo = 0,
>> +     .no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
>>       .turbo_disabled = 0,
>>       .max_perf_pct = 100,
>>       .max_perf = int_tofp(1),
>> @@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
>>       return count;
>>  }
>>
>> +static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
>> +                          const char *buf, size_t count)
>> +{
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> +     return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
>> +#else
>> +     return -ENODEV;
>> +#endif
>> +}
>> +show_one(no_acpi, no_acpi);
>> +define_one_global_rw(no_acpi);
>> +
>>  show_one(max_perf_pct, max_perf_pct);
>>  show_one(min_perf_pct, min_perf_pct);
>>
>> @@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);
>>
>>  static struct attribute *intel_pstate_attributes[] = {
>>       &no_turbo.attr,
>> +     &no_acpi.attr,
>>       &max_perf_pct.attr,
>>       &min_perf_pct.attr,
>>       &turbo_pct.attr,
>> @@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
>>       policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
>>       cpumask_set_cpu(policy->cpu, policy->cpus);
>>
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> +     if (!limits.no_acpi) {
>> +             /*
>> +              * Minimum necessary to get acpi_processor_ppc_notifier() and
>> +              * acpi_processor_get_bios_limit() working.
>> +              */
>> +             if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
>> +                                     GFP_KERNEL))
>> +                     rc = -ENOMEM;
>> +             else
>> +                     rc = acpi_processor_register_performance(
>> +                                     &cpu->acpi_data, policy->cpu);
>> +             if (rc) {
>> +                     pr_err("intel_pstate: acpi init failed: %d\n", rc);
>> +                     free_cpumask_var(cpu->acpi_data.shared_cpu_map);
>> +                     limits.no_acpi = 1;
>> +             }
>> +     }
>> +#endif
>> +     return 0;
>> +}
>> +
>> +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
>> +{
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> +     struct cpudata *cpu = all_cpu_data[policy->cpu];
>> +
>> +     if (cpu->acpi_data.state_count)
>> +             acpi_processor_unregister_performance(&cpu->acpi_data,
>> +                                                   policy->cpu);
>> +     free_cpumask_var(cpu->acpi_data.shared_cpu_map);
>> +#endif
>>       return 0;
>>  }
>>
>> @@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver = {
>>       .verify         = intel_pstate_verify_policy,
>>       .setpolicy      = intel_pstate_set_policy,
>>       .get            = intel_pstate_get,
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> +     .bios_limit     = acpi_processor_get_bios_limit,
>> +#endif
>>       .init           = intel_pstate_cpu_init,
>> +     .exit           = intel_pstate_cpu_exit,
>>       .stop_cpu       = intel_pstate_stop_cpu,
>>       .name           = "intel_pstate",
>>  };
>> @@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
>>               force_load = 1;
>>       if (!strcmp(str, "hwp_only"))
>>               hwp_only = 1;
>> +     if (!strcmp(str, "no_acpi"))
>> +             limits.no_acpi = 1;
>>       return 0;
>>  }
>>  early_param("intel_pstate", intel_pstate_setup);
>>
> _PPC is index into _PSS. Since intel P state doesn't follow _PSS, the
> states may not be 1:1 matching. So we have to harmonize them.
>
>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
ethan zhao July 17, 2015, 6 a.m. UTC | #3
On 2015/7/17 2:17, Konstantin Khlebnikov wrote:
> IPMI can control CPU P-states remotely: configuration is reported via
> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
> support in intel_pstate to receive and use these P-state limits.
>
> * ignore limit of top state in _PPC: it lower than turbo boost frequency
> * register intel_pstate in acpi-processor to get states from _PSS
> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> ---
>   drivers/acpi/processor_perflib.c |    3 +-
>   drivers/cpufreq/intel_pstate.c   |   57 ++++++++++++++++++++++++++++++++++++++
>   2 files changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> index cfc8aba72f86..781e328c9d5f 100644
> --- a/drivers/acpi/processor_perflib.c
> +++ b/drivers/acpi/processor_perflib.c
> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
>   
>   	ppc = (unsigned int)pr->performance_platform_limit;
>   
> -	if (ppc >= pr->performance->state_count)
> +	/* Ignore limit of top state: it lower than turbo boost frequency */
> +	if (!ppc || ppc >= pr->performance->state_count)
  Perhaps the !ppc is wrong if we check it against ACPI spec.
  Zero value of ppc means:

  "0 – States 0 through Nth state are available (all states available)"

>   		goto out;
>   
>   	cpufreq_verify_within_limits(policy, 0,
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 15ada47bb720..4a34ddf4fa73 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -26,6 +26,7 @@
>   #include <linux/fs.h>
>   #include <linux/debugfs.h>
>   #include <linux/acpi.h>
> +#include <acpi/processor.h>
>   #include <linux/vmalloc.h>
>   #include <trace/events/power.h>
>   
> @@ -113,6 +114,9 @@ struct cpudata {
>   	u64	prev_mperf;
>   	u64	prev_tsc;
>   	struct sample sample;
> +#ifdef CONFIG_ACPI_PROCESSOR
> +	struct acpi_processor_performance acpi_data;
> +#endif
>   };
>   
>   static struct cpudata **all_cpu_data;
> @@ -145,6 +149,7 @@ static int hwp_active;
>   
>   struct perf_limits {
>   	int no_turbo;
> +	int no_acpi;
>   	int turbo_disabled;
>   	int max_perf_pct;
>   	int min_perf_pct;
> @@ -158,6 +163,7 @@ struct perf_limits {
>   
>   static struct perf_limits limits = {
>   	.no_turbo = 0,
> +	.no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
>   	.turbo_disabled = 0,
>   	.max_perf_pct = 100,
>   	.max_perf = int_tofp(1),
> @@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
>   	return count;
>   }
>   
> +static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
> +			     const char *buf, size_t count)
> +{
> +#ifdef CONFIG_ACPI_PROCESSOR
> +	return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
> +#else
> +	return -ENODEV;
> +#endif
> +}
> +show_one(no_acpi, no_acpi);
> +define_one_global_rw(no_acpi);
> +
>   show_one(max_perf_pct, max_perf_pct);
>   show_one(min_perf_pct, min_perf_pct);
>   
> @@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);
>   
>   static struct attribute *intel_pstate_attributes[] = {
>   	&no_turbo.attr,
> +	&no_acpi.attr,
>   	&max_perf_pct.attr,
>   	&min_perf_pct.attr,
>   	&turbo_pct.attr,
> @@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
>   	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
>   	cpumask_set_cpu(policy->cpu, policy->cpus);
>   
> +#ifdef CONFIG_ACPI_PROCESSOR
> +	if (!limits.no_acpi) {
> +		/*
> +		 * Minimum necessary to get acpi_processor_ppc_notifier() and
> +		 * acpi_processor_get_bios_limit() working.
> +		 */
> +		if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
> +					GFP_KERNEL))
> +			rc = -ENOMEM;
> +		else
> +			rc = acpi_processor_register_performance(
> +					&cpu->acpi_data, policy->cpu);
> +		if (rc) {
> +			pr_err("intel_pstate: acpi init failed: %d\n", rc);
> +			free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> +			limits.no_acpi = 1;
> +		}
> +	}
> +#endif
> +	return 0;
> +}
> +
> +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
> +{
> +#ifdef CONFIG_ACPI_PROCESSOR
> +	struct cpudata *cpu = all_cpu_data[policy->cpu];
> +
> +	if (cpu->acpi_data.state_count)
> +		acpi_processor_unregister_performance(&cpu->acpi_data,
> +						      policy->cpu);
> +	free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> +#endif
>   	return 0;
>   }
>   
> @@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver = {
>   	.verify		= intel_pstate_verify_policy,
>   	.setpolicy	= intel_pstate_set_policy,
>   	.get		= intel_pstate_get,
> +#ifdef CONFIG_ACPI_PROCESSOR
> +	.bios_limit	= acpi_processor_get_bios_limit,
> +#endif
>   	.init		= intel_pstate_cpu_init,
> +	.exit		= intel_pstate_cpu_exit,
>   	.stop_cpu	= intel_pstate_stop_cpu,
>   	.name		= "intel_pstate",
>   };
> @@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
>   		force_load = 1;
>   	if (!strcmp(str, "hwp_only"))
>   		hwp_only = 1;
> +	if (!strcmp(str, "no_acpi"))
> +		limits.no_acpi = 1;
>   	return 0;
>   }
>   early_param("intel_pstate", intel_pstate_setup);
>


  Thanks,
  Ethan
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Konstantin Khlebnikov July 17, 2015, 7:10 a.m. UTC | #4
On Fri, Jul 17, 2015 at 9:00 AM, ethan zhao <ethan.zhao@oracle.com> wrote:
>
> On 2015/7/17 2:17, Konstantin Khlebnikov wrote:
>>
>> IPMI can control CPU P-states remotely: configuration is reported via
>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
>> support in intel_pstate to receive and use these P-state limits.
>>
>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
>> * register intel_pstate in acpi-processor to get states from _PSS
>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>>
>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
>> ---
>>   drivers/acpi/processor_perflib.c |    3 +-
>>   drivers/cpufreq/intel_pstate.c   |   57
>> ++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 59 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/acpi/processor_perflib.c
>> b/drivers/acpi/processor_perflib.c
>> index cfc8aba72f86..781e328c9d5f 100644
>> --- a/drivers/acpi/processor_perflib.c
>> +++ b/drivers/acpi/processor_perflib.c
>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct
>> notifier_block *nb,
>>         ppc = (unsigned int)pr->performance_platform_limit;
>>   -     if (ppc >= pr->performance->state_count)
>> +       /* Ignore limit of top state: it lower than turbo boost frequency
>> */
>> +       if (!ppc || ppc >= pr->performance->state_count)
>
>  Perhaps the !ppc is wrong if we check it against ACPI spec.
>  Zero value of ppc means:
>
>  "0 – States 0 through Nth state are available (all states available)"

Yep, also states are ordered by power - state0 must must have highest
performance.
This code below clamps range of available frequences to 0.._PSS[_PPC].frequency.
So if _PPC = 0 then there should be no limit but then turbo is enabled frequency
of state0 is actually could be lower than effective turbo frequency:
for hardware I have it's _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
state is 3400 Mhz.

>
>
>>                 goto out;
>>         cpufreq_verify_within_limits(policy, 0,
>> diff --git a/drivers/cpufreq/intel_pstate.c
>> b/drivers/cpufreq/intel_pstate.c
>> index 15ada47bb720..4a34ddf4fa73 100644
>> --- a/drivers/cpufreq/intel_pstate.c
>> +++ b/drivers/cpufreq/intel_pstate.c
>> @@ -26,6 +26,7 @@
>>   #include <linux/fs.h>
>>   #include <linux/debugfs.h>
>>   #include <linux/acpi.h>
>> +#include <acpi/processor.h>
>>   #include <linux/vmalloc.h>
>>   #include <trace/events/power.h>
>>   @@ -113,6 +114,9 @@ struct cpudata {
>>         u64     prev_mperf;
>>         u64     prev_tsc;
>>         struct sample sample;
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> +       struct acpi_processor_performance acpi_data;
>> +#endif
>>   };
>>     static struct cpudata **all_cpu_data;
>> @@ -145,6 +149,7 @@ static int hwp_active;
>>     struct perf_limits {
>>         int no_turbo;
>> +       int no_acpi;
>>         int turbo_disabled;
>>         int max_perf_pct;
>>         int min_perf_pct;
>> @@ -158,6 +163,7 @@ struct perf_limits {
>>     static struct perf_limits limits = {
>>         .no_turbo = 0,
>> +       .no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
>>         .turbo_disabled = 0,
>>         .max_perf_pct = 100,
>>         .max_perf = int_tofp(1),
>> @@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a,
>> struct attribute *b,
>>         return count;
>>   }
>>   +static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
>> +                            const char *buf, size_t count)
>> +{
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> +       return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
>> +#else
>> +       return -ENODEV;
>> +#endif
>> +}
>> +show_one(no_acpi, no_acpi);
>> +define_one_global_rw(no_acpi);
>> +
>>   show_one(max_perf_pct, max_perf_pct);
>>   show_one(min_perf_pct, min_perf_pct);
>>   @@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);
>>     static struct attribute *intel_pstate_attributes[] = {
>>         &no_turbo.attr,
>> +       &no_acpi.attr,
>>         &max_perf_pct.attr,
>>         &min_perf_pct.attr,
>>         &turbo_pct.attr,
>> @@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct
>> cpufreq_policy *policy)
>>         policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
>>         cpumask_set_cpu(policy->cpu, policy->cpus);
>>   +#ifdef CONFIG_ACPI_PROCESSOR
>> +       if (!limits.no_acpi) {
>> +               /*
>> +                * Minimum necessary to get acpi_processor_ppc_notifier()
>> and
>> +                * acpi_processor_get_bios_limit() working.
>> +                */
>> +               if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
>> +                                       GFP_KERNEL))
>> +                       rc = -ENOMEM;
>> +               else
>> +                       rc = acpi_processor_register_performance(
>> +                                       &cpu->acpi_data, policy->cpu);
>> +               if (rc) {
>> +                       pr_err("intel_pstate: acpi init failed: %d\n",
>> rc);
>> +                       free_cpumask_var(cpu->acpi_data.shared_cpu_map);
>> +                       limits.no_acpi = 1;
>> +               }
>> +       }
>> +#endif
>> +       return 0;
>> +}
>> +
>> +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
>> +{
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> +       struct cpudata *cpu = all_cpu_data[policy->cpu];
>> +
>> +       if (cpu->acpi_data.state_count)
>> +               acpi_processor_unregister_performance(&cpu->acpi_data,
>> +                                                     policy->cpu);
>> +       free_cpumask_var(cpu->acpi_data.shared_cpu_map);
>> +#endif
>>         return 0;
>>   }
>>   @@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver
>> = {
>>         .verify         = intel_pstate_verify_policy,
>>         .setpolicy      = intel_pstate_set_policy,
>>         .get            = intel_pstate_get,
>> +#ifdef CONFIG_ACPI_PROCESSOR
>> +       .bios_limit     = acpi_processor_get_bios_limit,
>> +#endif
>>         .init           = intel_pstate_cpu_init,
>> +       .exit           = intel_pstate_cpu_exit,
>>         .stop_cpu       = intel_pstate_stop_cpu,
>>         .name           = "intel_pstate",
>>   };
>> @@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
>>                 force_load = 1;
>>         if (!strcmp(str, "hwp_only"))
>>                 hwp_only = 1;
>> +       if (!strcmp(str, "no_acpi"))
>> +               limits.no_acpi = 1;
>>         return 0;
>>   }
>>   early_param("intel_pstate", intel_pstate_setup);
>>
>
>
>  Thanks,
>  Ethan
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Srinivas Pandruvada July 20, 2015, 9:08 p.m. UTC | #5
On Fri, 2015-07-17 at 07:36 +0300, Konstantin Khlebnikov wrote:
> On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
> <srinivas.pandruvada@linux.intel.com> wrote:
> > On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
> >> IPMI can control CPU P-states remotely: configuration is reported via
> >> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
> >> support in intel_pstate to receive and use these P-state limits.
> >>
> >> * ignore limit of top state in _PPC: it lower than turbo boost frequency
> >> * register intel_pstate in acpi-processor to get states from _PSS
> >> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
> >>
> >> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> >> ---
> >>  drivers/acpi/processor_perflib.c |    3 +-
> >>  drivers/cpufreq/intel_pstate.c   |   57 ++++++++++++++++++++++++++++++++++++++
> >>  2 files changed, 59 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> >> index cfc8aba72f86..781e328c9d5f 100644
> >> --- a/drivers/acpi/processor_perflib.c
> >> +++ b/drivers/acpi/processor_perflib.c
> >> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
> >>
> >>       ppc = (unsigned int)pr->performance_platform_limit;
> >>
> >> -     if (ppc >= pr->performance->state_count)
> >> +     /* Ignore limit of top state: it lower than turbo boost frequency */
> >> +     if (!ppc || ppc >= pr->performance->state_count)
> > Why? Isn't the previous check enough?
> 
> Zero _PPC state must be top performance state but as I see frequency in
> _PSS is lower than maximum possible turbo frequency. So, in this case
> intel_pstate cannnot get "100%"  for max bound even it there is no limit set.
> 
> For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
> state is 3400 Mhz.
> 
Have you tested dynamic _PPC modification with acpi cpufreq with this
change (after boot)? Suppose _PPC is changed from 3 to 0, then
cpufreq_verify_within_limits will not be called to change to new max
turbo performance state.

Thanks,
Srinivas
> >>               goto out;
> >>
> >>       cpufreq_verify_within_limits(policy, 0,
> >> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> >> index 15ada47bb720..4a34ddf4fa73 100644
> >> --- a/drivers/cpufreq/intel_pstate.c
> >> +++ b/drivers/cpufreq/intel_pstate.c
> >> @@ -26,6 +26,7 @@
> >>  #include <linux/fs.h>
> >>  #include <linux/debugfs.h>
> >>  #include <linux/acpi.h>
> >> +#include <acpi/processor.h>
> >>  #include <linux/vmalloc.h>
> >>  #include <trace/events/power.h>
> >>
> >> @@ -113,6 +114,9 @@ struct cpudata {
> >>       u64     prev_mperf;
> >>       u64     prev_tsc;
> >>       struct sample sample;
> >> +#ifdef CONFIG_ACPI_PROCESSOR
> >> +     struct acpi_processor_performance acpi_data;
> >> +#endif
> >>  };
> >>
> >>  static struct cpudata **all_cpu_data;
> >> @@ -145,6 +149,7 @@ static int hwp_active;
> >>
> >>  struct perf_limits {
> >>       int no_turbo;
> >> +     int no_acpi;
> >>       int turbo_disabled;
> >>       int max_perf_pct;
> >>       int min_perf_pct;
> >> @@ -158,6 +163,7 @@ struct perf_limits {
> >>
> >>  static struct perf_limits limits = {
> >>       .no_turbo = 0,
> >> +     .no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
> >>       .turbo_disabled = 0,
> >>       .max_perf_pct = 100,
> >>       .max_perf = int_tofp(1),
> >> @@ -449,6 +455,18 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
> >>       return count;
> >>  }
> >>
> >> +static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
> >> +                          const char *buf, size_t count)
> >> +{
> >> +#ifdef CONFIG_ACPI_PROCESSOR
> >> +     return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
> >> +#else
> >> +     return -ENODEV;
> >> +#endif
> >> +}
> >> +show_one(no_acpi, no_acpi);
> >> +define_one_global_rw(no_acpi);
> >> +
> >>  show_one(max_perf_pct, max_perf_pct);
> >>  show_one(min_perf_pct, min_perf_pct);
> >>
> >> @@ -460,6 +478,7 @@ define_one_global_ro(num_pstates);
> >>
> >>  static struct attribute *intel_pstate_attributes[] = {
> >>       &no_turbo.attr,
> >> +     &no_acpi.attr,
> >>       &max_perf_pct.attr,
> >>       &min_perf_pct.attr,
> >>       &turbo_pct.attr,
> >> @@ -1049,6 +1068,38 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
> >>       policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
> >>       cpumask_set_cpu(policy->cpu, policy->cpus);
> >>
> >> +#ifdef CONFIG_ACPI_PROCESSOR
> >> +     if (!limits.no_acpi) {
> >> +             /*
> >> +              * Minimum necessary to get acpi_processor_ppc_notifier() and
> >> +              * acpi_processor_get_bios_limit() working.
> >> +              */
> >> +             if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
> >> +                                     GFP_KERNEL))
> >> +                     rc = -ENOMEM;
> >> +             else
> >> +                     rc = acpi_processor_register_performance(
> >> +                                     &cpu->acpi_data, policy->cpu);
> >> +             if (rc) {
> >> +                     pr_err("intel_pstate: acpi init failed: %d\n", rc);
> >> +                     free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> >> +                     limits.no_acpi = 1;
> >> +             }
> >> +     }
> >> +#endif
> >> +     return 0;
> >> +}
> >> +
> >> +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
> >> +{
> >> +#ifdef CONFIG_ACPI_PROCESSOR
> >> +     struct cpudata *cpu = all_cpu_data[policy->cpu];
> >> +
> >> +     if (cpu->acpi_data.state_count)
> >> +             acpi_processor_unregister_performance(&cpu->acpi_data,
> >> +                                                   policy->cpu);
> >> +     free_cpumask_var(cpu->acpi_data.shared_cpu_map);
> >> +#endif
> >>       return 0;
> >>  }
> >>
> >> @@ -1057,7 +1108,11 @@ static struct cpufreq_driver intel_pstate_driver = {
> >>       .verify         = intel_pstate_verify_policy,
> >>       .setpolicy      = intel_pstate_set_policy,
> >>       .get            = intel_pstate_get,
> >> +#ifdef CONFIG_ACPI_PROCESSOR
> >> +     .bios_limit     = acpi_processor_get_bios_limit,
> >> +#endif
> >>       .init           = intel_pstate_cpu_init,
> >> +     .exit           = intel_pstate_cpu_exit,
> >>       .stop_cpu       = intel_pstate_stop_cpu,
> >>       .name           = "intel_pstate",
> >>  };
> >> @@ -1286,6 +1341,8 @@ static int __init intel_pstate_setup(char *str)
> >>               force_load = 1;
> >>       if (!strcmp(str, "hwp_only"))
> >>               hwp_only = 1;
> >> +     if (!strcmp(str, "no_acpi"))
> >> +             limits.no_acpi = 1;
> >>       return 0;
> >>  }
> >>  early_param("intel_pstate", intel_pstate_setup);
> >>
> > _PPC is index into _PSS. Since intel P state doesn't follow _PSS, the
> > states may not be 1:1 matching. So we have to harmonize them.
> >
> >
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> >> the body of a message to majordomo@vger.kernel.org
> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Konstantin Khlebnikov July 21, 2015, 10:25 a.m. UTC | #6
On 21.07.2015 00:08, Srinivas Pandruvada wrote:
> On Fri, 2015-07-17 at 07:36 +0300, Konstantin Khlebnikov wrote:
>> On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
>> <srinivas.pandruvada@linux.intel.com> wrote:
>>> On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
>>>> IPMI can control CPU P-states remotely: configuration is reported via
>>>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
>>>> support in intel_pstate to receive and use these P-state limits.
>>>>
>>>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
>>>> * register intel_pstate in acpi-processor to get states from _PSS
>>>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>>>>
>>>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
>>>> ---
>>>>   drivers/acpi/processor_perflib.c |    3 +-
>>>>   drivers/cpufreq/intel_pstate.c   |   57 ++++++++++++++++++++++++++++++++++++++
>>>>   2 files changed, 59 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
>>>> index cfc8aba72f86..781e328c9d5f 100644
>>>> --- a/drivers/acpi/processor_perflib.c
>>>> +++ b/drivers/acpi/processor_perflib.c
>>>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
>>>>
>>>>        ppc = (unsigned int)pr->performance_platform_limit;
>>>>
>>>> -     if (ppc >= pr->performance->state_count)
>>>> +     /* Ignore limit of top state: it lower than turbo boost frequency */
>>>> +     if (!ppc || ppc >= pr->performance->state_count)
>>> Why? Isn't the previous check enough?
>>
>> Zero _PPC state must be top performance state but as I see frequency in
>> _PSS is lower than maximum possible turbo frequency. So, in this case
>> intel_pstate cannnot get "100%"  for max bound even it there is no limit set.
>>
>> For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
>> state is 3400 Mhz.
>>
> Have you tested dynamic _PPC modification with acpi cpufreq with this
> change (after boot)? Suppose _PPC is changed from 3 to 0, then
> cpufreq_verify_within_limits will not be called to change to new max
> turbo performance state.
>

I haven't checked that but as I see acpi_processor_ppc_notifier()
can only reduce maximum frequency. So, there should be no problem
in this case.
Srinivas Pandruvada July 21, 2015, 3:37 p.m. UTC | #7
On Tue, 2015-07-21 at 13:25 +0300, Konstantin Khlebnikov wrote:
> On 21.07.2015 00:08, Srinivas Pandruvada wrote:
> > On Fri, 2015-07-17 at 07:36 +0300, Konstantin Khlebnikov wrote:
> >> On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
> >> <srinivas.pandruvada@linux.intel.com> wrote:
> >>> On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
> >>>> IPMI can control CPU P-states remotely: configuration is reported via
> >>>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
> >>>> support in intel_pstate to receive and use these P-state limits.
> >>>>
> >>>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
> >>>> * register intel_pstate in acpi-processor to get states from _PSS
> >>>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
> >>>>
> >>>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> >>>> ---
> >>>>   drivers/acpi/processor_perflib.c |    3 +-
> >>>>   drivers/cpufreq/intel_pstate.c   |   57 ++++++++++++++++++++++++++++++++++++++
> >>>>   2 files changed, 59 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> >>>> index cfc8aba72f86..781e328c9d5f 100644
> >>>> --- a/drivers/acpi/processor_perflib.c
> >>>> +++ b/drivers/acpi/processor_perflib.c
> >>>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
> >>>>
> >>>>        ppc = (unsigned int)pr->performance_platform_limit;
> >>>>
> >>>> -     if (ppc >= pr->performance->state_count)
> >>>> +     /* Ignore limit of top state: it lower than turbo boost frequency */
> >>>> +     if (!ppc || ppc >= pr->performance->state_count)
> >>> Why? Isn't the previous check enough?
> >>
> >> Zero _PPC state must be top performance state but as I see frequency in
> >> _PSS is lower than maximum possible turbo frequency. So, in this case
> >> intel_pstate cannnot get "100%"  for max bound even it there is no limit set.
> >>
> >> For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
> >> state is 3400 Mhz.
> >>
> > Have you tested dynamic _PPC modification with acpi cpufreq with this
> > change (after boot)? Suppose _PPC is changed from 3 to 0, then
> > cpufreq_verify_within_limits will not be called to change to new max
> > turbo performance state.
> >
> 
> I haven't checked that but as I see acpi_processor_ppc_notifier()
> can only reduce maximum frequency. So, there should be no problem
> in this case.
No, it can also be used in both ways. Once reduced, it can increase as
well. _PPC can be dynamically modified by BIOS to reduce and also to
increase.



--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Konstantin Khlebnikov July 21, 2015, 4:37 p.m. UTC | #8
On 21.07.2015 18:37, Srinivas Pandruvada wrote:
> On Tue, 2015-07-21 at 13:25 +0300, Konstantin Khlebnikov wrote:
>> On 21.07.2015 00:08, Srinivas Pandruvada wrote:
>>> On Fri, 2015-07-17 at 07:36 +0300, Konstantin Khlebnikov wrote:
>>>> On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
>>>> <srinivas.pandruvada@linux.intel.com> wrote:
>>>>> On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
>>>>>> IPMI can control CPU P-states remotely: configuration is reported via
>>>>>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
>>>>>> support in intel_pstate to receive and use these P-state limits.
>>>>>>
>>>>>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
>>>>>> * register intel_pstate in acpi-processor to get states from _PSS
>>>>>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
>>>>>>
>>>>>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
>>>>>> ---
>>>>>>    drivers/acpi/processor_perflib.c |    3 +-
>>>>>>    drivers/cpufreq/intel_pstate.c   |   57 ++++++++++++++++++++++++++++++++++++++
>>>>>>    2 files changed, 59 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
>>>>>> index cfc8aba72f86..781e328c9d5f 100644
>>>>>> --- a/drivers/acpi/processor_perflib.c
>>>>>> +++ b/drivers/acpi/processor_perflib.c
>>>>>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
>>>>>>
>>>>>>         ppc = (unsigned int)pr->performance_platform_limit;
>>>>>>
>>>>>> -     if (ppc >= pr->performance->state_count)
>>>>>> +     /* Ignore limit of top state: it lower than turbo boost frequency */
>>>>>> +     if (!ppc || ppc >= pr->performance->state_count)
>>>>> Why? Isn't the previous check enough?
>>>>
>>>> Zero _PPC state must be top performance state but as I see frequency in
>>>> _PSS is lower than maximum possible turbo frequency. So, in this case
>>>> intel_pstate cannnot get "100%"  for max bound even it there is no limit set.
>>>>
>>>> For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
>>>> state is 3400 Mhz.
>>>>
>>> Have you tested dynamic _PPC modification with acpi cpufreq with this
>>> change (after boot)? Suppose _PPC is changed from 3 to 0, then
>>> cpufreq_verify_within_limits will not be called to change to new max
>>> turbo performance state.
>>>
>>
>> I haven't checked that but as I see acpi_processor_ppc_notifier()
>> can only reduce maximum frequency. So, there should be no problem
>> in this case.
> No, it can also be used in both ways. Once reduced, it can increase as
> well. _PPC can be dynamically modified by BIOS to reduce and also to
> increase.

Well, in this case BIOS will trigger ACPI_PROCESSOR_NOTIFY_PERFORMANCE:
kernel evaluate new _PPC and call cpufreq_update_policy()
which set initial frequency min/max range according to user setup and
apply all limits after that. Initial policy->user_policy.min/max stay
unchanged. So, that dynamic modification works in both ways.
Srinivas Pandruvada July 21, 2015, 6:53 p.m. UTC | #9
On Tue, 2015-07-21 at 19:37 +0300, Konstantin Khlebnikov wrote:
> On 21.07.2015 18:37, Srinivas Pandruvada wrote:
> > On Tue, 2015-07-21 at 13:25 +0300, Konstantin Khlebnikov wrote:
> >> On 21.07.2015 00:08, Srinivas Pandruvada wrote:
> >>> On Fri, 2015-07-17 at 07:36 +0300, Konstantin Khlebnikov wrote:
> >>>> On Fri, Jul 17, 2015 at 1:08 AM, Srinivas Pandruvada
> >>>> <srinivas.pandruvada@linux.intel.com> wrote:
> >>>>> On Thu, 2015-07-16 at 21:17 +0300, Konstantin Khlebnikov wrote:
> >>>>>> IPMI can control CPU P-states remotely: configuration is reported via
> >>>>>> common ACPI interface (_PPC/_PSS/etc). This patch adds required minimal
> >>>>>> support in intel_pstate to receive and use these P-state limits.
> >>>>>>
> >>>>>> * ignore limit of top state in _PPC: it lower than turbo boost frequency
> >>>>>> * register intel_pstate in acpi-processor to get states from _PSS
> >>>>>> * link acpi_processor_get_bios_limit: this adds attribute "bios_limit"
> >>>>>>
> >>>>>> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> >>>>>> ---
> >>>>>>    drivers/acpi/processor_perflib.c |    3 +-
> >>>>>>    drivers/cpufreq/intel_pstate.c   |   57 ++++++++++++++++++++++++++++++++++++++
> >>>>>>    2 files changed, 59 insertions(+), 1 deletion(-)
> >>>>>>
> >>>>>> diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
> >>>>>> index cfc8aba72f86..781e328c9d5f 100644
> >>>>>> --- a/drivers/acpi/processor_perflib.c
> >>>>>> +++ b/drivers/acpi/processor_perflib.c
> >>>>>> @@ -98,7 +98,8 @@ static int acpi_processor_ppc_notifier(struct notifier_block *nb,
> >>>>>>
> >>>>>>         ppc = (unsigned int)pr->performance_platform_limit;
> >>>>>>
> >>>>>> -     if (ppc >= pr->performance->state_count)
> >>>>>> +     /* Ignore limit of top state: it lower than turbo boost frequency */
> >>>>>> +     if (!ppc || ppc >= pr->performance->state_count)
> >>>>> Why? Isn't the previous check enough?
> >>>>
> >>>> Zero _PPC state must be top performance state but as I see frequency in
> >>>> _PSS is lower than maximum possible turbo frequency. So, in this case
> >>>> intel_pstate cannnot get "100%"  for max bound even it there is no limit set.
> >>>>
> >>>> For example: I saw _PSS[0] = 2601 Mhz, PSS[1] = 2600 Mhz while turbo
> >>>> state is 3400 Mhz.
> >>>>
> >>> Have you tested dynamic _PPC modification with acpi cpufreq with this
> >>> change (after boot)? Suppose _PPC is changed from 3 to 0, then
> >>> cpufreq_verify_within_limits will not be called to change to new max
> >>> turbo performance state.
> >>>
> >>
> >> I haven't checked that but as I see acpi_processor_ppc_notifier()
> >> can only reduce maximum frequency. So, there should be no problem
> >> in this case.
> > No, it can also be used in both ways. Once reduced, it can increase as
> > well. _PPC can be dynamically modified by BIOS to reduce and also to
> > increase.
> 
> Well, in this case BIOS will trigger ACPI_PROCESSOR_NOTIFY_PERFORMANCE:
> kernel evaluate new _PPC and call cpufreq_update_policy()
> which set initial frequency min/max range according to user setup and
> apply all limits after that. Initial policy->user_policy.min/max stay
> unchanged. So, that dynamic modification works in both ways.
> 
Fair enough. We need to take account for _PSS. We have some changes for
this, but not gone through test cycle. I will post them as RFC, please
check. Thanks for your patience.

- Srinivas

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index cfc8aba72f86..781e328c9d5f 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -98,7 +98,8 @@  static int acpi_processor_ppc_notifier(struct notifier_block *nb,
 
 	ppc = (unsigned int)pr->performance_platform_limit;
 
-	if (ppc >= pr->performance->state_count)
+	/* Ignore limit of top state: it lower than turbo boost frequency */
+	if (!ppc || ppc >= pr->performance->state_count)
 		goto out;
 
 	cpufreq_verify_within_limits(policy, 0,
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 15ada47bb720..4a34ddf4fa73 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -26,6 +26,7 @@ 
 #include <linux/fs.h>
 #include <linux/debugfs.h>
 #include <linux/acpi.h>
+#include <acpi/processor.h>
 #include <linux/vmalloc.h>
 #include <trace/events/power.h>
 
@@ -113,6 +114,9 @@  struct cpudata {
 	u64	prev_mperf;
 	u64	prev_tsc;
 	struct sample sample;
+#ifdef CONFIG_ACPI_PROCESSOR
+	struct acpi_processor_performance acpi_data;
+#endif
 };
 
 static struct cpudata **all_cpu_data;
@@ -145,6 +149,7 @@  static int hwp_active;
 
 struct perf_limits {
 	int no_turbo;
+	int no_acpi;
 	int turbo_disabled;
 	int max_perf_pct;
 	int min_perf_pct;
@@ -158,6 +163,7 @@  struct perf_limits {
 
 static struct perf_limits limits = {
 	.no_turbo = 0,
+	.no_acpi = !IS_ENABLED(CONFIG_ACPI_PROCESSOR),
 	.turbo_disabled = 0,
 	.max_perf_pct = 100,
 	.max_perf = int_tofp(1),
@@ -449,6 +455,18 @@  static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 	return count;
 }
 
+static ssize_t store_no_acpi(struct kobject *a, struct attribute *b,
+			     const char *buf, size_t count)
+{
+#ifdef CONFIG_ACPI_PROCESSOR
+	return kstrtouint(buf, 0, &limits.no_acpi) ?: count;
+#else
+	return -ENODEV;
+#endif
+}
+show_one(no_acpi, no_acpi);
+define_one_global_rw(no_acpi);
+
 show_one(max_perf_pct, max_perf_pct);
 show_one(min_perf_pct, min_perf_pct);
 
@@ -460,6 +478,7 @@  define_one_global_ro(num_pstates);
 
 static struct attribute *intel_pstate_attributes[] = {
 	&no_turbo.attr,
+	&no_acpi.attr,
 	&max_perf_pct.attr,
 	&min_perf_pct.attr,
 	&turbo_pct.attr,
@@ -1049,6 +1068,38 @@  static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 	cpumask_set_cpu(policy->cpu, policy->cpus);
 
+#ifdef CONFIG_ACPI_PROCESSOR
+	if (!limits.no_acpi) {
+		/*
+		 * Minimum necessary to get acpi_processor_ppc_notifier() and
+		 * acpi_processor_get_bios_limit() working.
+		 */
+		if (!zalloc_cpumask_var(&cpu->acpi_data.shared_cpu_map,
+					GFP_KERNEL))
+			rc = -ENOMEM;
+		else
+			rc = acpi_processor_register_performance(
+					&cpu->acpi_data, policy->cpu);
+		if (rc) {
+			pr_err("intel_pstate: acpi init failed: %d\n", rc);
+			free_cpumask_var(cpu->acpi_data.shared_cpu_map);
+			limits.no_acpi = 1;
+		}
+	}
+#endif
+	return 0;
+}
+
+static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+{
+#ifdef CONFIG_ACPI_PROCESSOR
+	struct cpudata *cpu = all_cpu_data[policy->cpu];
+
+	if (cpu->acpi_data.state_count)
+		acpi_processor_unregister_performance(&cpu->acpi_data,
+						      policy->cpu);
+	free_cpumask_var(cpu->acpi_data.shared_cpu_map);
+#endif
 	return 0;
 }
 
@@ -1057,7 +1108,11 @@  static struct cpufreq_driver intel_pstate_driver = {
 	.verify		= intel_pstate_verify_policy,
 	.setpolicy	= intel_pstate_set_policy,
 	.get		= intel_pstate_get,
+#ifdef CONFIG_ACPI_PROCESSOR
+	.bios_limit	= acpi_processor_get_bios_limit,
+#endif
 	.init		= intel_pstate_cpu_init,
+	.exit		= intel_pstate_cpu_exit,
 	.stop_cpu	= intel_pstate_stop_cpu,
 	.name		= "intel_pstate",
 };
@@ -1286,6 +1341,8 @@  static int __init intel_pstate_setup(char *str)
 		force_load = 1;
 	if (!strcmp(str, "hwp_only"))
 		hwp_only = 1;
+	if (!strcmp(str, "no_acpi"))
+		limits.no_acpi = 1;
 	return 0;
 }
 early_param("intel_pstate", intel_pstate_setup);