diff mbox

cpufreq: intel_pstate: fix inconsistency in setting policy limits

Message ID 1459713972-5654-1-git-send-email-srinivas.pandruvada@linux.intel.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

srinivas pandruvada April 3, 2016, 8:06 p.m. UTC
When user sets performance policy using cpufreq interface, it is possible
that because of policy->max limits, the actual performance is still
limited. But the current implementation will silently switch the
policy to powersave and start using powersave limits. If user modifies
any limits using intel_pstate sysfs, this is actually changing powersave
limits.

The current implementation tracks limits under powersave and performance
policy using two different variables. When policy->max is less than
policy->cpuinfo.max_freq, only powersave limit variable is used.

This fix involves uses performance limits variable always when policy
is performance.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/cpufreq/intel_pstate.c | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

Comments

Rafael J. Wysocki April 4, 2016, 1:16 a.m. UTC | #1
On Sun, Apr 3, 2016 at 10:06 PM, Srinivas Pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
> When user sets performance policy using cpufreq interface, it is possible
> that because of policy->max limits, the actual performance is still
> limited. But the current implementation will silently switch the
> policy to powersave and start using powersave limits. If user modifies
> any limits using intel_pstate sysfs, this is actually changing powersave
> limits.
>
> The current implementation tracks limits under powersave and performance
> policy using two different variables. When policy->max is less than
> policy->cpuinfo.max_freq, only powersave limit variable is used.
>
> This fix involves uses performance limits variable always when policy
> is performance.
>
> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> ---
>  drivers/cpufreq/intel_pstate.c | 40 ++++++++++++++++++++++++++++++++--------
>  1 file changed, 32 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index 4b64452..776cea7 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -1122,22 +1122,46 @@ static unsigned int intel_pstate_get(unsigned int cpu_num)
>         return get_avg_frequency(cpu);
>  }
>
> +static void intel_pstate_set_performance_limits(struct perf_limits *limits)
> +{
> +       limits->no_turbo = 0;
> +       limits->turbo_disabled = 0;
> +       limits->max_perf_pct = 100;
> +       limits->max_perf = int_tofp(1);
> +       limits->min_perf_pct = 100;
> +       limits->min_perf = int_tofp(1);
> +       limits->max_policy_pct = 100;
> +       limits->max_sysfs_pct = 100;
> +       limits->min_policy_pct = 0;
> +       limits->min_sysfs_pct = 0;
> +}
> +
>  static int intel_pstate_set_policy(struct cpufreq_policy *policy)
>  {
>         if (!policy->cpuinfo.max_freq)
>                 return -ENODEV;
>
> -       if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
> -           policy->max >= policy->cpuinfo.max_freq) {
> -               pr_debug("intel_pstate: set performance\n");
> +       if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
>                 limits = &performance_limits;
> -               if (hwp_active)
> -                       intel_pstate_hwp_set(policy->cpus);

Can you please rebase this on top of my linux-next branch?  This
change in particular will conflict with one commit already in there.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 4b64452..776cea7 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1122,22 +1122,46 @@  static unsigned int intel_pstate_get(unsigned int cpu_num)
 	return get_avg_frequency(cpu);
 }
 
+static void intel_pstate_set_performance_limits(struct perf_limits *limits)
+{
+	limits->no_turbo = 0;
+	limits->turbo_disabled = 0;
+	limits->max_perf_pct = 100;
+	limits->max_perf = int_tofp(1);
+	limits->min_perf_pct = 100;
+	limits->min_perf = int_tofp(1);
+	limits->max_policy_pct = 100;
+	limits->max_sysfs_pct = 100;
+	limits->min_policy_pct = 0;
+	limits->min_sysfs_pct = 0;
+}
+
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
 
-	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
-	    policy->max >= policy->cpuinfo.max_freq) {
-		pr_debug("intel_pstate: set performance\n");
+	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		limits = &performance_limits;
-		if (hwp_active)
-			intel_pstate_hwp_set(policy->cpus);
-		return 0;
+		/*
+		 * policy->cpuinfo.max_freq is the max frequency supported,
+		 * which is set during cpufreq init() callback.
+		 * policy->max is the current max frequency, which can less
+		 * than policy->cpuinfo.max_freq, because of limits placed
+		 * by cpufreq thermal interface.
+		 */
+		if (policy->max >= policy->cpuinfo.max_freq) {
+			pr_debug("intel_pstate: set performance\n");
+			intel_pstate_set_performance_limits(limits);
+			if (hwp_active)
+				intel_pstate_hwp_set(policy->cpus);
+			return 0;
+		}
+	} else {
+		pr_debug("intel_pstate: set powersave\n");
+		limits = &powersave_limits;
 	}
 
-	pr_debug("intel_pstate: set powersave\n");
-	limits = &powersave_limits;
 	limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
 	limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
 	limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,