diff mbox

[2/2] x86: Manage ENERGY_PERF_BIAS based on cpufreq governor

Message ID 20100303000849.278509000@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

venkip March 3, 2010, 12:06 a.m. UTC
None
diff mbox

Patch

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8c666d8..4945add 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -749,6 +749,10 @@  and is between 256 and 4096 characters. It is defined in the file
 			Default value is 0.
 			Value can be changed at runtime via /selinux/enforce.
 
+	epb		[X86] Control IA32_ENERGY_PERF_BIAS setting
+			"disable" - Kernel will not modify this MSR
+			<0..15> - Kernel will set this MSR to i/p static value
+
 	ether=		[HW,NET] Ethernet cards parameters
 			This option is obsoleted by the "netdev=" option, which
 			has equivalent usage. See its documentation for details.
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index f138c6c..1addc05 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -26,6 +26,12 @@  config X86_ACPI_CPUFREQ
 
 	  If in doubt, say N.
 
+config X86_ENERGY_PERF_BIAS
+	def_bool y
+	depends on X86_ACPI_CPUFREQ
+	help
+	  Support for x86 Intel ENERGY_PERF_BIAS MSR
+
 config ELAN_CPUFREQ
 	tristate "AMD Elan SC400 and SC410"
 	select CPU_FREQ_TABLE
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 509296d..5290428 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -18,3 +18,4 @@  obj-$(CONFIG_X86_SPEEDSTEP_SMI)		+= speedstep-smi.o
 obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
 obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
 obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
+obj-$(CONFIG_X86_ENERGY_PERF_BIAS)	+= energy_perf_bias.o
diff --git a/arch/x86/kernel/cpu/cpufreq/energy_perf_bias.c b/arch/x86/kernel/cpu/cpufreq/energy_perf_bias.c
new file mode 100644
index 0000000..2bd4e74
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/energy_perf_bias.c
@@ -0,0 +1,186 @@ 
+/*
+ * x86 IA32_ENERGY_PERF_BIAS MSR driver
+ * This MSR lets software set a Energy Performance Preference, which
+ * can then be used by hardware to make Energy Performance tradeoffs.
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/cpufreq.h>
+
+#include <asm/msr.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+
+#define ENERGY_PERF_BIAS_BITS		0xff
+
+#define ENERGY_PERF_BIAS_INVALID	(-1)
+#define ENERGY_PERF_BIAS_PERF		0
+#define ENERGY_PERF_BIAS_ONDEMAND	7
+#define ENERGY_PERF_BIAS_POWER		15
+
+static int epb_override = ENERGY_PERF_BIAS_INVALID; /* User bias override */
+static int epb_disable; /* User disable option */
+
+#define is_epb_override_set() (epb_override != ENERGY_PERF_BIAS_INVALID)
+
+/*
+ * epb=disable
+ * Kernel will not touch ENERGY_PERF_BIAS
+ *
+ * epb=<0..15>
+ * Kernel will leave ENERGY_PERF_BIAS at user specified value, independent of
+ * cpufreq policy
+ *
+ * Default is to change ENERGY_PERF_BIAS based on cpufreq governor
+ */
+static int __init epb_setup(char *str)
+{
+	if (str) {
+		if (!strncmp("disable", str, 7)) {
+			epb_disable = 1;
+		} else if (isdigit(*str)) {
+			unsigned long val;
+			val = (uint) simple_strtoul(str, NULL, 0);
+			if (val >= ENERGY_PERF_BIAS_PERF &&
+			    val <= ENERGY_PERF_BIAS_POWER) {
+				epb_override = (uint) val;
+			}
+		}
+	}
+	return 0;
+}
+__setup("epb=", epb_setup);
+
+static void set_epb_on_cpu(int val, int cpu)
+{
+	val &= ENERGY_PERF_BIAS_BITS;
+	wrmsr_safe_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, val, 0);
+}
+
+/* Policy notifier to hook into cpufreq policy updates */
+static int epb_policy_notifier(struct notifier_block *nb, unsigned long val,
+				void *data)
+{
+	int cpu;
+	int epb_val;
+	struct cpufreq_policy *policy = data;
+	struct cpufreq_governor *gov;
+
+	if (val != CPUFREQ_NOTIFY)
+		return 0;
+
+	if (!policy || !policy->governor)
+		return 0;
+
+	cpu = policy->cpu;
+	gov = policy->governor;
+
+	if (!strncmp(gov->name, "performance", strlen("performance")))
+		epb_val = ENERGY_PERF_BIAS_PERF;
+	else if (!strncmp(gov->name, "powersave", strlen("powersave")))
+		epb_val = ENERGY_PERF_BIAS_POWER;
+	else
+		epb_val = ENERGY_PERF_BIAS_ONDEMAND;
+
+	set_epb_on_cpu(epb_val, cpu);
+	return 0;
+}
+
+static struct notifier_block policy_nb = {
+        .notifier_call = epb_policy_notifier,
+};
+
+static void epb_cpu_online(int cpu)
+{
+	set_epb_on_cpu(epb_override, cpu);
+}
+
+/* Resume notifier to update the MSR on boot CPU on resume */
+static int epb_resume(struct sys_device *sys_dev)
+{
+        unsigned int cpu = sys_dev->id;
+
+	if (cpu != 0)
+		return 0;
+
+	epb_cpu_online(cpu);
+	return 0;
+}
+
+static struct sysdev_driver epb_sysdev_driver = {
+        .resume        = epb_resume,
+};
+
+/* Online notifier to update the MSR on all non-boot CPU on resume and online */
+static int __cpuinit epb_cpu_notifier(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	int cpu = (long)hcpu;
+
+	if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
+		epb_cpu_online(cpu);
+
+	return 0;
+}
+
+static struct notifier_block cpu_nb = {
+	.notifier_call = epb_cpu_notifier,
+};
+
+
+static int __init epb_init(void)
+{
+	int ret;
+	int cpu;
+
+	if (!boot_cpu_has(X86_FEATURE_EPB) || epb_disable) {
+		ret = -ENODEV;
+		goto err;
+	}
+
+	if (!is_epb_override_set()) {
+		ret = cpufreq_register_notifier(&policy_nb,
+						CPUFREQ_POLICY_NOTIFIER);
+		goto err;
+	} else {
+		ret = sysdev_driver_register(&cpu_sysdev_class,
+						&epb_sysdev_driver);
+		if (ret)
+			goto err;
+
+		ret = register_cpu_notifier(&cpu_nb);
+		if (ret)
+			goto err_sysdev_driver;
+
+		for_each_online_cpu(cpu)
+			set_epb_on_cpu(epb_override, cpu);
+	}
+	return 0;
+
+err_sysdev_driver:
+	sysdev_driver_unregister(&cpu_sysdev_class, &epb_sysdev_driver);
+err:
+	return ret;
+}
+
+static void __exit epb_exit(void)
+{
+	if (!boot_cpu_has(X86_FEATURE_EPB) || epb_disable)
+		return;
+
+	if (!is_epb_override_set()) {
+		cpufreq_unregister_notifier(&policy_nb,
+						CPUFREQ_POLICY_NOTIFIER);
+	} else {
+		sysdev_driver_unregister(&cpu_sysdev_class, &epb_sysdev_driver);
+		unregister_cpu_notifier(&cpu_nb);
+	}
+}
+
+__initcall(epb_init);
+__exitcall(epb_exit);