Message ID | 3600864.XZTfEJ2ljK@skinner (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: > Hi, > > On Friday, December 04, 2015 05:40:30 PM Philippe Longepe wrote: > > From: Philippe Longepe <philippe.longepe@intel.com> > > > > Target systems using different cpus have different power and performance > > requirements. They may use different algorithms to get the next P-state > > based on their power or performance preference. > > But this does not necessarily have to do with the CPU model. Yes. In addition we are planning to work on ACPI based selection as you suggested below (With due credit to you). The first patch only differentiate BYT and CHT ATOM models which are not ATOM servers. They are used in non ACPI IoT systems also. Don't want to make command line overly complicated. One "Vanilla" and "server" is not enough. > > > For example, power-constrained systems may not want to use > > high-performance P-states as aggressively as a full-size desktop or a > > server platform. A server platform may want to run close to the max to > > achieve better performance, while laptop-like systems may prefer > > sacrificing performance for longer battery lifes. > > Here you name it. It is about the platform type. > > There may be atoms clustered in server platforms and there may be > performance oriented CPU models built into laptops. You mean BYT/CHT ATOM used as servers not Avaton, which has different id. Thanks, Srinivas > > > For the above reasons, modify intel_pstate to allow the target P-state > > selection algorithm to be depend on the CPU ID. > > This is wrong in 2 ways: > 1. it should depend on the platform type, not the CPU id > 2. it should still be overridable from userspace (this is what Len > came up with when I recently sent a patch. We recently added a patch > (pasted at the very end of the mail) to adjust > pstate knobs, to be more performance oriented, depending on the platform > type you are running on to SLES 12). > > Below patch is based on an older kernel. > IMO 3 things should get combined: > 1. This patch based on ACPI pm_profile instead of CPU id > 2. Your algorithm changes > 3. A way to adjust ACPI pm_profile from userspace and boot parameter. > e.g. acpi.pm_profile= > intel_pstate could register a callback: > void preferred_pm_profile(u8 pm_profile); > and adjust it's algorithm and tunables according to what user writes to: > /sys/firmware/acpi/pm_profile (which could be made overridable) > > > --------------------------------------------- > > From: Thomas Renninger <trenn@suse.com> > Subject: intel_pstate: Adjust performance knobs on servers for SLES > Patch-mainline: never > References: bnc#945201 > > Depending on ACPI FADT table's preferred PM profile > (compare with ACPI spec chapter 5.2.9.1 Preferred PM Profile System Types) > the intel_pstate performance tunables will be set to a more performance > oriented policy. > > intel_pstate=vanilla_policy > boot parameter will disable this functionality again. > > intel_pstate=server_policy > will apply the performance optimized values also on laptops, desktops > or where the ACPI preferred PM profile value is not set. > > --- > drivers/cpufreq/intel_pstate.c | 43 +++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 43 insertions(+) > > --- a/drivers/cpufreq/intel_pstate.c > +++ b/drivers/cpufreq/intel_pstate.c > @@ -532,6 +532,14 @@ static struct cpu_defaults byt_params = > }, > }; > > +static struct pstate_adjust_policy adj_server_policy = { > + .sample_rate_ms = 10, > + .deadband = 0, > + .setpoint = 30, > + .p_gain_pct = 10, > + .d_gain_pct = 0, > + .i_gain_pct = 0, > +}; > > static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) > { > @@ -862,6 +870,8 @@ static struct cpufreq_driver intel_pstat > }; > > static int __initdata no_load; > +static int __initdata vanilla_policy; > +static int __initdata server_policy; > > static int intel_pstate_msrs_not_valid(void) > { > @@ -978,6 +988,9 @@ static int __init intel_pstate_init(void > int cpu, rc = 0; > const struct x86_cpu_id *id; > struct cpu_defaults *cpu_info; > +#if IS_ENABLED(CONFIG_ACPI) > + const char *profile = NULL; > +#endif > > if (no_load) > return -ENODEV; > @@ -1003,6 +1016,32 @@ static int __init intel_pstate_init(void > > pr_info("Intel P-state driver initializing.\n"); > > +#if IS_ENABLED(CONFIG_ACPI) > + if (!vanilla_policy) { > + switch (acpi_gbl_FADT.preferred_profile) { > + case PM_WORKSTATION: > + profile = "Workstation"; > + break; > + case PM_ENTERPRISE_SERVER: > + profile = "Enterprise Server"; > + break; > + case PM_SOHO_SERVER: > + profile = "SOHO Server"; > + break; > + case PM_PERFORMANCE_SERVER: > + profile = "Performance Server"; > + break; > + default: > + if (server_policy) > + profile = "Server"; > + }; > + > + if (profile) { > + pr_info("Intel P-state setting %s policy\n", profile); > + copy_pid_params(&adj_server_policy); > + } > + } > +#endif > all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); > if (!all_cpu_data) > return -ENOMEM; > @@ -1036,6 +1075,10 @@ static int __init intel_pstate_setup(cha > > if (!strcmp(str, "disable")) > no_load = 1; > + if (!strcmp(str, "vanilla_policy")) > + vanilla_policy = 1; > + if (!strcmp(str, "server_policy")) > + server_policy = 1; > return 0; > } > early_param("intel_pstate", intel_pstate_setup); > > -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: > On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: > > Hi, > > There may be atoms clustered in server platforms and there may be > > performance oriented CPU models built into laptops. > > You mean BYT/CHT ATOM used as servers not Avaton, which has different > id. I have no idea. And since Intel does not build mainboards anymore afaik or most are built by OEMs, you also have no idea which CPUs end up on which kind of system, right? Default policy to go for performance, IO, workload based switching or whatever tunables are modified should be, as you said, platform oriented. So instead of calling the params: silvermont, airmont, knl_params (knights landing, right?), whatever CPU... Better call them: laptop, io_optimized, performance params, whatever ... And later set them according to pm_profile, not CPU id and still provide something to override via userspace (I hope this is what you want to do anyway, right?). Even someone buys a tablet, there are Linux guys around who use them as their homeserver in 2 years and they want to override... Hm, in fact what you are doing is you implement your own governors/policies inside intel_pstate driver now, right? If not done already, you should name it in dmesg what kind of algorithm is used (cmp with my patch). This makes it easier to find powersave or performance regressions that will come up with the new default settings and pin point them to intel_pstate. Even better would be if intel_pstate could set it's own governor string. Then there wouldn't be any mixture with ondemand or whatever. But again.., those should not be named "airmont" policy/governor. I had a quick look, but providing whole governors seem to be a lot overhead. But with some luck you do not have to fill up whole structures you have to pass to cpufreq_register_governor(..). Thomas -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: > On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: > > On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: > > > Hi, > > > > There may be atoms clustered in server platforms and there may be > > > performance oriented CPU models built into laptops. > > > > You mean BYT/CHT ATOM used as servers not Avaton, which has different > > id. > > I have no idea. > And since Intel does not build mainboards anymore afaik or most > are built by OEMs, you also have no idea which CPUs end up on which kind > of system, right? > > Default policy to go for performance, IO, workload based switching or > whatever tunables are modified should be, as you said, platform oriented. > > So instead of calling the params: > silvermont, airmont, knl_params (knights landing, right?), whatever CPU... > Better call them: These param structure is much more than just policy specification. They have callback to get limits. For example, silvermont and airmont has different scaling methods. So they can't be common to all. The function to get next state can be common. There is no airmont/silvermont name attached to this (get_target_pstate_use_cpu_load or get_target_pstate_use_performance). So the default callback ".get_target_pstate" can be changed based on preference. This is the order I am thinking of in the order of priority high to low : - User policy (either command line or via cpu-freq scaling_governor) - ACPI - Pickup defaults based on CPU ID. The current patch-set only addresses the cpu id based selection. BTW changing a pid set point can be done from user space without changing code. > laptop, io_optimized, performance params, whatever ... > And later set them according to pm_profile, not CPU id and still > provide something to override via userspace (I hope this is > what you want to do anyway, right?). > Even someone buys a tablet, there are Linux guys around who use them as > their homeserver in 2 years and they want to override... > > Hm, in fact what you are doing is you implement your own governors/policies > inside intel_pstate driver now, right? > If not done already, you should name it in dmesg what kind of algorithm is > used (cmp with my patch). This makes it easier to find powersave > or performance regressions that will come up with the new default settings > and pin point them to intel_pstate. Can be done. > > Even better would be if intel_pstate could set it's own governor string. > Then there wouldn't be any mixture with ondemand or whatever. > But again.., those should not be named "airmont" policy/governor. > The No airmont or silvermont policies. But more specific to use case based string will be better. > I had a quick look, but providing whole governors seem to be a lot overhead. > But with some luck you do not have to fill up whole structures you have to > pass to cpufreq_register_governor(..). Adding a external governor is easy (Your RFC patch already does that). But inside cpufreq core logic there is different processing based on whether there are internal governor or cpufreq governor. So that needs to be addressed first. Thanks, Srinivas > > Thomas > -- > To unsubscribe from this list: send the line "unsubscribe linux-pm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: > On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: > > On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: > > > On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: > > > > Hi, > > > > > > > > There may be atoms clustered in server platforms and there may be > > > > performance oriented CPU models built into laptops. > > > > > > You mean BYT/CHT ATOM used as servers not Avaton, which has different > > > id. > > > > I have no idea. > > And since Intel does not build mainboards anymore afaik or most > > are built by OEMs, you also have no idea which CPUs end up on which kind > > of system, right? > > > > Default policy to go for performance, IO, workload based switching or > > whatever tunables are modified should be, as you said, platform oriented. > > > > So instead of calling the params: > > silvermont, airmont, knl_params (knights landing, right?), whatever CPU... > > > Better call them: > These param structure is much more than just policy specification. They > have callback to get limits. For example, silvermont and airmont has > different scaling methods. So they can't be common to all. Sure this is HW specific. > The function > to get next state can be common. There is no airmont/silvermont name > attached to this (get_target_pstate_use_cpu_load or > get_target_pstate_use_performance). So the default callback > ".get_target_pstate" can be changed based on preference. > > This is the order I am thinking of in the order of priority high to > low : > - User policy (either command line or via cpu-freq scaling_governor) > - ACPI > - Pickup defaults based on CPU ID. Why by CPU ID? This doesn't make sense to me and unnecessarily complicates things. But, please do not submit anything half baken: a new pstate ondemand governor or different workload optizmized algorithms inside this governor, or later possibly as separate governors, defaults based on CPU ID or platform type and then later userspace tunable... We should not only think, but implement a final concept and submit altogether. Otherwise there are several kernels flying around which do implement half of the implementation and the confusion is perfect. I would also be interested in some figures. Are there any measurements how many power is saved, how big the performance overhead is, etc? I formerly used the tools/power/cpupower/bench/ tool to find out worst case workload scenarios between ondemand and performance governor with different tuning knobs. It could even do some nice plots showing performance issues when increasing polling time, etc., IIRC. Are these new RAPL registers fine grained enough to be used to measure power consumption on different workloads with different dynamic cpufreq algorithms/tunables? On a platform which exposes itself as "Enterprise Server" or "Performance Server" (ACPI PM Profile, cmp with chapter 5.2.9 of ACPI spec) I expect default will be the current performance governor, which means dynamic CPU frequency off and run on high frequency statically, right? > The current patch-set only addresses the cpu id based selection. > BTW changing a pid set point can be done from user space without > changing code. You mean: /sys/kernel/debug/pstate_snb/p_gain_pct /sys/kernel/debug/pstate_snb/setpoint /sys/kernel/debug/pstate_snb/deadband /sys/kernel/debug/pstate_snb/i_gain_pct /sys/kernel/debug/pstate_snb/d_gain_pct /sys/kernel/debug/pstate_snb/sample_rate_ms These should better not be picked as API by userspace tools... > > I had a quick look, but providing whole governors seem to be a lot > > overhead. But with some luck you do not have to fill up whole structures > > you have to pass to cpufreq_register_governor(..). > > Adding a external governor is easy (Your RFC patch already does that). > But inside cpufreq core logic there is different processing based on > whether there are internal governor or cpufreq governor. So that needs > to be addressed first. Yep, not sure that is perfect. I also try to have a closer look. Quite some open questions... Thomas -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2015-12-10 at 14:04 +0100, Thomas Renninger wrote: > On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: > > On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: > > > On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: > > > > On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: > > > > > Hi, > > > > > > > > > > There may be atoms clustered in server platforms and there may be > > > > > performance oriented CPU models built into laptops. > > > > > > > > You mean BYT/CHT ATOM used as servers not Avaton, which has different > > > > id. > > > > > > I have no idea. > > > And since Intel does not build mainboards anymore afaik or most > > > are built by OEMs, you also have no idea which CPUs end up on which kind > > > of system, right? > > > > > > Default policy to go for performance, IO, workload based switching or > > > whatever tunables are modified should be, as you said, platform oriented. > > > > > > So instead of calling the params: > > > silvermont, airmont, knl_params (knights landing, right?), whatever CPU... > > > > > Better call them: > > These param structure is much more than just policy specification. They > > have callback to get limits. For example, silvermont and airmont has > > different scaling methods. So they can't be common to all. > Sure this is HW specific. > > > The function > > to get next state can be common. There is no airmont/silvermont name > > attached to this (get_target_pstate_use_cpu_load or > > get_target_pstate_use_performance). So the default callback > > ".get_target_pstate" can be changed based on preference. > > > > This is the order I am thinking of in the order of priority high to > > low : > > - User policy (either command line or via cpu-freq scaling_governor) > > - ACPI > > - Pickup defaults based on CPU ID. > > Why by CPU ID? This doesn't make sense to me and unnecessarily complicates > things. Some CPU are designed for a specific functions. > > But, please do not submit anything half baken: I don't know what makes you think that. The two CPU ids used to have a different algorithm are specifically designed for power constrained devices. Lots of testing was done to prove power vs performance benefits before pushing those. Philippe, can you share test results? There is no change for other cpus because there is not enough convincing data. Also changing based on ACPI is also not perfect. We have folks complaining that the server setpoint is already aggressive enough and consume power. So before we have a good answer, will not submit changes for that. > a new pstate ondemand governor or different > workload optizmized algorithms inside this governor, or later possibly as > separate governors, defaults based on CPU ID or platform type and > then later userspace tunable... > > We should not only think, but implement a final concept and submit altogether. > Otherwise there are several kernels flying around which do implement half > of the implementation and the confusion is perfect. > > I would also be interested in some figures. > Are there any measurements how many power is saved, how big the performance > overhead is, etc? > > I formerly used the tools/power/cpupower/bench/ tool to find out worst case > workload scenarios between ondemand and performance governor with different > tuning knobs. > It could even do some nice plots showing performance issues when increasing > polling time, etc., IIRC. We use mweb, specpower for server workloads and phoronix test suite for clients. Do you have some specific server workloads? > Are these new RAPL registers fine grained enough to be used to measure > power consumption on different workloads with different dynamic cpufreq > algorithms/tunables? The package power part of RAPL is good enough. But we use external power meter or instrumented system. > > On a platform which exposes itself as "Enterprise Server" or "Performance > Server" (ACPI PM Profile, cmp with chapter 5.2.9 of ACPI spec) I expect > default will be the current performance governor, which means dynamic CPU > frequency off and run on high frequency statically, right? Yes, no change. > > > The current patch-set only addresses the cpu id based selection. > > BTW changing a pid set point can be done from user space without > > changing code. > > You mean: > /sys/kernel/debug/pstate_snb/p_gain_pct > /sys/kernel/debug/pstate_snb/setpoint > /sys/kernel/debug/pstate_snb/deadband > /sys/kernel/debug/pstate_snb/i_gain_pct > /sys/kernel/debug/pstate_snb/d_gain_pct > /sys/kernel/debug/pstate_snb/sample_rate_ms > > These should better not be picked as API by userspace tools... > > > > I had a quick look, but providing whole governors seem to be a lot > > > overhead. But with some luck you do not have to fill up whole structures > > > you have to pass to cpufreq_register_governor(..). > > > > Adding a external governor is easy (Your RFC patch already does that). > > But inside cpufreq core logic there is different processing based on > > whether there are internal governor or cpufreq governor. So that needs > > to be addressed first. > > Yep, not sure that is perfect. > I also try to have a closer look. > Quite some open questions... > Thanks, Srinivas > Thomas > -- > To unsubscribe from this list: send the line "unsubscribe linux-pm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thursday, December 10, 2015 02:04:46 PM Thomas Renninger wrote: > On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: > > On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: > > > On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: > > > > On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: [cut] > > > > This is the order I am thinking of in the order of priority high to > > low : > > - User policy (either command line or via cpu-freq scaling_governor) > > - ACPI > > - Pickup defaults based on CPU ID. > > Why by CPU ID? For a couple of reasons. First of all, processors are designed in a specific way and some ways of P-states management may not lead to good results on them no matter what, while others may match them a lot better. The processors in question here are designed with energy efficiency in mind. For this reason, an approach skewed towards performance (which the original algorithm in intel_pstate is) is really suboptimal there as the performance is not there in the first place, quite fundamentally. Even if anyone used any chip based on those cores in a server, that would be a "low-power server" so to speak, so using an algorithm more oriented towards energy efficiency would still make sense for it. Second, the CPU ID is the most reliable piece of information about the type of the system we can possibly get. The BIOS may always lie to us and we can't entirely rely on it for figuring out the system profile, but as I said, if a CPU designed for energy-efficient systems is used in the given one, that is a strong indication on what the system is or it would have used a different CPU otherwise. > This doesn't make sense to me and unnecessarily complicates > things. > > But, please do not submit anything half baken: > a new pstate ondemand governor or different > workload optizmized algorithms inside this governor, or later possibly as > separate governors, defaults based on CPU ID or platform type and > then later userspace tunable... > > We should not only think, but implement a final concept and submit altogether. > Otherwise there are several kernels flying around which do implement half > of the implementation and the confusion is perfect. On the other hand, making changes in smaller steps allows us to use a more evolutionary approach in which changes that turn out to cause problems to happen may be easily reverted and replaced with something better. > I would also be interested in some figures. > Are there any measurements how many power is saved, how big the performance > overhead is, etc? Philippe will provide the data. Thanks, Rafael -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thursday, December 10, 2015 09:28:40 AM Srinivas Pandruvada wrote: > On Thu, 2015-12-10 at 14:04 +0100, Thomas Renninger wrote: > > On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: > > > On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: > > > > On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: ... > > > The function > > > to get next state can be common. There is no airmont/silvermont name > > > attached to this (get_target_pstate_use_cpu_load or > > > get_target_pstate_use_performance). So the default callback > > > ".get_target_pstate" can be changed based on preference. > > > > > > This is the order I am thinking of in the order of priority high to > > > low : > > > - User policy (either command line or via cpu-freq scaling_governor) > > > - ACPI > > > - Pickup defaults based on CPU ID. That would mean if ACPI pm profile is unknown and only then, assign algorithm (or other tunables) based on CPU ID? > > Why by CPU ID? This doesn't make sense to me and unnecessarily complicates > > things. > > Some CPU are designed for a specific functions. Ok, I nearly bought this... Then I started reading (only a very bit). Silvermont is simply a CPU model. It's not a bound to specific functions like different ARM processors with possibly very specific chipset. It's just a general X86 processor, right? And here: https://en.wikipedia.org/wiki/Silvermont I get: List of Silvermont processors: Desktop processors (Bay Trail-D) Server processors (Avoton) Communications processors (Rangeley) Embedded/automotive processors (Bay Trail-I) Mobile processors (Bay Trail-M) Tablet processors (Bay Trail-T) Smartphone processors (Merrifield and Moorefield) List of Airmont processors Mobile processors (Braswell) Smartphone and Tablet processors (Cherry Trail) Not sure what specific functions you mean... Can you name them? I cut the rest of the mail which was very helpful info and explained quite some open details. Thanks! Thomas -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
We are currently re-doing the measurement on our latest android release. Here are the results we have on a android release of WW50 Note that as of today Android is using Interactive Governor. Atom: Intel PState Intel PState Power Performance CPU Load Improvment Circular Progressbar (animation) 136 mW 32 mW -76% 50% Load 1 thread 260 mW 25 mW -90% T-Rex (gaming) 500 mW 302 mW -40% Application Install 799 mW 383 mW -52% Music Playback (AAC) 87 mW 33 mW -62% iozone 819 mW 104 mW -87% CandyCrush 415 mW 130 mW -69% FruitNinja 1007 mW 853 mW -15% Plants vs zombie2 356 mW 101 mW -71% AngryBirds 344 mW 100 mW -70% VideoPlayBack 549 mW 124 mW -77% Browsermark 759 mW 560 mW -26% IcyRocks 589 mW 486 mW -17% Atom: Intel PState Intel PState Performance Performance CPU Load Improvment AndEBench-Native 21585 21737 0.70% AndEBench-Java 1630 1632 0.12% SmartBench-Gaming 4312 4292 -0.46% SmartBench-Productivity 15484 15441 -0.28% Dhrystone 37506 37444 -0.16% CoreMark 35259 35231 -0.08% Quadrant-2D 1000 1000 0.00% Quadrant-3D 2478 2465 -0.52% Quadrant-IO 9997 9917 -0.80% Quadrant-Mem 14839 15159 2.16% 0xBenchLinpack4T 193 192 -0.70% CaffeineMark 51955 51746 -0.40% GeekBench-Integer 4514 4507 -0.16% GeekBench-Score 3403 3378 -0.73% GeekBench-FloatingPoint 3247 3245 -0.06% GeekBench-Memory 1540 1538 -0.13% CFBench-Native 38585 39003 1.08% AnTuTu-DB 770 745 -3.25% AnTuTu-2D 1657 1654 -0.18% AnTuTu-3D 20133 19961 -0.85% AnTuTu-Dalvik 4336 4352 0.37% AnTuTu-Int-single 2244 2244 0.00% AnTuTu-Multitask 8693 8164 -6.09% AnTuTu-Float-single 2228 2226 -0.09% AnTuTu-Ram 3425 3447 0.64% AnTuTu-IO 2467 2442 -1.01% AnTuTu-Mem 2532 2528 -0.16% AnTuTu-Int 4882 4901 0.39% AnTuTu-Total 58198 57631 -0.97% AnTuTu-Float 4831 4904 1.51% CompuBenchRS-Gauss 45 43 -4.84% CompuBenchRS-Particule 57 42 -26.89% CompuBenchRS-Fractal 157 149 -5.46% CompuBenchRS-Raytrace 10 10 -1.21% CompuBenchRS-Histogram 85 77 -9.20% CompuBenchRS-Gaussintr 47 44 -7.28% CompuBenchRS-Facedetect 5 5 -0.64% Kraken-Score 4125 4136 0.27% Octane-Score 8026 8098 0.90% Sunspider-Score 660 673 1.98% AnTuTu-Dalvik 2605 2642 1.42% AnTuTu-Int-single 2343 2388 1.92% AnTuTu-Multitask 2082 2135 2.55% Stream-Copy 6704 6705 0.01% Stream-Scale 6530 6527 -0.06% IOZone RR 4656 4278 -8.12% Intel P-State performance, is relatively imune to I/O Wait IOZone RW 1256 836 -33.44% Intel P-State performance, is relatively imune to I/O Wait BrowserMark 2071 2086 0.72% jArt 689 ops/sec 685 ops/sec -0.54% -- Steph > On Dec 10, 2015, at 11:01 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote: > > On Thursday, December 10, 2015 02:04:46 PM Thomas Renninger wrote: >> On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: >>> On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: >>>> On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: >>>>> On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: > > [cut] > >>> >>> This is the order I am thinking of in the order of priority high to >>> low : >>> - User policy (either command line or via cpu-freq scaling_governor) >>> - ACPI >>> - Pickup defaults based on CPU ID. >> >> Why by CPU ID? > > For a couple of reasons. > > First of all, processors are designed in a specific way and some ways of > P-states management may not lead to good results on them no matter what, > while others may match them a lot better. > > The processors in question here are designed with energy efficiency in mind. > For this reason, an approach skewed towards performance (which the original > algorithm in intel_pstate is) is really suboptimal there as the performance > is not there in the first place, quite fundamentally. Even if anyone used > any chip based on those cores in a server, that would be a "low-power server" > so to speak, so using an algorithm more oriented towards energy efficiency > would still make sense for it. > > Second, the CPU ID is the most reliable piece of information about the > type of the system we can possibly get. The BIOS may always lie to us and > we can't entirely rely on it for figuring out the system profile, but as I > said, if a CPU designed for energy-efficient systems is used in the given > one, that is a strong indication on what the system is or it would have > used a different CPU otherwise. > >> This doesn't make sense to me and unnecessarily complicates >> things. >> >> But, please do not submit anything half baken: >> a new pstate ondemand governor or different >> workload optizmized algorithms inside this governor, or later possibly as >> separate governors, defaults based on CPU ID or platform type and >> then later userspace tunable... >> >> We should not only think, but implement a final concept and submit altogether. >> Otherwise there are several kernels flying around which do implement half >> of the implementation and the confusion is perfect. > > On the other hand, making changes in smaller steps allows us to use a more > evolutionary approach in which changes that turn out to cause problems to > happen may be easily reverted and replaced with something better. > >> I would also be interested in some figures. >> Are there any measurements how many power is saved, how big the performance >> overhead is, etc? > > Philippe will provide the data. > > Thanks, > Rafael > > -- > To unsubscribe from this list: send the line "unsubscribe linux-pm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thursday, December 10, 2015 11:01:18 PM Rafael J. Wysocki wrote: > On Thursday, December 10, 2015 02:04:46 PM Thomas Renninger wrote: > > On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: > > > On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: > > > > On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: > > > > > On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: > [cut] > > > > This is the order I am thinking of in the order of priority high to > > > low : > > > - User policy (either command line or via cpu-freq scaling_governor) > > > - ACPI > > > - Pickup defaults based on CPU ID. > > > > Why by CPU ID? > > For a couple of reasons. > > First of all, processors are designed in a specific way and some ways of > P-states management may not lead to good results on them no matter what, > while others may match them a lot better. > > The processors in question here are designed with energy efficiency in mind. > For this reason, an approach skewed towards performance (which the original > algorithm in intel_pstate is) is really suboptimal there as the performance > is not there in the first place, quite fundamentally. Even if anyone used > any chip based on those cores in a server, that would be a "low-power > server" so to speak, so using an algorithm more oriented towards energy > efficiency would still make sense for it. Ok, so we have these: Desktop processors (Bay Trail-D) Server processors (Avoton) Are you sure that your partners may not sell these CPUs clustered on servers due to other features on these CPUs (than powersavings), or possibly due to simple marketing reasons, because Atom or ARM servers are trendy nowadays. And in the end it will show up as an "Enterprise Server" platform where your partners like HP, SAP, Dell,... have to explicitly state to disable specific powersaving features on OS level, because the CPU driver keeps ignoring any BIOS provided information. > Second, the CPU ID is the most reliable piece of information about the > type of the system we can possibly get. This is wrong. Differing by battery or not is more reliable than any CPU id matching. And for the last at least 3 years, I have not seen a system where pm profile was set wrong. In our server room I can show you a hundred servers that all match the "Enterprise Sever" ACPI profile. Laptops as well. > The BIOS may always lie to us and we can't entirely rely on it for figuring > out the system profile, We can. Because otherwise the ACPI pm profile is set "unknown". BTW: There were rumours that Intel's microcode had some sever bugs as well recently. So what, we have to rely on this piece of software as well... IMO we see a bit too much CPU ID matching and it's getting more and more. Perfect would be no CPU ID matching at all. We had this with the acpi-cpufreq driver which in the end worked out perfectly for 99% of all machines out there. Hm, apropos id matching... I thought intel_pstate got introduced because the CPUs it supports can switch to any frequency between min and max. And this is not reflected by ACPI which does export a maximum amount of X (10?) frequency states. Seeing this: static int silvermont_freq_table[] = { 83300, 100000, 133300, 116700, 80000}; static int airmont_freq_table[] = { 83300, 100000, 133300, 116700, 80000, 93300, 90000, 88900, 87500}; makes me think, whether these shouldn't simply use the acpi_cpufreq driver. Or in the near future we may have tons of such defines? And you are back at "real" governors... > but as I said, if a CPU designed for energy-efficient systems is used in the > given one, that is a strong indication on what the system is or it would > have used a different CPU otherwise. Are you sure? And is this statement in line with your sales and product managers. These guys often tend to think differently than the developers ;) Thomas -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sorry there is a typo, the result are from WW40. — Steph > On Dec 14, 2015, at 5:14 PM, Stephane Gasparini <stephane.gasparini@linux.intel.com> wrote: > > We are currently re-doing the measurement on our latest android release. > > Here are the results we have on a android release of WW50 > Note that as of today Android is using Interactive Governor. > > Atom: Intel PState Intel PState Power > Performance CPU Load Improvment > > Circular Progressbar (animation) 136 mW 32 mW -76% > 50% Load 1 thread 260 mW 25 mW -90% > T-Rex (gaming) 500 mW 302 mW -40% > Application Install 799 mW 383 mW -52% > Music Playback (AAC) 87 mW 33 mW -62% > iozone 819 mW 104 mW -87% > CandyCrush 415 mW 130 mW -69% > FruitNinja 1007 mW 853 mW -15% > Plants vs zombie2 356 mW 101 mW -71% > AngryBirds 344 mW 100 mW -70% > VideoPlayBack 549 mW 124 mW -77% > Browsermark 759 mW 560 mW -26% > IcyRocks 589 mW 486 mW -17% > > > > Atom: Intel PState Intel PState Performance > Performance CPU Load Improvment > > AndEBench-Native 21585 21737 0.70% > AndEBench-Java 1630 1632 0.12% > SmartBench-Gaming 4312 4292 -0.46% > SmartBench-Productivity 15484 15441 -0.28% > Dhrystone 37506 37444 -0.16% > CoreMark 35259 35231 -0.08% > Quadrant-2D 1000 1000 0.00% > Quadrant-3D 2478 2465 -0.52% > Quadrant-IO 9997 9917 -0.80% > Quadrant-Mem 14839 15159 2.16% > 0xBenchLinpack4T 193 192 -0.70% > CaffeineMark 51955 51746 -0.40% > GeekBench-Integer 4514 4507 -0.16% > GeekBench-Score 3403 3378 -0.73% > GeekBench-FloatingPoint 3247 3245 -0.06% > GeekBench-Memory 1540 1538 -0.13% > CFBench-Native 38585 39003 1.08% > AnTuTu-DB 770 745 -3.25% > AnTuTu-2D 1657 1654 -0.18% > AnTuTu-3D 20133 19961 -0.85% > AnTuTu-Dalvik 4336 4352 0.37% > AnTuTu-Int-single 2244 2244 0.00% > AnTuTu-Multitask 8693 8164 -6.09% > AnTuTu-Float-single 2228 2226 -0.09% > AnTuTu-Ram 3425 3447 0.64% > AnTuTu-IO 2467 2442 -1.01% > AnTuTu-Mem 2532 2528 -0.16% > AnTuTu-Int 4882 4901 0.39% > AnTuTu-Total 58198 57631 -0.97% > AnTuTu-Float 4831 4904 1.51% > CompuBenchRS-Gauss 45 43 -4.84% > CompuBenchRS-Particule 57 42 -26.89% > CompuBenchRS-Fractal 157 149 -5.46% > CompuBenchRS-Raytrace 10 10 -1.21% > CompuBenchRS-Histogram 85 77 -9.20% > CompuBenchRS-Gaussintr 47 44 -7.28% > CompuBenchRS-Facedetect 5 5 -0.64% > Kraken-Score 4125 4136 0.27% > Octane-Score 8026 8098 0.90% > Sunspider-Score 660 673 1.98% > AnTuTu-Dalvik 2605 2642 1.42% > AnTuTu-Int-single 2343 2388 1.92% > AnTuTu-Multitask 2082 2135 2.55% > Stream-Copy 6704 6705 0.01% > Stream-Scale 6530 6527 -0.06% > IOZone RR 4656 4278 -8.12% Intel P-State performance, is relatively imune to I/O Wait > IOZone RW 1256 836 -33.44% Intel P-State performance, is relatively imune to I/O Wait > BrowserMark 2071 2086 0.72% > jArt 689 ops/sec 685 ops/sec -0.54% > > > -- > Steph > > > > >> On Dec 10, 2015, at 11:01 PM, Rafael J. Wysocki <rjw@rjwysocki.net> wrote: >> >> On Thursday, December 10, 2015 02:04:46 PM Thomas Renninger wrote: >>> On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: >>>> On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: >>>>> On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: >>>>>> On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: >> >> [cut] >> >>>> >>>> This is the order I am thinking of in the order of priority high to >>>> low : >>>> - User policy (either command line or via cpu-freq scaling_governor) >>>> - ACPI >>>> - Pickup defaults based on CPU ID. >>> >>> Why by CPU ID? >> >> For a couple of reasons. >> >> First of all, processors are designed in a specific way and some ways of >> P-states management may not lead to good results on them no matter what, >> while others may match them a lot better. >> >> The processors in question here are designed with energy efficiency in mind. >> For this reason, an approach skewed towards performance (which the original >> algorithm in intel_pstate is) is really suboptimal there as the performance >> is not there in the first place, quite fundamentally. Even if anyone used >> any chip based on those cores in a server, that would be a "low-power server" >> so to speak, so using an algorithm more oriented towards energy efficiency >> would still make sense for it. >> >> Second, the CPU ID is the most reliable piece of information about the >> type of the system we can possibly get. The BIOS may always lie to us and >> we can't entirely rely on it for figuring out the system profile, but as I >> said, if a CPU designed for energy-efficient systems is used in the given >> one, that is a strong indication on what the system is or it would have >> used a different CPU otherwise. >> >>> This doesn't make sense to me and unnecessarily complicates >>> things. >>> >>> But, please do not submit anything half baken: >>> a new pstate ondemand governor or different >>> workload optizmized algorithms inside this governor, or later possibly as >>> separate governors, defaults based on CPU ID or platform type and >>> then later userspace tunable... >>> >>> We should not only think, but implement a final concept and submit altogether. >>> Otherwise there are several kernels flying around which do implement half >>> of the implementation and the confusion is perfect. >> >> On the other hand, making changes in smaller steps allows us to use a more >> evolutionary approach in which changes that turn out to cause problems to >> happen may be easily reverted and replaced with something better. >> >>> I would also be interested in some figures. >>> Are there any measurements how many power is saved, how big the performance >>> overhead is, etc? >> >> Philippe will provide the data. >> >> Thanks, >> Rafael >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-pm" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- > To unsubscribe from this list: send the line "unsubscribe linux-pm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
— Steph > On Dec 14, 2015, at 5:22 PM, Thomas Renninger <trenn@suse.de> wrote: > > On Thursday, December 10, 2015 11:01:18 PM Rafael J. Wysocki wrote: >> On Thursday, December 10, 2015 02:04:46 PM Thomas Renninger wrote: >>> On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: >>>> On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: >>>>> On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: >>>>>> On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: >> [cut] >> >>>> This is the order I am thinking of in the order of priority high to >>>> low : >>>> - User policy (either command line or via cpu-freq scaling_governor) >>>> - ACPI >>>> - Pickup defaults based on CPU ID. >>> >>> Why by CPU ID? >> >> For a couple of reasons. >> >> First of all, processors are designed in a specific way and some ways of >> P-states management may not lead to good results on them no matter what, >> while others may match them a lot better. >> >> The processors in question here are designed with energy efficiency in mind. >> For this reason, an approach skewed towards performance (which the original >> algorithm in intel_pstate is) is really suboptimal there as the performance >> is not there in the first place, quite fundamentally. Even if anyone used >> any chip based on those cores in a server, that would be a "low-power >> server" so to speak, so using an algorithm more oriented towards energy >> efficiency would still make sense for it. > > Ok, so we have these: > Desktop processors (Bay Trail-D) > Server processors (Avoton) > > Are you sure that your partners may not sell these CPUs clustered on servers > due to other features on these CPUs (than powersavings), or possibly due to > simple marketing reasons, because Atom or ARM servers are trendy nowadays. > > And in the end it will show up as an "Enterprise Server" platform where > your partners like HP, SAP, Dell,... have to explicitly state to disable > specific powersaving features on OS level, because the CPU driver keeps > ignoring any BIOS provided information. > >> Second, the CPU ID is the most reliable piece of information about the >> type of the system we can possibly get. > > This is wrong. > Differing by battery or not is more reliable than any CPU id matching. > > And for the last at least 3 years, I have not seen a system where pm profile > was set wrong. In our server room I can show you a hundred servers that > all match the "Enterprise Sever" ACPI profile. Laptops as well. > >> The BIOS may always lie to us and we can't entirely rely on it for figuring >> out the system profile, > > We can. Because otherwise the ACPI pm profile is set "unknown". > BTW: There were rumours that Intel's microcode had some sever bugs as well > recently. So what, we have to rely on this piece of software as well... > > IMO we see a bit too much CPU ID matching and it's getting more and more. > Perfect would be no CPU ID matching at all. > We had this with the acpi-cpufreq driver which in the end worked out perfectly > for 99% of all machines out there. > > Hm, apropos id matching... I thought intel_pstate got introduced because the > CPUs it supports can switch to any frequency between min and max. And this > is not reflected by ACPI which does export a maximum amount of X (10?) > frequency states. > Seeing this: > static int silvermont_freq_table[] = { > 83300, 100000, 133300, 116700, 80000}; > > static int airmont_freq_table[] = { > 83300, 100000, 133300, 116700, 80000, > 93300, 90000, 88900, 87500}; > > makes me think, whether these shouldn't simply use the acpi_cpufreq driver. > Or in the near future we may have tons of such defines? > And you are back at "real" governors… Those tables are for the bus frequency, not the core frequency. On ATOM so far there is an constraint because the CPU are not having a separate PLL, thus they is a dependency on the RAM bus speed that cause the FSB not to be 100000 compare to core. The ACPI table usually does not expose all the p-state above max freq (turbo). So relying on ACPI table is not perf/power efficient. > >> but as I said, if a CPU designed for energy-efficient systems is used in the >> given one, that is a strong indication on what the system is or it would >> have used a different CPU otherwise. > > Are you sure? And is this statement in line with your sales and product > managers. These guys often tend to think differently than the developers ;) > > Thomas > > -- > To unsubscribe from this list: send the line "unsubscribe linux-pm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2015-12-14 at 16:13 +0100, Thomas Renninger wrote: > On Thursday, December 10, 2015 09:28:40 AM Srinivas Pandruvada wrote: > > On Thu, 2015-12-10 at 14:04 +0100, Thomas Renninger wrote: > > > On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: > > > > On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: > > > > > On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: > > ... > > > > > The function > > > > to get next state can be common. There is no airmont/silvermont name > > > > attached to this (get_target_pstate_use_cpu_load or > > > > get_target_pstate_use_performance). So the default callback > > > > ".get_target_pstate" can be changed based on preference. > > > > > > > > This is the order I am thinking of in the order of priority high to > > > > low : > > > > - User policy (either command line or via cpu-freq scaling_governor) > > > > - ACPI > > > > - Pickup defaults based on CPU ID. > > That would mean if ACPI pm profile is unknown and only then, assign > algorithm (or other tunables) based on CPU ID? Yes. > > > > Why by CPU ID? This doesn't make sense to me and unnecessarily complicates > > > things. > > > > Some CPU are designed for a specific functions. > > Ok, I nearly bought this... > Then I started reading (only a very bit). Silvermont is simply a CPU model. > It's not a bound to specific functions like different ARM processors with > possibly very specific chipset. It's just a general X86 processor, right? > These are micro architectures. The methods to get some parameters are different. Currently our callbacks are also including pid_policy. If this core is used in ATOM servers, then it will have a different cpu id. We have not added any ATOM server cpuids. We have to separate the micro-architecture callbacks from pid_policy. > And here: > https://en.wikipedia.org/wiki/Silvermont > > I get: > List of Silvermont processors: > Desktop processors (Bay Trail-D) > Server processors (Avoton) > Communications processors (Rangeley) > Embedded/automotive processors (Bay Trail-I) > Mobile processors (Bay Trail-M) > Tablet processors (Bay Trail-T) > Smartphone processors (Merrifield and Moorefield) > > List of Airmont processors > Mobile processors (Braswell) > Smartphone and Tablet processors (Cherry Trail) > > Not sure what specific functions you mean... > Can you name them? You have them above for two micro-architectures. But they have different cpu id when the use case calls for totally different use case. For example server processor (Avaton above in your list) has a cpuid of 0x4D, which we don't support for Intel Pstate. Thanks, Srinivas > > I cut the rest of the mail which was very helpful info and explained > quite some open details. > > Thanks! > > Thomas > -- > To unsubscribe from this list: send the line "unsubscribe linux-pm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Monday, December 14, 2015 05:22:12 PM Thomas Renninger wrote: > On Thursday, December 10, 2015 11:01:18 PM Rafael J. Wysocki wrote: > > On Thursday, December 10, 2015 02:04:46 PM Thomas Renninger wrote: > > > On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: > > > > On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: > > > > > On Tuesday, December 08, 2015 10:02:23 AM Srinivas Pandruvada wrote: > > > > > > On Tue, 2015-12-08 at 16:27 +0100, Thomas Renninger wrote: > > [cut] > > > > > > This is the order I am thinking of in the order of priority high to > > > > low : > > > > - User policy (either command line or via cpu-freq scaling_governor) > > > > - ACPI > > > > - Pickup defaults based on CPU ID. > > > > > > Why by CPU ID? > > > > For a couple of reasons. > > > > First of all, processors are designed in a specific way and some ways of > > P-states management may not lead to good results on them no matter what, > > while others may match them a lot better. > > > > The processors in question here are designed with energy efficiency in mind. > > For this reason, an approach skewed towards performance (which the original > > algorithm in intel_pstate is) is really suboptimal there as the performance > > is not there in the first place, quite fundamentally. Even if anyone used > > any chip based on those cores in a server, that would be a "low-power > > server" so to speak, so using an algorithm more oriented towards energy > > efficiency would still make sense for it. > > Ok, so we have these: > Desktop processors (Bay Trail-D) > Server processors (Avoton) > > Are you sure that your partners may not sell these CPUs clustered on servers > due to other features on these CPUs (than powersavings), or possibly due to > simple marketing reasons, because Atom or ARM servers are trendy nowadays. No I don't, and I've never said I do. The point here is that even if a bunch of Baytrails is clustered in a server (which I don't think is extremely likely, as in that case it would make much more sense to cluster Avotons - or use just one of them for that matter - and they have a different CPU ID), they still are designed with energy efficiency rather than with performance in mind. This means that, from intel_pstate perspective and from the point of view of finding a useful balance between energy efficiency and performace, it doesn't make sense to use a performace-oriented algorithm on them anyway, because energy efficiency and performance are not balanced then. > And in the end it will show up as an "Enterprise Server" platform where > your partners like HP, SAP, Dell,... have to explicitly state to disable > specific powersaving features on OS level, because the CPU driver keeps > ignoring any BIOS provided information. Well, so let's consider the current state of affairs for a while. Where do we take the BIOS-provided information in question into account today? The best approach IMO is to use all of the sources of information available and make a choice on the basis of all that information combined, but we need to start somewhere. If your concern is that we are going to discard all of the BIOS-provided information going forward, then this is not our current plan. > > Second, the CPU ID is the most reliable piece of information about the > > type of the system we can possibly get. > > This is wrong. > Differing by battery or not is more reliable than any CPU id matching. Well, not really, because it doesn't tell you, for example, whether or not race-to-idle is an efficient strategy for the given CPU. If it is not, skweing towards performance is likely a bad choice. > And for the last at least 3 years, I have not seen a system where pm profile > was set wrong. In our server room I can show you a hundred servers that > all match the "Enterprise Sever" ACPI profile. Laptops as well. Which doesn't prove that it can't be wrong and I've seen enough systems where BIOS-provided information is simply incorrect to base any decisions on that information alone if I have any other choice. > > The BIOS may always lie to us and we can't entirely rely on it for figuring > > out the system profile, > > We can. Because otherwise the ACPI pm profile is set "unknown". Well, it should be. It will be if everything is done correctly, but is it really guaraneed in any way? > BTW: There were rumours that Intel's microcode had some sever bugs as well > recently. So what, we have to rely on this piece of software as well... > > IMO we see a bit too much CPU ID matching and it's getting more and more. > Perfect would be no CPU ID matching at all. > We had this with the acpi-cpufreq driver which in the end worked out perfectly > for 99% of all machines out there. Had it worked perfectly, intel_pstate wouldn't have been introduced. Simple as that. > Hm, apropos id matching... I thought intel_pstate got introduced because the > CPUs it supports can switch to any frequency between min and max. And this > is not reflected by ACPI which does export a maximum amount of X (10?) > frequency states. That was one of the factors taken into consideration, but not the only one. > Seeing this: > static int silvermont_freq_table[] = { > 83300, 100000, 133300, 116700, 80000}; > > static int airmont_freq_table[] = { > 83300, 100000, 133300, 116700, 80000, > 93300, 90000, 88900, 87500}; > > makes me think, whether these shouldn't simply use the acpi_cpufreq driver. > Or in the near future we may have tons of such defines? > And you are back at "real" governors... I'm not sure what you mean here. These are used for computing a single parameter of the algorithm at the initialization time AFAICS. > > > but as I said, if a CPU designed for energy-efficient systems is used in the > > given one, that is a strong indication on what the system is or it would > > have used a different CPU otherwise. > > Are you sure? And is this statement in line with your sales and product > managers. These guys often tend to think differently than the developers ;) Well, maybe, but I'm talking from the technical viewpoint only. And, again, what intel_pstate in its default mode tries to do is to balance energy efficiency with performance. In other words, we try to maximize the performance/power ratio here and quite frankly I find it rather hard to understand why the same strategy of doing that should be suitable for all of the processors in the market. Especially given the known differences between them. Thanks, Rafael
On 2015.12.14 08:15 Stephane Gasparini wrote: > Here are the results we have on a android release of WW50 > Note that as of today Android is using Interactive Governor. Thanks very much for your test results. By "Intel PState CPU Load" I assume you mean using that V6 3 patch "cpufreq: intel_pstate: account non C0 time" from Dec 4th, which I also tested and sent an off-list reply to Philippe. Summary: I like the patch set. Myself, I would like to see tests comparing the current "powersave" governor to your "CPU Load" method, although I do always like the reference "performance" test. However, I suspect that in your case, there wouldn't be much difference. After you moved the setpoint to 60 from 97, the response becomes pretty much like performance mode.[1] Also, you provide only power information, and no performance / energy trade-off information. Do any of those tests reveal good power use, but unbearable performance? What I am saying is that power by itself is not a sufficient evaluation criteria, otherwise just lock in the minimum pstate and be done with it, which we know isn't the right solution. > Atom: Intel PState Intel PState Power > Performance CPU Load Improvment > 50% Load 1 thread 260 mW 25 mW -90% If I understand correctly, the CPU load is 50% regardless of CPU frequency. If yes, then this particular test is grossly unfair and misleading. Why? Because using your default setpoint of 60, the CPU load method will hold the pstate at minimum, whereas performance mode will ask for the maximum. The result will be drastic differences in the actual amount of work done per unit time. I think that a more comparable test would be a 50% (or whatever) load calibrated to a nominal CPU frequency (I use the max non-turbo CPU frequency, but it can be anything.) Meaning that the once the fixed packet of work is done, the CPU can go idle sooner or later, depending on the CPU frequency. Note also, that the work/sleep frequency used to attain the 50% load can be relevant, particularly at lower sleep/work frequencies where the intel_pstate driver response can have higher and higher magnitude oscillations. By the way, in my tests, your "CPU Load" method lower sleep/work frequency results were phenomenally good. Here are some results from my test computer, albeit with the wrong processor: Note 1: I have an older i7-2600K. Note 2: Obviously, I forced your code patch to work with my processor ID. Note 3: Power is package power measured with turbostat. Note 4: one thread. 1.) 50% load at 3.4GHz 201 hertz work / sleep frequency: 4.4-rc5 powersave 11.27 watts* 4.4-rc5 performance 12.83 watts 4.4-rc3 + PL ver 6 3 patch set (default (60)): 10.47 watts 4.4-rc3 + PL ver 6 3 patch set (setpoint 40): 12.55 watts 4.4-rc3 + PL ver 6 3 patch set (setpoint 70): 9.72 watts** 2.) 50% load at 3.4GHz 50 hertz work / sleep frequency: 4.4-rc5 powersave 12.01 watts 4.4-rc5 performance 11.90 watts 4.4-rc3 + PL ver 6 3 patch set (default (60)): 10.09 watts 4.4-rc3 + PL ver 6 3 patch set (setpoint 40): 12.01 watts 4.4-rc3 + PL ver 6 3 patch set (setpoint 70): 9.65 watts * there were 6 overruns. ** there were 3 overruns, meaning the work packet did not finish in time before the next one was supposed to start. This issue goes to step function load response time. i.e How fast does the scaling driver respond to load and ramp up the CPU frequency. My test program can catch up, but some applications might not like the delay. An example of a performance / energy trade-off test: phoronix ffmpeg test: Shorter time is better. The ffmpeg test is known to be particularly difficult for frequency scaling drivers to handle. The scenario is similar to how some games utilize all the CPUs. Your patch set (an older version) on kernel 4.4-rc1: setpoint 60: 17.84 seconds ave. 4324 package Joules. (default) setpoint 40: 12.86 seconds ave. 4822 package Joules. (noisey) or ~30% time improvement at a cost of 12% more energy, which some users might think worthwhile. For reference: intel_pstate powersave (normal processor, setpoint 97): 12.06 seconds ave. 4983 package Joules I do not have energy numbers for the below: Performance mode: 11.16 seconds ave. acpi-cpufreq powersave: 24.47 seconds ave. acpi-cpufreq ondemand: 13.35 seconds ave. acpi-cpufreq conservative: 17.60 seconds ave. [1] http://marc.info/?l=linux-pm&m=142894256520552&w=2 ... Doug -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Doug, On 14/12/2015 23:13, Doug Smythies wrote: > On 2015.12.14 08:15 Stephane Gasparini wrote: > >> Here are the results we have on a android release of WW50 >> Note that as of today Android is using Interactive Governor. > Thanks very much for your test results. > By "Intel PState CPU Load" I assume you mean using that V6 3 patch > "cpufreq: intel_pstate: account non C0 time" from Dec 4th, which I also > tested and sent an off-list reply to Philippe. > Summary: I like the patch set. These tests were done on a previous version of my patch done in WW40. We'll sent an updated version (WW50 with patch V6 3) soon. > Myself, I would like to see tests comparing the current > "powersave" governor to your "CPU Load" method, although > I do always like the reference "performance" test. However, > I suspect that in your case, there wouldn't be much difference. > After you moved the setpoint to 60 from 97, the response becomes > pretty much like performance mode.[1] In the table sent by stephane, "performance" meant "original algorithm" based on average pstate (aperf/mperf). On Atoms, a setpoint of 60 is not too high. With higher setpoints we'll not be able to reach meet some performance KPIs (some frames are dropped mainly for gaming use cases). However, we are working on a more power conservative algorithm). > > Also, you provide only power information, and no performance / energy > trade-off information. Do any of those tests reveal good power use, but > unbearable performance? What I am saying is that power by itself is not > a sufficient evaluation criteria, otherwise just lock in the minimum pstate > and be done with it, which we know isn't the right solution. Stephane has also some other performance/power figures done with Power Lab. We'll see if we can share them. > >> Atom: Intel PState Intel PState Power >> Performance CPU Load Improvment >> 50% Load 1 thread 260 mW 25 mW -90% > If I understand correctly, the CPU load is 50% regardless of CPU frequency. > If yes, then this particular test is grossly unfair and misleading. > Why? > Because using your default setpoint of 60, the CPU load method will hold > the pstate at minimum, whereas performance mode will ask for the maximum. > The result will be drastic differences in the actual amount of work done > per unit time. Yes, you are right, we need to fix that workload or to remove it to the list. Also a fixed load does not correspond to a real use case. This is why we are not using this test as a KPI. > > I think that a more comparable test would be a 50% (or whatever) load > calibrated to a nominal CPU frequency (I use the max non-turbo CPU > frequency, but it can be anything.) Meaning that the once the fixed > packet of work is done, the CPU can go idle sooner or later, depending > on the CPU frequency. Are you using an existing tool for doing that or did you developed your own tool ? > > Note also, that the work/sleep frequency used to attain the 50% load > can be relevant, particularly at lower sleep/work frequencies where > the intel_pstate driver response can have higher and higher > magnitude oscillations. By the way, in my tests, your "CPU Load" method > lower sleep/work frequency results were phenomenally good. Yes, that's what we observed also. For many use cases very often used in android (gaming, circular progress bar, audio playback, video playback, etc ...), using the load instead of the ratio avg_pstate/current_pstate is a good choice and can save a lot of power! > > Here are some results from my test computer, albeit with the wrong processor: > > Note 1: I have an older i7-2600K. > Note 2: Obviously, I forced your code patch to work with my processor ID. > Note 3: Power is package power measured with turbostat. > Note 4: one thread. > > 1.) 50% load at 3.4GHz 201 hertz work / sleep frequency: > > 4.4-rc5 powersave 11.27 watts* > 4.4-rc5 performance 12.83 watts > 4.4-rc3 + PL ver 6 3 patch set (default (60)): 10.47 watts > 4.4-rc3 + PL ver 6 3 patch set (setpoint 40): 12.55 watts > 4.4-rc3 + PL ver 6 3 patch set (setpoint 70): 9.72 watts** > > 2.) 50% load at 3.4GHz 50 hertz work / sleep frequency: > > 4.4-rc5 powersave 12.01 watts > 4.4-rc5 performance 11.90 watts > 4.4-rc3 + PL ver 6 3 patch set (default (60)): 10.09 watts > 4.4-rc3 + PL ver 6 3 patch set (setpoint 40): 12.01 watts > 4.4-rc3 + PL ver 6 3 patch set (setpoint 70): 9.65 watts > > * there were 6 overruns. > ** there were 3 overruns, meaning the work packet did not > finish in time before the next one was supposed to start. > This issue goes to step function load response time. i.e > How fast does the scaling driver respond to load and ramp up > the CPU frequency. My test program can catch up, but some > applications might not like the delay. > > An example of a performance / energy trade-off test: > > phoronix ffmpeg test: > Shorter time is better. > The ffmpeg test is known to be particularly difficult for > frequency scaling drivers to handle. The scenario is similar > to how some games utilize all the CPUs. > > Your patch set (an older version) on kernel 4.4-rc1: > setpoint 60: 17.84 seconds ave. 4324 package Joules. (default) > setpoint 40: 12.86 seconds ave. 4822 package Joules. (noisey) > > or ~30% time improvement at a cost of 12% more energy, which some > users might think worthwhile. > > For reference: > intel_pstate powersave (normal processor, setpoint 97): 12.06 seconds ave. 4983 package Joules > I do not have energy numbers for the below: > Performance mode: 11.16 seconds ave. > acpi-cpufreq powersave: 24.47 seconds ave. > acpi-cpufreq ondemand: 13.35 seconds ave. > acpi-cpufreq conservative: 17.60 seconds ave. > > [1] http://marc.info/?l=linux-pm&m=142894256520552&w=2 > > ... Doug > > Thx for the data, Philippe -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
— Steph > On Dec 15, 2015, at 11:30 AM, Philippe Longepe <philippe.longepe@linux.intel.com> wrote: > > Hi Doug, > > On 14/12/2015 23:13, Doug Smythies wrote: >> On 2015.12.14 08:15 Stephane Gasparini wrote: >> >>> Here are the results we have on a android release of WW50 >>> Note that as of today Android is using Interactive Governor. >> Thanks very much for your test results. >> By "Intel PState CPU Load" I assume you mean using that V6 3 patch >> "cpufreq: intel_pstate: account non C0 time" from Dec 4th, which I also >> tested and sent an off-list reply to Philippe. >> Summary: I like the patch set. > These tests were done on a previous version of my patch done in WW40. > We'll sent an updated version (WW50 with patch V6 3) soon. As I said we are redoing a whole sets of measure with this submission and it takes times. The measures I sent are all made without the iowait being taking into account in the load algorithm, but for IOZONE and power data. >> Myself, I would like to see tests comparing the current >> "powersave" governor to your "CPU Load" method, although This what is the comparison I sent. Intel PState Performance = intel pstate “powersave” Intel PState CPU Load ? V6 3 patch "cpufreq: intel_pstate: account non C0 time" from Dec 4th >> I do always like the reference "performance" test. However, >> I suspect that in your case, there wouldn't be much difference. >> After you moved the setpoint to 60 from 97, the response becomes >> pretty much like performance mode.[1] This is not really true as the load goes down by itself without the need to wait for 3 periods lower the requested pstate compare to ?APERF/?MPERF. The setpoint of 60 was there on “powersave" already for Atom cores prior we modified the algorithm, for performance issue. On Android we need a very great reactivity (measured by KPI to match competition). This was solved by Google by auto/touch boost feature in Interactive Governor. Today with a setpoint of 60 we are on par with Interactive Governor in terms of performance, and we are better in power. > In the table sent by stephane, "performance" meant "original algorithm" based on average pstate (aperf/mperf). > On Atoms, a setpoint of 60 is not too high. With higher setpoints we'll not be able to reach meet some performance KPIs > (some frames are dropped mainly for gaming use cases). However, we are working on a more power conservative algorithm). >> >> Also, you provide only power information, and no performance / energy >> trade-off information. Do any of those tests reveal good power use, but >> unbearable performance? What I am saying is that power by itself is not >> a sufficient evaluation criteria, otherwise just lock in the minimum pstate >> and be done with it, which we know isn't the right solution. Well you misunderstood the power numbers I guess e.g. if you watch a movie for 1h30, it will last 1h30 not matter what is the average power, so the lower is the average power the better it is. This is the same thing for Audio and all the power numbers I shared but the application install. Here are the detail for Application install Intel PState Intel PState Performance CPU Load ? 799 mW (28 s) ? 383 mW (34 s) ? 29 mW/s ? 11 mW/s > Stephane has also some other performance/power figures done with Power Lab. We'll see if we can share them. >> >>> Atom: Intel PState Intel PState Power >>> Performance CPU Load Improvment >>> 50% Load 1 thread 260 mW 25 mW -90% >> If I understand correctly, the CPU load is 50% regardless of CPU frequency. >> If yes, then this particular test is grossly unfair and misleading. >> Why? >> Because using your default setpoint of 60, the CPU load method will hold >> the pstate at minimum, whereas performance mode will ask for the maximum. >> The result will be drastic differences in the actual amount of work done >> per unit time. > Yes, you are right, we need to fix that workload or to remove it to the list. > Also a fixed load does not correspond to a real use case. > This is why we are not using this test as a KPI. > The “powersave” version on intel p-state is misbehaving for our Android device to duty cycle load. If you have a duty cycle that is below 3 sample sampling period then the “powersave” version will request to high frequency for the job to be done. Using the load solves this particular situation that is an issue for Android devices. In the power numbers I provided there is a real case of this the Circular Progress bar where the “powersave” request a too high frequency all along. Using the load allow to run at Lowest Frequency that is sufficient to run this progress bar. On this point we behave better than the Interactive Governor (Android) that is also requesting necessary high frequencies for this workload due to its auto/touch boost features. >> >> I think that a more comparable test would be a 50% (or whatever) load >> calibrated to a nominal CPU frequency (I use the max non-turbo CPU >> frequency, but it can be anything.) Meaning that the once the fixed >> packet of work is done, the CPU can go idle sooner or later, depending >> on the CPU frequency. > Are you using an existing tool for doing that or did you developed your own tool ? >> >> Note also, that the work/sleep frequency used to attain the 50% load >> can be relevant, particularly at lower sleep/work frequencies where >> the intel_pstate driver response can have higher and higher >> magnitude oscillations. By the way, in my tests, your "CPU Load" method >> lower sleep/work frequency results were phenomenally good. > Yes, that's what we observed also. For many use cases very often used in android > (gaming, circular progress bar, audio playback, video playback, etc ...), using the > load instead of the ratio avg_pstate/current_pstate is a good choice and can save > a lot of power! >> >> Here are some results from my test computer, albeit with the wrong processor: >> >> Note 1: I have an older i7-2600K. >> Note 2: Obviously, I forced your code patch to work with my processor ID. >> Note 3: Power is package power measured with turbostat. >> Note 4: one thread. >> >> 1.) 50% load at 3.4GHz 201 hertz work / sleep frequency: >> >> 4.4-rc5 powersave 11.27 watts* >> 4.4-rc5 performance 12.83 watts >> 4.4-rc3 + PL ver 6 3 patch set (default (60)): 10.47 watts >> 4.4-rc3 + PL ver 6 3 patch set (setpoint 40): 12.55 watts >> 4.4-rc3 + PL ver 6 3 patch set (setpoint 70): 9.72 watts** >> >> 2.) 50% load at 3.4GHz 50 hertz work / sleep frequency: >> >> 4.4-rc5 powersave 12.01 watts >> 4.4-rc5 performance 11.90 watts >> 4.4-rc3 + PL ver 6 3 patch set (default (60)): 10.09 watts >> 4.4-rc3 + PL ver 6 3 patch set (setpoint 40): 12.01 watts >> 4.4-rc3 + PL ver 6 3 patch set (setpoint 70): 9.65 watts >> >> * there were 6 overruns. >> ** there were 3 overruns, meaning the work packet did not >> finish in time before the next one was supposed to start. >> This issue goes to step function load response time. i.e >> How fast does the scaling driver respond to load and ramp up >> the CPU frequency. My test program can catch up, but some >> applications might not like the delay. >> An example of a performance / energy trade-off test: >> >> phoronix ffmpeg test: >> Shorter time is better. >> The ffmpeg test is known to be particularly difficult for >> frequency scaling drivers to handle. The scenario is similar >> to how some games utilize all the CPUs. >> >> Your patch set (an older version) on kernel 4.4-rc1: >> setpoint 60: 17.84 seconds ave. 4324 package Joules. (default) >> setpoint 40: 12.86 seconds ave. 4822 package Joules. (noisey) >> >> or ~30% time improvement at a cost of 12% more energy, which some >> users might think worthwhile. >> >> For reference: >> intel_pstate powersave (normal processor, setpoint 97): 12.06 seconds ave. 4983 package Joules >> I do not have energy numbers for the below: >> Performance mode: 11.16 seconds ave. >> acpi-cpufreq powersave: 24.47 seconds ave. >> acpi-cpufreq ondemand: 13.35 seconds ave. >> acpi-cpufreq conservative: 17.60 seconds ave. >> >> [1] http://marc.info/?l=linux-pm&m=142894256520552&w=2 >> >> ... Doug >> >> > Thx for the data, > > Philippe > -- > To unsubscribe from this list: send the line "unsubscribe linux-pm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Monday, December 14, 2015 11:06:22 PM Rafael J. Wysocki wrote: > On Monday, December 14, 2015 05:22:12 PM Thomas Renninger wrote: > > On Thursday, December 10, 2015 11:01:18 PM Rafael J. Wysocki wrote: > > > On Thursday, December 10, 2015 02:04:46 PM Thomas Renninger wrote: > > > > On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: ... > This means that, from intel_pstate perspective and from the point of view of > finding a useful balance between energy efficiency and performace, it > doesn't make sense to use a performace-oriented algorithm on them anyway, > because energy efficiency and performance are not balanced then. Ok. But this has nothing to do with a specific CPU ID. This is not even Intel specific. It is about workload. If HW specific side-effects as "race-to-idle is an efficient strategy for the given CPU" comes into the game... This also is not Intel specific. My point is: The cpufreq subsystem nicely splitted out workload policy into governors. intel_pstate violated this concept by implementing it's own policy into the HW driver. We now know that this caused: - quite some confusion. - needs userspace to adopt to HW driver specific knobs - isn't very elegant and rather ungeneric Now even more different policy algorithms should get mangled into the HW driver. I do see why you want to do this... It's easy (for now). But I am questioning whether this is the right way to go for (on longterm)... I do not have a final solution at hand. You may want to evaluate the possibility to move to a governor based solution. If this is not feasible you should at least write to syslog which algorithm has been taken and why. > > And in the end it will show up as an "Enterprise Server" platform where > > your partners like HP, SAP, Dell,... have to explicitly state to disable > > specific powersaving features on OS level, because the CPU driver keeps > > ignoring any BIOS provided information. > > Well, so let's consider the current state of affairs for a while. Where do > we take the BIOS-provided information in question into account today? Are you talking about general BIOS-provided information? -> There are a hundred places and we cannot boot if BIOS data is wrong, right? For the pm profile I guess none. I sent patches to expose this to sysfs quite some years ago, when I realized that OEMs started to not only provide "unknown", but fill it with correct data. Since then it's easy to read out and it was always correct when I double checked. The other point is: It's going to be even more correct and used, when we use it. It's a public spec and OEMs have to stick to it and as said 99% should do. And if not: It's the old game: provide acpi.pm_profile=MyPlatform param. You/we are not to blame. What is much worse is the back and forth to use BIOS provided data. With acpi_idle BIOS data was used to identify C-states. All major server vendors provided OS independent documentation how to disable deep sleep states in BIOS for specific number cruncher or database workloads. Then Intel violated the specs by switching to intel_idle. Similar for P-states. ACPI (acpi-cpufreq) -> MSR (intel_pstate) -> partly ACPI again ?!? (commit 37afb00032424d684a48d649fc) So we, Linux kernel, have to rely on some BIOS info to be correct And the BIOS has to rely on the kernel drivers to use some BIOS info it provides... And then both should stick to it... What I want to say: It's not only the BIOS guys doing things wrong and in the end you have to trust some interfaces. > The best approach IMO is to use all of the sources of information available > and make a choice on the basis of all that information combined, but we need > to start somewhere. > > If your concern is that we are going to discard all of the BIOS-provided > information going forward, then this is not our current plan. > > Which doesn't prove that it can't be wrong and I've seen enough systems > where BIOS-provided information is simply incorrect to base any decisions > on that information alone if I have any other choice. > > > > The BIOS may always lie to us and we can't entirely rely on it for > > > figuring > > > out the system profile, > > > > We can. Because otherwise the ACPI pm profile is set "unknown". > > Well, it should be. It will be if everything is done correctly, but is it > really guaraneed in any way? > > > BTW: There were rumours that Intel's microcode had some sever bugs as well > > recently. So what, we have to rely on this piece of software as well... > > > > IMO we see a bit too much CPU ID matching and it's getting more and more. > > Perfect would be no CPU ID matching at all. > > We had this with the acpi-cpufreq driver which in the end worked out > > perfectly for 99% of all machines out there. > > Had it worked perfectly, intel_pstate wouldn't have been introduced. Simple > as that. From my experience, yes. It worked more or less perfect after years of stabilization and improvments. intel_pstate still has to go through this. > > > Hm, apropos id matching... I thought intel_pstate got introduced because > > the CPUs it supports can switch to any frequency between min and max. And > > this is not reflected by ACPI which does export a maximum amount of X > > (10?) frequency states. > > That was one of the factors taken into consideration, but not the only one. > > > Seeing this: > > static int silvermont_freq_table[] = { > > > > 83300, 100000, 133300, 116700, 80000}; > > > > static int airmont_freq_table[] = { > > > > 83300, 100000, 133300, 116700, 80000, > > 93300, 90000, 88900, 87500}; > > > > makes me think, whether these shouldn't simply use the acpi_cpufreq > > driver. > > Or in the near future we may have tons of such defines? > > And you are back at "real" governors... > > I'm not sure what you mean here. These are used for computing a single > parameter of the algorithm at the initialization time AFAICS. Ah yes, too quick. Thomas -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Monday, December 14, 2015 10:20:27 AM Srinivas Pandruvada wrote: > On Mon, 2015-12-14 at 16:13 +0100, Thomas Renninger wrote: > > On Thursday, December 10, 2015 09:28:40 AM Srinivas Pandruvada wrote: > > > On Thu, 2015-12-10 at 14:04 +0100, Thomas Renninger wrote: > > > > On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: > > > > > On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: ... > > And here: > > https://en.wikipedia.org/wiki/Silvermont > > > > I get: > > List of Silvermont processors: > > Desktop processors (Bay Trail-D) > > Server processors (Avoton) > > Communications processors (Rangeley) > > Embedded/automotive processors (Bay Trail-I) > > Mobile processors (Bay Trail-M) > > Tablet processors (Bay Trail-T) > > Smartphone processors (Merrifield and Moorefield) > > > > List of Airmont processors > > Mobile processors (Braswell) > > Smartphone and Tablet processors (Cherry Trail) > > > > Not sure what specific functions you mean... > > Can you name them? > > You have them above for two micro-architectures. > But they have different cpu id when the use case calls for totally > different use case. For example server processor (Avaton above in your > list) has a cpuid of 0x4D, which we don't support for Intel Pstate. Thanks. Does this means Avaton will fall back to acpi-cpufreq? Would it make sense to initalize with intel_pstate and then switch to performance governor per default? This may need some fiddling with our certifcation tool which expects cpupfreq to work at least a bit. Thomas PS: Thanks for all the input. Summary (from my point of view): Go for the airmont/silvermont specific algorithms. Especially as they seem to be an important improvment. But at least write something to syslog. The "ondemand" compatiblity patch(es) are not that important, right? I would hold them off and evaluate whether it will make sense to get back to governors. Even if not, I am not sure it is a good idea to introduce a fake ondemand governor which is still doing things totally different than the orignal one... -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> Does this means Avaton will fall back to acpi-cpufreq? Yes. Though if we find that we can make it run better by supporting it via intel_pstate, then we reserve the right to add that support:-) > Would it make sense to initalize with intel_pstate and then switch to > performance governor per default? I don't understand this question. > This may need some fiddling with our certifcation tool which expects > cpupfreq to work at least a bit. How do your certification tool handle with a system that has NO pstates? We have low power products that run at only 1 frequency. We also have several large customers that like to implement P-states in their out-of-band firmware and hide them completely from the OS. > The "ondemand" compatiblity patch(es) are not that important, right? > I would hold them off and evaluate whether it will make sense to get back > to governors. Even if not, I am not sure it is a good idea to introduce a > fake ondemand governor which is still doing things totally different than > the orignal one... I agree, ondemand is a file in Linux, and the word should be used to describe only what that file does. thanks, Len Brown, Intel Open Source Technology Center -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2015-12-15 at 15:24 +0100, Thomas Renninger wrote: > On Monday, December 14, 2015 10:20:27 AM Srinivas Pandruvada wrote: > > On Mon, 2015-12-14 at 16:13 +0100, Thomas Renninger wrote: > > > On Thursday, December 10, 2015 09:28:40 AM Srinivas Pandruvada wrote: > > > > On Thu, 2015-12-10 at 14:04 +0100, Thomas Renninger wrote: > > > > > On Wednesday, December 09, 2015 12:21:53 PM Srinivas Pandruvada wrote: > > > > > > On Wed, 2015-12-09 at 15:34 +0100, Thomas Renninger wrote: > > ... > > > > And here: > > > https://en.wikipedia.org/wiki/Silvermont > > > > > > I get: > > > List of Silvermont processors: > > > Desktop processors (Bay Trail-D) > > > Server processors (Avoton) > > > Communications processors (Rangeley) > > > Embedded/automotive processors (Bay Trail-I) > > > Mobile processors (Bay Trail-M) > > > Tablet processors (Bay Trail-T) > > > Smartphone processors (Merrifield and Moorefield) > > > > > > List of Airmont processors > > > Mobile processors (Braswell) > > > Smartphone and Tablet processors (Cherry Trail) > > > > > > Not sure what specific functions you mean... > > > Can you name them? > > > > You have them above for two micro-architectures. > > But they have different cpu id when the use case calls for totally > > different use case. For example server processor (Avaton above in your > > list) has a cpuid of 0x4D, which we don't support for Intel Pstate. > > Thanks. > Does this means Avaton will fall back to acpi-cpufreq? > Would it make sense to initalize with intel_pstate and then switch to > performance governor per default? > > This may need some fiddling with our certifcation tool which expects > cpupfreq to work at least a bit. > > Thomas > > PS: Thanks for all the input. Summary (from my point of view): > > Go for the airmont/silvermont specific algorithms. Especially as they seem to > be an important improvment. But at least write something to syslog. > > The "ondemand" compatiblity patch(es) are not that important, right? They are not going in current form even if I dropped powersave as you suggested. > I would hold them off and evaluate whether it will make sense to get back > to governors. Even if not, I am not sure it is a good idea to introduce a > fake ondemand governor which is still doing things totally different than > the orignal one... Idea is not to cause more confusion. Thinking of calling something different "intel_pstate_..", but needs changes in cpufreq core. I will propose some changes, and see if they are acceptable. Thanks, Srinivas > -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015.12.15 02:30 Philippe Longepe wrote: > On 14/12/2015 23:13, Doug Smythies wrote: >> On 2015.12.14 08:15 Stephane Gasparini wrote: >> > In the table sent by stephane, "performance" meant "original algorithm" > based on average pstate (aperf/mperf). Oh. > On Atoms, a setpoint of 60 is not too high. With higher setpoints we'll > not be able to reach meet some performance KPIs Define "KPI" > (some frames are dropped mainly for gaming use cases). However, we are > working on a more power conservative algorithm). I do not think the issue / challenge is specific to Atom. >> >> I think that a more comparable test would be a 50% (or whatever) load >> calibrated to a nominal CPU frequency (I use the max non-turbo CPU >> frequency, but it can be anything.) Meaning that the once the fixed >> packet of work is done, the CPU can go idle sooner or later, depending >> on the CPU frequency. > Are you using an existing tool for doing that or did you developed your > own tool ? While he would not recognize it now, I mainly use a simple program that Peter Zijlstra gave me a few years ago, consume.c. However, I added fixed work packet mode, to satisfy a need for something more representative of real world periodic workflows. I'll send it to you off-list. On 2015.12.15 05:07 Stephane Gasparini wrote: > Well you misunderstood the power numbers I guess > e.g. if you watch a movie for 1h30, it will last 1h30 not matter what is the > average power, so the lower is the average power the better it is. O.K. I didn’t know all of those tests had to run for whatever time. But for the performance list of tests, there is no energy information. I'm just saying that it might matter for some, as in ffmpeg example I gave. > Here are the detail for Application install > > Intel PState Intel PState > Performance CPU Load > ? 799 mW (28 s) ? 383 mW (34 s) > ? 29 mW/s ? 11 mW/s Wouldn't the correct terminology be 22.4 Joules and 13.0 Joules to do the job? Or 42% energy saved at a cost of 21% extra time. Anyway, you made your point. ... Doug -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
— Steph > On Dec 16, 2015, at 12:34 AM, Doug Smythies <dsmythies@telus.net> wrote: > > On 2015.12.15 02:30 Philippe Longepe wrote: >> On 14/12/2015 23:13, Doug Smythies wrote: >>> On 2015.12.14 08:15 Stephane Gasparini wrote: >>> >> In the table sent by stephane, "performance" meant "original algorithm" >> based on average pstate (aperf/mperf). > > Oh. > >> On Atoms, a setpoint of 60 is not too high. With higher setpoints we'll >> not be able to reach meet some performance KPIs > > Define “KPI" Key Performance Indicator, basically a test that we have to reach certain performance or power criteria > >> (some frames are dropped mainly for gaming use cases). However, we are >> working on a more power conservative algorithm). > > I do not think the issue / challenge is specific to Atom. Well I agree that this would also be beneficials to Laptop for instance. However the "load based" algorithm is behind the existing “powersave” intel states in regards of performance for database I/O type tests. > >>> >>> I think that a more comparable test would be a 50% (or whatever) load >>> calibrated to a nominal CPU frequency (I use the max non-turbo CPU >>> frequency, but it can be anything.) Meaning that the once the fixed >>> packet of work is done, the CPU can go idle sooner or later, depending >>> on the CPU frequency. >> Are you using an existing tool for doing that or did you developed your >> own tool ? > > While he would not recognize it now, I mainly use a simple program that > Peter Zijlstra gave me a few years ago, consume.c. However, I added fixed > work packet mode, to satisfy a need for something more representative of > real world periodic workflows. > > I'll send it to you off-list. > > On 2015.12.15 05:07 Stephane Gasparini wrote: > >> Well you misunderstood the power numbers I guess >> e.g. if you watch a movie for 1h30, it will last 1h30 not matter what is the >> average power, so the lower is the average power the better it is. > > O.K. I didn’t know all of those tests had to run for whatever time. > But for the performance list of tests, there is no energy information. > I'm just saying that it might matter for some, as in ffmpeg example I gave. > >> Here are the detail for Application install >> >> Intel PState Intel PState >> Performance CPU Load >> ? 799 mW (28 s) ? 383 mW (34 s) >> ? 29 mW/s ? 11 mW/s > > Wouldn't the correct terminology be 22.4 Joules and 13.0 Joules > to do the job? > Or 42% energy saved at a cost of 21% extra time. > > Anyway, you made your point. We’ll try to provide all metrics (perf, J and mW) for the new measures. > > ... Doug > > -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tuesday, December 15, 2015 12:59:50 PM Len Brown wrote: > > Does this means Avaton will fall back to acpi-cpufreq? > > This may need some fiddling with our certifcation tool which expects > > cpupfreq to work at least a bit. > > How do your certification tool handle with a system that has NO pstates? > > We have low power products that run at only 1 frequency. > We also have several large customers that like to implement > P-states in their out-of-band firmware and hide them > completely from the OS. The tests had been adopted to intel_pstate, if needed they have to be adjusted again.. But it is nice to know in advance when such a bug drops in. Thomas -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
--- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -532,6 +532,14 @@ static struct cpu_defaults byt_params = }, }; +static struct pstate_adjust_policy adj_server_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 30, + .p_gain_pct = 10, + .d_gain_pct = 0, + .i_gain_pct = 0, +}; static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) { @@ -862,6 +870,8 @@ static struct cpufreq_driver intel_pstat }; static int __initdata no_load; +static int __initdata vanilla_policy; +static int __initdata server_policy; static int intel_pstate_msrs_not_valid(void) { @@ -978,6 +988,9 @@ static int __init intel_pstate_init(void int cpu, rc = 0; const struct x86_cpu_id *id; struct cpu_defaults *cpu_info; +#if IS_ENABLED(CONFIG_ACPI) + const char *profile = NULL; +#endif if (no_load) return -ENODEV; @@ -1003,6 +1016,32 @@ static int __init intel_pstate_init(void pr_info("Intel P-state driver initializing.\n"); +#if IS_ENABLED(CONFIG_ACPI) + if (!vanilla_policy) { + switch (acpi_gbl_FADT.preferred_profile) { + case PM_WORKSTATION: + profile = "Workstation"; + break; + case PM_ENTERPRISE_SERVER: + profile = "Enterprise Server"; + break; + case PM_SOHO_SERVER: + profile = "SOHO Server"; + break; + case PM_PERFORMANCE_SERVER: + profile = "Performance Server"; + break; + default: + if (server_policy) + profile = "Server"; + }; + + if (profile) { + pr_info("Intel P-state setting %s policy\n", profile); + copy_pid_params(&adj_server_policy); + } + } +#endif all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); if (!all_cpu_data) return -ENOMEM; @@ -1036,6 +1075,10 @@ static int __init intel_pstate_setup(cha if (!strcmp(str, "disable")) no_load = 1; + if (!strcmp(str, "vanilla_policy")) + vanilla_policy = 1; + if (!strcmp(str, "server_policy")) + server_policy = 1; return 0; } early_param("intel_pstate", intel_pstate_setup);