Message ID | 20191113124654.18122-3-ggherdovich@suse.cz (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
Series | Add support for frequency invariance for (some) x86 | expand |
On Wed, Nov 13, 2019 at 01:46:50PM +0100, Giovanni Gherdovich wrote: > The scheduler needs the ratio freq_curr/freq_max for frequency-invariant > accounting. On SKYLAKE_X CPUs set freq_max to the highest frequency that can > be sustained by a group of at least 4 cores. > > From the changelog of commit 31e07522be56 ("tools/power turbostat: fix > decoding for GLM, DNV, SKX turbo-ratio limits"): > > > Newer processors do not hard-code the the number of cpus in each bin > > to {1, 2, 3, 4, 5, 6, 7, 8} Rather, they can specify any number > > of CPUS in each of the 8 bins: > > > > eg. > > > > ... > > 37 * 100.0 = 3600.0 MHz max turbo 4 active cores > > 38 * 100.0 = 3700.0 MHz max turbo 3 active cores > > 39 * 100.0 = 3800.0 MHz max turbo 2 active cores > > 39 * 100.0 = 3900.0 MHz max turbo 1 active cores > > > > could now look something like this: > > > > ... > > 37 * 100.0 = 3600.0 MHz max turbo 16 active cores > > 38 * 100.0 = 3700.0 MHz max turbo 8 active cores > > 39 * 100.0 = 3800.0 MHz max turbo 4 active cores > > 39 * 100.0 = 3900.0 MHz max turbo 2 active cores > > This encoding of turbo levels applies to both SKYLAKE_X and GOLDMONT/GOLDMONT_D, > but we treat these two classes in separate commits because their freq_max > values need to be different. For SKX we prefer a lower freq_max in the ratio > freq_curr/freq_max, allowing load and utilization to overshoot and the > schedutil governor to be more performance-oriented. Models from the Atom > series (such as GOLDMONT*) are handled in a forthcoming commit as they have to > favor power-efficiency over performance. Can we at least use a single function to decode both? A little like the below. I'm not married to the naming, but I think it is a little silly to have 2 different functions to decode the exact same MSRs. (one could even go as far as to make a boot param to override the {1,4} default core count for these things) --- Index: linux-2.6/arch/x86/kernel/smpboot.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/smpboot.c +++ linux-2.6/arch/x86/kernel/smpboot.c @@ -1863,27 +1863,6 @@ static const struct x86_cpu_id has_glm_t {} }; -static bool glm_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio) -{ - int err; - - if (!x86_match_cpu(has_glm_turbo_ratio_limits)) - return false; - - err = rdmsrl_safe(MSR_PLATFORM_INFO, ratio); - if (err) - return false; - - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, turbo_ratio); - if (err) - return false; - - *ratio = (*ratio >> 8) & 0xFF; /* max P state ratio */ - *turbo_ratio = *turbo_ratio & 0xFF; /* highest turbo ratio */ - - return true; -} - static int get_knl_turbo_ratio(u64 *turbo_ratio) { u64 msr; @@ -1933,53 +1912,35 @@ static bool knl_set_cpu_max_freq(u64 *ra return true; } -static int get_turbo_ratio_group(u64 *turbo_ratio) +static bool skx_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio, int size) { - u64 ratio, core_counts; - u32 group_size = 0; - int err, i, found = 0; + u64 ratios, counts; + u32 group_size; + int err, i; - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratio); - if (err) - return err; - - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &core_counts); + err = rdmsrl_safe(MSR_PLATFORM_INFO, ratio); if (err) - return err; - - for (i = 0; i < 64; i += 8) { - group_size = (core_counts >> i) & 0xFF; - if (group_size >= 4) { - *turbo_ratio = (ratio >> i) & 0xFF; - found = 1; - break; - } - } - - if (!found) - return 1; - - return 0; -} - -static bool skx_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio) -{ - int err; - - if (!x86_match_cpu(has_skx_turbo_ratio_limits)) return false; - err = rdmsrl_safe(MSR_PLATFORM_INFO, ratio); + *ratio = (*ratio >> 8) & 0xFF; /* max P state ratio */ + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios); if (err) return false; - err = get_turbo_ratio_group(turbo_ratio); /* 4C (circa) turbo ratio */ + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts); if (err) return false; - *ratio = (*ratio >> 8) & 0xFF; /* max P state ratio */ + for (i = 0; i < 64; i += 8) { + group_size = (counts >> i) & 0xFF; + if (group_size >= size) { + *turbo_ratio = (ratios >> i) & 0xFF; + return true; + } + } - return true; + return false; } static bool core_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio) @@ -2010,13 +1971,15 @@ static void intel_set_cpu_max_freq(void) if (slv_set_cpu_max_freq(&ratio, &turbo_ratio)) goto set_value; - if (glm_set_cpu_max_freq(&ratio, &turbo_ratio)) + if (x86_match_cpu(has_glm_turbo_ratio_limits) && + skx_set_cpu_max_freq(&ratio, &turbo_ratio, 1)) goto set_value; if (knl_set_cpu_max_freq(&ratio, &turbo_ratio)) goto set_value; - if (skx_set_cpu_max_freq(&ratio, &turbo_ratio)) + if (x86_match_cpu(has_skx_turbo_ratio_limits) && + skx_set_cpu_max_freq(&ratio, &turbo_ratio, 4)) goto set_value; core_set_cpu_max_freq(&ratio, &turbo_ratio);
On Wed, 2019-12-18 at 21:06 +0100, Peter Zijlstra wrote: > On Wed, Nov 13, 2019 at 01:46:50PM +0100, Giovanni Gherdovich wrote: > > The scheduler needs the ratio freq_curr/freq_max for frequency-invariant > > accounting. On SKYLAKE_X CPUs set freq_max to the highest frequency that can > > be sustained by a group of at least 4 cores. > > > > From the changelog of commit 31e07522be56 ("tools/power turbostat: fix > > decoding for GLM, DNV, SKX turbo-ratio limits"): > > > > > Newer processors do not hard-code the the number of cpus in each bin > > > to {1, 2, 3, 4, 5, 6, 7, 8} Rather, they can specify any number > > > of CPUS in each of the 8 bins: > > > > > > eg. > > > > > > ... > > > 37 * 100.0 = 3600.0 MHz max turbo 4 active cores > > > 38 * 100.0 = 3700.0 MHz max turbo 3 active cores > > > 39 * 100.0 = 3800.0 MHz max turbo 2 active cores > > > 39 * 100.0 = 3900.0 MHz max turbo 1 active cores > > > > > > could now look something like this: > > > > > > ... > > > 37 * 100.0 = 3600.0 MHz max turbo 16 active cores > > > 38 * 100.0 = 3700.0 MHz max turbo 8 active cores > > > 39 * 100.0 = 3800.0 MHz max turbo 4 active cores > > > 39 * 100.0 = 3900.0 MHz max turbo 2 active cores > > > > This encoding of turbo levels applies to both SKYLAKE_X and GOLDMONT/GOLDMONT_D, > > but we treat these two classes in separate commits because their freq_max > > values need to be different. For SKX we prefer a lower freq_max in the ratio > > freq_curr/freq_max, allowing load and utilization to overshoot and the > > schedutil governor to be more performance-oriented. Models from the Atom > > series (such as GOLDMONT*) are handled in a forthcoming commit as they have to > > favor power-efficiency over performance. > > Can we at least use a single function to decode both? A little like the > below. I'm not married to the naming, but I think it is a little silly > to have 2 different functions to decode the exact same MSRs. > > (one could even go as far as to make a boot param to override the {1,4} > default core count for these things) Sure, that was actually a gross oversight on my part for not seeing that. Thanks for catching it and sketching a solution. Giovanni
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 814d7900779d..11d57d741584 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1841,23 +1841,71 @@ static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = { {} }; -static void core_set_cpu_max_freq(void) +static int get_turbo_ratio_group(u64 *turbo_ratio) +{ + u64 ratio, core_counts; + u32 group_size = 0; + int err, i, found = 0; + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratio); + if (err) + return err; + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &core_counts); + if (err) + return err; + + for (i = 0; i < 64; i += 8) { + group_size = (core_counts >> i) & 0xFF; + if (group_size >= 4) { + *turbo_ratio = (ratio >> i) & 0xFF; + found = 1; + break; + } + } + + if (!found) + return 1; + + return 0; +} + +static bool skx_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio) { - u64 ratio, turbo_ratio; int err; - err = rdmsrl_safe(MSR_PLATFORM_INFO, &ratio); + if (!x86_match_cpu(has_skx_turbo_ratio_limits)) + return false; + + err = rdmsrl_safe(MSR_PLATFORM_INFO, ratio); if (err) - return; + return false; - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &turbo_ratio); + err = get_turbo_ratio_group(turbo_ratio); /* 4C (circa) turbo ratio */ if (err) - return; + return false; - ratio = (ratio >> 8) & 0xFF; /* max P state ratio */ - turbo_ratio = (turbo_ratio >> 24) & 0xFF; /* 4C turbo ratio */ + *ratio = (*ratio >> 8) & 0xFF; /* max P state ratio */ - arch_max_freq = div_u64(turbo_ratio * SCHED_CAPACITY_SCALE, ratio); + return true; +} + +static bool core_set_cpu_max_freq(u64 *ratio, u64 *turbo_ratio) +{ + int err; + + err = rdmsrl_safe(MSR_PLATFORM_INFO, ratio); + if (err) + return false; + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, turbo_ratio); + if (err) + return false; + + *ratio = (*ratio >> 8) & 0xFF; /* max P state ratio */ + *turbo_ratio = (*turbo_ratio >> 24) & 0xFF; /* 4C turbo ratio */ + + return true; } static void intel_set_cpu_max_freq(void) @@ -1865,7 +1913,6 @@ static void intel_set_cpu_max_freq(void) /* * TODO: add support for: * - * - Xeon Gold/Platinum * - Xeon Phi (KNM, KNL) * - Atom Goldmont * - Atom Silvermont @@ -1873,15 +1920,21 @@ static void intel_set_cpu_max_freq(void) * which all now get by default arch_max_freq = SCHED_CAPACITY_SCALE */ - static_branch_enable(&arch_scale_freq_key); + u64 ratio = 1, turbo_ratio = 1; if (turbo_disabled() || - x86_match_cpu(has_skx_turbo_ratio_limits) || x86_match_cpu(has_knl_turbo_ratio_limits) || x86_match_cpu(has_glm_turbo_ratio_limits)) return; - core_set_cpu_max_freq(); + if (skx_set_cpu_max_freq(&ratio, &turbo_ratio)) + goto set_value; + + core_set_cpu_max_freq(&ratio, &turbo_ratio); + +set_value: + arch_max_freq = div_u64(turbo_ratio * SCHED_CAPACITY_SCALE, ratio); + static_branch_enable(&arch_scale_freq_key); } static void init_scale_freq(void *arg)