Message ID | 20220415161206.478362457@linutronix.de (mailing list archive) |
---|---|
State | Handled Elsewhere, archived |
Headers | show |
Series | x86/cpu: Consolidate APERF/MPERF code | expand |
On Fri, Apr 15, 2022 at 9:19 PM Thomas Gleixner <tglx@linutronix.de> wrote: > > as this can share code with the preexisting APERF/MPERF code. > > No functional change. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > --- > arch/x86/kernel/cpu/aperfmperf.c | 366 ++++++++++++++++++++++++++++++++++++++- > arch/x86/kernel/smpboot.c | 355 ------------------------------------- > 2 files changed, 362 insertions(+), 359 deletions(-) > > --- a/arch/x86/kernel/cpu/aperfmperf.c > +++ b/arch/x86/kernel/cpu/aperfmperf.c > @@ -6,15 +6,19 @@ > * Copyright (C) 2017 Intel Corp. > * Author: Len Brown <len.brown@intel.com> > */ > - > +#include <linux/cpufreq.h> > #include <linux/delay.h> > #include <linux/ktime.h> > #include <linux/math64.h> > #include <linux/percpu.h> > -#include <linux/cpufreq.h> > -#include <linux/smp.h> > -#include <linux/sched/isolation.h> > #include <linux/rcupdate.h> > +#include <linux/sched/isolation.h> > +#include <linux/sched/topology.h> > +#include <linux/smp.h> > +#include <linux/syscore_ops.h> > + > +#include <asm/cpu_device_id.h> > +#include <asm/intel-family.h> > > #include "cpu.h" > > @@ -152,3 +156,357 @@ unsigned int arch_freq_get_on_cpu(int cp > > return per_cpu(samples.khz, cpu); > } > + > +#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) > +/* > + * APERF/MPERF frequency ratio computation. > + * > + * The scheduler wants to do frequency invariant accounting and needs a <1 > + * ratio to account for the 'current' frequency, corresponding to > + * freq_curr / freq_max. > + * > + * Since the frequency freq_curr on x86 is controlled by micro-controller and > + * our P-state setting is little more than a request/hint, we need to observe > + * the effective frequency 'BusyMHz', i.e. the average frequency over a time > + * interval after discarding idle time. This is given by: > + * > + * BusyMHz = delta_APERF / delta_MPERF * freq_base > + * > + * where freq_base is the max non-turbo P-state. > + * > + * The freq_max term has to be set to a somewhat arbitrary value, because we > + * can't know which turbo states will be available at a given point in time: > + * it all depends on the thermal headroom of the entire package. We set it to > + * the turbo level with 4 cores active. > + * > + * Benchmarks show that's a good compromise between the 1C turbo ratio > + * (freq_curr/freq_max would rarely reach 1) and something close to freq_base, > + * which would ignore the entire turbo range (a conspicuous part, making > + * freq_curr/freq_max always maxed out). > + * > + * An exception to the heuristic above is the Atom uarch, where we choose the > + * highest turbo level for freq_max since Atom's are generally oriented towards > + * power efficiency. > + * > + * Setting freq_max to anything less than the 1C turbo ratio makes the ratio > + * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1. > + */ > + > +DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key); > + > +static DEFINE_PER_CPU(u64, arch_prev_aperf); > +static DEFINE_PER_CPU(u64, arch_prev_mperf); > +static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE; > +static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE; > + > +void arch_set_max_freq_ratio(bool turbo_disabled) > +{ > + arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : > + arch_turbo_freq_ratio; > +} > +EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); > + > +static bool turbo_disabled(void) > +{ > + u64 misc_en; > + int err; > + > + err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en); > + if (err) > + return false; > + > + return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); > +} > + > +static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) > +{ > + int err; > + > + err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq); > + if (err) > + return false; > + > + err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq); > + if (err) > + return false; > + > + *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */ > + *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */ > + > + return true; > +} > + > +#define X86_MATCH(model) \ > + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \ > + INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL) > + > +static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = { > + X86_MATCH(XEON_PHI_KNL), > + X86_MATCH(XEON_PHI_KNM), > + {} > +}; > + > +static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = { > + X86_MATCH(SKYLAKE_X), > + {} > +}; > + > +static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = { > + X86_MATCH(ATOM_GOLDMONT), > + X86_MATCH(ATOM_GOLDMONT_D), > + X86_MATCH(ATOM_GOLDMONT_PLUS), > + {} > +}; > + > +static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, > + int num_delta_fratio) > +{ > + int fratio, delta_fratio, found; > + int err, i; > + u64 msr; > + > + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); > + if (err) > + return false; > + > + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ > + > + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); > + if (err) > + return false; > + > + fratio = (msr >> 8) & 0xFF; > + i = 16; > + found = 0; > + do { > + if (found >= num_delta_fratio) { > + *turbo_freq = fratio; > + return true; > + } > + > + delta_fratio = (msr >> (i + 5)) & 0x7; > + > + if (delta_fratio) { > + found += 1; > + fratio -= delta_fratio; > + } > + > + i += 8; > + } while (i < 64); > + > + return true; > +} > + > +static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size) > +{ > + u64 ratios, counts; > + u32 group_size; > + int err, i; > + > + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); > + if (err) > + return false; > + > + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ > + > + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios); > + if (err) > + return false; > + > + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts); > + if (err) > + return false; > + > + for (i = 0; i < 64; i += 8) { > + group_size = (counts >> i) & 0xFF; > + if (group_size >= size) { > + *turbo_freq = (ratios >> i) & 0xFF; > + return true; > + } > + } > + > + return false; > +} > + > +static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) > +{ > + u64 msr; > + int err; > + > + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); > + if (err) > + return false; > + > + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); > + if (err) > + return false; > + > + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ > + *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */ > + > + /* The CPU may have less than 4 cores */ > + if (!*turbo_freq) > + *turbo_freq = msr & 0xFF; /* 1C turbo */ > + > + return true; > +} > + > +static bool intel_set_max_freq_ratio(void) > +{ > + u64 base_freq, turbo_freq; > + u64 turbo_ratio; > + > + if (slv_set_max_freq_ratio(&base_freq, &turbo_freq)) > + goto out; > + > + if (x86_match_cpu(has_glm_turbo_ratio_limits) && > + skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) > + goto out; > + > + if (x86_match_cpu(has_knl_turbo_ratio_limits) && > + knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) > + goto out; > + > + if (x86_match_cpu(has_skx_turbo_ratio_limits) && > + skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4)) > + goto out; > + > + if (core_set_max_freq_ratio(&base_freq, &turbo_freq)) > + goto out; > + > + return false; > + > +out: > + /* > + * Some hypervisors advertise X86_FEATURE_APERFMPERF > + * but then fill all MSR's with zeroes. > + * Some CPUs have turbo boost but don't declare any turbo ratio > + * in MSR_TURBO_RATIO_LIMIT. > + */ > + if (!base_freq || !turbo_freq) { > + pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n"); > + return false; > + } > + > + turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq); > + if (!turbo_ratio) { > + pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n"); > + return false; > + } > + > + arch_turbo_freq_ratio = turbo_ratio; > + arch_set_max_freq_ratio(turbo_disabled()); > + > + return true; > +} > + > +static void init_counter_refs(void) > +{ > + u64 aperf, mperf; > + > + rdmsrl(MSR_IA32_APERF, aperf); > + rdmsrl(MSR_IA32_MPERF, mperf); > + > + this_cpu_write(arch_prev_aperf, aperf); > + this_cpu_write(arch_prev_mperf, mperf); > +} > + > +#ifdef CONFIG_PM_SLEEP > +static struct syscore_ops freq_invariance_syscore_ops = { > + .resume = init_counter_refs, > +}; > + > +static void register_freq_invariance_syscore_ops(void) > +{ > + /* Bail out if registered already. */ > + if (freq_invariance_syscore_ops.node.prev) > + return; > + > + register_syscore_ops(&freq_invariance_syscore_ops); > +} > +#else > +static inline void register_freq_invariance_syscore_ops(void) {} > +#endif > + > +void init_freq_invariance(bool secondary, bool cppc_ready) > +{ > + bool ret = false; > + > + if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) > + return; > + > + if (secondary) { > + if (static_branch_likely(&arch_scale_freq_key)) { > + init_counter_refs(); > + } > + return; > + } > + > + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) > + ret = intel_set_max_freq_ratio(); > + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { > + if (!cppc_ready) { > + return; > + } > + ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio); > + } > + > + if (ret) { > + init_counter_refs(); > + static_branch_enable(&arch_scale_freq_key); > + register_freq_invariance_syscore_ops(); > + pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); > + } else { > + pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n"); > + } > +} > + > +static void disable_freq_invariance_workfn(struct work_struct *work) > +{ > + static_branch_disable(&arch_scale_freq_key); > +} > + > +static DECLARE_WORK(disable_freq_invariance_work, > + disable_freq_invariance_workfn); > + > +DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; > + > +void arch_scale_freq_tick(void) > +{ > + u64 freq_scale; > + u64 aperf, mperf; > + u64 acnt, mcnt; > + > + if (!arch_scale_freq_invariant()) > + return; > + > + rdmsrl(MSR_IA32_APERF, aperf); > + rdmsrl(MSR_IA32_MPERF, mperf); > + > + acnt = aperf - this_cpu_read(arch_prev_aperf); > + mcnt = mperf - this_cpu_read(arch_prev_mperf); > + > + this_cpu_write(arch_prev_aperf, aperf); > + this_cpu_write(arch_prev_mperf, mperf); > + > + if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) > + goto error; > + > + if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt) > + goto error; > + > + freq_scale = div64_u64(acnt, mcnt); > + if (!freq_scale) > + goto error; > + > + if (freq_scale > SCHED_CAPACITY_SCALE) > + freq_scale = SCHED_CAPACITY_SCALE; > + > + this_cpu_write(arch_freq_scale, freq_scale); > + return; > + > +error: > + pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); > + schedule_work(&disable_freq_invariance_work); > +} > +#endif /* CONFIG_X86_64 && CONFIG_SMP */ > --- a/arch/x86/kernel/smpboot.c > +++ b/arch/x86/kernel/smpboot.c > @@ -56,7 +56,6 @@ > #include <linux/numa.h> > #include <linux/pgtable.h> > #include <linux/overflow.h> > -#include <linux/syscore_ops.h> > > #include <asm/acpi.h> > #include <asm/desc.h> > @@ -1847,357 +1846,3 @@ void native_play_dead(void) > } > > #endif > - > -#ifdef CONFIG_X86_64 > -/* > - * APERF/MPERF frequency ratio computation. > - * > - * The scheduler wants to do frequency invariant accounting and needs a <1 > - * ratio to account for the 'current' frequency, corresponding to > - * freq_curr / freq_max. > - * > - * Since the frequency freq_curr on x86 is controlled by micro-controller and > - * our P-state setting is little more than a request/hint, we need to observe > - * the effective frequency 'BusyMHz', i.e. the average frequency over a time > - * interval after discarding idle time. This is given by: > - * > - * BusyMHz = delta_APERF / delta_MPERF * freq_base > - * > - * where freq_base is the max non-turbo P-state. > - * > - * The freq_max term has to be set to a somewhat arbitrary value, because we > - * can't know which turbo states will be available at a given point in time: > - * it all depends on the thermal headroom of the entire package. We set it to > - * the turbo level with 4 cores active. > - * > - * Benchmarks show that's a good compromise between the 1C turbo ratio > - * (freq_curr/freq_max would rarely reach 1) and something close to freq_base, > - * which would ignore the entire turbo range (a conspicuous part, making > - * freq_curr/freq_max always maxed out). > - * > - * An exception to the heuristic above is the Atom uarch, where we choose the > - * highest turbo level for freq_max since Atom's are generally oriented towards > - * power efficiency. > - * > - * Setting freq_max to anything less than the 1C turbo ratio makes the ratio > - * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1. > - */ > - > -DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key); > - > -static DEFINE_PER_CPU(u64, arch_prev_aperf); > -static DEFINE_PER_CPU(u64, arch_prev_mperf); > -static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE; > -static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE; > - > -void arch_set_max_freq_ratio(bool turbo_disabled) > -{ > - arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : > - arch_turbo_freq_ratio; > -} > -EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); > - > -static bool turbo_disabled(void) > -{ > - u64 misc_en; > - int err; > - > - err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en); > - if (err) > - return false; > - > - return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); > -} > - > -static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) > -{ > - int err; > - > - err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq); > - if (err) > - return false; > - > - err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq); > - if (err) > - return false; > - > - *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */ > - *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */ > - > - return true; > -} > - > -#define X86_MATCH(model) \ > - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \ > - INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL) > - > -static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = { > - X86_MATCH(XEON_PHI_KNL), > - X86_MATCH(XEON_PHI_KNM), > - {} > -}; > - > -static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = { > - X86_MATCH(SKYLAKE_X), > - {} > -}; > - > -static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = { > - X86_MATCH(ATOM_GOLDMONT), > - X86_MATCH(ATOM_GOLDMONT_D), > - X86_MATCH(ATOM_GOLDMONT_PLUS), > - {} > -}; > - > -static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, > - int num_delta_fratio) > -{ > - int fratio, delta_fratio, found; > - int err, i; > - u64 msr; > - > - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); > - if (err) > - return false; > - > - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ > - > - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); > - if (err) > - return false; > - > - fratio = (msr >> 8) & 0xFF; > - i = 16; > - found = 0; > - do { > - if (found >= num_delta_fratio) { > - *turbo_freq = fratio; > - return true; > - } > - > - delta_fratio = (msr >> (i + 5)) & 0x7; > - > - if (delta_fratio) { > - found += 1; > - fratio -= delta_fratio; > - } > - > - i += 8; > - } while (i < 64); > - > - return true; > -} > - > -static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size) > -{ > - u64 ratios, counts; > - u32 group_size; > - int err, i; > - > - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); > - if (err) > - return false; > - > - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ > - > - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios); > - if (err) > - return false; > - > - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts); > - if (err) > - return false; > - > - for (i = 0; i < 64; i += 8) { > - group_size = (counts >> i) & 0xFF; > - if (group_size >= size) { > - *turbo_freq = (ratios >> i) & 0xFF; > - return true; > - } > - } > - > - return false; > -} > - > -static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) > -{ > - u64 msr; > - int err; > - > - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); > - if (err) > - return false; > - > - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); > - if (err) > - return false; > - > - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ > - *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */ > - > - /* The CPU may have less than 4 cores */ > - if (!*turbo_freq) > - *turbo_freq = msr & 0xFF; /* 1C turbo */ > - > - return true; > -} > - > -static bool intel_set_max_freq_ratio(void) > -{ > - u64 base_freq, turbo_freq; > - u64 turbo_ratio; > - > - if (slv_set_max_freq_ratio(&base_freq, &turbo_freq)) > - goto out; > - > - if (x86_match_cpu(has_glm_turbo_ratio_limits) && > - skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) > - goto out; > - > - if (x86_match_cpu(has_knl_turbo_ratio_limits) && > - knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) > - goto out; > - > - if (x86_match_cpu(has_skx_turbo_ratio_limits) && > - skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4)) > - goto out; > - > - if (core_set_max_freq_ratio(&base_freq, &turbo_freq)) > - goto out; > - > - return false; > - > -out: > - /* > - * Some hypervisors advertise X86_FEATURE_APERFMPERF > - * but then fill all MSR's with zeroes. > - * Some CPUs have turbo boost but don't declare any turbo ratio > - * in MSR_TURBO_RATIO_LIMIT. > - */ > - if (!base_freq || !turbo_freq) { > - pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n"); > - return false; > - } > - > - turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq); > - if (!turbo_ratio) { > - pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n"); > - return false; > - } > - > - arch_turbo_freq_ratio = turbo_ratio; > - arch_set_max_freq_ratio(turbo_disabled()); > - > - return true; > -} > - > -static void init_counter_refs(void) > -{ > - u64 aperf, mperf; > - > - rdmsrl(MSR_IA32_APERF, aperf); > - rdmsrl(MSR_IA32_MPERF, mperf); > - > - this_cpu_write(arch_prev_aperf, aperf); > - this_cpu_write(arch_prev_mperf, mperf); > -} > - > -#ifdef CONFIG_PM_SLEEP > -static struct syscore_ops freq_invariance_syscore_ops = { > - .resume = init_counter_refs, > -}; > - > -static void register_freq_invariance_syscore_ops(void) > -{ > - /* Bail out if registered already. */ > - if (freq_invariance_syscore_ops.node.prev) > - return; > - > - register_syscore_ops(&freq_invariance_syscore_ops); > -} > -#else > -static inline void register_freq_invariance_syscore_ops(void) {} > -#endif > - > -void init_freq_invariance(bool secondary, bool cppc_ready) > -{ > - bool ret = false; > - > - if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) > - return; > - > - if (secondary) { > - if (static_branch_likely(&arch_scale_freq_key)) { > - init_counter_refs(); > - } > - return; > - } > - > - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) > - ret = intel_set_max_freq_ratio(); > - else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { > - if (!cppc_ready) { > - return; > - } > - ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio); > - } > - > - if (ret) { > - init_counter_refs(); > - static_branch_enable(&arch_scale_freq_key); > - register_freq_invariance_syscore_ops(); > - pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); > - } else { > - pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n"); > - } > -} > - > -static void disable_freq_invariance_workfn(struct work_struct *work) > -{ > - static_branch_disable(&arch_scale_freq_key); > -} > - > -static DECLARE_WORK(disable_freq_invariance_work, > - disable_freq_invariance_workfn); > - > -DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; > - > -void arch_scale_freq_tick(void) > -{ > - u64 freq_scale; > - u64 aperf, mperf; > - u64 acnt, mcnt; > - > - if (!arch_scale_freq_invariant()) > - return; > - > - rdmsrl(MSR_IA32_APERF, aperf); > - rdmsrl(MSR_IA32_MPERF, mperf); > - > - acnt = aperf - this_cpu_read(arch_prev_aperf); > - mcnt = mperf - this_cpu_read(arch_prev_mperf); > - > - this_cpu_write(arch_prev_aperf, aperf); > - this_cpu_write(arch_prev_mperf, mperf); > - > - if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) > - goto error; > - > - if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt) > - goto error; > - > - freq_scale = div64_u64(acnt, mcnt); > - if (!freq_scale) > - goto error; > - > - if (freq_scale > SCHED_CAPACITY_SCALE) > - freq_scale = SCHED_CAPACITY_SCALE; > - > - this_cpu_write(arch_freq_scale, freq_scale); > - return; > - > -error: > - pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); > - schedule_work(&disable_freq_invariance_work); > -} > -#endif /* CONFIG_X86_64 */ >
--- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -6,15 +6,19 @@ * Copyright (C) 2017 Intel Corp. * Author: Len Brown <len.brown@intel.com> */ - +#include <linux/cpufreq.h> #include <linux/delay.h> #include <linux/ktime.h> #include <linux/math64.h> #include <linux/percpu.h> -#include <linux/cpufreq.h> -#include <linux/smp.h> -#include <linux/sched/isolation.h> #include <linux/rcupdate.h> +#include <linux/sched/isolation.h> +#include <linux/sched/topology.h> +#include <linux/smp.h> +#include <linux/syscore_ops.h> + +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> #include "cpu.h" @@ -152,3 +156,357 @@ unsigned int arch_freq_get_on_cpu(int cp return per_cpu(samples.khz, cpu); } + +#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) +/* + * APERF/MPERF frequency ratio computation. + * + * The scheduler wants to do frequency invariant accounting and needs a <1 + * ratio to account for the 'current' frequency, corresponding to + * freq_curr / freq_max. + * + * Since the frequency freq_curr on x86 is controlled by micro-controller and + * our P-state setting is little more than a request/hint, we need to observe + * the effective frequency 'BusyMHz', i.e. the average frequency over a time + * interval after discarding idle time. This is given by: + * + * BusyMHz = delta_APERF / delta_MPERF * freq_base + * + * where freq_base is the max non-turbo P-state. + * + * The freq_max term has to be set to a somewhat arbitrary value, because we + * can't know which turbo states will be available at a given point in time: + * it all depends on the thermal headroom of the entire package. We set it to + * the turbo level with 4 cores active. + * + * Benchmarks show that's a good compromise between the 1C turbo ratio + * (freq_curr/freq_max would rarely reach 1) and something close to freq_base, + * which would ignore the entire turbo range (a conspicuous part, making + * freq_curr/freq_max always maxed out). + * + * An exception to the heuristic above is the Atom uarch, where we choose the + * highest turbo level for freq_max since Atom's are generally oriented towards + * power efficiency. + * + * Setting freq_max to anything less than the 1C turbo ratio makes the ratio + * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1. + */ + +DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key); + +static DEFINE_PER_CPU(u64, arch_prev_aperf); +static DEFINE_PER_CPU(u64, arch_prev_mperf); +static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE; +static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE; + +void arch_set_max_freq_ratio(bool turbo_disabled) +{ + arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : + arch_turbo_freq_ratio; +} +EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); + +static bool turbo_disabled(void) +{ + u64 misc_en; + int err; + + err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en); + if (err) + return false; + + return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); +} + +static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) +{ + int err; + + err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq); + if (err) + return false; + + err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq); + if (err) + return false; + + *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */ + *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */ + + return true; +} + +#define X86_MATCH(model) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL) + +static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = { + X86_MATCH(XEON_PHI_KNL), + X86_MATCH(XEON_PHI_KNM), + {} +}; + +static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = { + X86_MATCH(SKYLAKE_X), + {} +}; + +static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = { + X86_MATCH(ATOM_GOLDMONT), + X86_MATCH(ATOM_GOLDMONT_D), + X86_MATCH(ATOM_GOLDMONT_PLUS), + {} +}; + +static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, + int num_delta_fratio) +{ + int fratio, delta_fratio, found; + int err, i; + u64 msr; + + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); + if (err) + return false; + + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); + if (err) + return false; + + fratio = (msr >> 8) & 0xFF; + i = 16; + found = 0; + do { + if (found >= num_delta_fratio) { + *turbo_freq = fratio; + return true; + } + + delta_fratio = (msr >> (i + 5)) & 0x7; + + if (delta_fratio) { + found += 1; + fratio -= delta_fratio; + } + + i += 8; + } while (i < 64); + + return true; +} + +static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size) +{ + u64 ratios, counts; + u32 group_size; + int err, i; + + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); + if (err) + return false; + + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios); + if (err) + return false; + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts); + if (err) + return false; + + for (i = 0; i < 64; i += 8) { + group_size = (counts >> i) & 0xFF; + if (group_size >= size) { + *turbo_freq = (ratios >> i) & 0xFF; + return true; + } + } + + return false; +} + +static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) +{ + u64 msr; + int err; + + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); + if (err) + return false; + + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); + if (err) + return false; + + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ + *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */ + + /* The CPU may have less than 4 cores */ + if (!*turbo_freq) + *turbo_freq = msr & 0xFF; /* 1C turbo */ + + return true; +} + +static bool intel_set_max_freq_ratio(void) +{ + u64 base_freq, turbo_freq; + u64 turbo_ratio; + + if (slv_set_max_freq_ratio(&base_freq, &turbo_freq)) + goto out; + + if (x86_match_cpu(has_glm_turbo_ratio_limits) && + skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) + goto out; + + if (x86_match_cpu(has_knl_turbo_ratio_limits) && + knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) + goto out; + + if (x86_match_cpu(has_skx_turbo_ratio_limits) && + skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4)) + goto out; + + if (core_set_max_freq_ratio(&base_freq, &turbo_freq)) + goto out; + + return false; + +out: + /* + * Some hypervisors advertise X86_FEATURE_APERFMPERF + * but then fill all MSR's with zeroes. + * Some CPUs have turbo boost but don't declare any turbo ratio + * in MSR_TURBO_RATIO_LIMIT. + */ + if (!base_freq || !turbo_freq) { + pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n"); + return false; + } + + turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq); + if (!turbo_ratio) { + pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n"); + return false; + } + + arch_turbo_freq_ratio = turbo_ratio; + arch_set_max_freq_ratio(turbo_disabled()); + + return true; +} + +static void init_counter_refs(void) +{ + u64 aperf, mperf; + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + + this_cpu_write(arch_prev_aperf, aperf); + this_cpu_write(arch_prev_mperf, mperf); +} + +#ifdef CONFIG_PM_SLEEP +static struct syscore_ops freq_invariance_syscore_ops = { + .resume = init_counter_refs, +}; + +static void register_freq_invariance_syscore_ops(void) +{ + /* Bail out if registered already. */ + if (freq_invariance_syscore_ops.node.prev) + return; + + register_syscore_ops(&freq_invariance_syscore_ops); +} +#else +static inline void register_freq_invariance_syscore_ops(void) {} +#endif + +void init_freq_invariance(bool secondary, bool cppc_ready) +{ + bool ret = false; + + if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) + return; + + if (secondary) { + if (static_branch_likely(&arch_scale_freq_key)) { + init_counter_refs(); + } + return; + } + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + ret = intel_set_max_freq_ratio(); + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + if (!cppc_ready) { + return; + } + ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio); + } + + if (ret) { + init_counter_refs(); + static_branch_enable(&arch_scale_freq_key); + register_freq_invariance_syscore_ops(); + pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); + } else { + pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n"); + } +} + +static void disable_freq_invariance_workfn(struct work_struct *work) +{ + static_branch_disable(&arch_scale_freq_key); +} + +static DECLARE_WORK(disable_freq_invariance_work, + disable_freq_invariance_workfn); + +DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; + +void arch_scale_freq_tick(void) +{ + u64 freq_scale; + u64 aperf, mperf; + u64 acnt, mcnt; + + if (!arch_scale_freq_invariant()) + return; + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + + acnt = aperf - this_cpu_read(arch_prev_aperf); + mcnt = mperf - this_cpu_read(arch_prev_mperf); + + this_cpu_write(arch_prev_aperf, aperf); + this_cpu_write(arch_prev_mperf, mperf); + + if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) + goto error; + + if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt) + goto error; + + freq_scale = div64_u64(acnt, mcnt); + if (!freq_scale) + goto error; + + if (freq_scale > SCHED_CAPACITY_SCALE) + freq_scale = SCHED_CAPACITY_SCALE; + + this_cpu_write(arch_freq_scale, freq_scale); + return; + +error: + pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); + schedule_work(&disable_freq_invariance_work); +} +#endif /* CONFIG_X86_64 && CONFIG_SMP */ --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -56,7 +56,6 @@ #include <linux/numa.h> #include <linux/pgtable.h> #include <linux/overflow.h> -#include <linux/syscore_ops.h> #include <asm/acpi.h> #include <asm/desc.h> @@ -1847,357 +1846,3 @@ void native_play_dead(void) } #endif - -#ifdef CONFIG_X86_64 -/* - * APERF/MPERF frequency ratio computation. - * - * The scheduler wants to do frequency invariant accounting and needs a <1 - * ratio to account for the 'current' frequency, corresponding to - * freq_curr / freq_max. - * - * Since the frequency freq_curr on x86 is controlled by micro-controller and - * our P-state setting is little more than a request/hint, we need to observe - * the effective frequency 'BusyMHz', i.e. the average frequency over a time - * interval after discarding idle time. This is given by: - * - * BusyMHz = delta_APERF / delta_MPERF * freq_base - * - * where freq_base is the max non-turbo P-state. - * - * The freq_max term has to be set to a somewhat arbitrary value, because we - * can't know which turbo states will be available at a given point in time: - * it all depends on the thermal headroom of the entire package. We set it to - * the turbo level with 4 cores active. - * - * Benchmarks show that's a good compromise between the 1C turbo ratio - * (freq_curr/freq_max would rarely reach 1) and something close to freq_base, - * which would ignore the entire turbo range (a conspicuous part, making - * freq_curr/freq_max always maxed out). - * - * An exception to the heuristic above is the Atom uarch, where we choose the - * highest turbo level for freq_max since Atom's are generally oriented towards - * power efficiency. - * - * Setting freq_max to anything less than the 1C turbo ratio makes the ratio - * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1. - */ - -DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key); - -static DEFINE_PER_CPU(u64, arch_prev_aperf); -static DEFINE_PER_CPU(u64, arch_prev_mperf); -static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE; -static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE; - -void arch_set_max_freq_ratio(bool turbo_disabled) -{ - arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : - arch_turbo_freq_ratio; -} -EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); - -static bool turbo_disabled(void) -{ - u64 misc_en; - int err; - - err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en); - if (err) - return false; - - return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); -} - -static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) -{ - int err; - - err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq); - if (err) - return false; - - err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq); - if (err) - return false; - - *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */ - *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */ - - return true; -} - -#define X86_MATCH(model) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \ - INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL) - -static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = { - X86_MATCH(XEON_PHI_KNL), - X86_MATCH(XEON_PHI_KNM), - {} -}; - -static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = { - X86_MATCH(SKYLAKE_X), - {} -}; - -static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = { - X86_MATCH(ATOM_GOLDMONT), - X86_MATCH(ATOM_GOLDMONT_D), - X86_MATCH(ATOM_GOLDMONT_PLUS), - {} -}; - -static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, - int num_delta_fratio) -{ - int fratio, delta_fratio, found; - int err, i; - u64 msr; - - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); - if (err) - return false; - - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ - - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); - if (err) - return false; - - fratio = (msr >> 8) & 0xFF; - i = 16; - found = 0; - do { - if (found >= num_delta_fratio) { - *turbo_freq = fratio; - return true; - } - - delta_fratio = (msr >> (i + 5)) & 0x7; - - if (delta_fratio) { - found += 1; - fratio -= delta_fratio; - } - - i += 8; - } while (i < 64); - - return true; -} - -static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size) -{ - u64 ratios, counts; - u32 group_size; - int err, i; - - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); - if (err) - return false; - - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ - - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios); - if (err) - return false; - - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts); - if (err) - return false; - - for (i = 0; i < 64; i += 8) { - group_size = (counts >> i) & 0xFF; - if (group_size >= size) { - *turbo_freq = (ratios >> i) & 0xFF; - return true; - } - } - - return false; -} - -static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) -{ - u64 msr; - int err; - - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); - if (err) - return false; - - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); - if (err) - return false; - - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ - *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */ - - /* The CPU may have less than 4 cores */ - if (!*turbo_freq) - *turbo_freq = msr & 0xFF; /* 1C turbo */ - - return true; -} - -static bool intel_set_max_freq_ratio(void) -{ - u64 base_freq, turbo_freq; - u64 turbo_ratio; - - if (slv_set_max_freq_ratio(&base_freq, &turbo_freq)) - goto out; - - if (x86_match_cpu(has_glm_turbo_ratio_limits) && - skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) - goto out; - - if (x86_match_cpu(has_knl_turbo_ratio_limits) && - knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) - goto out; - - if (x86_match_cpu(has_skx_turbo_ratio_limits) && - skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4)) - goto out; - - if (core_set_max_freq_ratio(&base_freq, &turbo_freq)) - goto out; - - return false; - -out: - /* - * Some hypervisors advertise X86_FEATURE_APERFMPERF - * but then fill all MSR's with zeroes. - * Some CPUs have turbo boost but don't declare any turbo ratio - * in MSR_TURBO_RATIO_LIMIT. - */ - if (!base_freq || !turbo_freq) { - pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n"); - return false; - } - - turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq); - if (!turbo_ratio) { - pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n"); - return false; - } - - arch_turbo_freq_ratio = turbo_ratio; - arch_set_max_freq_ratio(turbo_disabled()); - - return true; -} - -static void init_counter_refs(void) -{ - u64 aperf, mperf; - - rdmsrl(MSR_IA32_APERF, aperf); - rdmsrl(MSR_IA32_MPERF, mperf); - - this_cpu_write(arch_prev_aperf, aperf); - this_cpu_write(arch_prev_mperf, mperf); -} - -#ifdef CONFIG_PM_SLEEP -static struct syscore_ops freq_invariance_syscore_ops = { - .resume = init_counter_refs, -}; - -static void register_freq_invariance_syscore_ops(void) -{ - /* Bail out if registered already. */ - if (freq_invariance_syscore_ops.node.prev) - return; - - register_syscore_ops(&freq_invariance_syscore_ops); -} -#else -static inline void register_freq_invariance_syscore_ops(void) {} -#endif - -void init_freq_invariance(bool secondary, bool cppc_ready) -{ - bool ret = false; - - if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) - return; - - if (secondary) { - if (static_branch_likely(&arch_scale_freq_key)) { - init_counter_refs(); - } - return; - } - - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - ret = intel_set_max_freq_ratio(); - else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - if (!cppc_ready) { - return; - } - ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio); - } - - if (ret) { - init_counter_refs(); - static_branch_enable(&arch_scale_freq_key); - register_freq_invariance_syscore_ops(); - pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); - } else { - pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n"); - } -} - -static void disable_freq_invariance_workfn(struct work_struct *work) -{ - static_branch_disable(&arch_scale_freq_key); -} - -static DECLARE_WORK(disable_freq_invariance_work, - disable_freq_invariance_workfn); - -DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; - -void arch_scale_freq_tick(void) -{ - u64 freq_scale; - u64 aperf, mperf; - u64 acnt, mcnt; - - if (!arch_scale_freq_invariant()) - return; - - rdmsrl(MSR_IA32_APERF, aperf); - rdmsrl(MSR_IA32_MPERF, mperf); - - acnt = aperf - this_cpu_read(arch_prev_aperf); - mcnt = mperf - this_cpu_read(arch_prev_mperf); - - this_cpu_write(arch_prev_aperf, aperf); - this_cpu_write(arch_prev_mperf, mperf); - - if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) - goto error; - - if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt) - goto error; - - freq_scale = div64_u64(acnt, mcnt); - if (!freq_scale) - goto error; - - if (freq_scale > SCHED_CAPACITY_SCALE) - freq_scale = SCHED_CAPACITY_SCALE; - - this_cpu_write(arch_freq_scale, freq_scale); - return; - -error: - pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); - schedule_work(&disable_freq_invariance_work); -} -#endif /* CONFIG_X86_64 */
as this can share code with the preexisting APERF/MPERF code. No functional change. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- arch/x86/kernel/cpu/aperfmperf.c | 366 ++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/smpboot.c | 355 ------------------------------------- 2 files changed, 362 insertions(+), 359 deletions(-)