Message ID | 20240913132944.1880703-4-beata.michalska@arm.com (mailing list archive) |
---|---|
State | Changes Requested, archived |
Headers | show |
Series | Add support for AArch64 AMUv1-based average freq | expand |
Hi Beata, Thank you for the patches. On 13/09/24 18:59, Beata Michalska wrote: > External email: Use caution opening links or attachments > > > With the Frequency Invariance Engine (FIE) being already wired up with > sched tick and making use of relevant (core counter and constant > counter) AMU counters, getting the average frequency for a given CPU, > can be achieved by utilizing the frequency scale factor which reflects > an average CPU frequency for the last tick period length. > > The solution is partially based on APERF/MPERF implementation of > arch_freq_get_on_cpu. > > Suggested-by: Ionela Voinescu <ionela.voinescu@arm.com> > Signed-off-by: Beata Michalska <beata.michalska@arm.com> > --- > arch/arm64/kernel/topology.c | 109 +++++++++++++++++++++++++++++++---- > 1 file changed, 99 insertions(+), 10 deletions(-) > > diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c > index cb180684d10d..22e510733336 100644 > --- a/arch/arm64/kernel/topology.c > +++ b/arch/arm64/kernel/topology.c > @@ -17,6 +17,7 @@ > #include <linux/cpufreq.h> > #include <linux/init.h> > #include <linux/percpu.h> > +#include <linux/sched/isolation.h> > > #include <asm/cpu.h> > #include <asm/cputype.h> > @@ -88,18 +89,28 @@ int __init parse_acpi_topology(void) > * initialized. > */ > static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT); > -static DEFINE_PER_CPU(u64, arch_const_cycles_prev); > -static DEFINE_PER_CPU(u64, arch_core_cycles_prev); > static cpumask_var_t amu_fie_cpus; > > +struct amu_cntr_sample { > + u64 arch_const_cycles_prev; > + u64 arch_core_cycles_prev; > + unsigned long last_scale_update; > +}; > + > +static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples); > + > void update_freq_counters_refs(void) > { > - this_cpu_write(arch_core_cycles_prev, read_corecnt()); > - this_cpu_write(arch_const_cycles_prev, read_constcnt()); > + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); > + > + amu_sample->arch_core_cycles_prev = read_corecnt(); > + amu_sample->arch_const_cycles_prev = read_constcnt(); > } > > static inline bool freq_counters_valid(int cpu) > { > + struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); > + > if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) > return false; > > @@ -108,8 +119,8 @@ static inline bool freq_counters_valid(int cpu) > return false; > } > > - if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || > - !per_cpu(arch_core_cycles_prev, cpu))) { > + if (unlikely(!amu_sample->arch_const_cycles_prev || > + !amu_sample->arch_core_cycles_prev)) { > pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); > return false; > } > @@ -152,17 +163,22 @@ void freq_inv_set_max_ratio(int cpu, u64 max_rate) > > static void amu_scale_freq_tick(void) > { > + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); > u64 prev_core_cnt, prev_const_cnt; > u64 core_cnt, const_cnt, scale; > > - prev_const_cnt = this_cpu_read(arch_const_cycles_prev); > - prev_core_cnt = this_cpu_read(arch_core_cycles_prev); > + prev_const_cnt = amu_sample->arch_const_cycles_prev; > + prev_core_cnt = amu_sample->arch_core_cycles_prev; > > update_freq_counters_refs(); > > - const_cnt = this_cpu_read(arch_const_cycles_prev); > - core_cnt = this_cpu_read(arch_core_cycles_prev); > + const_cnt = amu_sample->arch_const_cycles_prev; > + core_cnt = amu_sample->arch_core_cycles_prev; > > + /* > + * This should not happen unless the AMUs have been reset and the > + * counter values have not been restored - unlikely > + */ > if (unlikely(core_cnt <= prev_core_cnt || > const_cnt <= prev_const_cnt)) > return; > @@ -182,6 +198,8 @@ static void amu_scale_freq_tick(void) > > scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); > this_cpu_write(arch_freq_scale, (unsigned long)scale); > + > + amu_sample->last_scale_update = jiffies; > } > > static struct scale_freq_data amu_sfd = { > @@ -189,6 +207,77 @@ static struct scale_freq_data amu_sfd = { > .set_freq_scale = amu_scale_freq_tick, > }; > > +static __always_inline bool amu_fie_cpu_supported(unsigned int cpu) > +{ > + return cpumask_available(amu_fie_cpus) && > + cpumask_test_cpu(cpu, amu_fie_cpus); > +} > + > +#define AMU_SAMPLE_EXP_MS 20 > + > +int arch_freq_avg_get_on_cpu(int cpu) > +{ > + struct amu_cntr_sample *amu_sample; > + unsigned int start_cpu = cpu; > + unsigned long last_update; > + unsigned int freq = 0; > + u64 scale; > + > + if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu)) > + return -EOPNOTSUPP; > + > +retry: > + amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); > + > + last_update = amu_sample->last_scale_update; > + > + /* > + * For those CPUs that are in full dynticks mode, and those that have 'or those' to match with if condition? > + * not seen tick for a while, try an alternative source for the counters > + * (and thus freq scale), if available, for given policy: this boils > + * down to identifying an active cpu within the same freq domain, if any. > + */ > + if (!housekeeping_cpu(cpu, HK_TYPE_TICK) || > + time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) { > + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); > + int ref_cpu = cpu; > + > + if (!policy) > + return 0; > + We can skip the rest of code if policy has a single cpu. AFAIR, one of the previous versions had similar check. if (!policy_is_shared(policy)) { cpufreq_cpu_put(policy); goto freq_comput; } > + if (!cpumask_intersects(policy->related_cpus, > + housekeeping_cpumask(HK_TYPE_TICK))) { > + cpufreq_cpu_put(policy); > + return -EOPNOTSUPP; > + } > + > + > + do { > + ref_cpu = cpumask_next_wrap(ref_cpu, policy->cpus, > + start_cpu, false); > + > + } while (ref_cpu < nr_cpu_ids && idle_cpu(ref_cpu)); > + > + cpufreq_cpu_put(policy); > + > + if (ref_cpu >= nr_cpu_ids) > + /* No alternative to pull info from */ > + return 0; > + The 'cpuinfo_avg_freq' node gives 'unknown' value for single CPU per policy as 'ref_cpu' increments to 'nr_cpu_ids'. We can use the same CPU instead of returning zero if no alternative CPU. # cat /sys/devices/system/cpu/cpu2/cpufreq/cpuinfo_avg_freq <unknown> ---- if (ref_cpu >= nr_cpu_ids) /* Use same CPU if no alternative to pull info from */ goto freq_comput; .. freq_comput: scale = arch_scale_freq_capacity(cpu); freq = scale * arch_scale_freq_ref(cpu); ---- Thank you, Sumit Gupta P.S. Will be on afk for next 2 weeks with no access to email. Please expect a delay in response. > + cpu = ref_cpu; > + goto retry; > + } > + /* > + * Reversed computation to the one used to determine > + * the arch_freq_scale value > + * (see amu_scale_freq_tick for details) > + */ > + scale = arch_scale_freq_capacity(cpu); > + freq = scale * arch_scale_freq_ref(cpu); > + freq >>= SCHED_CAPACITY_SHIFT; > + return freq; > +} > + > static void amu_fie_setup(const struct cpumask *cpus) > { > int cpu; > -- > 2.25.1 >
On Tue, Sep 17, 2024 at 05:41:09PM +0530, Sumit Gupta wrote: > Hi Beata, Hi Sumit, > > Thank you for the patches. Thank you for having a look at those. > > On 13/09/24 18:59, Beata Michalska wrote: > > External email: Use caution opening links or attachments > > > > > > With the Frequency Invariance Engine (FIE) being already wired up with > > sched tick and making use of relevant (core counter and constant > > counter) AMU counters, getting the average frequency for a given CPU, > > can be achieved by utilizing the frequency scale factor which reflects > > an average CPU frequency for the last tick period length. > > > > The solution is partially based on APERF/MPERF implementation of > > arch_freq_get_on_cpu. > > > > Suggested-by: Ionela Voinescu <ionela.voinescu@arm.com> > > Signed-off-by: Beata Michalska <beata.michalska@arm.com> > > --- > > arch/arm64/kernel/topology.c | 109 +++++++++++++++++++++++++++++++---- > > 1 file changed, 99 insertions(+), 10 deletions(-) > > > > diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c > > index cb180684d10d..22e510733336 100644 > > --- a/arch/arm64/kernel/topology.c > > +++ b/arch/arm64/kernel/topology.c > > @@ -17,6 +17,7 @@ > > #include <linux/cpufreq.h> > > #include <linux/init.h> > > #include <linux/percpu.h> > > +#include <linux/sched/isolation.h> > > > > #include <asm/cpu.h> > > #include <asm/cputype.h> > > @@ -88,18 +89,28 @@ int __init parse_acpi_topology(void) > > * initialized. > > */ > > static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT); > > -static DEFINE_PER_CPU(u64, arch_const_cycles_prev); > > -static DEFINE_PER_CPU(u64, arch_core_cycles_prev); > > static cpumask_var_t amu_fie_cpus; > > > > +struct amu_cntr_sample { > > + u64 arch_const_cycles_prev; > > + u64 arch_core_cycles_prev; > > + unsigned long last_scale_update; > > +}; > > + > > +static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples); > > + > > void update_freq_counters_refs(void) > > { > > - this_cpu_write(arch_core_cycles_prev, read_corecnt()); > > - this_cpu_write(arch_const_cycles_prev, read_constcnt()); > > + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); > > + > > + amu_sample->arch_core_cycles_prev = read_corecnt(); > > + amu_sample->arch_const_cycles_prev = read_constcnt(); > > } > > > > static inline bool freq_counters_valid(int cpu) > > { > > + struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); > > + > > if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) > > return false; > > > > @@ -108,8 +119,8 @@ static inline bool freq_counters_valid(int cpu) > > return false; > > } > > > > - if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || > > - !per_cpu(arch_core_cycles_prev, cpu))) { > > + if (unlikely(!amu_sample->arch_const_cycles_prev || > > + !amu_sample->arch_core_cycles_prev)) { > > pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); > > return false; > > } > > @@ -152,17 +163,22 @@ void freq_inv_set_max_ratio(int cpu, u64 max_rate) > > > > static void amu_scale_freq_tick(void) > > { > > + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); > > u64 prev_core_cnt, prev_const_cnt; > > u64 core_cnt, const_cnt, scale; > > > > - prev_const_cnt = this_cpu_read(arch_const_cycles_prev); > > - prev_core_cnt = this_cpu_read(arch_core_cycles_prev); > > + prev_const_cnt = amu_sample->arch_const_cycles_prev; > > + prev_core_cnt = amu_sample->arch_core_cycles_prev; > > > > update_freq_counters_refs(); > > > > - const_cnt = this_cpu_read(arch_const_cycles_prev); > > - core_cnt = this_cpu_read(arch_core_cycles_prev); > > + const_cnt = amu_sample->arch_const_cycles_prev; > > + core_cnt = amu_sample->arch_core_cycles_prev; > > > > + /* > > + * This should not happen unless the AMUs have been reset and the > > + * counter values have not been restored - unlikely > > + */ > > if (unlikely(core_cnt <= prev_core_cnt || > > const_cnt <= prev_const_cnt)) > > return; > > @@ -182,6 +198,8 @@ static void amu_scale_freq_tick(void) > > > > scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); > > this_cpu_write(arch_freq_scale, (unsigned long)scale); > > + > > + amu_sample->last_scale_update = jiffies; > > } > > > > static struct scale_freq_data amu_sfd = { > > @@ -189,6 +207,77 @@ static struct scale_freq_data amu_sfd = { > > .set_freq_scale = amu_scale_freq_tick, > > }; > > > > +static __always_inline bool amu_fie_cpu_supported(unsigned int cpu) > > +{ > > + return cpumask_available(amu_fie_cpus) && > > + cpumask_test_cpu(cpu, amu_fie_cpus); > > +} > > + > > +#define AMU_SAMPLE_EXP_MS 20 > > + > > +int arch_freq_avg_get_on_cpu(int cpu) > > +{ > > + struct amu_cntr_sample *amu_sample; > > + unsigned int start_cpu = cpu; > > + unsigned long last_update; > > + unsigned int freq = 0; > > + u64 scale; > > + > > + if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu)) > > + return -EOPNOTSUPP; > > + > > +retry: > > + amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); > > + > > + last_update = amu_sample->last_scale_update; > > + > > + /* > > + * For those CPUs that are in full dynticks mode, and those that have > 'or those' to match with if condition? Yeah, might be. > > > + * not seen tick for a while, try an alternative source for the counters > > + * (and thus freq scale), if available, for given policy: this boils > > + * down to identifying an active cpu within the same freq domain, if any. > > + */ > > + if (!housekeeping_cpu(cpu, HK_TYPE_TICK) || > > + time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) { > > + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); > > + int ref_cpu = cpu; > > + > > + if (!policy) > > + return 0; > > + > > We can skip the rest of code if policy has a single cpu. AFAIR, one of the > previous versions had similar check. > > if (!policy_is_shared(policy)) { > cpufreq_cpu_put(policy); > goto freq_comput; > } True, we could but then this case is covered by cpumask_next_wrap which for single-cpu policies will render the ref_cpu invalid, so policy_is_shared check seemed unnecessary. > > > + if (!cpumask_intersects(policy->related_cpus, > > + housekeeping_cpumask(HK_TYPE_TICK))) { > > + cpufreq_cpu_put(policy); > > + return -EOPNOTSUPP; > > + } > > + > > + > > + do { > > + ref_cpu = cpumask_next_wrap(ref_cpu, policy->cpus, > > + start_cpu, false); > > + > > + } while (ref_cpu < nr_cpu_ids && idle_cpu(ref_cpu)); > > + > > + cpufreq_cpu_put(policy); > > + > > + if (ref_cpu >= nr_cpu_ids) > > + /* No alternative to pull info from */ > > + return 0; > > + > > The 'cpuinfo_avg_freq' node gives 'unknown' value for single CPU per policy > as 'ref_cpu' increments to 'nr_cpu_ids'. We can use the same CPU instead of > returning zero if no alternative CPU. > > # cat /sys/devices/system/cpu/cpu2/cpufreq/cpuinfo_avg_freq > <unknown> > > ---- > if (ref_cpu >= nr_cpu_ids) > /* Use same CPU if no alternative to pull info from */ > goto freq_comput; > > .. > freq_comput: > scale = arch_scale_freq_capacity(cpu); > freq = scale * arch_scale_freq_ref(cpu); > ---- > This boils down to the question what that function, and the information it provides, represent really. The 'unknown' here simply says the CPU has been idle for a while and as such the frequency data is a bit stale and it does not represent the average freq the CPU is actually running at anymore, which is the intention here really. Or, that the given CPU is a non-housekeeping one. Either way I believe this is a useful information, instead of providing stale data with no indication on whether the frequency is really the 'current' one or not. If that is somehow undesirable we can discuss this further, though I'd rather avoid exposing an interface where the feedback provided is open to interpretation at all times. --- Best Regards Beata > Thank you, > Sumit Gupta > > P.S. Will be on afk for next 2 weeks with no access to email. Please expect > a delay in response. > > > + cpu = ref_cpu; > > + goto retry; > > + } > > + /* > > + * Reversed computation to the one used to determine > > + * the arch_freq_scale value > > + * (see amu_scale_freq_tick for details) > > + */ > > + scale = arch_scale_freq_capacity(cpu); > > + freq = scale * arch_scale_freq_ref(cpu); > > + freq >>= SCHED_CAPACITY_SHIFT; > > + return freq; > > +} > > + > > > static void amu_fie_setup(const struct cpumask *cpus) > > { > > int cpu; > > -- > > 2.25.1 > >
On Thu, Sep 26, 2024 at 12:34:01PM GMT, Beata Michalska wrote: >On Tue, Sep 17, 2024 at 05:41:09PM +0530, Sumit Gupta wrote: >> Hi Beata, >Hi Sumit, >> >> Thank you for the patches. >Thank you for having a look at those. >> >> On 13/09/24 18:59, Beata Michalska wrote: >> > External email: Use caution opening links or attachments >> > >> > >> > With the Frequency Invariance Engine (FIE) being already wired up with >> > sched tick and making use of relevant (core counter and constant >> > counter) AMU counters, getting the average frequency for a given CPU, >> > can be achieved by utilizing the frequency scale factor which reflects >> > an average CPU frequency for the last tick period length. >> > >> > The solution is partially based on APERF/MPERF implementation of >> > arch_freq_get_on_cpu. >> > >> > Suggested-by: Ionela Voinescu <ionela.voinescu@arm.com> >> > Signed-off-by: Beata Michalska <beata.michalska@arm.com> >> > --- >> > arch/arm64/kernel/topology.c | 109 +++++++++++++++++++++++++++++++---- >> > 1 file changed, 99 insertions(+), 10 deletions(-) >> > >> > diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c >> > index cb180684d10d..22e510733336 100644 >> > --- a/arch/arm64/kernel/topology.c >> > +++ b/arch/arm64/kernel/topology.c >> > @@ -17,6 +17,7 @@ >> > #include <linux/cpufreq.h> >> > #include <linux/init.h> >> > #include <linux/percpu.h> >> > +#include <linux/sched/isolation.h> >> > >> > #include <asm/cpu.h> >> > #include <asm/cputype.h> >> > @@ -88,18 +89,28 @@ int __init parse_acpi_topology(void) >> > * initialized. >> > */ >> > static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT); >> > -static DEFINE_PER_CPU(u64, arch_const_cycles_prev); >> > -static DEFINE_PER_CPU(u64, arch_core_cycles_prev); >> > static cpumask_var_t amu_fie_cpus; >> > >> > +struct amu_cntr_sample { >> > + u64 arch_const_cycles_prev; >> > + u64 arch_core_cycles_prev; >> > + unsigned long last_scale_update; >> > +}; >> > + >> > +static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples); >> > + >> > void update_freq_counters_refs(void) >> > { >> > - this_cpu_write(arch_core_cycles_prev, read_corecnt()); >> > - this_cpu_write(arch_const_cycles_prev, read_constcnt()); >> > + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); >> > + >> > + amu_sample->arch_core_cycles_prev = read_corecnt(); >> > + amu_sample->arch_const_cycles_prev = read_constcnt(); >> > } >> > >> > static inline bool freq_counters_valid(int cpu) >> > { >> > + struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); >> > + >> > if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) >> > return false; >> > >> > @@ -108,8 +119,8 @@ static inline bool freq_counters_valid(int cpu) >> > return false; >> > } >> > >> > - if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || >> > - !per_cpu(arch_core_cycles_prev, cpu))) { >> > + if (unlikely(!amu_sample->arch_const_cycles_prev || >> > + !amu_sample->arch_core_cycles_prev)) { >> > pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); >> > return false; >> > } >> > @@ -152,17 +163,22 @@ void freq_inv_set_max_ratio(int cpu, u64 max_rate) >> > >> > static void amu_scale_freq_tick(void) >> > { >> > + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); >> > u64 prev_core_cnt, prev_const_cnt; >> > u64 core_cnt, const_cnt, scale; >> > >> > - prev_const_cnt = this_cpu_read(arch_const_cycles_prev); >> > - prev_core_cnt = this_cpu_read(arch_core_cycles_prev); >> > + prev_const_cnt = amu_sample->arch_const_cycles_prev; >> > + prev_core_cnt = amu_sample->arch_core_cycles_prev; >> > >> > update_freq_counters_refs(); >> > >> > - const_cnt = this_cpu_read(arch_const_cycles_prev); >> > - core_cnt = this_cpu_read(arch_core_cycles_prev); >> > + const_cnt = amu_sample->arch_const_cycles_prev; >> > + core_cnt = amu_sample->arch_core_cycles_prev; >> > >> > + /* >> > + * This should not happen unless the AMUs have been reset and the >> > + * counter values have not been restored - unlikely >> > + */ >> > if (unlikely(core_cnt <= prev_core_cnt || >> > const_cnt <= prev_const_cnt)) >> > return; >> > @@ -182,6 +198,8 @@ static void amu_scale_freq_tick(void) >> > >> > scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); >> > this_cpu_write(arch_freq_scale, (unsigned long)scale); >> > + >> > + amu_sample->last_scale_update = jiffies; >> > } >> > >> > static struct scale_freq_data amu_sfd = { >> > @@ -189,6 +207,77 @@ static struct scale_freq_data amu_sfd = { >> > .set_freq_scale = amu_scale_freq_tick, >> > }; >> > >> > +static __always_inline bool amu_fie_cpu_supported(unsigned int cpu) >> > +{ >> > + return cpumask_available(amu_fie_cpus) && >> > + cpumask_test_cpu(cpu, amu_fie_cpus); >> > +} >> > + >> > +#define AMU_SAMPLE_EXP_MS 20 >> > + >> > +int arch_freq_avg_get_on_cpu(int cpu) >> > +{ >> > + struct amu_cntr_sample *amu_sample; >> > + unsigned int start_cpu = cpu; >> > + unsigned long last_update; >> > + unsigned int freq = 0; >> > + u64 scale; >> > + >> > + if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu)) >> > + return -EOPNOTSUPP; >> > + >> > +retry: >> > + amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); >> > + >> > + last_update = amu_sample->last_scale_update; >> > + >> > + /* >> > + * For those CPUs that are in full dynticks mode, and those that have >> 'or those' to match with if condition? >Yeah, might be. >> >> > + * not seen tick for a while, try an alternative source for the counters >> > + * (and thus freq scale), if available, for given policy: this boils >> > + * down to identifying an active cpu within the same freq domain, if any. >> > + */ >> > + if (!housekeeping_cpu(cpu, HK_TYPE_TICK) || >> > + time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) { >> > + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); >> > + int ref_cpu = cpu; >> > + >> > + if (!policy) >> > + return 0; >> > + >> >> We can skip the rest of code if policy has a single cpu. AFAIR, one of the >> previous versions had similar check. >> >> if (!policy_is_shared(policy)) { >> cpufreq_cpu_put(policy); >> goto freq_comput; >> } >True, we could but then this case is covered by cpumask_next_wrap >which for single-cpu policies will render the ref_cpu invalid, >so policy_is_shared check seemed unnecessary. >> >> > + if (!cpumask_intersects(policy->related_cpus, >> > + housekeeping_cpumask(HK_TYPE_TICK))) { >> > + cpufreq_cpu_put(policy); >> > + return -EOPNOTSUPP; >> > + } >> > + >> > + >> > + do { >> > + ref_cpu = cpumask_next_wrap(ref_cpu, policy->cpus, >> > + start_cpu, false); >> > + >> > + } while (ref_cpu < nr_cpu_ids && idle_cpu(ref_cpu)); >> > + >> > + cpufreq_cpu_put(policy); >> > + >> > + if (ref_cpu >= nr_cpu_ids) >> > + /* No alternative to pull info from */ >> > + return 0; >> > + >> >> The 'cpuinfo_avg_freq' node gives 'unknown' value for single CPU per policy >> as 'ref_cpu' increments to 'nr_cpu_ids'. We can use the same CPU instead of >> returning zero if no alternative CPU. >> >> # cat /sys/devices/system/cpu/cpu2/cpufreq/cpuinfo_avg_freq >> <unknown> >> >> ---- >> if (ref_cpu >= nr_cpu_ids) >> /* Use same CPU if no alternative to pull info from */ >> goto freq_comput; >> >> .. >> freq_comput: >> scale = arch_scale_freq_capacity(cpu); >> freq = scale * arch_scale_freq_ref(cpu); >> ---- >> >This boils down to the question what that function, and the information it >provides, represent really. The 'unknown' here simply says the CPU has been idle >for a while and as such the frequency data is a bit stale and it does not >represent the average freq the CPU is actually running at anymore, which is >the intention here really. Or, that the given CPU is a non-housekeeping one. >Either way I believe this is a useful information, instead of providing >stale data with no indication on whether the frequency is really the 'current' >one or not. > >If that is somehow undesirable we can discuss this further, though I'd rather >avoid exposing an interface where the feedback provided is open to >interpretation at all times. Would it make sense to identify that the frequency reporting is unknown due to cpu being idle vs some other issue like being a non-housekeeping CPU? Would returning a value of 0 make it easier for tools to represent that the CPU is currently idle? Thanks, Vanshidhar > >--- >Best Regards >Beata >> Thank you, >> Sumit Gupta >> >> P.S. Will be on afk for next 2 weeks with no access to email. Please expect >> a delay in response. >> >> > + cpu = ref_cpu; >> > + goto retry; >> > + } >> > + /* >> > + * Reversed computation to the one used to determine >> > + * the arch_freq_scale value >> > + * (see amu_scale_freq_tick for details) >> > + */ >> > + scale = arch_scale_freq_capacity(cpu); >> > + freq = scale * arch_scale_freq_ref(cpu); >> > + freq >>= SCHED_CAPACITY_SHIFT; >> > + return freq; >> > +} >> > + >> >> > static void amu_fie_setup(const struct cpumask *cpus) >> > { >> > int cpu; >> > -- >> > 2.25.1 >> >
On Thu, Sep 26, 2024 at 04:21:14PM -0700, Vanshidhar Konda wrote: > On Thu, Sep 26, 2024 at 12:34:01PM GMT, Beata Michalska wrote: > > On Tue, Sep 17, 2024 at 05:41:09PM +0530, Sumit Gupta wrote: > > > Hi Beata, > > Hi Sumit, > > > > > > Thank you for the patches. > > Thank you for having a look at those. > > > > > > On 13/09/24 18:59, Beata Michalska wrote: > > > > External email: Use caution opening links or attachments > > > > > > > > > > > > With the Frequency Invariance Engine (FIE) being already wired up with > > > > sched tick and making use of relevant (core counter and constant > > > > counter) AMU counters, getting the average frequency for a given CPU, > > > > can be achieved by utilizing the frequency scale factor which reflects > > > > an average CPU frequency for the last tick period length. > > > > > > > > The solution is partially based on APERF/MPERF implementation of > > > > arch_freq_get_on_cpu. > > > > > > > > Suggested-by: Ionela Voinescu <ionela.voinescu@arm.com> > > > > Signed-off-by: Beata Michalska <beata.michalska@arm.com> > > > > --- > > > > arch/arm64/kernel/topology.c | 109 +++++++++++++++++++++++++++++++---- > > > > 1 file changed, 99 insertions(+), 10 deletions(-) > > > > > > > > diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c > > > > index cb180684d10d..22e510733336 100644 > > > > --- a/arch/arm64/kernel/topology.c > > > > +++ b/arch/arm64/kernel/topology.c > > > > @@ -17,6 +17,7 @@ > > > > #include <linux/cpufreq.h> > > > > #include <linux/init.h> > > > > #include <linux/percpu.h> > > > > +#include <linux/sched/isolation.h> > > > > > > > > #include <asm/cpu.h> > > > > #include <asm/cputype.h> > > > > @@ -88,18 +89,28 @@ int __init parse_acpi_topology(void) > > > > * initialized. > > > > */ > > > > static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT); > > > > -static DEFINE_PER_CPU(u64, arch_const_cycles_prev); > > > > -static DEFINE_PER_CPU(u64, arch_core_cycles_prev); > > > > static cpumask_var_t amu_fie_cpus; > > > > > > > > +struct amu_cntr_sample { > > > > + u64 arch_const_cycles_prev; > > > > + u64 arch_core_cycles_prev; > > > > + unsigned long last_scale_update; > > > > +}; > > > > + > > > > +static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples); > > > > + > > > > void update_freq_counters_refs(void) > > > > { > > > > - this_cpu_write(arch_core_cycles_prev, read_corecnt()); > > > > - this_cpu_write(arch_const_cycles_prev, read_constcnt()); > > > > + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); > > > > + > > > > + amu_sample->arch_core_cycles_prev = read_corecnt(); > > > > + amu_sample->arch_const_cycles_prev = read_constcnt(); > > > > } > > > > > > > > static inline bool freq_counters_valid(int cpu) > > > > { > > > > + struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); > > > > + > > > > if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) > > > > return false; > > > > > > > > @@ -108,8 +119,8 @@ static inline bool freq_counters_valid(int cpu) > > > > return false; > > > > } > > > > > > > > - if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || > > > > - !per_cpu(arch_core_cycles_prev, cpu))) { > > > > + if (unlikely(!amu_sample->arch_const_cycles_prev || > > > > + !amu_sample->arch_core_cycles_prev)) { > > > > pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); > > > > return false; > > > > } > > > > @@ -152,17 +163,22 @@ void freq_inv_set_max_ratio(int cpu, u64 max_rate) > > > > > > > > static void amu_scale_freq_tick(void) > > > > { > > > > + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); > > > > u64 prev_core_cnt, prev_const_cnt; > > > > u64 core_cnt, const_cnt, scale; > > > > > > > > - prev_const_cnt = this_cpu_read(arch_const_cycles_prev); > > > > - prev_core_cnt = this_cpu_read(arch_core_cycles_prev); > > > > + prev_const_cnt = amu_sample->arch_const_cycles_prev; > > > > + prev_core_cnt = amu_sample->arch_core_cycles_prev; > > > > > > > > update_freq_counters_refs(); > > > > > > > > - const_cnt = this_cpu_read(arch_const_cycles_prev); > > > > - core_cnt = this_cpu_read(arch_core_cycles_prev); > > > > + const_cnt = amu_sample->arch_const_cycles_prev; > > > > + core_cnt = amu_sample->arch_core_cycles_prev; > > > > > > > > + /* > > > > + * This should not happen unless the AMUs have been reset and the > > > > + * counter values have not been restored - unlikely > > > > + */ > > > > if (unlikely(core_cnt <= prev_core_cnt || > > > > const_cnt <= prev_const_cnt)) > > > > return; > > > > @@ -182,6 +198,8 @@ static void amu_scale_freq_tick(void) > > > > > > > > scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); > > > > this_cpu_write(arch_freq_scale, (unsigned long)scale); > > > > + > > > > + amu_sample->last_scale_update = jiffies; > > > > } > > > > > > > > static struct scale_freq_data amu_sfd = { > > > > @@ -189,6 +207,77 @@ static struct scale_freq_data amu_sfd = { > > > > .set_freq_scale = amu_scale_freq_tick, > > > > }; > > > > > > > > +static __always_inline bool amu_fie_cpu_supported(unsigned int cpu) > > > > +{ > > > > + return cpumask_available(amu_fie_cpus) && > > > > + cpumask_test_cpu(cpu, amu_fie_cpus); > > > > +} > > > > + > > > > +#define AMU_SAMPLE_EXP_MS 20 > > > > + > > > > +int arch_freq_avg_get_on_cpu(int cpu) > > > > +{ > > > > + struct amu_cntr_sample *amu_sample; > > > > + unsigned int start_cpu = cpu; > > > > + unsigned long last_update; > > > > + unsigned int freq = 0; > > > > + u64 scale; > > > > + > > > > + if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu)) > > > > + return -EOPNOTSUPP; > > > > + > > > > +retry: > > > > + amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); > > > > + > > > > + last_update = amu_sample->last_scale_update; > > > > + > > > > + /* > > > > + * For those CPUs that are in full dynticks mode, and those that have > > > 'or those' to match with if condition? > > Yeah, might be. > > > > > > > + * not seen tick for a while, try an alternative source for the counters > > > > + * (and thus freq scale), if available, for given policy: this boils > > > > + * down to identifying an active cpu within the same freq domain, if any. > > > > + */ > > > > + if (!housekeeping_cpu(cpu, HK_TYPE_TICK) || > > > > + time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) { > > > > + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); > > > > + int ref_cpu = cpu; > > > > + > > > > + if (!policy) > > > > + return 0; > > > > + > > > > > > We can skip the rest of code if policy has a single cpu. AFAIR, one of the > > > previous versions had similar check. > > > > > > if (!policy_is_shared(policy)) { > > > cpufreq_cpu_put(policy); > > > goto freq_comput; > > > } > > True, we could but then this case is covered by cpumask_next_wrap > > which for single-cpu policies will render the ref_cpu invalid, > > so policy_is_shared check seemed unnecessary. > > > > > > > + if (!cpumask_intersects(policy->related_cpus, > > > > + housekeeping_cpumask(HK_TYPE_TICK))) { > > > > + cpufreq_cpu_put(policy); > > > > + return -EOPNOTSUPP; > > > > + } > > > > + > > > > + > > > > + do { > > > > + ref_cpu = cpumask_next_wrap(ref_cpu, policy->cpus, > > > > + start_cpu, false); > > > > + > > > > + } while (ref_cpu < nr_cpu_ids && idle_cpu(ref_cpu)); > > > > + > > > > + cpufreq_cpu_put(policy); > > > > + > > > > + if (ref_cpu >= nr_cpu_ids) > > > > + /* No alternative to pull info from */ > > > > + return 0; > > > > + > > > > > > The 'cpuinfo_avg_freq' node gives 'unknown' value for single CPU per policy > > > as 'ref_cpu' increments to 'nr_cpu_ids'. We can use the same CPU instead of > > > returning zero if no alternative CPU. > > > > > > # cat /sys/devices/system/cpu/cpu2/cpufreq/cpuinfo_avg_freq > > > <unknown> > > > > > > ---- > > > if (ref_cpu >= nr_cpu_ids) > > > /* Use same CPU if no alternative to pull info from */ > > > goto freq_comput; > > > > > > .. > > > freq_comput: > > > scale = arch_scale_freq_capacity(cpu); > > > freq = scale * arch_scale_freq_ref(cpu); > > > ---- > > > > > This boils down to the question what that function, and the information it > > provides, represent really. The 'unknown' here simply says the CPU has been idle > > for a while and as such the frequency data is a bit stale and it does not > > represent the average freq the CPU is actually running at anymore, which is > > the intention here really. Or, that the given CPU is a non-housekeeping one. > > Either way I believe this is a useful information, instead of providing > > stale data with no indication on whether the frequency is really the 'current' > > one or not. > > > > If that is somehow undesirable we can discuss this further, though I'd rather > > avoid exposing an interface where the feedback provided is open to > > interpretation at all times. > > Would it make sense to identify that the frequency reporting is unknown due to > cpu being idle vs some other issue like being a non-housekeeping CPU? Would > returning a value of 0 make it easier for tools to represent that the CPU is > currently idle? That is an option. Another one would be to return an error for those cases. This would make it easier to distinguish between valid frequency &/| idle CPU vs tickless CPU (EINVAL vs ENOENT) ? --- BR Beata > > Thanks, > Vanshidhar > > > > > --- > > Best Regards > > Beata > > > Thank you, > > > Sumit Gupta > > > > > > P.S. Will be on afk for next 2 weeks with no access to email. Please expect > > > a delay in response. > > > > > > > + cpu = ref_cpu; > > > > + goto retry; > > > > + } > > > > + /* > > > > + * Reversed computation to the one used to determine > > > > + * the arch_freq_scale value > > > > + * (see amu_scale_freq_tick for details) > > > > + */ > > > > + scale = arch_scale_freq_capacity(cpu); > > > > + freq = scale * arch_scale_freq_ref(cpu); > > > > + freq >>= SCHED_CAPACITY_SHIFT; > > > > + return freq; > > > > +} > > > > + > > > > > > > static void amu_fie_setup(const struct cpumask *cpus) > > > > { > > > > int cpu; > > > > -- > > > > 2.25.1 > > > >
On Thu, Oct 03, 2024 at 11:39:54PM GMT, Beata Michalska wrote: >On Thu, Sep 26, 2024 at 04:21:14PM -0700, Vanshidhar Konda wrote: >> On Thu, Sep 26, 2024 at 12:34:01PM GMT, Beata Michalska wrote: >> > On Tue, Sep 17, 2024 at 05:41:09PM +0530, Sumit Gupta wrote: >> > > Hi Beata, >> > Hi Sumit, >> > > >> > > Thank you for the patches. >> > Thank you for having a look at those. >> > > >> > > On 13/09/24 18:59, Beata Michalska wrote: >> > > > External email: Use caution opening links or attachments >> > > > >> > > > >> > > > With the Frequency Invariance Engine (FIE) being already wired up with >> > > > sched tick and making use of relevant (core counter and constant >> > > > counter) AMU counters, getting the average frequency for a given CPU, >> > > > can be achieved by utilizing the frequency scale factor which reflects >> > > > an average CPU frequency for the last tick period length. >> > > > >> > > > The solution is partially based on APERF/MPERF implementation of >> > > > arch_freq_get_on_cpu. >> > > > >> > > > Suggested-by: Ionela Voinescu <ionela.voinescu@arm.com> >> > > > Signed-off-by: Beata Michalska <beata.michalska@arm.com> >> > > > --- >> > > > arch/arm64/kernel/topology.c | 109 +++++++++++++++++++++++++++++++---- >> > > > 1 file changed, 99 insertions(+), 10 deletions(-) >> > > > --- snip ---- >> > > >> > > .. >> > > freq_comput: >> > > scale = arch_scale_freq_capacity(cpu); >> > > freq = scale * arch_scale_freq_ref(cpu); >> > > ---- >> > > >> > This boils down to the question what that function, and the information it >> > provides, represent really. The 'unknown' here simply says the CPU has been idle >> > for a while and as such the frequency data is a bit stale and it does not >> > represent the average freq the CPU is actually running at anymore, which is >> > the intention here really. Or, that the given CPU is a non-housekeeping one. >> > Either way I believe this is a useful information, instead of providing >> > stale data with no indication on whether the frequency is really the 'current' >> > one or not. >> > >> > If that is somehow undesirable we can discuss this further, though I'd rather >> > avoid exposing an interface where the feedback provided is open to >> > interpretation at all times. >> >> Would it make sense to identify that the frequency reporting is unknown due to >> cpu being idle vs some other issue like being a non-housekeeping CPU? Would >> returning a value of 0 make it easier for tools to represent that the CPU is >> currently idle? >That is an option. >Another one would be to return an error for those cases. This would make it >easier to distinguish between valid frequency &/| idle CPU vs tickless CPU >(EINVAL vs ENOENT) ? > That seems like a good idea but I suspect it would be confusing to the end user. If a user runs `cat /sys/devices/system/cpu/cpu2/cpuinfo_avg_freq` they would get an error in some cases or get a number in some other iterations. Thanks, Vanshidhar >--- >BR >Beata >> >> Thanks, >> Vanshidhar >> >> > >> > --- >> > Best Regards >> > Beata >> > > Thank you, >> > > Sumit Gupta >> > > >> > > P.S. Will be on afk for next 2 weeks with no access to email. Please expect >> > > a delay in response. >> > > >> > > > + cpu = ref_cpu; >> > > > + goto retry; >> > > > + } >> > > > + /* >> > > > + * Reversed computation to the one used to determine >> > > > + * the arch_freq_scale value >> > > > + * (see amu_scale_freq_tick for details) >> > > > + */ >> > > > + scale = arch_scale_freq_capacity(cpu); >> > > > + freq = scale * arch_scale_freq_ref(cpu); >> > > > + freq >>= SCHED_CAPACITY_SHIFT; >> > > > + return freq; >> > > > +} >> > > > + >> > > >> > > > static void amu_fie_setup(const struct cpumask *cpus) >> > > > { >> > > > int cpu; >> > > > -- >> > > > 2.25.1 >> > > >
On Thu, Oct 03, 2024 at 02:54:22PM -0700, Vanshidhar Konda wrote: > On Thu, Oct 03, 2024 at 11:39:54PM GMT, Beata Michalska wrote: > > On Thu, Sep 26, 2024 at 04:21:14PM -0700, Vanshidhar Konda wrote: > > > On Thu, Sep 26, 2024 at 12:34:01PM GMT, Beata Michalska wrote: > > > > On Tue, Sep 17, 2024 at 05:41:09PM +0530, Sumit Gupta wrote: > > > > > Hi Beata, > > > > Hi Sumit, > > > > > > > > > > Thank you for the patches. > > > > Thank you for having a look at those. > > > > > > > > > > On 13/09/24 18:59, Beata Michalska wrote: > > > > > > External email: Use caution opening links or attachments > > > > > > > > > > > > > > > > > > With the Frequency Invariance Engine (FIE) being already wired up with > > > > > > sched tick and making use of relevant (core counter and constant > > > > > > counter) AMU counters, getting the average frequency for a given CPU, > > > > > > can be achieved by utilizing the frequency scale factor which reflects > > > > > > an average CPU frequency for the last tick period length. > > > > > > > > > > > > The solution is partially based on APERF/MPERF implementation of > > > > > > arch_freq_get_on_cpu. > > > > > > > > > > > > Suggested-by: Ionela Voinescu <ionela.voinescu@arm.com> > > > > > > Signed-off-by: Beata Michalska <beata.michalska@arm.com> > > > > > > --- > > > > > > arch/arm64/kernel/topology.c | 109 +++++++++++++++++++++++++++++++---- > > > > > > 1 file changed, 99 insertions(+), 10 deletions(-) > > > > > > > > --- snip ---- > > > > > > > > > > > .. > > > > > freq_comput: > > > > > scale = arch_scale_freq_capacity(cpu); > > > > > freq = scale * arch_scale_freq_ref(cpu); > > > > > ---- > > > > > > > > > This boils down to the question what that function, and the information it > > > > provides, represent really. The 'unknown' here simply says the CPU has been idle > > > > for a while and as such the frequency data is a bit stale and it does not > > > > represent the average freq the CPU is actually running at anymore, which is > > > > the intention here really. Or, that the given CPU is a non-housekeeping one. > > > > Either way I believe this is a useful information, instead of providing > > > > stale data with no indication on whether the frequency is really the 'current' > > > > one or not. > > > > > > > > If that is somehow undesirable we can discuss this further, though I'd rather > > > > avoid exposing an interface where the feedback provided is open to > > > > interpretation at all times. > > > > > > Would it make sense to identify that the frequency reporting is unknown due to > > > cpu being idle vs some other issue like being a non-housekeeping CPU? Would > > > returning a value of 0 make it easier for tools to represent that the CPU is > > > currently idle? > > That is an option. > > Another one would be to return an error for those cases. This would make it > > easier to distinguish between valid frequency &/| idle CPU vs tickless CPU > > (EINVAL vs ENOENT) ? > > > > That seems like a good idea but I suspect it would be confusing to the end user. > > If a user runs `cat /sys/devices/system/cpu/cpu2/cpuinfo_avg_freq` they would > get an error in some cases or get a number in some other iterations. > That is a fair point but I am not entirely convinced using '0' instead makes things any more clearer as this is in no way a valid CPU frequency. As long as we document the expected behaviour keeping the interface well defined, both options should be fine I guess. @Viresh: what is your opinion on that one ? --- BR Beata > Thanks, > Vanshidhar > > > --- > > BR > > Beata > > > > > > Thanks, > > > Vanshidhar > > > > > > > > > > > --- > > > > Best Regards > > > > Beata > > > > > Thank you, > > > > > Sumit Gupta > > > > > > > > > > P.S. Will be on afk for next 2 weeks with no access to email. Please expect > > > > > a delay in response. > > > > > > > > > > > + cpu = ref_cpu; > > > > > > + goto retry; > > > > > > + } > > > > > > + /* > > > > > > + * Reversed computation to the one used to determine > > > > > > + * the arch_freq_scale value > > > > > > + * (see amu_scale_freq_tick for details) > > > > > > + */ > > > > > > + scale = arch_scale_freq_capacity(cpu); > > > > > > + freq = scale * arch_scale_freq_ref(cpu); > > > > > > + freq >>= SCHED_CAPACITY_SHIFT; > > > > > > + return freq; > > > > > > +} > > > > > > + > > > > > > > > > > > static void amu_fie_setup(const struct cpumask *cpus) > > > > > > { > > > > > > int cpu; > > > > > > -- > > > > > > 2.25.1 > > > > > >
On Thu, Oct 10, 2024 at 01:08:23PM GMT, Beata Michalska wrote: >On Thu, Oct 03, 2024 at 02:54:22PM -0700, Vanshidhar Konda wrote: >> On Thu, Oct 03, 2024 at 11:39:54PM GMT, Beata Michalska wrote: >> > On Thu, Sep 26, 2024 at 04:21:14PM -0700, Vanshidhar Konda wrote: >> > > On Thu, Sep 26, 2024 at 12:34:01PM GMT, Beata Michalska wrote: >> > > > On Tue, Sep 17, 2024 at 05:41:09PM +0530, Sumit Gupta wrote: >> > > > > Hi Beata, >> > > > Hi Sumit, >> > > > > >> > > > > Thank you for the patches. >> > > > Thank you for having a look at those. >> > > > > >> > > > > On 13/09/24 18:59, Beata Michalska wrote: >> > > > > > External email: Use caution opening links or attachments >> > > > > > >> > > > > > >> > > > > > With the Frequency Invariance Engine (FIE) being already wired up with >> > > > > > sched tick and making use of relevant (core counter and constant >> > > > > > counter) AMU counters, getting the average frequency for a given CPU, >> > > > > > can be achieved by utilizing the frequency scale factor which reflects >> > > > > > an average CPU frequency for the last tick period length. >> > > > > > >> > > > > > The solution is partially based on APERF/MPERF implementation of >> > > > > > arch_freq_get_on_cpu. >> > > > > > >> > > > > > Suggested-by: Ionela Voinescu <ionela.voinescu@arm.com> >> > > > > > Signed-off-by: Beata Michalska <beata.michalska@arm.com> >> > > > > > --- >> > > > > > arch/arm64/kernel/topology.c | 109 +++++++++++++++++++++++++++++++---- >> > > > > > 1 file changed, 99 insertions(+), 10 deletions(-) >> > > > > > >> >> --- snip ---- >> >> > > > > >> > > > > .. >> > > > > freq_comput: >> > > > > scale = arch_scale_freq_capacity(cpu); >> > > > > freq = scale * arch_scale_freq_ref(cpu); >> > > > > ---- >> > > > > >> > > > This boils down to the question what that function, and the information it >> > > > provides, represent really. The 'unknown' here simply says the CPU has been idle >> > > > for a while and as such the frequency data is a bit stale and it does not >> > > > represent the average freq the CPU is actually running at anymore, which is >> > > > the intention here really. Or, that the given CPU is a non-housekeeping one. >> > > > Either way I believe this is a useful information, instead of providing >> > > > stale data with no indication on whether the frequency is really the 'current' >> > > > one or not. >> > > > >> > > > If that is somehow undesirable we can discuss this further, though I'd rather >> > > > avoid exposing an interface where the feedback provided is open to >> > > > interpretation at all times. >> > > >> > > Would it make sense to identify that the frequency reporting is unknown due to >> > > cpu being idle vs some other issue like being a non-housekeeping CPU? Would >> > > returning a value of 0 make it easier for tools to represent that the CPU is >> > > currently idle? >> > That is an option. >> > Another one would be to return an error for those cases. This would make it >> > easier to distinguish between valid frequency &/| idle CPU vs tickless CPU >> > (EINVAL vs ENOENT) ? >> > >> >> That seems like a good idea but I suspect it would be confusing to the end user. >> >> If a user runs `cat /sys/devices/system/cpu/cpu2/cpuinfo_avg_freq` they would >> get an error in some cases or get a number in some other iterations. >> >That is a fair point but I am not entirely convinced using '0' instead makes >things any more clearer as this is in no way a valid CPU frequency. >As long as we document the expected behaviour keeping the interface well >defined, both options should be fine I guess. > Another option could be to list out the reason as 'idle' or 'no entry' instead of returning EINVAL or ENOENT. These wouldn't be valid values either but cat on the sysfs node wouldn't return an error. Thanks, Vanshidhar >@Viresh: what is your opinion on that one ? > >--- >BR >Beata >> Thanks, >> Vanshidhar >> >> > --- >> > BR >> > Beata >> > > >> > > Thanks, >> > > Vanshidhar >> > > >> > > > >> > > > --- >> > > > Best Regards >> > > > Beata >> > > > > Thank you, >> > > > > Sumit Gupta >> > > > > >> > > > > P.S. Will be on afk for next 2 weeks with no access to email. Please expect >> > > > > a delay in response. >> > > > > >> > > > > > + cpu = ref_cpu; >> > > > > > + goto retry; >> > > > > > + } >> > > > > > + /* >> > > > > > + * Reversed computation to the one used to determine >> > > > > > + * the arch_freq_scale value >> > > > > > + * (see amu_scale_freq_tick for details) >> > > > > > + */ >> > > > > > + scale = arch_scale_freq_capacity(cpu); >> > > > > > + freq = scale * arch_scale_freq_ref(cpu); >> > > > > > + freq >>= SCHED_CAPACITY_SHIFT; >> > > > > > + return freq; >> > > > > > +} >> > > > > > + >> > > > > >> > > > > > static void amu_fie_setup(const struct cpumask *cpus) >> > > > > > { >> > > > > > int cpu; >> > > > > > -- >> > > > > > 2.25.1 >> > > > > >
On 11/10/24 21:59, Vanshidhar Konda wrote: > External email: Use caution opening links or attachments > > > On Thu, Oct 10, 2024 at 01:08:23PM GMT, Beata Michalska wrote: >> On Thu, Oct 03, 2024 at 02:54:22PM -0700, Vanshidhar Konda wrote: >>> On Thu, Oct 03, 2024 at 11:39:54PM GMT, Beata Michalska wrote: >>> > On Thu, Sep 26, 2024 at 04:21:14PM -0700, Vanshidhar Konda wrote: >>> > > On Thu, Sep 26, 2024 at 12:34:01PM GMT, Beata Michalska wrote: >>> > > > On Tue, Sep 17, 2024 at 05:41:09PM +0530, Sumit Gupta wrote: >>> > > > > Hi Beata, >>> > > > Hi Sumit, >>> > > > > >>> > > > > Thank you for the patches. >>> > > > Thank you for having a look at those. >>> > > > > >>> > > > > On 13/09/24 18:59, Beata Michalska wrote: >>> > > > > > External email: Use caution opening links or attachments >>> > > > > > >>> > > > > > >>> > > > > > With the Frequency Invariance Engine (FIE) being already >>> wired up with >>> > > > > > sched tick and making use of relevant (core counter and >>> constant >>> > > > > > counter) AMU counters, getting the average frequency for a >>> given CPU, >>> > > > > > can be achieved by utilizing the frequency scale factor >>> which reflects >>> > > > > > an average CPU frequency for the last tick period length. >>> > > > > > >>> > > > > > The solution is partially based on APERF/MPERF >>> implementation of >>> > > > > > arch_freq_get_on_cpu. >>> > > > > > >>> > > > > > Suggested-by: Ionela Voinescu <ionela.voinescu@arm.com> >>> > > > > > Signed-off-by: Beata Michalska <beata.michalska@arm.com> >>> > > > > > --- >>> > > > > > arch/arm64/kernel/topology.c | 109 >>> +++++++++++++++++++++++++++++++---- >>> > > > > > 1 file changed, 99 insertions(+), 10 deletions(-) >>> > > > > > >>> >>> --- snip ---- >>> >>> > > > > >>> > > > > .. >>> > > > > freq_comput: >>> > > > > scale = arch_scale_freq_capacity(cpu); >>> > > > > freq = scale * arch_scale_freq_ref(cpu); >>> > > > > ---- >>> > > > > >>> > > > This boils down to the question what that function, and the >>> information it >>> > > > provides, represent really. The 'unknown' here simply says the >>> CPU has been idle >>> > > > for a while and as such the frequency data is a bit stale and >>> it does not >>> > > > represent the average freq the CPU is actually running at >>> anymore, which is >>> > > > the intention here really. Or, that the given CPU is a >>> non-housekeeping one. >>> > > > Either way I believe this is a useful information, instead of >>> providing >>> > > > stale data with no indication on whether the frequency is >>> really the 'current' >>> > > > one or not. >>> > > > >>> > > > If that is somehow undesirable we can discuss this further, >>> though I'd rather >>> > > > avoid exposing an interface where the feedback provided is open to >>> > > > interpretation at all times. >>> > > >>> > > Would it make sense to identify that the frequency reporting is >>> unknown due to >>> > > cpu being idle vs some other issue like being a non-housekeeping >>> CPU? Would >>> > > returning a value of 0 make it easier for tools to represent that >>> the CPU is >>> > > currently idle? >>> > That is an option. >>> > Another one would be to return an error for those cases. This would >>> make it >>> > easier to distinguish between valid frequency &/| idle CPU vs >>> tickless CPU >>> > (EINVAL vs ENOENT) ? >>> > >>> >>> That seems like a good idea but I suspect it would be confusing to >>> the end user. >>> >>> If a user runs `cat /sys/devices/system/cpu/cpu2/cpuinfo_avg_freq` >>> they would >>> get an error in some cases or get a number in some other iterations. >>> >> That is a fair point but I am not entirely convinced using '0' instead >> makes >> things any more clearer as this is in no way a valid CPU frequency. >> As long as we document the expected behaviour keeping the interface well >> defined, both options should be fine I guess. >> > > Another option could be to list out the reason as 'idle' or 'no entry' > instead of > returning EINVAL or ENOENT. These wouldn't be valid values either but > cat on the > sysfs node wouldn't return an error. > > Thanks, > Vanshidhar > Ya, listing the clear reason sounds better. Thank you, Sumit Gupta >> @Viresh: what is your opinion on that one ? >> >> --- >> BR >> Beata >>> Thanks, .... >>> > > > > > + cpu = ref_cpu; >>> > > > > > + goto retry; >>> > > > > > + } >>> > > > > > + /* >>> > > > > > + * Reversed computation to the one used to determine >>> > > > > > + * the arch_freq_scale value >>> > > > > > + * (see amu_scale_freq_tick for details) >>> > > > > > + */ >>> > > > > > + scale = arch_scale_freq_capacity(cpu); >>> > > > > > + freq = scale * arch_scale_freq_ref(cpu); >>> > > > > > + freq >>= SCHED_CAPACITY_SHIFT; >>> > > > > > + return freq; >>> > > > > > +} >>> > > > > > + >>> > > > > >>> > > > > > static void amu_fie_setup(const struct cpumask *cpus) >>> > > > > > { >>> > > > > > int cpu; >>> > > > > > -- >>> > > > > > 2.25.1 >>> > > > > >
On Mon, Oct 14, 2024 at 11:16:36PM +0530, Sumit Gupta wrote: > > > On 11/10/24 21:59, Vanshidhar Konda wrote: > > External email: Use caution opening links or attachments > > > > > > On Thu, Oct 10, 2024 at 01:08:23PM GMT, Beata Michalska wrote: > > > On Thu, Oct 03, 2024 at 02:54:22PM -0700, Vanshidhar Konda wrote: > > > > On Thu, Oct 03, 2024 at 11:39:54PM GMT, Beata Michalska wrote: > > > > > On Thu, Sep 26, 2024 at 04:21:14PM -0700, Vanshidhar Konda wrote: > > > > > > On Thu, Sep 26, 2024 at 12:34:01PM GMT, Beata Michalska wrote: > > > > > > > On Tue, Sep 17, 2024 at 05:41:09PM +0530, Sumit Gupta wrote: > > > > > > > > Hi Beata, > > > > > > > Hi Sumit, > > > > > > > > > > > > > > > > Thank you for the patches. > > > > > > > Thank you for having a look at those. > > > > > > > > > > > > > > > > On 13/09/24 18:59, Beata Michalska wrote: > > > > > > > > > External email: Use caution opening links or attachments > > > > > > > > > > > > > > > > > > > > > > > > > > > With the Frequency Invariance Engine (FIE) being > > > > already wired up with > > > > > > > > > sched tick and making use of relevant (core counter > > > > and constant > > > > > > > > > counter) AMU counters, getting the average frequency > > > > for a given CPU, > > > > > > > > > can be achieved by utilizing the frequency scale > > > > factor which reflects > > > > > > > > > an average CPU frequency for the last tick period length. > > > > > > > > > > > > > > > > > > The solution is partially based on APERF/MPERF > > > > implementation of > > > > > > > > > arch_freq_get_on_cpu. > > > > > > > > > > > > > > > > > > Suggested-by: Ionela Voinescu <ionela.voinescu@arm.com> > > > > > > > > > Signed-off-by: Beata Michalska <beata.michalska@arm.com> > > > > > > > > > --- > > > > > > > > > arch/arm64/kernel/topology.c | 109 > > > > +++++++++++++++++++++++++++++++---- > > > > > > > > > 1 file changed, 99 insertions(+), 10 deletions(-) > > > > > > > > > > > > > > > > > --- snip ---- > > > > > > > > > > > > > > > > > > > > .. > > > > > > > > freq_comput: > > > > > > > > scale = arch_scale_freq_capacity(cpu); > > > > > > > > freq = scale * arch_scale_freq_ref(cpu); > > > > > > > > ---- > > > > > > > > > > > > > > > This boils down to the question what that function, and > > > > the information it > > > > > > > provides, represent really. The 'unknown' here simply says > > > > the CPU has been idle > > > > > > > for a while and as such the frequency data is a bit stale > > > > and it does not > > > > > > > represent the average freq the CPU is actually running at > > > > anymore, which is > > > > > > > the intention here really. Or, that the given CPU is a > > > > non-housekeeping one. > > > > > > > Either way I believe this is a useful information, instead > > > > of providing > > > > > > > stale data with no indication on whether the frequency is > > > > really the 'current' > > > > > > > one or not. > > > > > > > > > > > > > > If that is somehow undesirable we can discuss this > > > > further, though I'd rather > > > > > > > avoid exposing an interface where the feedback provided is open to > > > > > > > interpretation at all times. > > > > > > > > > > > > Would it make sense to identify that the frequency reporting > > > > is unknown due to > > > > > > cpu being idle vs some other issue like being a > > > > non-housekeeping CPU? Would > > > > > > returning a value of 0 make it easier for tools to represent > > > > that the CPU is > > > > > > currently idle? > > > > > That is an option. > > > > > Another one would be to return an error for those cases. This > > > > would make it > > > > > easier to distinguish between valid frequency &/| idle CPU vs > > > > tickless CPU > > > > > (EINVAL vs ENOENT) ? > > > > > > > > > > > > > That seems like a good idea but I suspect it would be confusing > > > > to the end user. > > > > > > > > If a user runs `cat > > > > /sys/devices/system/cpu/cpu2/cpuinfo_avg_freq` they would > > > > get an error in some cases or get a number in some other iterations. > > > > > > > That is a fair point but I am not entirely convinced using '0' > > > instead makes > > > things any more clearer as this is in no way a valid CPU frequency. > > > As long as we document the expected behaviour keeping the interface well > > > defined, both options should be fine I guess. > > > > > > > Another option could be to list out the reason as 'idle' or 'no entry' > > instead of > > returning EINVAL or ENOENT. These wouldn't be valid values either but > > cat on the > > sysfs node wouldn't return an error. > > > > Thanks, > > Vanshidhar > > > > Ya, listing the clear reason sounds better. > > Thank you, > Sumit Gupta > I'd still prefer returning an error as that is a clear indication on failure upon read. Furthermore, that would also make that attribute stick to single-type rule for sysfs, which is currently not the case and will not be if we return 'idle' or 'no entry'. That said, I am happy to make that change if that would be the final decision and that one is not mine, as the change is ultimately the cpufreq one. --- BR Beata > > > @Viresh: what is your opinion on that one ? > > > > > > --- > > > BR > > > Beata > > > > Thanks, > > .... > > > > > > > > > > + cpu = ref_cpu; > > > > > > > > > + goto retry; > > > > > > > > > + } > > > > > > > > > + /* > > > > > > > > > + * Reversed computation to the one used to determine > > > > > > > > > + * the arch_freq_scale value > > > > > > > > > + * (see amu_scale_freq_tick for details) > > > > > > > > > + */ > > > > > > > > > + scale = arch_scale_freq_capacity(cpu); > > > > > > > > > + freq = scale * arch_scale_freq_ref(cpu); > > > > > > > > > + freq >>= SCHED_CAPACITY_SHIFT; > > > > > > > > > + return freq; > > > > > > > > > +} > > > > > > > > > + > > > > > > > > > > > > > > > > > static void amu_fie_setup(const struct cpumask *cpus) > > > > > > > > > { > > > > > > > > > int cpu; > > > > > > > > > -- > > > > > > > > > 2.25.1 > > > > > > > > >
On 10-10-24, 13:08, Beata Michalska wrote: > That is a fair point but I am not entirely convinced using '0' instead makes > things any more clearer as this is in no way a valid CPU frequency. > As long as we document the expected behaviour keeping the interface well > defined, both options should be fine I guess. > > @Viresh: what is your opinion on that one ? Failing to get frequency for the CPU shouldn't be represented by 0, even if it is confusing for the user.
On Tue, Oct 29, 2024 at 12:23:19PM +0530, Viresh Kumar wrote: > On 10-10-24, 13:08, Beata Michalska wrote: > > That is a fair point but I am not entirely convinced using '0' instead makes > > things any more clearer as this is in no way a valid CPU frequency. > > As long as we document the expected behaviour keeping the interface well > > defined, both options should be fine I guess. > > > > @Viresh: what is your opinion on that one ? > > Failing to get frequency for the CPU shouldn't be represented by 0, > even if it is confusing for the user. We still need to decide whether provide a more descriptive way of informing about such cases (whether it be 'unknown' or 'idle' ) or to simply return an appropriate error and leave the userspace with dealing with that. --- Thanks Beata > > -- > viresh
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index cb180684d10d..22e510733336 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -17,6 +17,7 @@ #include <linux/cpufreq.h> #include <linux/init.h> #include <linux/percpu.h> +#include <linux/sched/isolation.h> #include <asm/cpu.h> #include <asm/cputype.h> @@ -88,18 +89,28 @@ int __init parse_acpi_topology(void) * initialized. */ static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT); -static DEFINE_PER_CPU(u64, arch_const_cycles_prev); -static DEFINE_PER_CPU(u64, arch_core_cycles_prev); static cpumask_var_t amu_fie_cpus; +struct amu_cntr_sample { + u64 arch_const_cycles_prev; + u64 arch_core_cycles_prev; + unsigned long last_scale_update; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples); + void update_freq_counters_refs(void) { - this_cpu_write(arch_core_cycles_prev, read_corecnt()); - this_cpu_write(arch_const_cycles_prev, read_constcnt()); + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); + + amu_sample->arch_core_cycles_prev = read_corecnt(); + amu_sample->arch_const_cycles_prev = read_constcnt(); } static inline bool freq_counters_valid(int cpu) { + struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); + if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask)) return false; @@ -108,8 +119,8 @@ static inline bool freq_counters_valid(int cpu) return false; } - if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || - !per_cpu(arch_core_cycles_prev, cpu))) { + if (unlikely(!amu_sample->arch_const_cycles_prev || + !amu_sample->arch_core_cycles_prev)) { pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); return false; } @@ -152,17 +163,22 @@ void freq_inv_set_max_ratio(int cpu, u64 max_rate) static void amu_scale_freq_tick(void) { + struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples); u64 prev_core_cnt, prev_const_cnt; u64 core_cnt, const_cnt, scale; - prev_const_cnt = this_cpu_read(arch_const_cycles_prev); - prev_core_cnt = this_cpu_read(arch_core_cycles_prev); + prev_const_cnt = amu_sample->arch_const_cycles_prev; + prev_core_cnt = amu_sample->arch_core_cycles_prev; update_freq_counters_refs(); - const_cnt = this_cpu_read(arch_const_cycles_prev); - core_cnt = this_cpu_read(arch_core_cycles_prev); + const_cnt = amu_sample->arch_const_cycles_prev; + core_cnt = amu_sample->arch_core_cycles_prev; + /* + * This should not happen unless the AMUs have been reset and the + * counter values have not been restored - unlikely + */ if (unlikely(core_cnt <= prev_core_cnt || const_cnt <= prev_const_cnt)) return; @@ -182,6 +198,8 @@ static void amu_scale_freq_tick(void) scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); this_cpu_write(arch_freq_scale, (unsigned long)scale); + + amu_sample->last_scale_update = jiffies; } static struct scale_freq_data amu_sfd = { @@ -189,6 +207,77 @@ static struct scale_freq_data amu_sfd = { .set_freq_scale = amu_scale_freq_tick, }; +static __always_inline bool amu_fie_cpu_supported(unsigned int cpu) +{ + return cpumask_available(amu_fie_cpus) && + cpumask_test_cpu(cpu, amu_fie_cpus); +} + +#define AMU_SAMPLE_EXP_MS 20 + +int arch_freq_avg_get_on_cpu(int cpu) +{ + struct amu_cntr_sample *amu_sample; + unsigned int start_cpu = cpu; + unsigned long last_update; + unsigned int freq = 0; + u64 scale; + + if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu)) + return -EOPNOTSUPP; + +retry: + amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu); + + last_update = amu_sample->last_scale_update; + + /* + * For those CPUs that are in full dynticks mode, and those that have + * not seen tick for a while, try an alternative source for the counters + * (and thus freq scale), if available, for given policy: this boils + * down to identifying an active cpu within the same freq domain, if any. + */ + if (!housekeeping_cpu(cpu, HK_TYPE_TICK) || + time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) { + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + int ref_cpu = cpu; + + if (!policy) + return 0; + + if (!cpumask_intersects(policy->related_cpus, + housekeeping_cpumask(HK_TYPE_TICK))) { + cpufreq_cpu_put(policy); + return -EOPNOTSUPP; + } + + + do { + ref_cpu = cpumask_next_wrap(ref_cpu, policy->cpus, + start_cpu, false); + + } while (ref_cpu < nr_cpu_ids && idle_cpu(ref_cpu)); + + cpufreq_cpu_put(policy); + + if (ref_cpu >= nr_cpu_ids) + /* No alternative to pull info from */ + return 0; + + cpu = ref_cpu; + goto retry; + } + /* + * Reversed computation to the one used to determine + * the arch_freq_scale value + * (see amu_scale_freq_tick for details) + */ + scale = arch_scale_freq_capacity(cpu); + freq = scale * arch_scale_freq_ref(cpu); + freq >>= SCHED_CAPACITY_SHIFT; + return freq; +} + static void amu_fie_setup(const struct cpumask *cpus) { int cpu;
With the Frequency Invariance Engine (FIE) being already wired up with sched tick and making use of relevant (core counter and constant counter) AMU counters, getting the average frequency for a given CPU, can be achieved by utilizing the frequency scale factor which reflects an average CPU frequency for the last tick period length. The solution is partially based on APERF/MPERF implementation of arch_freq_get_on_cpu. Suggested-by: Ionela Voinescu <ionela.voinescu@arm.com> Signed-off-by: Beata Michalska <beata.michalska@arm.com> --- arch/arm64/kernel/topology.c | 109 +++++++++++++++++++++++++++++++---- 1 file changed, 99 insertions(+), 10 deletions(-)