Message ID | 20240229162520.970986-4-vanshikonda@os.amperecomputing.com (mailing list archive) |
---|---|
State | Changes Requested, archived |
Headers | show |
Series | arm64: Use AMU counters for measuring CPU frequency | expand |
On Thu, Feb 29, 2024 at 5:25 PM Vanshidhar Konda <vanshikonda@os.amperecomputing.com> wrote: > > The CPPC driver reads delivered and reference counters using cpc_read > one at a time. This leads to inaccurate measurement of CPU frequency > discussed in [1]. If the firmware indicates that both the registers are > in the FFH interface the kernel can read the registers together in a > single IPI. This has two benefits: > 1. Reduces the number of IPIs needed to read the two registers > 2. The two registers will be read in close proximity resulting in more > accurate CPU frequency measurement > > [1]: https://lore.kernel.org/all/20230328193846.8757-1-yang@os.amperecomputing.com/ > > Signed-off-by: Vanshidhar Konda <vanshikonda@os.amperecomputing.com> > --- > arch/arm64/kernel/topology.c | 37 ++++++++++++++++++++++++++++++++++++ > drivers/acpi/cppc_acpi.c | 32 +++++++++++++++++++++++++++---- > include/acpi/cppc_acpi.h | 13 +++++++++++++ > 3 files changed, 78 insertions(+), 4 deletions(-) > > diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c > index 8905eb0c681f..8207565f43ee 100644 > --- a/arch/arm64/kernel/topology.c > +++ b/arch/arm64/kernel/topology.c > @@ -421,6 +421,43 @@ int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val) > return ret; > } > > +static void cpc_update_freq_counters(void *info) > +{ > + update_freq_counters_refs(); > +} > + > +int cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *ffh_regs) > +{ > + struct amu_counters *ctrs = per_cpu_ptr(&cpu_samples, cpu); > + int idx; > + > + if (!cpc_ffh_supported() || !freq_counters_valid(cpu)) > + return -EOPNOTSUPP; > + > + if (WARN_ON_ONCE(irqs_disabled())) > + return -EPERM; > + > + if (!idle_cpu(cpu)) > + smp_call_function_single(cpu, cpc_update_freq_counters, NULL, 1); > + > + for (idx = 0; idx < MAX_NUM_CPC_REGS_FFH; idx++) { > + > + if (!ffh_regs->regs[idx].reg) > + continue; > + > + switch ((u64)(ffh_regs->regs[idx].reg->address)) { > + case 0x0: > + ffh_regs->regs[idx].value = ctrs->core_cnt; > + break; > + case 0x1: > + ffh_regs->regs[idx].value = ctrs->const_cnt; > + break; > + } > + } > + > + return 0; > +} > + > int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) > { > return -EOPNOTSUPP; > diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c > index d155a86a8614..55ffb1915e4f 100644 > --- a/drivers/acpi/cppc_acpi.c > +++ b/drivers/acpi/cppc_acpi.c > @@ -113,6 +113,10 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr); > (cpc)->cpc_entry.reg.space_id == \ > ACPI_ADR_SPACE_SYSTEM_IO) > > +#define CPC_IN_FFH(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \ > + (cpc)->cpc_entry.reg.space_id == \ > + ACPI_ADR_SPACE_FIXED_HARDWARE) > + > /* Evaluates to True if reg is a NULL register descriptor */ > #define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \ > (reg)->address == 0 && \ > @@ -974,6 +978,11 @@ int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) > return -ENOTSUPP; > } > > +int __weak cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *regs) > +{ > + return -ENOTSUPP; > +} This might return a bool value. Is there any case in which the caller would handle different error codes differently? > + > /* > * Since cpc_read and cpc_write are called while holding pcc_lock, it should be > * as fast as possible. We have already mapped the PCC subspace during init, so > @@ -1317,7 +1326,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) > int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); > struct cppc_pcc_data *pcc_ss_data = NULL; > u64 delivered, reference, ref_perf, ctr_wrap_time; > - int ret = 0, regs_in_pcc = 0; > + int ret = 0, regs_in_pcc = 0, regs_read_in_ffh = 0; Please use bool as the type for boolean variables. > > if (!cpc_desc) { > pr_debug("No CPC descriptor for CPU:%d\n", cpunum); > @@ -1353,8 +1362,23 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) > } > } > > - cpc_read(cpunum, delivered_reg, &delivered); > - cpc_read(cpunum, reference_reg, &reference); > + if (CPC_IN_FFH(delivered_reg) && CPC_IN_FFH(reference_reg)) { > + struct ffh_cpc_reg_values ffh_regs; > + > + ffh_regs.regs[0].reg = &(delivered_reg->cpc_entry.reg); > + ffh_regs.regs[1].reg = &(reference_reg->cpc_entry.reg); > + ret = cpc_read_regs_ffh(cpunum, &ffh_regs); > + if (!ret) { If cpc_read_regs_ffh() returned 'true' on success, the above could be written as if (cpc_read_regs_ffh(cpunum, &ffh_regs)) { > + delivered = ffh_regs.regs[0].value; > + reference = ffh_regs.regs[1].value; > + regs_read_in_ffh = 1; > + } > + } > + > + if (!regs_read_in_ffh) { > + cpc_read(cpunum, delivered_reg, &delivered); > + cpc_read(cpunum, reference_reg, &reference); > + } > cpc_read(cpunum, ref_perf_reg, &ref_perf); > > /* > @@ -1366,7 +1390,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) > if (CPC_SUPPORTED(ctr_wrap_reg)) > cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time); > > - if (!delivered || !reference || !ref_perf) { > + if (!delivered || !reference || !ref_perf) { > ret = -EFAULT; > goto out_err; > } > diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h > index 3a0995f8bce8..0da614a50edd 100644 > --- a/include/acpi/cppc_acpi.h > +++ b/include/acpi/cppc_acpi.h > @@ -137,6 +137,18 @@ struct cppc_cpudata { > }; > > #ifdef CONFIG_ACPI_CPPC_LIB > + > +#define MAX_NUM_CPC_REGS_FFH 2 > + > +struct ffh_cpc_reg { > + struct cpc_reg *reg; > + u64 value; > +}; > + > +struct ffh_cpc_reg_values { > + struct ffh_cpc_reg regs[MAX_NUM_CPC_REGS_FFH]; > +}; > + > extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); > extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); > extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); > @@ -153,6 +165,7 @@ extern unsigned int cppc_get_transition_latency(int cpu); > extern bool cpc_ffh_supported(void); > extern bool cpc_supported_by_cpu(void); > extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); > +extern int cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *regs); > extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val); > extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); > extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); > --
On Thu, Feb 29, 2024 at 06:32:59PM +0100, Rafael J. Wysocki wrote: >On Thu, Feb 29, 2024 at 5:25 PM Vanshidhar Konda ><vanshikonda@os.amperecomputing.com> wrote: >> >> The CPPC driver reads delivered and reference counters using cpc_read >> one at a time. This leads to inaccurate measurement of CPU frequency >> discussed in [1]. If the firmware indicates that both the registers are >> in the FFH interface the kernel can read the registers together in a >> single IPI. This has two benefits: >> 1. Reduces the number of IPIs needed to read the two registers >> 2. The two registers will be read in close proximity resulting in more >> accurate CPU frequency measurement >> >> [1]: https://lore.kernel.org/all/20230328193846.8757-1-yang@os.amperecomputing.com/ >> >> Signed-off-by: Vanshidhar Konda <vanshikonda@os.amperecomputing.com> >> --- >> arch/arm64/kernel/topology.c | 37 ++++++++++++++++++++++++++++++++++++ >> drivers/acpi/cppc_acpi.c | 32 +++++++++++++++++++++++++++---- >> include/acpi/cppc_acpi.h | 13 +++++++++++++ >> 3 files changed, 78 insertions(+), 4 deletions(-) >> >> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c >> index 8905eb0c681f..8207565f43ee 100644 >> --- a/arch/arm64/kernel/topology.c >> +++ b/arch/arm64/kernel/topology.c >> @@ -421,6 +421,43 @@ int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val) >> return ret; >> } >> >> +static void cpc_update_freq_counters(void *info) >> +{ >> + update_freq_counters_refs(); >> +} >> + >> +int cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *ffh_regs) >> +{ >> + struct amu_counters *ctrs = per_cpu_ptr(&cpu_samples, cpu); >> + int idx; >> + >> + if (!cpc_ffh_supported() || !freq_counters_valid(cpu)) >> + return -EOPNOTSUPP; >> + >> + if (WARN_ON_ONCE(irqs_disabled())) >> + return -EPERM; >> + >> + if (!idle_cpu(cpu)) >> + smp_call_function_single(cpu, cpc_update_freq_counters, NULL, 1); >> + >> + for (idx = 0; idx < MAX_NUM_CPC_REGS_FFH; idx++) { >> + >> + if (!ffh_regs->regs[idx].reg) >> + continue; >> + >> + switch ((u64)(ffh_regs->regs[idx].reg->address)) { >> + case 0x0: >> + ffh_regs->regs[idx].value = ctrs->core_cnt; >> + break; >> + case 0x1: >> + ffh_regs->regs[idx].value = ctrs->const_cnt; >> + break; >> + } >> + } >> + >> + return 0; >> +} >> + >> int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) >> { >> return -EOPNOTSUPP; >> diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c >> index d155a86a8614..55ffb1915e4f 100644 >> --- a/drivers/acpi/cppc_acpi.c >> +++ b/drivers/acpi/cppc_acpi.c >> @@ -113,6 +113,10 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr); >> (cpc)->cpc_entry.reg.space_id == \ >> ACPI_ADR_SPACE_SYSTEM_IO) >> >> +#define CPC_IN_FFH(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \ >> + (cpc)->cpc_entry.reg.space_id == \ >> + ACPI_ADR_SPACE_FIXED_HARDWARE) >> + >> /* Evaluates to True if reg is a NULL register descriptor */ >> #define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \ >> (reg)->address == 0 && \ >> @@ -974,6 +978,11 @@ int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) >> return -ENOTSUPP; >> } >> >> +int __weak cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *regs) >> +{ >> + return -ENOTSUPP; >> +} > >This might return a bool value. > >Is there any case in which the caller would handle different error >codes differently? > I kept this similar to cpc_read_ffh(). I didn't see any usage of the error codes. The return value of cpc_read_ffh() is returned only from cpc_read(), but I didn't see anyone consuming the return value of cpc_read(). Additionally, checkpatch complains about using -ENOTSUPP and suggests replacing it with -EOPNOTSUPP. Does it make sense to update cpc_read/write_ffh() as well to switch to return type bool? >> + >> /* >> * Since cpc_read and cpc_write are called while holding pcc_lock, it should be >> * as fast as possible. We have already mapped the PCC subspace during init, so >> @@ -1317,7 +1326,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) >> int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); >> struct cppc_pcc_data *pcc_ss_data = NULL; >> u64 delivered, reference, ref_perf, ctr_wrap_time; >> - int ret = 0, regs_in_pcc = 0; >> + int ret = 0, regs_in_pcc = 0, regs_read_in_ffh = 0; > >Please use bool as the type for boolean variables. > Thanks for pointing that out. I'll do that for the next version. Thanks, Vanshi >> >> if (!cpc_desc) { >> pr_debug("No CPC descriptor for CPU:%d\n", cpunum); >> @@ -1353,8 +1362,23 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) >> } >> } >> >> - cpc_read(cpunum, delivered_reg, &delivered); >> - cpc_read(cpunum, reference_reg, &reference); >> + if (CPC_IN_FFH(delivered_reg) && CPC_IN_FFH(reference_reg)) { >> + struct ffh_cpc_reg_values ffh_regs; >> + >> + ffh_regs.regs[0].reg = &(delivered_reg->cpc_entry.reg); >> + ffh_regs.regs[1].reg = &(reference_reg->cpc_entry.reg); >> + ret = cpc_read_regs_ffh(cpunum, &ffh_regs); >> + if (!ret) { > >If cpc_read_regs_ffh() returned 'true' on success, the above could be written as > >if (cpc_read_regs_ffh(cpunum, &ffh_regs)) { > >> + delivered = ffh_regs.regs[0].value; >> + reference = ffh_regs.regs[1].value; >> + regs_read_in_ffh = 1; >> + } >> + } >> + >> + if (!regs_read_in_ffh) { >> + cpc_read(cpunum, delivered_reg, &delivered); >> + cpc_read(cpunum, reference_reg, &reference); >> + } >> cpc_read(cpunum, ref_perf_reg, &ref_perf); >> >> /* >> @@ -1366,7 +1390,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) >> if (CPC_SUPPORTED(ctr_wrap_reg)) >> cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time); >> >> - if (!delivered || !reference || !ref_perf) { >> + if (!delivered || !reference || !ref_perf) { >> ret = -EFAULT; >> goto out_err; >> } >> diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h >> index 3a0995f8bce8..0da614a50edd 100644 >> --- a/include/acpi/cppc_acpi.h >> +++ b/include/acpi/cppc_acpi.h >> @@ -137,6 +137,18 @@ struct cppc_cpudata { >> }; >> >> #ifdef CONFIG_ACPI_CPPC_LIB >> + >> +#define MAX_NUM_CPC_REGS_FFH 2 >> + >> +struct ffh_cpc_reg { >> + struct cpc_reg *reg; >> + u64 value; >> +}; >> + >> +struct ffh_cpc_reg_values { >> + struct ffh_cpc_reg regs[MAX_NUM_CPC_REGS_FFH]; >> +}; >> + >> extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); >> extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); >> extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); >> @@ -153,6 +165,7 @@ extern unsigned int cppc_get_transition_latency(int cpu); >> extern bool cpc_ffh_supported(void); >> extern bool cpc_supported_by_cpu(void); >> extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); >> +extern int cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *regs); >> extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val); >> extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); >> extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable); >> --
On Thu, Feb 29, 2024 at 7:01 PM Vanshidhar Konda <vanshikonda@os.amperecomputing.com> wrote: > > On Thu, Feb 29, 2024 at 06:32:59PM +0100, Rafael J. Wysocki wrote: > >On Thu, Feb 29, 2024 at 5:25 PM Vanshidhar Konda > ><vanshikonda@os.amperecomputing.com> wrote: > >> > >> The CPPC driver reads delivered and reference counters using cpc_read > >> one at a time. This leads to inaccurate measurement of CPU frequency > >> discussed in [1]. If the firmware indicates that both the registers are > >> in the FFH interface the kernel can read the registers together in a > >> single IPI. This has two benefits: > >> 1. Reduces the number of IPIs needed to read the two registers > >> 2. The two registers will be read in close proximity resulting in more > >> accurate CPU frequency measurement > >> > >> [1]: https://lore.kernel.org/all/20230328193846.8757-1-yang@os.amperecomputing.com/ > >> > >> Signed-off-by: Vanshidhar Konda <vanshikonda@os.amperecomputing.com> > >> --- > >> arch/arm64/kernel/topology.c | 37 ++++++++++++++++++++++++++++++++++++ > >> drivers/acpi/cppc_acpi.c | 32 +++++++++++++++++++++++++++---- > >> include/acpi/cppc_acpi.h | 13 +++++++++++++ > >> 3 files changed, 78 insertions(+), 4 deletions(-) > >> > >> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c > >> index 8905eb0c681f..8207565f43ee 100644 > >> --- a/arch/arm64/kernel/topology.c > >> +++ b/arch/arm64/kernel/topology.c > >> @@ -421,6 +421,43 @@ int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val) > >> return ret; > >> } > >> > >> +static void cpc_update_freq_counters(void *info) > >> +{ > >> + update_freq_counters_refs(); > >> +} > >> + > >> +int cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *ffh_regs) > >> +{ > >> + struct amu_counters *ctrs = per_cpu_ptr(&cpu_samples, cpu); > >> + int idx; > >> + > >> + if (!cpc_ffh_supported() || !freq_counters_valid(cpu)) > >> + return -EOPNOTSUPP; > >> + > >> + if (WARN_ON_ONCE(irqs_disabled())) > >> + return -EPERM; > >> + > >> + if (!idle_cpu(cpu)) > >> + smp_call_function_single(cpu, cpc_update_freq_counters, NULL, 1); > >> + > >> + for (idx = 0; idx < MAX_NUM_CPC_REGS_FFH; idx++) { > >> + > >> + if (!ffh_regs->regs[idx].reg) > >> + continue; > >> + > >> + switch ((u64)(ffh_regs->regs[idx].reg->address)) { > >> + case 0x0: > >> + ffh_regs->regs[idx].value = ctrs->core_cnt; > >> + break; > >> + case 0x1: > >> + ffh_regs->regs[idx].value = ctrs->const_cnt; > >> + break; > >> + } > >> + } > >> + > >> + return 0; > >> +} > >> + > >> int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) > >> { > >> return -EOPNOTSUPP; > >> diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c > >> index d155a86a8614..55ffb1915e4f 100644 > >> --- a/drivers/acpi/cppc_acpi.c > >> +++ b/drivers/acpi/cppc_acpi.c > >> @@ -113,6 +113,10 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr); > >> (cpc)->cpc_entry.reg.space_id == \ > >> ACPI_ADR_SPACE_SYSTEM_IO) > >> > >> +#define CPC_IN_FFH(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \ > >> + (cpc)->cpc_entry.reg.space_id == \ > >> + ACPI_ADR_SPACE_FIXED_HARDWARE) > >> + > >> /* Evaluates to True if reg is a NULL register descriptor */ > >> #define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \ > >> (reg)->address == 0 && \ > >> @@ -974,6 +978,11 @@ int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) > >> return -ENOTSUPP; > >> } > >> > >> +int __weak cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *regs) > >> +{ > >> + return -ENOTSUPP; > >> +} > > > >This might return a bool value. > > > >Is there any case in which the caller would handle different error > >codes differently? > > > I kept this similar to cpc_read_ffh(). I didn't see any usage of the error > codes. The return value of cpc_read_ffh() is returned only from cpc_read(), > but I didn't see anyone consuming the return value of cpc_read(). > Additionally, checkpatch complains about using -ENOTSUPP and suggests > replacing it with -EOPNOTSUPP. Does it make sense to update > cpc_read/write_ffh() as well to switch to return type bool? Probably, but in a separate patch.
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 8905eb0c681f..8207565f43ee 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -421,6 +421,43 @@ int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val) return ret; } +static void cpc_update_freq_counters(void *info) +{ + update_freq_counters_refs(); +} + +int cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *ffh_regs) +{ + struct amu_counters *ctrs = per_cpu_ptr(&cpu_samples, cpu); + int idx; + + if (!cpc_ffh_supported() || !freq_counters_valid(cpu)) + return -EOPNOTSUPP; + + if (WARN_ON_ONCE(irqs_disabled())) + return -EPERM; + + if (!idle_cpu(cpu)) + smp_call_function_single(cpu, cpc_update_freq_counters, NULL, 1); + + for (idx = 0; idx < MAX_NUM_CPC_REGS_FFH; idx++) { + + if (!ffh_regs->regs[idx].reg) + continue; + + switch ((u64)(ffh_regs->regs[idx].reg->address)) { + case 0x0: + ffh_regs->regs[idx].value = ctrs->core_cnt; + break; + case 0x1: + ffh_regs->regs[idx].value = ctrs->const_cnt; + break; + } + } + + return 0; +} + int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) { return -EOPNOTSUPP; diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index d155a86a8614..55ffb1915e4f 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -113,6 +113,10 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr); (cpc)->cpc_entry.reg.space_id == \ ACPI_ADR_SPACE_SYSTEM_IO) +#define CPC_IN_FFH(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \ + (cpc)->cpc_entry.reg.space_id == \ + ACPI_ADR_SPACE_FIXED_HARDWARE) + /* Evaluates to True if reg is a NULL register descriptor */ #define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \ (reg)->address == 0 && \ @@ -974,6 +978,11 @@ int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) return -ENOTSUPP; } +int __weak cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *regs) +{ + return -ENOTSUPP; +} + /* * Since cpc_read and cpc_write are called while holding pcc_lock, it should be * as fast as possible. We have already mapped the PCC subspace during init, so @@ -1317,7 +1326,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); struct cppc_pcc_data *pcc_ss_data = NULL; u64 delivered, reference, ref_perf, ctr_wrap_time; - int ret = 0, regs_in_pcc = 0; + int ret = 0, regs_in_pcc = 0, regs_read_in_ffh = 0; if (!cpc_desc) { pr_debug("No CPC descriptor for CPU:%d\n", cpunum); @@ -1353,8 +1362,23 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) } } - cpc_read(cpunum, delivered_reg, &delivered); - cpc_read(cpunum, reference_reg, &reference); + if (CPC_IN_FFH(delivered_reg) && CPC_IN_FFH(reference_reg)) { + struct ffh_cpc_reg_values ffh_regs; + + ffh_regs.regs[0].reg = &(delivered_reg->cpc_entry.reg); + ffh_regs.regs[1].reg = &(reference_reg->cpc_entry.reg); + ret = cpc_read_regs_ffh(cpunum, &ffh_regs); + if (!ret) { + delivered = ffh_regs.regs[0].value; + reference = ffh_regs.regs[1].value; + regs_read_in_ffh = 1; + } + } + + if (!regs_read_in_ffh) { + cpc_read(cpunum, delivered_reg, &delivered); + cpc_read(cpunum, reference_reg, &reference); + } cpc_read(cpunum, ref_perf_reg, &ref_perf); /* @@ -1366,7 +1390,7 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) if (CPC_SUPPORTED(ctr_wrap_reg)) cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time); - if (!delivered || !reference || !ref_perf) { + if (!delivered || !reference || !ref_perf) { ret = -EFAULT; goto out_err; } diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 3a0995f8bce8..0da614a50edd 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -137,6 +137,18 @@ struct cppc_cpudata { }; #ifdef CONFIG_ACPI_CPPC_LIB + +#define MAX_NUM_CPC_REGS_FFH 2 + +struct ffh_cpc_reg { + struct cpc_reg *reg; + u64 value; +}; + +struct ffh_cpc_reg_values { + struct ffh_cpc_reg regs[MAX_NUM_CPC_REGS_FFH]; +}; + extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); @@ -153,6 +165,7 @@ extern unsigned int cppc_get_transition_latency(int cpu); extern bool cpc_ffh_supported(void); extern bool cpc_supported_by_cpu(void); extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); +extern int cpc_read_regs_ffh(int cpu, struct ffh_cpc_reg_values *regs); extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val); extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf); extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable);
The CPPC driver reads delivered and reference counters using cpc_read one at a time. This leads to inaccurate measurement of CPU frequency discussed in [1]. If the firmware indicates that both the registers are in the FFH interface the kernel can read the registers together in a single IPI. This has two benefits: 1. Reduces the number of IPIs needed to read the two registers 2. The two registers will be read in close proximity resulting in more accurate CPU frequency measurement [1]: https://lore.kernel.org/all/20230328193846.8757-1-yang@os.amperecomputing.com/ Signed-off-by: Vanshidhar Konda <vanshikonda@os.amperecomputing.com> --- arch/arm64/kernel/topology.c | 37 ++++++++++++++++++++++++++++++++++++ drivers/acpi/cppc_acpi.c | 32 +++++++++++++++++++++++++++---- include/acpi/cppc_acpi.h | 13 +++++++++++++ 3 files changed, 78 insertions(+), 4 deletions(-)