Message ID | 1580125469-23887-4-git-send-email-andrew.murray@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: perf: Add support for ARMv8.5-PMU 64-bit counters | expand |
On 2020-01-27 11:44, Andrew Murray wrote: > At present ARMv8 event counters are limited to 32-bits, though by > using the CHAIN event it's possible to combine adjacent counters to > achieve 64-bits. The perf config1:0 bit can be set to use such a > configuration. > > With the introduction of ARMv8.5-PMU support, all event counters can > now be used as 64-bit counters. > > Let's enable 64-bit event counters where support exists. Unless the > user sets config1:0 we will adjust the counter value such that it > overflows upon 32-bit overflow. This follows the same behaviour as > the cycle counter which has always been (and remains) 64-bits. > > Signed-off-by: Andrew Murray <andrew.murray@arm.com> > Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com> > --- > arch/arm64/include/asm/perf_event.h | 3 +- > arch/arm64/include/asm/sysreg.h | 1 + > arch/arm64/kernel/perf_event.c | 86 > +++++++++++++++++++++++++++++-------- > include/linux/perf/arm_pmu.h | 1 + > 4 files changed, 73 insertions(+), 18 deletions(-) > > diff --git a/arch/arm64/include/asm/perf_event.h > b/arch/arm64/include/asm/perf_event.h > index 2bdbc79..e7765b6 100644 > --- a/arch/arm64/include/asm/perf_event.h > +++ b/arch/arm64/include/asm/perf_event.h > @@ -176,9 +176,10 @@ > #define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ > #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive > debug*/ > #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter > */ > +#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ > #define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ > #define ARMV8_PMU_PMCR_N_MASK 0x1f > -#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ > +#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ > > /* > * PMOVSR: counters overflow flag status reg > diff --git a/arch/arm64/include/asm/sysreg.h > b/arch/arm64/include/asm/sysreg.h > index 1009878..30c1e18 100644 > --- a/arch/arm64/include/asm/sysreg.h > +++ b/arch/arm64/include/asm/sysreg.h > @@ -675,6 +675,7 @@ > #define ID_DFR0_PERFMON_SHIFT 24 > > #define ID_DFR0_EL1_PMUVER_8_1 4 > +#define ID_DFR0_EL1_PMUVER_8_4 5 This doesn't seem right, see below. > #define ID_AA64DFR0_EL1_PMUVER_8_1 4 > > #define ID_ISAR5_RDM_SHIFT 24 > diff --git a/arch/arm64/kernel/perf_event.c > b/arch/arm64/kernel/perf_event.c > index e40b656..4e27f90 100644 > --- a/arch/arm64/kernel/perf_event.c > +++ b/arch/arm64/kernel/perf_event.c > @@ -285,6 +285,17 @@ static struct attribute_group > armv8_pmuv3_format_attr_group = { > #define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ > (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) > > + > +/* > + * We unconditionally enable ARMv8.5-PMU long event counter support > + * (64-bit events) where supported. Indicate if this arm_pmu has long > + * event counter support. > + */ > +static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) > +{ > + return (cpu_pmu->pmuver > ID_DFR0_EL1_PMUVER_8_4); Isn't the ID_DFR0 prefix for AArch32? Although this doesn't change much the final result (the values happen to be the same on both architectures), it is nonetheless a bit confusing. > +} > + > /* > * We must chain two programmable counters for 64 bit events, > * except when we have allocated the 64bit cycle counter (for CPU > @@ -294,9 +305,11 @@ static struct attribute_group > armv8_pmuv3_format_attr_group = { > static inline bool armv8pmu_event_is_chained(struct perf_event *event) > { > int idx = event->hw.idx; > + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > > return !WARN_ON(idx < 0) && > armv8pmu_event_is_64bit(event) && > + !armv8pmu_has_long_event(cpu_pmu) && > (idx != ARMV8_IDX_CYCLE_COUNTER); > } > > @@ -345,7 +358,7 @@ static inline void armv8pmu_select_counter(int idx) > isb(); > } > > -static inline u32 armv8pmu_read_evcntr(int idx) > +static inline u64 armv8pmu_read_evcntr(int idx) > { > armv8pmu_select_counter(idx); > return read_sysreg(pmxevcntr_el0); > @@ -362,6 +375,44 @@ static inline u64 armv8pmu_read_hw_counter(struct > perf_event *event) > return val; > } > > +/* > + * The cycle counter is always a 64-bit counter. When > ARMV8_PMU_PMCR_LP > + * is set the event counters also become 64-bit counters. Unless the > + * user has requested a long counter (attr.config1) then we want to > + * interrupt upon 32-bit overflow - we achieve this by applying a > bias. > + */ > +static bool armv8pmu_event_needs_bias(struct perf_event *event) > +{ > + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > + struct hw_perf_event *hwc = &event->hw; > + int idx = hwc->idx; > + > + if (armv8pmu_event_is_64bit(event)) > + return false; > + > + if (armv8pmu_has_long_event(cpu_pmu) || > + idx == ARMV8_IDX_CYCLE_COUNTER) > + return true; > + > + return false; > +} > + > +static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 > value) > +{ > + if (armv8pmu_event_needs_bias(event)) > + value |= GENMASK(63, 32); > + > + return value; > +} > + > +static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 > value) > +{ > + if (armv8pmu_event_needs_bias(event)) > + value &= ~GENMASK(63, 32); > + > + return value; > +} > + > static u64 armv8pmu_read_counter(struct perf_event *event) > { > struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > @@ -377,10 +428,10 @@ static u64 armv8pmu_read_counter(struct > perf_event *event) > else > value = armv8pmu_read_hw_counter(event); > > - return value; > + return armv8pmu_unbias_long_counter(event, value); > } > > -static inline void armv8pmu_write_evcntr(int idx, u32 value) > +static inline void armv8pmu_write_evcntr(int idx, u64 value) > { > armv8pmu_select_counter(idx); > write_sysreg(value, pmxevcntr_el0); > @@ -405,20 +456,14 @@ static void armv8pmu_write_counter(struct > perf_event *event, u64 value) > struct hw_perf_event *hwc = &event->hw; > int idx = hwc->idx; > > + value = armv8pmu_bias_long_counter(event, value); > + > if (!armv8pmu_counter_valid(cpu_pmu, idx)) > pr_err("CPU%u writing wrong counter %d\n", > smp_processor_id(), idx); > - else if (idx == ARMV8_IDX_CYCLE_COUNTER) { > - /* > - * The cycles counter is really a 64-bit counter. > - * When treating it as a 32-bit counter, we only count > - * the lower 32 bits, and set the upper 32-bits so that > - * we get an interrupt upon 32-bit overflow. > - */ > - if (!armv8pmu_event_is_64bit(event)) > - value |= 0xffffffff00000000ULL; > + else if (idx == ARMV8_IDX_CYCLE_COUNTER) > write_sysreg(value, pmccntr_el0); > - } else > + else > armv8pmu_write_hw_counter(event, value); > } > > @@ -743,7 +788,8 @@ static int armv8pmu_get_event_idx(struct > pmu_hw_events *cpuc, > /* > * Otherwise use events counters > */ > - if (armv8pmu_event_is_64bit(event)) > + if (armv8pmu_event_is_64bit(event) && > + !armv8pmu_has_long_event(cpu_pmu)) > return armv8pmu_get_chain_idx(cpuc, cpu_pmu); > else > return armv8pmu_get_single_idx(cpuc, cpu_pmu); > @@ -815,7 +861,7 @@ static int armv8pmu_filter_match(struct perf_event > *event) > static void armv8pmu_reset(void *info) > { > struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; > - u32 idx, nb_cnt = cpu_pmu->num_events; > + u32 idx, pmcr, nb_cnt = cpu_pmu->num_events; > > /* The counter and interrupt enable registers are unknown at reset. > */ > for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { > @@ -830,8 +876,13 @@ static void armv8pmu_reset(void *info) > * Initialize & Reset PMNC. Request overflow interrupt for > * 64 bit cycle counter but cheat in armv8pmu_write_counter(). > */ > - armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | > - ARMV8_PMU_PMCR_LC); > + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; > + > + /* Enable long event counter support where available */ > + if (armv8pmu_has_long_event(cpu_pmu)) > + pmcr |= ARMV8_PMU_PMCR_LP; > + > + armv8pmu_pmcr_write(pmcr); > } > > static int __armv8_pmuv3_map_event(struct perf_event *event, > @@ -914,6 +965,7 @@ static void __armv8pmu_probe_pmu(void *info) > if (pmuver == 0xf || pmuver == 0) > return; > > + cpu_pmu->pmuver = pmuver; And pmuver comes from ID_AA64DFR0_PMUVER_SHIFT, so the above really needs fixing in the name of consistency. > probe->present = true; > > /* Read the nb of CNTx counters supported from PMNC */ > diff --git a/include/linux/perf/arm_pmu.h > b/include/linux/perf/arm_pmu.h > index 71f525a..5b616dd 100644 > --- a/include/linux/perf/arm_pmu.h > +++ b/include/linux/perf/arm_pmu.h > @@ -80,6 +80,7 @@ struct arm_pmu { > struct pmu pmu; > cpumask_t supported_cpus; > char *name; > + int pmuver; > irqreturn_t (*handle_irq)(struct arm_pmu *pmu); > void (*enable)(struct perf_event *event); > void (*disable)(struct perf_event *event); Thanks, M.
On Mon, Jan 27, 2020 at 12:32:22PM +0000, Marc Zyngier wrote: > On 2020-01-27 11:44, Andrew Murray wrote: > > At present ARMv8 event counters are limited to 32-bits, though by > > using the CHAIN event it's possible to combine adjacent counters to > > achieve 64-bits. The perf config1:0 bit can be set to use such a > > configuration. > > > > With the introduction of ARMv8.5-PMU support, all event counters can > > now be used as 64-bit counters. > > > > Let's enable 64-bit event counters where support exists. Unless the > > user sets config1:0 we will adjust the counter value such that it > > overflows upon 32-bit overflow. This follows the same behaviour as > > the cycle counter which has always been (and remains) 64-bits. > > > > Signed-off-by: Andrew Murray <andrew.murray@arm.com> > > Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com> > > --- > > arch/arm64/include/asm/perf_event.h | 3 +- > > arch/arm64/include/asm/sysreg.h | 1 + > > arch/arm64/kernel/perf_event.c | 86 > > +++++++++++++++++++++++++++++-------- > > include/linux/perf/arm_pmu.h | 1 + > > 4 files changed, 73 insertions(+), 18 deletions(-) > > > > diff --git a/arch/arm64/include/asm/perf_event.h > > b/arch/arm64/include/asm/perf_event.h > > index 2bdbc79..e7765b6 100644 > > --- a/arch/arm64/include/asm/perf_event.h > > +++ b/arch/arm64/include/asm/perf_event.h > > @@ -176,9 +176,10 @@ > > #define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ > > #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive > > debug*/ > > #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter > > */ > > +#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ > > #define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ > > #define ARMV8_PMU_PMCR_N_MASK 0x1f > > -#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ > > +#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ > > > > /* > > * PMOVSR: counters overflow flag status reg > > diff --git a/arch/arm64/include/asm/sysreg.h > > b/arch/arm64/include/asm/sysreg.h > > index 1009878..30c1e18 100644 > > --- a/arch/arm64/include/asm/sysreg.h > > +++ b/arch/arm64/include/asm/sysreg.h > > @@ -675,6 +675,7 @@ > > #define ID_DFR0_PERFMON_SHIFT 24 > > > > #define ID_DFR0_EL1_PMUVER_8_1 4 > > +#define ID_DFR0_EL1_PMUVER_8_4 5 > > This doesn't seem right, see below. Yes you're right - I'll rename this to ID_AA64DFR0_EL1_PMUVER_8_4 > > > #define ID_AA64DFR0_EL1_PMUVER_8_1 4 > > > > #define ID_ISAR5_RDM_SHIFT 24 > > diff --git a/arch/arm64/kernel/perf_event.c > > b/arch/arm64/kernel/perf_event.c > > index e40b656..4e27f90 100644 > > --- a/arch/arm64/kernel/perf_event.c > > +++ b/arch/arm64/kernel/perf_event.c > > @@ -285,6 +285,17 @@ static struct attribute_group > > armv8_pmuv3_format_attr_group = { > > #define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ > > (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) > > > > + > > +/* > > + * We unconditionally enable ARMv8.5-PMU long event counter support > > + * (64-bit events) where supported. Indicate if this arm_pmu has long > > + * event counter support. > > + */ > > +static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) > > +{ > > + return (cpu_pmu->pmuver > ID_DFR0_EL1_PMUVER_8_4); > > Isn't the ID_DFR0 prefix for AArch32? Although this doesn't change much > the final result (the values happen to be the same on both architectures), > it is nonetheless a bit confusing. Yes - ID_DFR0 is the AArch32 register relating to the AArch32 state, that's mapped onto the AArch64 ID_DFR0_EL1 register. The ID_AA64DFR0_EL1 register relates to the AArch64 state. > > > +} > > + > > /* > > * We must chain two programmable counters for 64 bit events, > > * except when we have allocated the 64bit cycle counter (for CPU > > @@ -294,9 +305,11 @@ static struct attribute_group > > armv8_pmuv3_format_attr_group = { > > static inline bool armv8pmu_event_is_chained(struct perf_event *event) > > { > > int idx = event->hw.idx; > > + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > > > > return !WARN_ON(idx < 0) && > > armv8pmu_event_is_64bit(event) && > > + !armv8pmu_has_long_event(cpu_pmu) && > > (idx != ARMV8_IDX_CYCLE_COUNTER); > > } > > > > @@ -345,7 +358,7 @@ static inline void armv8pmu_select_counter(int idx) > > isb(); > > } > > > > -static inline u32 armv8pmu_read_evcntr(int idx) > > +static inline u64 armv8pmu_read_evcntr(int idx) > > { > > armv8pmu_select_counter(idx); > > return read_sysreg(pmxevcntr_el0); > > @@ -362,6 +375,44 @@ static inline u64 armv8pmu_read_hw_counter(struct > > perf_event *event) > > return val; > > } > > > > +/* > > + * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP > > + * is set the event counters also become 64-bit counters. Unless the > > + * user has requested a long counter (attr.config1) then we want to > > + * interrupt upon 32-bit overflow - we achieve this by applying a bias. > > + */ > > +static bool armv8pmu_event_needs_bias(struct perf_event *event) > > +{ > > + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > > + struct hw_perf_event *hwc = &event->hw; > > + int idx = hwc->idx; > > + > > + if (armv8pmu_event_is_64bit(event)) > > + return false; > > + > > + if (armv8pmu_has_long_event(cpu_pmu) || > > + idx == ARMV8_IDX_CYCLE_COUNTER) > > + return true; > > + > > + return false; > > +} > > + > > +static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 > > value) > > +{ > > + if (armv8pmu_event_needs_bias(event)) > > + value |= GENMASK(63, 32); > > + > > + return value; > > +} > > + > > +static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 > > value) > > +{ > > + if (armv8pmu_event_needs_bias(event)) > > + value &= ~GENMASK(63, 32); > > + > > + return value; > > +} > > + > > static u64 armv8pmu_read_counter(struct perf_event *event) > > { > > struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > > @@ -377,10 +428,10 @@ static u64 armv8pmu_read_counter(struct perf_event > > *event) > > else > > value = armv8pmu_read_hw_counter(event); > > > > - return value; > > + return armv8pmu_unbias_long_counter(event, value); > > } > > > > -static inline void armv8pmu_write_evcntr(int idx, u32 value) > > +static inline void armv8pmu_write_evcntr(int idx, u64 value) > > { > > armv8pmu_select_counter(idx); > > write_sysreg(value, pmxevcntr_el0); > > @@ -405,20 +456,14 @@ static void armv8pmu_write_counter(struct > > perf_event *event, u64 value) > > struct hw_perf_event *hwc = &event->hw; > > int idx = hwc->idx; > > > > + value = armv8pmu_bias_long_counter(event, value); > > + > > if (!armv8pmu_counter_valid(cpu_pmu, idx)) > > pr_err("CPU%u writing wrong counter %d\n", > > smp_processor_id(), idx); > > - else if (idx == ARMV8_IDX_CYCLE_COUNTER) { > > - /* > > - * The cycles counter is really a 64-bit counter. > > - * When treating it as a 32-bit counter, we only count > > - * the lower 32 bits, and set the upper 32-bits so that > > - * we get an interrupt upon 32-bit overflow. > > - */ > > - if (!armv8pmu_event_is_64bit(event)) > > - value |= 0xffffffff00000000ULL; > > + else if (idx == ARMV8_IDX_CYCLE_COUNTER) > > write_sysreg(value, pmccntr_el0); > > - } else > > + else > > armv8pmu_write_hw_counter(event, value); > > } > > > > @@ -743,7 +788,8 @@ static int armv8pmu_get_event_idx(struct > > pmu_hw_events *cpuc, > > /* > > * Otherwise use events counters > > */ > > - if (armv8pmu_event_is_64bit(event)) > > + if (armv8pmu_event_is_64bit(event) && > > + !armv8pmu_has_long_event(cpu_pmu)) > > return armv8pmu_get_chain_idx(cpuc, cpu_pmu); > > else > > return armv8pmu_get_single_idx(cpuc, cpu_pmu); > > @@ -815,7 +861,7 @@ static int armv8pmu_filter_match(struct perf_event > > *event) > > static void armv8pmu_reset(void *info) > > { > > struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; > > - u32 idx, nb_cnt = cpu_pmu->num_events; > > + u32 idx, pmcr, nb_cnt = cpu_pmu->num_events; > > > > /* The counter and interrupt enable registers are unknown at reset. */ > > for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { > > @@ -830,8 +876,13 @@ static void armv8pmu_reset(void *info) > > * Initialize & Reset PMNC. Request overflow interrupt for > > * 64 bit cycle counter but cheat in armv8pmu_write_counter(). > > */ > > - armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | > > - ARMV8_PMU_PMCR_LC); > > + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; > > + > > + /* Enable long event counter support where available */ > > + if (armv8pmu_has_long_event(cpu_pmu)) > > + pmcr |= ARMV8_PMU_PMCR_LP; > > + > > + armv8pmu_pmcr_write(pmcr); > > } > > > > static int __armv8_pmuv3_map_event(struct perf_event *event, > > @@ -914,6 +965,7 @@ static void __armv8pmu_probe_pmu(void *info) > > if (pmuver == 0xf || pmuver == 0) > > return; > > > > + cpu_pmu->pmuver = pmuver; > > And pmuver comes from ID_AA64DFR0_PMUVER_SHIFT, so the above really > needs fixing in the name of consistency. > No problem, I'll respin next week. Thanks, Andrew Murray > > probe->present = true; > > > > /* Read the nb of CNTx counters supported from PMNC */ > > diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h > > index 71f525a..5b616dd 100644 > > --- a/include/linux/perf/arm_pmu.h > > +++ b/include/linux/perf/arm_pmu.h > > @@ -80,6 +80,7 @@ struct arm_pmu { > > struct pmu pmu; > > cpumask_t supported_cpus; > > char *name; > > + int pmuver; > > irqreturn_t (*handle_irq)(struct arm_pmu *pmu); > > void (*enable)(struct perf_event *event); > > void (*disable)(struct perf_event *event); > > Thanks, > > M. > -- > Jazz is not dead. It just smells funny... > _______________________________________________ > kvmarm mailing list > kvmarm@lists.cs.columbia.edu > https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2bdbc79..e7765b6 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -176,9 +176,10 @@ #define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ +#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ #define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ #define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ +#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ /* * PMOVSR: counters overflow flag status reg diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1009878..30c1e18 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -675,6 +675,7 @@ #define ID_DFR0_PERFMON_SHIFT 24 #define ID_DFR0_EL1_PMUVER_8_1 4 +#define ID_DFR0_EL1_PMUVER_8_4 5 #define ID_AA64DFR0_EL1_PMUVER_8_1 4 #define ID_ISAR5_RDM_SHIFT 24 diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index e40b656..4e27f90 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -285,6 +285,17 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { #define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) + +/* + * We unconditionally enable ARMv8.5-PMU long event counter support + * (64-bit events) where supported. Indicate if this arm_pmu has long + * event counter support. + */ +static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) +{ + return (cpu_pmu->pmuver > ID_DFR0_EL1_PMUVER_8_4); +} + /* * We must chain two programmable counters for 64 bit events, * except when we have allocated the 64bit cycle counter (for CPU @@ -294,9 +305,11 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { static inline bool armv8pmu_event_is_chained(struct perf_event *event) { int idx = event->hw.idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); return !WARN_ON(idx < 0) && armv8pmu_event_is_64bit(event) && + !armv8pmu_has_long_event(cpu_pmu) && (idx != ARMV8_IDX_CYCLE_COUNTER); } @@ -345,7 +358,7 @@ static inline void armv8pmu_select_counter(int idx) isb(); } -static inline u32 armv8pmu_read_evcntr(int idx) +static inline u64 armv8pmu_read_evcntr(int idx) { armv8pmu_select_counter(idx); return read_sysreg(pmxevcntr_el0); @@ -362,6 +375,44 @@ static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) return val; } +/* + * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP + * is set the event counters also become 64-bit counters. Unless the + * user has requested a long counter (attr.config1) then we want to + * interrupt upon 32-bit overflow - we achieve this by applying a bias. + */ +static bool armv8pmu_event_needs_bias(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_64bit(event)) + return false; + + if (armv8pmu_has_long_event(cpu_pmu) || + idx == ARMV8_IDX_CYCLE_COUNTER) + return true; + + return false; +} + +static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value |= GENMASK(63, 32); + + return value; +} + +static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value &= ~GENMASK(63, 32); + + return value; +} + static u64 armv8pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); @@ -377,10 +428,10 @@ static u64 armv8pmu_read_counter(struct perf_event *event) else value = armv8pmu_read_hw_counter(event); - return value; + return armv8pmu_unbias_long_counter(event, value); } -static inline void armv8pmu_write_evcntr(int idx, u32 value) +static inline void armv8pmu_write_evcntr(int idx, u64 value) { armv8pmu_select_counter(idx); write_sysreg(value, pmxevcntr_el0); @@ -405,20 +456,14 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + value = armv8pmu_bias_long_counter(event, value); + if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) { - /* - * The cycles counter is really a 64-bit counter. - * When treating it as a 32-bit counter, we only count - * the lower 32 bits, and set the upper 32-bits so that - * we get an interrupt upon 32-bit overflow. - */ - if (!armv8pmu_event_is_64bit(event)) - value |= 0xffffffff00000000ULL; + else if (idx == ARMV8_IDX_CYCLE_COUNTER) write_sysreg(value, pmccntr_el0); - } else + else armv8pmu_write_hw_counter(event, value); } @@ -743,7 +788,8 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Otherwise use events counters */ - if (armv8pmu_event_is_64bit(event)) + if (armv8pmu_event_is_64bit(event) && + !armv8pmu_has_long_event(cpu_pmu)) return armv8pmu_get_chain_idx(cpuc, cpu_pmu); else return armv8pmu_get_single_idx(cpuc, cpu_pmu); @@ -815,7 +861,7 @@ static int armv8pmu_filter_match(struct perf_event *event) static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 idx, nb_cnt = cpu_pmu->num_events; + u32 idx, pmcr, nb_cnt = cpu_pmu->num_events; /* The counter and interrupt enable registers are unknown at reset. */ for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { @@ -830,8 +876,13 @@ static void armv8pmu_reset(void *info) * Initialize & Reset PMNC. Request overflow interrupt for * 64 bit cycle counter but cheat in armv8pmu_write_counter(). */ - armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | - ARMV8_PMU_PMCR_LC); + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; + + /* Enable long event counter support where available */ + if (armv8pmu_has_long_event(cpu_pmu)) + pmcr |= ARMV8_PMU_PMCR_LP; + + armv8pmu_pmcr_write(pmcr); } static int __armv8_pmuv3_map_event(struct perf_event *event, @@ -914,6 +965,7 @@ static void __armv8pmu_probe_pmu(void *info) if (pmuver == 0xf || pmuver == 0) return; + cpu_pmu->pmuver = pmuver; probe->present = true; /* Read the nb of CNTx counters supported from PMNC */ diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 71f525a..5b616dd 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -80,6 +80,7 @@ struct arm_pmu { struct pmu pmu; cpumask_t supported_cpus; char *name; + int pmuver; irqreturn_t (*handle_irq)(struct arm_pmu *pmu); void (*enable)(struct perf_event *event); void (*disable)(struct perf_event *event);