Message ID | 20230615133239.442736-10-anshuman.khandual@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64/perf: Enable branch stack sampling | expand |
On Thu, Jun 15, 2023 at 07:02:38PM +0530, Anshuman Khandual wrote: > This modifies current armv8pmu_sched_task(), to implement a branch records > save mechanism via armv8pmu_branch_save() when a task scheds out of a cpu. > BRBE is paused and disabled for all exception levels before branch records > get captured, which then get concatenated with all existing stored records > present in the task context maintaining the contiguity. Although the final > length of the concatenated buffer does not exceed implemented BRBE length. > > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will@kernel.org> > Cc: Mark Rutland <mark.rutland@arm.com> > Cc: linux-arm-kernel@lists.infradead.org > Cc: linux-kernel@vger.kernel.org > Tested-by: James Clark <james.clark@arm.com> > Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> Acked-by: Mark Rutland <mark.rutland@arm.com> Mark. > --- > arch/arm64/include/asm/perf_event.h | 2 ++ > drivers/perf/arm_brbe.c | 30 +++++++++++++++++++++++++++++ > drivers/perf/arm_pmuv3.c | 14 ++++++++++++-- > 3 files changed, 44 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h > index b0c12a5882df..36e7dfb466a6 100644 > --- a/arch/arm64/include/asm/perf_event.h > +++ b/arch/arm64/include/asm/perf_event.h > @@ -40,6 +40,7 @@ void armv8pmu_branch_probe(struct arm_pmu *arm_pmu); > void armv8pmu_branch_reset(void); > int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu); > void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu); > +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx); > #else > static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event) > { > @@ -66,6 +67,7 @@ static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { } > static inline void armv8pmu_branch_reset(void) { } > static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) { return 0; } > static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) { } > +static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) { } > #endif > #endif > #endif > diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c > index f6693699fade..3bb17ced2b1d 100644 > --- a/drivers/perf/arm_brbe.c > +++ b/drivers/perf/arm_brbe.c > @@ -171,6 +171,36 @@ static int stitch_stored_live_entries(struct brbe_regset *stored, > return min(nr_live + nr_stored, nr_max); > } > > +static int brbe_branch_save(int nr_hw_entries, struct brbe_regset *live) > +{ > + u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); > + int nr_live; > + > + write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); > + isb(); > + > + nr_live = capture_brbe_regset(nr_hw_entries, live); > + > + write_sysreg_s(brbfcr & ~BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); > + isb(); > + > + return nr_live; > +} > + > +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) > +{ > + struct arm64_perf_task_context *task_ctx = ctx; > + struct brbe_regset live[BRBE_MAX_ENTRIES]; > + int nr_live, nr_store, nr_hw_entries; > + > + nr_hw_entries = brbe_get_numrec(arm_pmu->reg_brbidr); > + nr_live = brbe_branch_save(nr_hw_entries, live); > + nr_store = task_ctx->nr_brbe_records; > + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store, > + nr_live, nr_hw_entries); > + task_ctx->nr_brbe_records = nr_store; > +} > + > /* > * Generic perf branch filters supported on BRBE > * > diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c > index 3c079051a63a..53f404618891 100644 > --- a/drivers/perf/arm_pmuv3.c > +++ b/drivers/perf/arm_pmuv3.c > @@ -907,9 +907,19 @@ static int armv8pmu_user_event_idx(struct perf_event *event) > static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) > { > struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); > + void *task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL; > > - if (sched_in && armpmu->has_branch_stack) > - armv8pmu_branch_reset(); > + if (armpmu->has_branch_stack) { > + /* Save branch records in task_ctx on sched out */ > + if (task_ctx && !sched_in) { > + armv8pmu_branch_save(armpmu, task_ctx); > + return; > + } > + > + /* Reset branch records on sched in */ > + if (sched_in) > + armv8pmu_branch_reset(); > + } > } > > /* > -- > 2.25.1 >
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index b0c12a5882df..36e7dfb466a6 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -40,6 +40,7 @@ void armv8pmu_branch_probe(struct arm_pmu *arm_pmu); void armv8pmu_branch_reset(void); int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu); void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu); +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx); #else static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event) { @@ -66,6 +67,7 @@ static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { } static inline void armv8pmu_branch_reset(void) { } static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) { return 0; } static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) { } +static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) { } #endif #endif #endif diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c index f6693699fade..3bb17ced2b1d 100644 --- a/drivers/perf/arm_brbe.c +++ b/drivers/perf/arm_brbe.c @@ -171,6 +171,36 @@ static int stitch_stored_live_entries(struct brbe_regset *stored, return min(nr_live + nr_stored, nr_max); } +static int brbe_branch_save(int nr_hw_entries, struct brbe_regset *live) +{ + u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + int nr_live; + + write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + nr_live = capture_brbe_regset(nr_hw_entries, live); + + write_sysreg_s(brbfcr & ~BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + return nr_live; +} + +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) +{ + struct arm64_perf_task_context *task_ctx = ctx; + struct brbe_regset live[BRBE_MAX_ENTRIES]; + int nr_live, nr_store, nr_hw_entries; + + nr_hw_entries = brbe_get_numrec(arm_pmu->reg_brbidr); + nr_live = brbe_branch_save(nr_hw_entries, live); + nr_store = task_ctx->nr_brbe_records; + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store, + nr_live, nr_hw_entries); + task_ctx->nr_brbe_records = nr_store; +} + /* * Generic perf branch filters supported on BRBE * diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 3c079051a63a..53f404618891 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -907,9 +907,19 @@ static int armv8pmu_user_event_idx(struct perf_event *event) static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); + void *task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL; - if (sched_in && armpmu->has_branch_stack) - armv8pmu_branch_reset(); + if (armpmu->has_branch_stack) { + /* Save branch records in task_ctx on sched out */ + if (task_ctx && !sched_in) { + armv8pmu_branch_save(armpmu, task_ctx); + return; + } + + /* Reset branch records on sched in */ + if (sched_in) + armv8pmu_branch_reset(); + } } /*