diff mbox series

[RFC,V1,06/11] arm64/perf: Drive BRBE from perf event states

Message ID 1642998653-21377-7-git-send-email-anshuman.khandual@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64/perf: Enable branch stack sampling | expand

Commit Message

Anshuman Khandual Jan. 24, 2022, 4:30 a.m. UTC
Branch stack sampling rides along the normal perf event and all the branch
records get captured during the PMU interrupt. This just changes perf event
handling on the arm64 platform to accommodate required BRBE operations that
will enable branch stack sampling support.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
 arch/arm64/kernel/perf_event.c |  6 +++++
 drivers/perf/arm_pmu.c         | 40 ++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

Comments

Rob Herring (Arm) Jan. 26, 2022, 5:07 p.m. UTC | #1
On Mon, Jan 24, 2022 at 10:00:48AM +0530, Anshuman Khandual wrote:
> Branch stack sampling rides along the normal perf event and all the branch
> records get captured during the PMU interrupt. This just changes perf event
> handling on the arm64 platform to accommodate required BRBE operations that
> will enable branch stack sampling support.
> 
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: linux-perf-users@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: linux-arm-kernel@lists.infradead.org
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
>  arch/arm64/kernel/perf_event.c |  6 +++++
>  drivers/perf/arm_pmu.c         | 40 ++++++++++++++++++++++++++++++++++
>  2 files changed, 46 insertions(+)
> 
> diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
> index f6a47036b0b4..11c82c8f2eec 100644
> --- a/arch/arm64/kernel/perf_event.c
> +++ b/arch/arm64/kernel/perf_event.c
> @@ -864,6 +864,12 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
>  		if (!armpmu_event_set_period(event))
>  			continue;
>  
> +		if (has_branch_stack(event)) {
> +			cpu_pmu->brbe_read(cpuc, event);

Is has_branch_stack() guaranteed to be false on arm32? If not, this will 
be a NULL function ptr. 

To add to my other comments, this patch is where I would add 
brbe_read(), etc. to arm_pmu.

Rob
Anshuman Khandual Jan. 27, 2022, 12:20 p.m. UTC | #2
On 1/26/22 10:37 PM, Rob Herring wrote:
> On Mon, Jan 24, 2022 at 10:00:48AM +0530, Anshuman Khandual wrote:
>> Branch stack sampling rides along the normal perf event and all the branch
>> records get captured during the PMU interrupt. This just changes perf event
>> handling on the arm64 platform to accommodate required BRBE operations that
>> will enable branch stack sampling support.
>>
>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: Ingo Molnar <mingo@redhat.com>
>> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
>> Cc: Mark Rutland <mark.rutland@arm.com>
>> Cc: Will Deacon <will@kernel.org>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: linux-perf-users@vger.kernel.org
>> Cc: linux-kernel@vger.kernel.org
>> Cc: linux-arm-kernel@lists.infradead.org
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>> ---
>>  arch/arm64/kernel/perf_event.c |  6 +++++
>>  drivers/perf/arm_pmu.c         | 40 ++++++++++++++++++++++++++++++++++
>>  2 files changed, 46 insertions(+)
>>
>> diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
>> index f6a47036b0b4..11c82c8f2eec 100644
>> --- a/arch/arm64/kernel/perf_event.c
>> +++ b/arch/arm64/kernel/perf_event.c
>> @@ -864,6 +864,12 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
>>  		if (!armpmu_event_set_period(event))
>>  			continue;
>>  
>> +		if (has_branch_stack(event)) {
>> +			cpu_pmu->brbe_read(cpuc, event);
> 
> Is has_branch_stack() guaranteed to be false on arm32? If not, this will 
> be a NULL function ptr.

armpmu_event_init() blocks a perf event from being created with branch
stack sampling request without CONFIG_ARM_BRBE_PMU option being enabled
first, which has dependency on CONFIG_ARM64. So has_branch_stack() is
guaranteed to be false on arm32.

static int armpmu_event_init(struct perf_event *event)
{
	....
        if (has_branch_stack(event)) {
                /*
                 * BRBE support is absent. Select CONFIG_ARM_BRBE_PMU
                 * in the config, before branch stack sampling events
                 * can be requested.
                 */
                if (!IS_ENABLED(CONFIG_ARM_BRBE_PMU)) {
                        pr_warn_once("BRBE is disabled, select CONFIG_ARM_BRBE_PMU\n");
                        return -EOPNOTSUPP;
                }


config ARM_BRBE_PMU
        tristate "Enable support for Branch Record Buffer Extension (BRBE)"
        depends on ARM64 && ARM_PMU
        default y
        help
          Enable perf support for Branch Record Buffer Extension (BRBE) which
          records all branches taken in an execution path. This supports some
          branch types and privilege based filtering. It captured additional
          relevant information such as cycle count, misprediction and branch
          type, branch privilege level etc.

> 
> To add to my other comments, this patch is where I would add 
> brbe_read(), etc. to arm_pmu.

Because all new arm_pmu helpers get added and get used in the perf driver
in the same patch, although the actual helper implementation would still
come by bit later via the driver. This also uses updates to pmu_hw_events
struct as well, then that patch needs to be folded here as well.

There is no problem as such, kind of bit subjective. I just feel inclined
to keep the independent infrastructure changes separate making it easy to
review while also creating a flow.
Rob Herring (Arm) Jan. 27, 2022, 2:31 p.m. UTC | #3
On Thu, Jan 27, 2022 at 6:20 AM Anshuman Khandual
<anshuman.khandual@arm.com> wrote:
>
>
> On 1/26/22 10:37 PM, Rob Herring wrote:
> > On Mon, Jan 24, 2022 at 10:00:48AM +0530, Anshuman Khandual wrote:
> >> Branch stack sampling rides along the normal perf event and all the branch
> >> records get captured during the PMU interrupt. This just changes perf event
> >> handling on the arm64 platform to accommodate required BRBE operations that
> >> will enable branch stack sampling support.
> >>
> >> Cc: Peter Zijlstra <peterz@infradead.org>
> >> Cc: Ingo Molnar <mingo@redhat.com>
> >> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> >> Cc: Mark Rutland <mark.rutland@arm.com>
> >> Cc: Will Deacon <will@kernel.org>
> >> Cc: Catalin Marinas <catalin.marinas@arm.com>
> >> Cc: linux-perf-users@vger.kernel.org
> >> Cc: linux-kernel@vger.kernel.org
> >> Cc: linux-arm-kernel@lists.infradead.org
> >> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> >> ---
> >>  arch/arm64/kernel/perf_event.c |  6 +++++
> >>  drivers/perf/arm_pmu.c         | 40 ++++++++++++++++++++++++++++++++++
> >>  2 files changed, 46 insertions(+)
> >>
> >> diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
> >> index f6a47036b0b4..11c82c8f2eec 100644
> >> --- a/arch/arm64/kernel/perf_event.c
> >> +++ b/arch/arm64/kernel/perf_event.c
> >> @@ -864,6 +864,12 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
> >>              if (!armpmu_event_set_period(event))
> >>                      continue;
> >>
> >> +            if (has_branch_stack(event)) {
> >> +                    cpu_pmu->brbe_read(cpuc, event);
> >
> > Is has_branch_stack() guaranteed to be false on arm32? If not, this will
> > be a NULL function ptr.
>
> armpmu_event_init() blocks a perf event from being created with branch
> stack sampling request without CONFIG_ARM_BRBE_PMU option being enabled
> first, which has dependency on CONFIG_ARM64. So has_branch_stack() is
> guaranteed to be false on arm32.

Then the stub functions in patch 3 are also not needed. The fact that
you create dummy functions makes it look like you can't have NULL
function ptrs, but you don't. This is what I mean about the structure
of the series being hard to review.

> static int armpmu_event_init(struct perf_event *event)
> {
>         ....
>         if (has_branch_stack(event)) {
>                 /*
>                  * BRBE support is absent. Select CONFIG_ARM_BRBE_PMU
>                  * in the config, before branch stack sampling events
>                  * can be requested.
>                  */
>                 if (!IS_ENABLED(CONFIG_ARM_BRBE_PMU)) {
>                         pr_warn_once("BRBE is disabled, select CONFIG_ARM_BRBE_PMU\n");
>                         return -EOPNOTSUPP;
>                 }
>
>
> config ARM_BRBE_PMU
>         tristate "Enable support for Branch Record Buffer Extension (BRBE)"
>         depends on ARM64 && ARM_PMU
>         default y
>         help
>           Enable perf support for Branch Record Buffer Extension (BRBE) which
>           records all branches taken in an execution path. This supports some
>           branch types and privilege based filtering. It captured additional
>           relevant information such as cycle count, misprediction and branch
>           type, branch privilege level etc.
>
> >
> > To add to my other comments, this patch is where I would add
> > brbe_read(), etc. to arm_pmu.
>
> Because all new arm_pmu helpers get added and get used in the perf driver
> in the same patch, although the actual helper implementation would still
> come by bit later via the driver. This also uses updates to pmu_hw_events
> struct as well, then that patch needs to be folded here as well.
>
> There is no problem as such, kind of bit subjective. I just feel inclined
> to keep the independent infrastructure changes separate making it easy to
> review while also creating a flow.

Everything about kernel development is subjective until it's a
requirement by the maintainer. I'm not here, so it's just advice.

Rob
diff mbox series

Patch

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f6a47036b0b4..11c82c8f2eec 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -864,6 +864,12 @@  static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
 		if (!armpmu_event_set_period(event))
 			continue;
 
+		if (has_branch_stack(event)) {
+			cpu_pmu->brbe_read(cpuc, event);
+			data.br_stack = &cpuc->brbe_stack;
+			cpu_pmu->brbe_reset(cpuc);
+		}
+
 		/*
 		 * Perf event overflow will queue the processing of the event as
 		 * an irq_work which will be taken care of in the handling of
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 295cc7952d0e..0800c8858ed8 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -269,12 +269,22 @@  armpmu_stop(struct perf_event *event, int flags)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 
 	/*
 	 * ARM pmu always has to update the counter, so ignore
 	 * PERF_EF_UPDATE, see comments in armpmu_start().
 	 */
 	if (!(hwc->state & PERF_HES_STOPPED)) {
+		if (has_branch_stack(event)) {
+			WARN_ON_ONCE(!hw_events->brbe_users);
+			hw_events->brbe_users--;
+			if (!hw_events->brbe_users) {
+				hw_events->brbe_context = NULL;
+				armpmu->brbe_disable(hw_events);
+			}
+		}
+
 		armpmu->disable(event);
 		armpmu_event_update(event);
 		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
@@ -285,6 +295,7 @@  static void armpmu_start(struct perf_event *event, int flags)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 
 	/*
 	 * ARM pmu always has to reprogram the period, so ignore
@@ -302,6 +313,14 @@  static void armpmu_start(struct perf_event *event, int flags)
 	 * happened since disabling.
 	 */
 	armpmu_event_set_period(event);
+	if (has_branch_stack(event)) {
+		if (event->ctx->task && hw_events->brbe_context != event->ctx) {
+			armpmu->brbe_reset(hw_events);
+			hw_events->brbe_context = event->ctx;
+		}
+		armpmu->brbe_enable(hw_events);
+		hw_events->brbe_users++;
+	}
 	armpmu->enable(event);
 }
 
@@ -347,6 +366,10 @@  armpmu_add(struct perf_event *event, int flags)
 	hw_events->events[idx] = event;
 
 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (has_branch_stack(event))
+		armpmu->brbe_filter(hw_events, event);
+
 	if (flags & PERF_EF_START)
 		armpmu_start(event, PERF_EF_RELOAD);
 
@@ -438,6 +461,7 @@  __hw_perf_event_init(struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
 	int mapping;
 
 	hwc->flags = 0;
@@ -492,6 +516,9 @@  __hw_perf_event_init(struct perf_event *event)
 			return -EINVAL;
 	}
 
+	if (has_branch_stack(event))
+		armpmu->brbe_filter(hw_events, event);
+
 	return 0;
 }
 
@@ -520,6 +547,18 @@  static int armpmu_event_init(struct perf_event *event)
 	return __hw_perf_event_init(event);
 }
 
+static void armpmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(ctx->pmu);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+
+	if (!hw_events->brbe_users)
+		return;
+
+	if (sched_in)
+		armpmu->brbe_reset(hw_events);
+}
+
 static void armpmu_enable(struct pmu *pmu)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(pmu);
@@ -877,6 +916,7 @@  static struct arm_pmu *__armpmu_alloc(gfp_t flags)
 	}
 
 	pmu->pmu = (struct pmu) {
+		.sched_task	= armpmu_sched_task,
 		.pmu_enable	= armpmu_enable,
 		.pmu_disable	= armpmu_disable,
 		.event_init	= armpmu_event_init,