diff mbox series

[V12,05/10] arm64/perf: Add branch stack support in ARMV8 PMU

Message ID 20230615133239.442736-6-anshuman.khandual@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64/perf: Enable branch stack sampling | expand

Commit Message

Anshuman Khandual June 15, 2023, 1:32 p.m. UTC
This enables support for branch stack sampling event in ARMV8 PMU, checking
has_branch_stack() on the event inside 'struct arm_pmu' callbacks. Although
these branch stack helpers armv8pmu_branch_XXXXX() are just dummy functions
for now. While here, this also defines arm_pmu's sched_task() callback with
armv8pmu_sched_task(), which resets the branch record buffer on a sched_in.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Tested-by: James Clark <james.clark@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
 arch/arm64/include/asm/perf_event.h | 31 +++++++++++
 drivers/perf/arm_pmuv3.c            | 86 +++++++++++++++++++++--------
 2 files changed, 93 insertions(+), 24 deletions(-)

Comments

kernel test robot June 15, 2023, 11:42 p.m. UTC | #1
Hi Anshuman,

kernel test robot noticed the following build errors:

[auto build test ERROR on arm64/for-next/core]
[also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link:    https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce (this is a W=1 build):
        mkdir -p ~/bin
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm cross compiling tool for clang build
        # apt-get install binutils-arm-linux-gnueabi
        git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
        git fetch arm64 for-next/core
        git checkout arm64/for-next/core
        b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202306160706.Uei5XDoi-lkp@intel.com/

All errors (new ones prefixed by >>):

         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
     140 |         PERF_CACHE_MAP_ALL_UNSUPPORTED,
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
      43 |                 [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,       \
         |                                             ^~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
      35 | #define CACHE_OP_UNSUPPORTED            0xFFFF
         |                                         ^~~~~~
   drivers/perf/arm_pmuv3.c:147:44: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
     147 |         [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:133:44: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD'
     133 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD                         0x004E
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
     140 |         PERF_CACHE_MAP_ALL_UNSUPPORTED,
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
      43 |                 [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,       \
         |                                             ^~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
      35 | #define CACHE_OP_UNSUPPORTED            0xFFFF
         |                                         ^~~~~~
   drivers/perf/arm_pmuv3.c:148:45: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
     148 |         [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
         |                                                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:134:44: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR'
     134 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR                         0x004F
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
     140 |         PERF_CACHE_MAP_ALL_UNSUPPORTED,
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
      43 |                 [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,       \
         |                                             ^~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
      35 | #define CACHE_OP_UNSUPPORTED            0xFFFF
         |                                         ^~~~~~
   drivers/perf/arm_pmuv3.c:149:42: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
     149 |         [C(DTLB)][C(OP_READ)][C(RESULT_MISS)]   = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:131:50: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD'
     131 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD                  0x004C
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
     140 |         PERF_CACHE_MAP_ALL_UNSUPPORTED,
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
      43 |                 [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,       \
         |                                             ^~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
      35 | #define CACHE_OP_UNSUPPORTED            0xFFFF
         |                                         ^~~~~~
   drivers/perf/arm_pmuv3.c:150:43: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
     150 |         [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]  = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:132:50: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR'
     132 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR                  0x004D
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
     140 |         PERF_CACHE_MAP_ALL_UNSUPPORTED,
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
      43 |                 [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,       \
         |                                             ^~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
      35 | #define CACHE_OP_UNSUPPORTED            0xFFFF
         |                                         ^~~~~~
   drivers/perf/arm_pmuv3.c:152:44: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
     152 |         [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:148:46: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD'
     148 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD                      0x0060
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
     140 |         PERF_CACHE_MAP_ALL_UNSUPPORTED,
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
      43 |                 [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,       \
         |                                             ^~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
      35 | #define CACHE_OP_UNSUPPORTED            0xFFFF
         |                                         ^~~~~~
   drivers/perf/arm_pmuv3.c:153:45: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
     153 |         [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
         |                                                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:149:46: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR'
     149 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR                      0x0061
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here
     140 |         PERF_CACHE_MAP_ALL_UNSUPPORTED,
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED'
      43 |                 [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,       \
         |                                             ^~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED'
      35 | #define CACHE_OP_UNSUPPORTED            0xFFFF
         |                                         ^~~~~~
>> drivers/perf/arm_pmuv3.c:714:3: error: call to undeclared function 'armv8pmu_branch_enable'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     714 |                 armv8pmu_branch_enable(event);
         |                 ^
>> drivers/perf/arm_pmuv3.c:720:3: error: call to undeclared function 'armv8pmu_branch_disable'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     720 |                 armv8pmu_branch_disable(event);
         |                 ^
>> drivers/perf/arm_pmuv3.c:801:4: error: call to undeclared function 'armv8pmu_branch_read'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     801 |                         armv8pmu_branch_read(cpuc, event);
         |                         ^
   drivers/perf/arm_pmuv3.c:801:4: note: did you mean 'armv8pmu_pmcr_read'?
   drivers/perf/arm_pmuv3.c:430:19: note: 'armv8pmu_pmcr_read' declared here
     430 | static inline u32 armv8pmu_pmcr_read(void)
         |                   ^
>> drivers/perf/arm_pmuv3.c:908:3: error: call to undeclared function 'armv8pmu_branch_reset'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     908 |                 armv8pmu_branch_reset();
         |                 ^
   drivers/perf/arm_pmuv3.c:983:3: error: call to undeclared function 'armv8pmu_branch_reset'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     983 |                 armv8pmu_branch_reset();
         |                 ^
>> drivers/perf/arm_pmuv3.c:1021:34: error: call to undeclared function 'armv8pmu_branch_attr_valid'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
    1021 |         if (has_branch_stack(event) && !armv8pmu_branch_attr_valid(event))
         |                                         ^
>> drivers/perf/arm_pmuv3.c:1140:2: error: call to undeclared function 'armv8pmu_branch_probe'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
    1140 |         armv8pmu_branch_probe(cpu_pmu);
         |         ^
   55 warnings and 7 errors generated.


vim +/armv8pmu_branch_enable +714 drivers/perf/arm_pmuv3.c

   701	
   702	static void armv8pmu_enable_event(struct perf_event *event)
   703	{
   704		/*
   705		 * Enable counter and interrupt, and set the counter to count
   706		 * the event that we're interested in.
   707		 */
   708		armv8pmu_disable_event_counter(event);
   709		armv8pmu_write_event_type(event);
   710		armv8pmu_enable_event_irq(event);
   711		armv8pmu_enable_event_counter(event);
   712	
   713		if (has_branch_stack(event))
 > 714			armv8pmu_branch_enable(event);
   715	}
   716	
   717	static void armv8pmu_disable_event(struct perf_event *event)
   718	{
   719		if (has_branch_stack(event))
 > 720			armv8pmu_branch_disable(event);
   721	
   722		armv8pmu_disable_event_counter(event);
   723		armv8pmu_disable_event_irq(event);
   724	}
   725	
   726	static void armv8pmu_start(struct arm_pmu *cpu_pmu)
   727	{
   728		struct perf_event_context *ctx;
   729		int nr_user = 0;
   730	
   731		ctx = perf_cpu_task_ctx();
   732		if (ctx)
   733			nr_user = ctx->nr_user;
   734	
   735		if (sysctl_perf_user_access && nr_user)
   736			armv8pmu_enable_user_access(cpu_pmu);
   737		else
   738			armv8pmu_disable_user_access();
   739	
   740		/* Enable all counters */
   741		armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
   742	}
   743	
   744	static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
   745	{
   746		/* Disable all counters */
   747		armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
   748	}
   749	
   750	static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
   751	{
   752		u32 pmovsr;
   753		struct perf_sample_data data;
   754		struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
   755		struct pt_regs *regs;
   756		int idx;
   757	
   758		/*
   759		 * Get and reset the IRQ flags
   760		 */
   761		pmovsr = armv8pmu_getreset_flags();
   762	
   763		/*
   764		 * Did an overflow occur?
   765		 */
   766		if (!armv8pmu_has_overflowed(pmovsr))
   767			return IRQ_NONE;
   768	
   769		/*
   770		 * Handle the counter(s) overflow(s)
   771		 */
   772		regs = get_irq_regs();
   773	
   774		/*
   775		 * Stop the PMU while processing the counter overflows
   776		 * to prevent skews in group events.
   777		 */
   778		armv8pmu_stop(cpu_pmu);
   779		for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
   780			struct perf_event *event = cpuc->events[idx];
   781			struct hw_perf_event *hwc;
   782	
   783			/* Ignore if we don't have an event. */
   784			if (!event)
   785				continue;
   786	
   787			/*
   788			 * We have a single interrupt for all counters. Check that
   789			 * each counter has overflowed before we process it.
   790			 */
   791			if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
   792				continue;
   793	
   794			hwc = &event->hw;
   795			armpmu_event_update(event);
   796			perf_sample_data_init(&data, 0, hwc->last_period);
   797			if (!armpmu_event_set_period(event))
   798				continue;
   799	
   800			if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) {
 > 801				armv8pmu_branch_read(cpuc, event);
   802				perf_sample_save_brstack(&data, event, &cpuc->branches->branch_stack);
   803			}
   804	
   805			/*
   806			 * Perf event overflow will queue the processing of the event as
   807			 * an irq_work which will be taken care of in the handling of
   808			 * IPI_IRQ_WORK.
   809			 */
   810			if (perf_event_overflow(event, &data, regs))
   811				cpu_pmu->disable(event);
   812		}
   813		armv8pmu_start(cpu_pmu);
   814	
   815		return IRQ_HANDLED;
   816	}
   817	
   818	static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
   819					    struct arm_pmu *cpu_pmu)
   820	{
   821		int idx;
   822	
   823		for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
   824			if (!test_and_set_bit(idx, cpuc->used_mask))
   825				return idx;
   826		}
   827		return -EAGAIN;
   828	}
   829	
   830	static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
   831					   struct arm_pmu *cpu_pmu)
   832	{
   833		int idx;
   834	
   835		/*
   836		 * Chaining requires two consecutive event counters, where
   837		 * the lower idx must be even.
   838		 */
   839		for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) {
   840			if (!test_and_set_bit(idx, cpuc->used_mask)) {
   841				/* Check if the preceding even counter is available */
   842				if (!test_and_set_bit(idx - 1, cpuc->used_mask))
   843					return idx;
   844				/* Release the Odd counter */
   845				clear_bit(idx, cpuc->used_mask);
   846			}
   847		}
   848		return -EAGAIN;
   849	}
   850	
   851	static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
   852					  struct perf_event *event)
   853	{
   854		struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
   855		struct hw_perf_event *hwc = &event->hw;
   856		unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
   857	
   858		/* Always prefer to place a cycle counter into the cycle counter. */
   859		if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
   860			if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
   861				return ARMV8_IDX_CYCLE_COUNTER;
   862			else if (armv8pmu_event_is_64bit(event) &&
   863				   armv8pmu_event_want_user_access(event) &&
   864				   !armv8pmu_has_long_event(cpu_pmu))
   865					return -EAGAIN;
   866		}
   867	
   868		/*
   869		 * Otherwise use events counters
   870		 */
   871		if (armv8pmu_event_is_chained(event))
   872			return	armv8pmu_get_chain_idx(cpuc, cpu_pmu);
   873		else
   874			return armv8pmu_get_single_idx(cpuc, cpu_pmu);
   875	}
   876	
   877	static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc,
   878					     struct perf_event *event)
   879	{
   880		int idx = event->hw.idx;
   881	
   882		clear_bit(idx, cpuc->used_mask);
   883		if (armv8pmu_event_is_chained(event))
   884			clear_bit(idx - 1, cpuc->used_mask);
   885	}
   886	
   887	static int armv8pmu_user_event_idx(struct perf_event *event)
   888	{
   889		if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event))
   890			return 0;
   891	
   892		/*
   893		 * We remap the cycle counter index to 32 to
   894		 * match the offset applied to the rest of
   895		 * the counter indices.
   896		 */
   897		if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER)
   898			return ARMV8_IDX_CYCLE_COUNTER_USER;
   899	
   900		return event->hw.idx;
   901	}
   902	
   903	static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
   904	{
   905		struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
   906	
   907		if (sched_in && armpmu->has_branch_stack)
 > 908			armv8pmu_branch_reset();
   909	}
   910
Anshuman Khandual June 16, 2023, 1:27 a.m. UTC | #2
On 6/16/23 05:12, kernel test robot wrote:
> Hi Anshuman,
> 
> kernel test robot noticed the following build errors:
> 
> [auto build test ERROR on arm64/for-next/core]
> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
> patch link:    https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config)
> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
> reproduce (this is a W=1 build):
>         mkdir -p ~/bin
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # install arm cross compiling tool for clang build
>         # apt-get install binutils-arm-linux-gnueabi
>         git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
>         git fetch arm64 for-next/core
>         git checkout arm64/for-next/core
>         b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com
>         # save the config file
>         mkdir build_dir && cp config build_dir/.config
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/

I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler
on a W=1 build. Looking at all other problems reported on the file, it seems
something is not right here. Reported build problems around these callbacks,
i.e armv8pmu_branch_XXXX() do not make sense as they are available via config
CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this
test config.
kernel test robot June 16, 2023, 3:41 a.m. UTC | #3
Hi Anshuman,

kernel test robot noticed the following build errors:

[auto build test ERROR on arm64/for-next/core]
[also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link:    https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
config: arm-allmodconfig (https://download.01.org/0day-ci/archive/20230616/202306161154.PwcAiVfV-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 12.3.0
reproduce (this is a W=1 build):
        mkdir -p ~/bin
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
        git fetch arm64 for-next/core
        git checkout arm64/for-next/core
        b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202306161154.PwcAiVfV-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/perf/arm_pmuv3.c:143:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD'
     143 |         [C(L1D)][C(OP_READ)][C(RESULT_MISS)]    = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:122:65: warning: initialized field overwritten [-Woverride-init]
     122 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR                       0x0041
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:144:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR'
     144 |         [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:122:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[0][1][0]')
     122 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR                       0x0041
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:144:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR'
     144 |         [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:124:65: warning: initialized field overwritten [-Woverride-init]
     124 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR                0x0043
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:145:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR'
     145 |         [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]   = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:124:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[0][1][1]')
     124 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR                0x0043
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:145:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR'
     145 |         [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]   = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:133:65: warning: initialized field overwritten [-Woverride-init]
     133 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD                         0x004E
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:147:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD'
     147 |         [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:133:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][0][0]')
     133 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD                         0x004E
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:147:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD'
     147 |         [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:134:65: warning: initialized field overwritten [-Woverride-init]
     134 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR                         0x004F
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:148:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR'
     148 |         [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
         |                                                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:134:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][1][0]')
     134 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR                         0x004F
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:148:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR'
     148 |         [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
         |                                                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:131:65: warning: initialized field overwritten [-Woverride-init]
     131 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD                  0x004C
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:149:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD'
     149 |         [C(DTLB)][C(OP_READ)][C(RESULT_MISS)]   = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:131:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][0][1]')
     131 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD                  0x004C
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:149:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD'
     149 |         [C(DTLB)][C(OP_READ)][C(RESULT_MISS)]   = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:132:65: warning: initialized field overwritten [-Woverride-init]
     132 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR                  0x004D
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:150:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR'
     150 |         [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]  = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:132:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][1][1]')
     132 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR                  0x004D
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:150:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR'
     150 |         [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]  = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:148:65: warning: initialized field overwritten [-Woverride-init]
     148 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD                      0x0060
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:152:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD'
     152 |         [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:148:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[6][0][0]')
     148 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD                      0x0060
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:152:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD'
     152 |         [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
         |                                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:149:65: warning: initialized field overwritten [-Woverride-init]
     149 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR                      0x0061
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:153:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR'
     153 |         [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
         |                                                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/perf/arm_pmuv3.h:149:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[6][1][0]')
     149 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR                      0x0061
         |                                                                 ^~~~~~
   drivers/perf/arm_pmuv3.c:153:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR'
     153 |         [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
         |                                                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/perf/arm_pmuv3.c: In function 'armv8pmu_enable_event':
>> drivers/perf/arm_pmuv3.c:714:17: error: implicit declaration of function 'armv8pmu_branch_enable'; did you mean 'static_branch_enable'? [-Werror=implicit-function-declaration]
     714 |                 armv8pmu_branch_enable(event);
         |                 ^~~~~~~~~~~~~~~~~~~~~~
         |                 static_branch_enable
   drivers/perf/arm_pmuv3.c: In function 'armv8pmu_disable_event':
>> drivers/perf/arm_pmuv3.c:720:17: error: implicit declaration of function 'armv8pmu_branch_disable'; did you mean 'static_branch_disable'? [-Werror=implicit-function-declaration]
     720 |                 armv8pmu_branch_disable(event);
         |                 ^~~~~~~~~~~~~~~~~~~~~~~
         |                 static_branch_disable
   drivers/perf/arm_pmuv3.c: In function 'armv8pmu_handle_irq':
>> drivers/perf/arm_pmuv3.c:801:25: error: implicit declaration of function 'armv8pmu_branch_read'; did you mean 'armv8pmu_pmcr_read'? [-Werror=implicit-function-declaration]
     801 |                         armv8pmu_branch_read(cpuc, event);
         |                         ^~~~~~~~~~~~~~~~~~~~
         |                         armv8pmu_pmcr_read
   drivers/perf/arm_pmuv3.c: In function 'armv8pmu_sched_task':
>> drivers/perf/arm_pmuv3.c:908:17: error: implicit declaration of function 'armv8pmu_branch_reset' [-Werror=implicit-function-declaration]
     908 |                 armv8pmu_branch_reset();
         |                 ^~~~~~~~~~~~~~~~~~~~~
   drivers/perf/arm_pmuv3.c: In function '__armv8_pmuv3_map_event':
>> drivers/perf/arm_pmuv3.c:1021:41: error: implicit declaration of function 'armv8pmu_branch_attr_valid' [-Werror=implicit-function-declaration]
    1021 |         if (has_branch_stack(event) && !armv8pmu_branch_attr_valid(event))
         |                                         ^~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/perf/arm_pmuv3.c: In function '__armv8pmu_probe_pmu':
>> drivers/perf/arm_pmuv3.c:1140:9: error: implicit declaration of function 'armv8pmu_branch_probe'; did you mean 'arm_pmu_acpi_probe'? [-Werror=implicit-function-declaration]
    1140 |         armv8pmu_branch_probe(cpu_pmu);
         |         ^~~~~~~~~~~~~~~~~~~~~
         |         arm_pmu_acpi_probe
   cc1: some warnings being treated as errors


vim +714 drivers/perf/arm_pmuv3.c

   701	
   702	static void armv8pmu_enable_event(struct perf_event *event)
   703	{
   704		/*
   705		 * Enable counter and interrupt, and set the counter to count
   706		 * the event that we're interested in.
   707		 */
   708		armv8pmu_disable_event_counter(event);
   709		armv8pmu_write_event_type(event);
   710		armv8pmu_enable_event_irq(event);
   711		armv8pmu_enable_event_counter(event);
   712	
   713		if (has_branch_stack(event))
 > 714			armv8pmu_branch_enable(event);
   715	}
   716	
   717	static void armv8pmu_disable_event(struct perf_event *event)
   718	{
   719		if (has_branch_stack(event))
 > 720			armv8pmu_branch_disable(event);
   721	
   722		armv8pmu_disable_event_counter(event);
   723		armv8pmu_disable_event_irq(event);
   724	}
   725	
   726	static void armv8pmu_start(struct arm_pmu *cpu_pmu)
   727	{
   728		struct perf_event_context *ctx;
   729		int nr_user = 0;
   730	
   731		ctx = perf_cpu_task_ctx();
   732		if (ctx)
   733			nr_user = ctx->nr_user;
   734	
   735		if (sysctl_perf_user_access && nr_user)
   736			armv8pmu_enable_user_access(cpu_pmu);
   737		else
   738			armv8pmu_disable_user_access();
   739	
   740		/* Enable all counters */
   741		armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
   742	}
   743	
   744	static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
   745	{
   746		/* Disable all counters */
   747		armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
   748	}
   749	
   750	static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
   751	{
   752		u32 pmovsr;
   753		struct perf_sample_data data;
   754		struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
   755		struct pt_regs *regs;
   756		int idx;
   757	
   758		/*
   759		 * Get and reset the IRQ flags
   760		 */
   761		pmovsr = armv8pmu_getreset_flags();
   762	
   763		/*
   764		 * Did an overflow occur?
   765		 */
   766		if (!armv8pmu_has_overflowed(pmovsr))
   767			return IRQ_NONE;
   768	
   769		/*
   770		 * Handle the counter(s) overflow(s)
   771		 */
   772		regs = get_irq_regs();
   773	
   774		/*
   775		 * Stop the PMU while processing the counter overflows
   776		 * to prevent skews in group events.
   777		 */
   778		armv8pmu_stop(cpu_pmu);
   779		for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
   780			struct perf_event *event = cpuc->events[idx];
   781			struct hw_perf_event *hwc;
   782	
   783			/* Ignore if we don't have an event. */
   784			if (!event)
   785				continue;
   786	
   787			/*
   788			 * We have a single interrupt for all counters. Check that
   789			 * each counter has overflowed before we process it.
   790			 */
   791			if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
   792				continue;
   793	
   794			hwc = &event->hw;
   795			armpmu_event_update(event);
   796			perf_sample_data_init(&data, 0, hwc->last_period);
   797			if (!armpmu_event_set_period(event))
   798				continue;
   799	
   800			if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) {
 > 801				armv8pmu_branch_read(cpuc, event);
   802				perf_sample_save_brstack(&data, event, &cpuc->branches->branch_stack);
   803			}
   804	
   805			/*
   806			 * Perf event overflow will queue the processing of the event as
   807			 * an irq_work which will be taken care of in the handling of
   808			 * IPI_IRQ_WORK.
   809			 */
   810			if (perf_event_overflow(event, &data, regs))
   811				cpu_pmu->disable(event);
   812		}
   813		armv8pmu_start(cpu_pmu);
   814	
   815		return IRQ_HANDLED;
   816	}
   817	
   818	static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
   819					    struct arm_pmu *cpu_pmu)
   820	{
   821		int idx;
   822	
   823		for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
   824			if (!test_and_set_bit(idx, cpuc->used_mask))
   825				return idx;
   826		}
   827		return -EAGAIN;
   828	}
   829	
   830	static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
   831					   struct arm_pmu *cpu_pmu)
   832	{
   833		int idx;
   834	
   835		/*
   836		 * Chaining requires two consecutive event counters, where
   837		 * the lower idx must be even.
   838		 */
   839		for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) {
   840			if (!test_and_set_bit(idx, cpuc->used_mask)) {
   841				/* Check if the preceding even counter is available */
   842				if (!test_and_set_bit(idx - 1, cpuc->used_mask))
   843					return idx;
   844				/* Release the Odd counter */
   845				clear_bit(idx, cpuc->used_mask);
   846			}
   847		}
   848		return -EAGAIN;
   849	}
   850	
   851	static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
   852					  struct perf_event *event)
   853	{
   854		struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
   855		struct hw_perf_event *hwc = &event->hw;
   856		unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
   857	
   858		/* Always prefer to place a cycle counter into the cycle counter. */
   859		if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
   860			if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
   861				return ARMV8_IDX_CYCLE_COUNTER;
   862			else if (armv8pmu_event_is_64bit(event) &&
   863				   armv8pmu_event_want_user_access(event) &&
   864				   !armv8pmu_has_long_event(cpu_pmu))
   865					return -EAGAIN;
   866		}
   867	
   868		/*
   869		 * Otherwise use events counters
   870		 */
   871		if (armv8pmu_event_is_chained(event))
   872			return	armv8pmu_get_chain_idx(cpuc, cpu_pmu);
   873		else
   874			return armv8pmu_get_single_idx(cpuc, cpu_pmu);
   875	}
   876	
   877	static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc,
   878					     struct perf_event *event)
   879	{
   880		int idx = event->hw.idx;
   881	
   882		clear_bit(idx, cpuc->used_mask);
   883		if (armv8pmu_event_is_chained(event))
   884			clear_bit(idx - 1, cpuc->used_mask);
   885	}
   886	
   887	static int armv8pmu_user_event_idx(struct perf_event *event)
   888	{
   889		if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event))
   890			return 0;
   891	
   892		/*
   893		 * We remap the cycle counter index to 32 to
   894		 * match the offset applied to the rest of
   895		 * the counter indices.
   896		 */
   897		if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER)
   898			return ARMV8_IDX_CYCLE_COUNTER_USER;
   899	
   900		return event->hw.idx;
   901	}
   902	
   903	static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
   904	{
   905		struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
   906	
   907		if (sched_in && armpmu->has_branch_stack)
 > 908			armv8pmu_branch_reset();
   909	}
   910
Catalin Marinas June 16, 2023, 9:21 a.m. UTC | #4
On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote:
> On 6/16/23 05:12, kernel test robot wrote:
> > kernel test robot noticed the following build errors:
> > 
> > [auto build test ERROR on arm64/for-next/core]
> > [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
> > [If your patch is applied to the wrong git tree, kindly drop us a note.
> > And when submitting patch, we suggest to use '--base' as documented in
> > https://git-scm.com/docs/git-format-patch#_base_tree_information]
> > 
> > url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
> > base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
> > patch link:    https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
> > patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
> > config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config)
> > compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
> > reproduce (this is a W=1 build):
> >         mkdir -p ~/bin
> >         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> >         chmod +x ~/bin/make.cross
> >         # install arm cross compiling tool for clang build
> >         # apt-get install binutils-arm-linux-gnueabi
> >         git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
> >         git fetch arm64 for-next/core
> >         git checkout arm64/for-next/core
> >         b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com
> >         # save the config file
> >         mkdir build_dir && cp config build_dir/.config
> >         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
> >         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/
> 
> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler
> on a W=1 build. Looking at all other problems reported on the file, it seems
> something is not right here. Reported build problems around these callbacks,
> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config
> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this
> test config.

Have you tried applying this series on top of the arm64 for-next/core
branch? That's what the robot it testing (in the absence of a --base
option when generating the patches).
Anshuman Khandual June 19, 2023, 5:45 a.m. UTC | #5
On 6/16/23 14:51, Catalin Marinas wrote:
> On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote:
>> On 6/16/23 05:12, kernel test robot wrote:
>>> kernel test robot noticed the following build errors:
>>>
>>> [auto build test ERROR on arm64/for-next/core]
>>> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
>>> [If your patch is applied to the wrong git tree, kindly drop us a note.
>>> And when submitting patch, we suggest to use '--base' as documented in
>>> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>>>
>>> url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
>>> base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
>>> patch link:    https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
>>> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
>>> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config)
>>> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
>>> reproduce (this is a W=1 build):
>>>         mkdir -p ~/bin
>>>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>>>         chmod +x ~/bin/make.cross
>>>         # install arm cross compiling tool for clang build
>>>         # apt-get install binutils-arm-linux-gnueabi
>>>         git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
>>>         git fetch arm64 for-next/core
>>>         git checkout arm64/for-next/core
>>>         b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com
>>>         # save the config file
>>>         mkdir build_dir && cp config build_dir/.config
>>>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
>>>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/
>>
>> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler
>> on a W=1 build. Looking at all other problems reported on the file, it seems
>> something is not right here. Reported build problems around these callbacks,
>> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config
>> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this
>> test config.
> 
> Have you tried applying this series on top of the arm64 for-next/core
> branch? That's what the robot it testing (in the absence of a --base
> option when generating the patches).

Right, it turned out to be a build problem on arm (32 bit) platform instead.
After arm_pmuv3.c moved into common ./drivers/perf from ./arch/arm64/kernel/,
it can no longer access arch/arm64/include/asm/perf_event.h defined functions
without breaking arm (32) bit. The following code block needs to be moved out
from arch/arm64/include/asm/perf_event.h into include/linux/perf/arm_pmuv3.h
(which is preferred as all call sites are inside drivers/perf/arm_pmuv3.c) or
may be arm_pmu.h (which is one step higher in the abstraction).

struct pmu_hw_events;
struct arm_pmu;
struct perf_event;

#ifdef CONFIG_PERF_EVENTS
static inline bool has_branch_stack(struct perf_event *event);

#ifdef CONFIG_ARM64_BRBE
void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event);
bool armv8pmu_branch_attr_valid(struct perf_event *event);
void armv8pmu_branch_enable(struct perf_event *event);
void armv8pmu_branch_disable(struct perf_event *event);
void armv8pmu_branch_probe(struct arm_pmu *arm_pmu);
void armv8pmu_branch_reset(void);
int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu);
void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu);
void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx);
#else
static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event)
{
        WARN_ON_ONCE(!has_branch_stack(event));
}

static inline bool armv8pmu_branch_attr_valid(struct perf_event *event)
{
        WARN_ON_ONCE(!has_branch_stack(event));
        return false;
}

static inline void armv8pmu_branch_enable(struct perf_event *event)
{
        WARN_ON_ONCE(!has_branch_stack(event));
}

static inline void armv8pmu_branch_disable(struct perf_event *event)
{
        WARN_ON_ONCE(!has_branch_stack(event));
}

static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { }
static inline void armv8pmu_branch_reset(void) { }
static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) { return 0; }
static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) { }
static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) { }
#endif
#endif
Marc Zyngier June 19, 2023, 9:08 a.m. UTC | #6
On Mon, 19 Jun 2023 06:45:07 +0100,
Anshuman Khandual <anshuman.khandual@arm.com> wrote:
> 
> 
> 
> On 6/16/23 14:51, Catalin Marinas wrote:
> > On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote:
> >> On 6/16/23 05:12, kernel test robot wrote:
> >>> kernel test robot noticed the following build errors:
> >>>
> >>> [auto build test ERROR on arm64/for-next/core]
> >>> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
> >>> [If your patch is applied to the wrong git tree, kindly drop us a note.
> >>> And when submitting patch, we suggest to use '--base' as documented in
> >>> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> >>>
> >>> url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
> >>> base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
> >>> patch link:    https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
> >>> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
> >>> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config)
> >>> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
> >>> reproduce (this is a W=1 build):
> >>>         mkdir -p ~/bin
> >>>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> >>>         chmod +x ~/bin/make.cross
> >>>         # install arm cross compiling tool for clang build
> >>>         # apt-get install binutils-arm-linux-gnueabi
> >>>         git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
> >>>         git fetch arm64 for-next/core
> >>>         git checkout arm64/for-next/core
> >>>         b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com
> >>>         # save the config file
> >>>         mkdir build_dir && cp config build_dir/.config
> >>>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
> >>>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/
> >>
> >> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler
> >> on a W=1 build. Looking at all other problems reported on the file, it seems
> >> something is not right here. Reported build problems around these callbacks,
> >> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config
> >> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this
> >> test config.
> > 
> > Have you tried applying this series on top of the arm64 for-next/core
> > branch? That's what the robot it testing (in the absence of a --base
> > option when generating the patches).
> 
> Right, it turned out to be a build problem on arm (32 bit) platform instead.
> After arm_pmuv3.c moved into common ./drivers/perf from ./arch/arm64/kernel/,
> it can no longer access arch/arm64/include/asm/perf_event.h defined functions
> without breaking arm (32) bit. The following code block needs to be moved out
> from arch/arm64/include/asm/perf_event.h into include/linux/perf/arm_pmuv3.h
> (which is preferred as all call sites are inside drivers/perf/arm_pmuv3.c) or
> may be arm_pmu.h (which is one step higher in the abstraction).

No, that's the wrong approach. The 32bit backend must have its own
stubs for the stuff it implements or not.

Just add something like the patch below, and please *test* that a
32bit VM using PMUv3 doesn't have any regression.

Thanks,

	M.

From 017362ca518e6d6ac3262514d1f7f27e73232799 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 19 Jun 2023 10:05:52 +0100
Subject: [PATCH] 32bit hack

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm/include/asm/arm_pmuv3.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
index f4db3e75d75f..c4bcb7a18267 100644
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -244,4 +244,22 @@ static inline bool is_pmuv3p5(int pmuver)
 	return pmuver >= ARMV8_PMU_DFR_VER_V3P5;
 }
 
+/* BRBE stubs */
+static inline void armv8pmu_branch_enable(struct perf_event *event) { }
+static inline void armv8pmu_branch_disable(struct perf_event *event) { }
+static inline void armv8pmu_branch_read(struct pmu_hw_events * cpuc,
+					struct perf_event *event) { }
+static inline void armv8pmu_branch_save(struct arm_pmu *armpmu, void *ctx) {}
+static inline void armv8pmu_branch_reset(void) {}
+static inline bool armv8pmu_branch_attr_valid(struct perf_event *event)
+{
+	return false;
+}
+static inline void armv8pmu_branch_probe(struct arm_pmu *armpmu) {}
+static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *armpmu)
+{
+	return 0;
+}
+static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *armpmu) {}
+
 #endif
Anshuman Khandual June 22, 2023, 1:52 a.m. UTC | #7
On 6/19/23 14:38, Marc Zyngier wrote:
> On Mon, 19 Jun 2023 06:45:07 +0100,
> Anshuman Khandual <anshuman.khandual@arm.com> wrote:
>>
>>
>>
>> On 6/16/23 14:51, Catalin Marinas wrote:
>>> On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote:
>>>> On 6/16/23 05:12, kernel test robot wrote:
>>>>> kernel test robot noticed the following build errors:
>>>>>
>>>>> [auto build test ERROR on arm64/for-next/core]
>>>>> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615]
>>>>> [If your patch is applied to the wrong git tree, kindly drop us a note.
>>>>> And when submitting patch, we suggest to use '--base' as documented in
>>>>> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>>>>>
>>>>> url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352
>>>>> base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
>>>>> patch link:    https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com
>>>>> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU
>>>>> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config)
>>>>> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
>>>>> reproduce (this is a W=1 build):
>>>>>         mkdir -p ~/bin
>>>>>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>>>>>         chmod +x ~/bin/make.cross
>>>>>         # install arm cross compiling tool for clang build
>>>>>         # apt-get install binutils-arm-linux-gnueabi
>>>>>         git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
>>>>>         git fetch arm64 for-next/core
>>>>>         git checkout arm64/for-next/core
>>>>>         b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com
>>>>>         # save the config file
>>>>>         mkdir build_dir && cp config build_dir/.config
>>>>>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig
>>>>>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/
>>>>
>>>> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler
>>>> on a W=1 build. Looking at all other problems reported on the file, it seems
>>>> something is not right here. Reported build problems around these callbacks,
>>>> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config
>>>> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this
>>>> test config.
>>>
>>> Have you tried applying this series on top of the arm64 for-next/core
>>> branch? That's what the robot it testing (in the absence of a --base
>>> option when generating the patches).
>>
>> Right, it turned out to be a build problem on arm (32 bit) platform instead.
>> After arm_pmuv3.c moved into common ./drivers/perf from ./arch/arm64/kernel/,
>> it can no longer access arch/arm64/include/asm/perf_event.h defined functions
>> without breaking arm (32) bit. The following code block needs to be moved out
>> from arch/arm64/include/asm/perf_event.h into include/linux/perf/arm_pmuv3.h
>> (which is preferred as all call sites are inside drivers/perf/arm_pmuv3.c) or
>> may be arm_pmu.h (which is one step higher in the abstraction).
> 
> No, that's the wrong approach. The 32bit backend must have its own
> stubs for the stuff it implements or not.

Okay.


> 
> Just add something like the patch below, and please *test* that a
> 32bit VM using PMUv3 doesn't have any regression.

Sure.

> 
> Thanks,
> 
> 	M.
> 
>>From 017362ca518e6d6ac3262514d1f7f27e73232799 Mon Sep 17 00:00:00 2001
> From: Marc Zyngier <maz@kernel.org>
> Date: Mon, 19 Jun 2023 10:05:52 +0100
> Subject: [PATCH] 32bit hack
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  arch/arm/include/asm/arm_pmuv3.h | 18 ++++++++++++++++++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
> index f4db3e75d75f..c4bcb7a18267 100644
> --- a/arch/arm/include/asm/arm_pmuv3.h
> +++ b/arch/arm/include/asm/arm_pmuv3.h
> @@ -244,4 +244,22 @@ static inline bool is_pmuv3p5(int pmuver)
>  	return pmuver >= ARMV8_PMU_DFR_VER_V3P5;
>  }
>  
> +/* BRBE stubs */

These stubs also need to be wrapped around with #ifdef CONFIG_PERF_EVENTS

> +static inline void armv8pmu_branch_enable(struct perf_event *event) { }
> +static inline void armv8pmu_branch_disable(struct perf_event *event) { }
> +static inline void armv8pmu_branch_read(struct pmu_hw_events * cpuc,
> +					struct perf_event *event) { }
> +static inline void armv8pmu_branch_save(struct arm_pmu *armpmu, void *ctx) {}
> +static inline void armv8pmu_branch_reset(void) {}
> +static inline bool armv8pmu_branch_attr_valid(struct perf_event *event)
> +{
> +	return false;
> +}
> +static inline void armv8pmu_branch_probe(struct arm_pmu *armpmu) {}
> +static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *armpmu)
> +{
> +	return 0;
> +}
> +static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *armpmu) {}
> +
>  #endif

Sure, will make all the necessary changes.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index eb7071c9eb34..ebc392ba3559 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -24,4 +24,35 @@  extern unsigned long perf_misc_flags(struct pt_regs *regs);
 	(regs)->pstate = PSR_MODE_EL1h;	\
 }
 
+struct pmu_hw_events;
+struct arm_pmu;
+struct perf_event;
+
+#ifdef CONFIG_PERF_EVENTS
+static inline bool has_branch_stack(struct perf_event *event);
+
+static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event)
+{
+	WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline bool armv8pmu_branch_attr_valid(struct perf_event *event)
+{
+	WARN_ON_ONCE(!has_branch_stack(event));
+	return false;
+}
+
+static inline void armv8pmu_branch_enable(struct perf_event *event)
+{
+	WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline void armv8pmu_branch_disable(struct perf_event *event)
+{
+	WARN_ON_ONCE(!has_branch_stack(event));
+}
+
+static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { }
+static inline void armv8pmu_branch_reset(void) { }
+#endif
 #endif
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index c98e4039386d..54c80f393eb6 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -705,38 +705,21 @@  static void armv8pmu_enable_event(struct perf_event *event)
 	 * Enable counter and interrupt, and set the counter to count
 	 * the event that we're interested in.
 	 */
-
-	/*
-	 * Disable counter
-	 */
 	armv8pmu_disable_event_counter(event);
-
-	/*
-	 * Set event.
-	 */
 	armv8pmu_write_event_type(event);
-
-	/*
-	 * Enable interrupt for this counter
-	 */
 	armv8pmu_enable_event_irq(event);
-
-	/*
-	 * Enable counter
-	 */
 	armv8pmu_enable_event_counter(event);
+
+	if (has_branch_stack(event))
+		armv8pmu_branch_enable(event);
 }
 
 static void armv8pmu_disable_event(struct perf_event *event)
 {
-	/*
-	 * Disable counter
-	 */
-	armv8pmu_disable_event_counter(event);
+	if (has_branch_stack(event))
+		armv8pmu_branch_disable(event);
 
-	/*
-	 * Disable interrupt for this counter
-	 */
+	armv8pmu_disable_event_counter(event);
 	armv8pmu_disable_event_irq(event);
 }
 
@@ -814,6 +797,11 @@  static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
 		if (!armpmu_event_set_period(event))
 			continue;
 
+		if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) {
+			armv8pmu_branch_read(cpuc, event);
+			perf_sample_save_brstack(&data, event, &cpuc->branches->branch_stack);
+		}
+
 		/*
 		 * Perf event overflow will queue the processing of the event as
 		 * an irq_work which will be taken care of in the handling of
@@ -912,6 +900,14 @@  static int armv8pmu_user_event_idx(struct perf_event *event)
 	return event->hw.idx;
 }
 
+static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu);
+
+	if (sched_in && armpmu->has_branch_stack)
+		armv8pmu_branch_reset();
+}
+
 /*
  * Add an event filter to a given event.
  */
@@ -982,6 +978,9 @@  static void armv8pmu_reset(void *info)
 		pmcr |= ARMV8_PMU_PMCR_LP;
 
 	armv8pmu_pmcr_write(pmcr);
+
+	if (cpu_pmu->has_branch_stack)
+		armv8pmu_branch_reset();
 }
 
 static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu,
@@ -1019,6 +1018,9 @@  static int __armv8_pmuv3_map_event(struct perf_event *event,
 
 	hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event);
 
+	if (has_branch_stack(event) && !armv8pmu_branch_attr_valid(event))
+		return -EOPNOTSUPP;
+
 	/*
 	 * CHAIN events only work when paired with an adjacent counter, and it
 	 * never makes sense for a user to open one in isolation, as they'll be
@@ -1135,6 +1137,33 @@  static void __armv8pmu_probe_pmu(void *info)
 		cpu_pmu->reg_pmmir = read_pmmir();
 	else
 		cpu_pmu->reg_pmmir = 0;
+	armv8pmu_branch_probe(cpu_pmu);
+}
+
+static int branch_records_alloc(struct arm_pmu *armpmu)
+{
+	struct branch_records __percpu *records;
+	int cpu;
+
+	records = alloc_percpu_gfp(struct branch_records, GFP_KERNEL);
+	if (!records)
+		return -ENOMEM;
+
+	/*
+	 * FIXME: Memory allocated via records gets completely
+	 * consumed here, never required to be freed up later. Hence
+	 * losing access to on stack 'records' is acceptable.
+	 * Otherwise this alloc handle has to be saved some where.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events_cpu;
+		struct branch_records *records_cpu;
+
+		events_cpu = per_cpu_ptr(armpmu->hw_events, cpu);
+		records_cpu = per_cpu_ptr(records, cpu);
+		events_cpu->branches = records_cpu;
+	}
+	return 0;
 }
 
 static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
@@ -1151,7 +1180,15 @@  static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
 	if (ret)
 		return ret;
 
-	return probe.present ? 0 : -ENODEV;
+	if (!probe.present)
+		return -ENODEV;
+
+	if (cpu_pmu->has_branch_stack) {
+		ret = branch_records_alloc(cpu_pmu);
+		if (ret)
+			return ret;
+	}
+	return 0;
 }
 
 static void armv8pmu_disable_user_access_ipi(void *unused)
@@ -1214,6 +1251,7 @@  static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
 	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
 
 	cpu_pmu->pmu.event_idx		= armv8pmu_user_event_idx;
+	cpu_pmu->sched_task		= armv8pmu_sched_task;
 
 	cpu_pmu->name			= name;
 	cpu_pmu->map_event		= map_event;