Message ID | tencent_D6474BDCDD18AA90A0C656BE704136ED2807@qq.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v1] drivers/perf: apple_m1: add known PMU events | expand |
On Tue, 18 Jun 2024 14:49:48 +0100, Yangyu Chen <cyy@cyyself.name> wrote: > > This patch adds known PMU events that can be found on /usr/share/kpep in > macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from > the script [1], which consumes the plist file from Apple. And then added > these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's > documentation [2]. > > Link: https://github.com/cyyself/m1-pmu-gen [1] > Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] This needs registration, and is thus impossible to freely visit. > Signed-off-by: Yangyu Chen <cyy@cyyself.name> What is the licence applicable to the original source file? Does it explicitly allow redistribution in any form? > --- > drivers/perf/apple_m1_cpu_pmu.c | 204 +++++++++++++++++++++----------- > 1 file changed, 132 insertions(+), 72 deletions(-) > > diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c > index f322e5ca1114..e6045314ae97 100644 > --- a/drivers/perf/apple_m1_cpu_pmu.c > +++ b/drivers/perf/apple_m1_cpu_pmu.c > @@ -47,46 +47,79 @@ > * implementations, we'll have to introduce per cpu-type tables. > */ > enum m1_pmu_events { > - M1_PMU_PERFCTR_UNKNOWN_01 = 0x01, > - M1_PMU_PERFCTR_CPU_CYCLES = 0x02, > - M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c, > - M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d, > - M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e, > - M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f, > - M1_PMU_PERFCTR_UNKNOWN_90 = 0x90, > - M1_PMU_PERFCTR_UNKNOWN_93 = 0x93, > - M1_PMU_PERFCTR_UNKNOWN_94 = 0x94, > - M1_PMU_PERFCTR_UNKNOWN_95 = 0x95, > - M1_PMU_PERFCTR_UNKNOWN_96 = 0x96, > - M1_PMU_PERFCTR_UNKNOWN_97 = 0x97, > - M1_PMU_PERFCTR_UNKNOWN_98 = 0x98, > - M1_PMU_PERFCTR_UNKNOWN_99 = 0x99, > - M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a, > - M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b, > - M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c, > - M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, > - M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf, > - M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0, > - M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1, > - M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4, > - M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5, > - M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6, > - M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8, > - M1_PMU_PERFCTR_UNKNOWN_ca = 0xca, > - M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb, > - M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, > - M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, > - M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, > - M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, > - M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, > - M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, > + M1_PMU_PERFCTR_RETIRE_UOP = 0x1, > + M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE = 0x2, > + M1_PMU_PERFCTR_L1I_TLB_FILL = 0x4, > + M1_PMU_PERFCTR_L1D_TLB_FILL = 0x5, > + M1_PMU_PERFCTR_MMU_TABLE_WALK_INSTRUCTION = 0x7, > + M1_PMU_PERFCTR_MMU_TABLE_WALK_DATA = 0x8, > + M1_PMU_PERFCTR_L2_TLB_MISS_INSTRUCTION = 0xa, > + M1_PMU_PERFCTR_L2_TLB_MISS_DATA = 0xb, > + M1_PMU_PERFCTR_MMU_VIRTUAL_MEMORY_FAULT_NONSPEC = 0xd, > + M1_PMU_PERFCTR_SCHEDULE_UOP = 0x52, > + M1_PMU_PERFCTR_INTERRUPT_PENDING = 0x6c, > + M1_PMU_PERFCTR_MAP_STALL_DISPATCH = 0x70, > + M1_PMU_PERFCTR_MAP_REWIND = 0x75, > + M1_PMU_PERFCTR_MAP_STALL = 0x76, > + M1_PMU_PERFCTR_MAP_INT_UOP = 0x7c, > + M1_PMU_PERFCTR_MAP_LDST_UOP = 0x7d, > + M1_PMU_PERFCTR_MAP_SIMD_UOP = 0x7e, > + M1_PMU_PERFCTR_FLUSH_RESTART_OTHER_NONSPEC = 0x84, > + M1_PMU_PERFCTR_INST_ALL = 0x8c, > + M1_PMU_PERFCTR_INST_BRANCH = 0x8d, > + M1_PMU_PERFCTR_INST_BRANCH_CALL = 0x8e, > + M1_PMU_PERFCTR_INST_BRANCH_RET = 0x8f, > + M1_PMU_PERFCTR_INST_BRANCH_TAKEN = 0x90, > + M1_PMU_PERFCTR_INST_BRANCH_INDIR = 0x93, > + M1_PMU_PERFCTR_INST_BRANCH_COND = 0x94, > + M1_PMU_PERFCTR_INST_INT_LD = 0x95, > + M1_PMU_PERFCTR_INST_INT_ST = 0x96, > + M1_PMU_PERFCTR_INST_INT_ALU = 0x97, > + M1_PMU_PERFCTR_INST_SIMD_LD = 0x98, > + M1_PMU_PERFCTR_INST_SIMD_ST = 0x99, > + M1_PMU_PERFCTR_INST_SIMD_ALU = 0x9a, > + M1_PMU_PERFCTR_INST_LDST = 0x9b, > + M1_PMU_PERFCTR_INST_BARRIER = 0x9c, > + M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, > + M1_PMU_PERFCTR_L1D_TLB_ACCESS = 0xa0, > + M1_PMU_PERFCTR_L1D_TLB_MISS = 0xa1, > + M1_PMU_PERFCTR_L1D_CACHE_MISS_ST = 0xa2, > + M1_PMU_PERFCTR_L1D_CACHE_MISS_LD = 0xa3, > + M1_PMU_PERFCTR_LD_UNIT_UOP = 0xa6, > + M1_PMU_PERFCTR_ST_UNIT_UOP = 0xa7, > + M1_PMU_PERFCTR_L1D_CACHE_WRITEBACK = 0xa8, > + M1_PMU_PERFCTR_LDST_X64_UOP = 0xb1, > + M1_PMU_PERFCTR_LDST_XPG_UOP = 0xb2, > + M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_SUCC = 0xb3, > + M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_FAIL = 0xb4, > + M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC = 0xbf, > + M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC = 0xc0, > + M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC = 0xc1, > + M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC = 0xc4, > + M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC = 0xc5, > + M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC = 0xc6, > + M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC = 0xc8, > + M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC = 0xca, > + M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC = 0xcb, > + M1_PMU_PERFCTR_L1I_TLB_MISS_DEMAND = 0xd4, > + M1_PMU_PERFCTR_MAP_DISPATCH_BUBBLE = 0xd6, > + M1_PMU_PERFCTR_L1I_CACHE_MISS_DEMAND = 0xdb, > + M1_PMU_PERFCTR_FETCH_RESTART = 0xde, > + M1_PMU_PERFCTR_ST_NT_UOP = 0xe5, > + M1_PMU_PERFCTR_LD_NT_UOP = 0xe6, > + M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, > + M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, > + M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, > + M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, > + M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, > + M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, > > /* > * From this point onwards, these are not actual HW events, > * but attributes that get stored in hw->config_base. > */ > - M1_PMU_CFG_COUNT_USER = BIT(8), > - M1_PMU_CFG_COUNT_KERNEL = BIT(9), > + M1_PMU_CFG_COUNT_USER = BIT(8), > + M1_PMU_CFG_COUNT_KERNEL = BIT(9), > }; > > /* > @@ -96,45 +129,48 @@ enum m1_pmu_events { > * counters had strange affinities. > */ > static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = { > - [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, > - [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7), > - [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0), > - [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1), > - [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7), > - [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7), > - [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), > - [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, > - [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, > - [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, > - [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, > - [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, > + [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, > + [M1_PMU_PERFCTR_RETIRE_UOP] = BIT(7), > + [M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE] = ANY_BUT_0_1 | BIT(0), > + [M1_PMU_PERFCTR_INST_ALL] = BIT(7) | BIT(1), > + [M1_PMU_PERFCTR_INST_BRANCH] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_BRANCH_CALL] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_BRANCH_RET] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_BRANCH_TAKEN] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_BRANCH_INDIR] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_BRANCH_COND] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_INT_LD] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_INT_ST] = BIT(7), > + [M1_PMU_PERFCTR_INST_INT_ALU] = BIT(7), > + [M1_PMU_PERFCTR_INST_SIMD_LD] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_SIMD_ST] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_SIMD_ALU] = BIT(7), > + [M1_PMU_PERFCTR_INST_LDST] = BIT(7), > + [M1_PMU_PERFCTR_INST_BARRIER] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), > + [M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, > + [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, > + [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, > + [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, > + [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, > }; > > static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = { > PERF_MAP_ALL_UNSUPPORTED, > - [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES, > - [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS, > + [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE, > + [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_ALL, > + [PERF_COUNT_HW_CACHE_MISSES] = M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC, > + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_BRANCH, > + [PERF_COUNT_HW_BRANCH_MISSES] = M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC, > /* No idea about the rest yet */ > }; > > @@ -154,8 +190,32 @@ static ssize_t m1_pmu_events_sysfs_show(struct device *dev, > PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config) > > static struct attribute *m1_pmu_event_attrs[] = { > - M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES), > - M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS), > + M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE), > + M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INST_ALL), > + M1_PMU_EVENT_ATTR(retire-uops, M1_PMU_PERFCTR_RETIRE_UOP), > + M1_PMU_EVENT_ATTR(inst-branch, M1_PMU_PERFCTR_INST_BRANCH), > + M1_PMU_EVENT_ATTR(inst-branch-call, M1_PMU_PERFCTR_INST_BRANCH_CALL), > + M1_PMU_EVENT_ATTR(inst-branch-ret, M1_PMU_PERFCTR_INST_BRANCH_RET), > + M1_PMU_EVENT_ATTR(inst-branch-taken, M1_PMU_PERFCTR_INST_BRANCH_TAKEN), > + M1_PMU_EVENT_ATTR(inst-branch-indir, M1_PMU_PERFCTR_INST_BRANCH_INDIR), > + M1_PMU_EVENT_ATTR(inst-branch-cond, M1_PMU_PERFCTR_INST_BRANCH_COND), > + M1_PMU_EVENT_ATTR(inst-int-ld, M1_PMU_PERFCTR_INST_INT_LD), > + M1_PMU_EVENT_ATTR(inst-int-st, M1_PMU_PERFCTR_INST_INT_ST), > + M1_PMU_EVENT_ATTR(inst-int-alu, M1_PMU_PERFCTR_INST_INT_ALU), > + M1_PMU_EVENT_ATTR(inst-simd-ld, M1_PMU_PERFCTR_INST_SIMD_LD), > + M1_PMU_EVENT_ATTR(inst-simd-st, M1_PMU_PERFCTR_INST_SIMD_ST), > + M1_PMU_EVENT_ATTR(inst-simd-alu, M1_PMU_PERFCTR_INST_SIMD_ALU), > + M1_PMU_EVENT_ATTR(inst-ldst, M1_PMU_PERFCTR_INST_LDST), > + M1_PMU_EVENT_ATTR(inst-barrier, M1_PMU_PERFCTR_INST_BARRIER), > + M1_PMU_EVENT_ATTR(l1d-miss-ld, M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC), > + M1_PMU_EVENT_ATTR(l1d-miss-st, M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC), > + M1_PMU_EVENT_ATTR(l1d-tlb-miss, M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC), > + M1_PMU_EVENT_ATTR(st-mem-order-violation, M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC), > + M1_PMU_EVENT_ATTR(branch-cond-mispred, M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC), > + M1_PMU_EVENT_ATTR(branch-indir-mispred, M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC), > + M1_PMU_EVENT_ATTR(branch-ret-indir-mispred, M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC), > + M1_PMU_EVENT_ATTR(branch-call-indir-mispred, M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC), > + M1_PMU_EVENT_ATTR(branch-mispred, M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC), > NULL, > }; > Other than the licensing concern, why should we bloat the kernel with more of this stuff when everything is moving towards a bunch of JSON files (tools/perf/pmu-events/arch/arm64). M.
> On Jun 18, 2024, at 22:03, Marc Zyngier <maz@kernel.org> wrote: > > On Tue, 18 Jun 2024 14:49:48 +0100, > Yangyu Chen <cyy@cyyself.name> wrote: >> >> This patch adds known PMU events that can be found on /usr/share/kpep in >> macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from >> the script [1], which consumes the plist file from Apple. And then added >> these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's >> documentation [2]. >> >> Link: https://github.com/cyyself/m1-pmu-gen [1] >> Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] > > This needs registration, and is thus impossible to freely visit. > >> Signed-off-by: Yangyu Chen <cyy@cyyself.name> > > What is the licence applicable to the original source file? Does it > explicitly allow redistribution in any form? > Oh. It's my fault. Sorry for the trouble caused. > > Other than the licensing concern, why should we bloat the kernel with > more of this stuff when everything is moving towards a bunch of JSON > files (tools/perf/pmu-events/arch/arm64). > Thanks for this hint. So, the thing to do might be to provide a generator that consumes Apple files and then generates a kernel patch for Linux perf tools to use rather than provide such details directly in the source code as you said from [1]. Link: https://lore.kernel.org/lkml/87czn18zev.wl-maz@kernel.org/ [1] > M. > > -- > Without deviation from the norm, progress is not possible.
On Tue, 18 Jun 2024 16:56:48 +0100, Yangyu Chen <cyy@cyyself.name> wrote: > > > > > On Jun 18, 2024, at 22:03, Marc Zyngier <maz@kernel.org> wrote: > > > > On Tue, 18 Jun 2024 14:49:48 +0100, > > Yangyu Chen <cyy@cyyself.name> wrote: > >> > >> This patch adds known PMU events that can be found on /usr/share/kpep in > >> macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from > >> the script [1], which consumes the plist file from Apple. And then added > >> these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's > >> documentation [2]. > >> > >> Link: https://github.com/cyyself/m1-pmu-gen [1] > >> Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] > > > > This needs registration, and is thus impossible to freely visit. > > > >> Signed-off-by: Yangyu Chen <cyy@cyyself.name> > > > > What is the licence applicable to the original source file? Does it > > explicitly allow redistribution in any form? > > > > Oh. It's my fault. Sorry for the trouble caused. No trouble on my side. I'm just painfully aware that this is a legal landmine, and that what is perfectly allowed in one country may be a punishable offence in another. And since I'm not a lawyer, I want to see crystal clear things in writing. > > > > > Other than the licensing concern, why should we bloat the kernel with > > more of this stuff when everything is moving towards a bunch of JSON > > files (tools/perf/pmu-events/arch/arm64). > > > > Thanks for this hint. So, the thing to do might be to provide a > generator that consumes Apple files and then generates a kernel > patch for Linux perf tools to use rather than provide such details > directly in the source code as you said from [1]. > > Link: https://lore.kernel.org/lkml/87czn18zev.wl-maz@kernel.org/ [1] Even better: teach the perf tool to directly consume the plist file, but don't distribute the file or its content. People owning such a machine can fish the file from the machine itself (or the installer can extract it from the OS image as if it was firmware data). Thanks, M.
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index f322e5ca1114..e6045314ae97 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -47,46 +47,79 @@ * implementations, we'll have to introduce per cpu-type tables. */ enum m1_pmu_events { - M1_PMU_PERFCTR_UNKNOWN_01 = 0x01, - M1_PMU_PERFCTR_CPU_CYCLES = 0x02, - M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c, - M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d, - M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e, - M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f, - M1_PMU_PERFCTR_UNKNOWN_90 = 0x90, - M1_PMU_PERFCTR_UNKNOWN_93 = 0x93, - M1_PMU_PERFCTR_UNKNOWN_94 = 0x94, - M1_PMU_PERFCTR_UNKNOWN_95 = 0x95, - M1_PMU_PERFCTR_UNKNOWN_96 = 0x96, - M1_PMU_PERFCTR_UNKNOWN_97 = 0x97, - M1_PMU_PERFCTR_UNKNOWN_98 = 0x98, - M1_PMU_PERFCTR_UNKNOWN_99 = 0x99, - M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a, - M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b, - M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c, - M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, - M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf, - M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0, - M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1, - M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4, - M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5, - M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6, - M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8, - M1_PMU_PERFCTR_UNKNOWN_ca = 0xca, - M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb, - M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, - M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, - M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, - M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, - M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, - M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, + M1_PMU_PERFCTR_RETIRE_UOP = 0x1, + M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE = 0x2, + M1_PMU_PERFCTR_L1I_TLB_FILL = 0x4, + M1_PMU_PERFCTR_L1D_TLB_FILL = 0x5, + M1_PMU_PERFCTR_MMU_TABLE_WALK_INSTRUCTION = 0x7, + M1_PMU_PERFCTR_MMU_TABLE_WALK_DATA = 0x8, + M1_PMU_PERFCTR_L2_TLB_MISS_INSTRUCTION = 0xa, + M1_PMU_PERFCTR_L2_TLB_MISS_DATA = 0xb, + M1_PMU_PERFCTR_MMU_VIRTUAL_MEMORY_FAULT_NONSPEC = 0xd, + M1_PMU_PERFCTR_SCHEDULE_UOP = 0x52, + M1_PMU_PERFCTR_INTERRUPT_PENDING = 0x6c, + M1_PMU_PERFCTR_MAP_STALL_DISPATCH = 0x70, + M1_PMU_PERFCTR_MAP_REWIND = 0x75, + M1_PMU_PERFCTR_MAP_STALL = 0x76, + M1_PMU_PERFCTR_MAP_INT_UOP = 0x7c, + M1_PMU_PERFCTR_MAP_LDST_UOP = 0x7d, + M1_PMU_PERFCTR_MAP_SIMD_UOP = 0x7e, + M1_PMU_PERFCTR_FLUSH_RESTART_OTHER_NONSPEC = 0x84, + M1_PMU_PERFCTR_INST_ALL = 0x8c, + M1_PMU_PERFCTR_INST_BRANCH = 0x8d, + M1_PMU_PERFCTR_INST_BRANCH_CALL = 0x8e, + M1_PMU_PERFCTR_INST_BRANCH_RET = 0x8f, + M1_PMU_PERFCTR_INST_BRANCH_TAKEN = 0x90, + M1_PMU_PERFCTR_INST_BRANCH_INDIR = 0x93, + M1_PMU_PERFCTR_INST_BRANCH_COND = 0x94, + M1_PMU_PERFCTR_INST_INT_LD = 0x95, + M1_PMU_PERFCTR_INST_INT_ST = 0x96, + M1_PMU_PERFCTR_INST_INT_ALU = 0x97, + M1_PMU_PERFCTR_INST_SIMD_LD = 0x98, + M1_PMU_PERFCTR_INST_SIMD_ST = 0x99, + M1_PMU_PERFCTR_INST_SIMD_ALU = 0x9a, + M1_PMU_PERFCTR_INST_LDST = 0x9b, + M1_PMU_PERFCTR_INST_BARRIER = 0x9c, + M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, + M1_PMU_PERFCTR_L1D_TLB_ACCESS = 0xa0, + M1_PMU_PERFCTR_L1D_TLB_MISS = 0xa1, + M1_PMU_PERFCTR_L1D_CACHE_MISS_ST = 0xa2, + M1_PMU_PERFCTR_L1D_CACHE_MISS_LD = 0xa3, + M1_PMU_PERFCTR_LD_UNIT_UOP = 0xa6, + M1_PMU_PERFCTR_ST_UNIT_UOP = 0xa7, + M1_PMU_PERFCTR_L1D_CACHE_WRITEBACK = 0xa8, + M1_PMU_PERFCTR_LDST_X64_UOP = 0xb1, + M1_PMU_PERFCTR_LDST_XPG_UOP = 0xb2, + M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_SUCC = 0xb3, + M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_FAIL = 0xb4, + M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC = 0xbf, + M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC = 0xc0, + M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC = 0xc1, + M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC = 0xc4, + M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC = 0xc5, + M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC = 0xc6, + M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC = 0xc8, + M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC = 0xca, + M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC = 0xcb, + M1_PMU_PERFCTR_L1I_TLB_MISS_DEMAND = 0xd4, + M1_PMU_PERFCTR_MAP_DISPATCH_BUBBLE = 0xd6, + M1_PMU_PERFCTR_L1I_CACHE_MISS_DEMAND = 0xdb, + M1_PMU_PERFCTR_FETCH_RESTART = 0xde, + M1_PMU_PERFCTR_ST_NT_UOP = 0xe5, + M1_PMU_PERFCTR_LD_NT_UOP = 0xe6, + M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, + M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, + M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, + M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, + M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, + M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, /* * From this point onwards, these are not actual HW events, * but attributes that get stored in hw->config_base. */ - M1_PMU_CFG_COUNT_USER = BIT(8), - M1_PMU_CFG_COUNT_KERNEL = BIT(9), + M1_PMU_CFG_COUNT_USER = BIT(8), + M1_PMU_CFG_COUNT_KERNEL = BIT(9), }; /* @@ -96,45 +129,48 @@ enum m1_pmu_events { * counters had strange affinities. */ static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = { - [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, - [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7), - [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0), - [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1), - [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7), - [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7), - [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), - [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, - [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, - [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, - [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, - [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, + [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, + [M1_PMU_PERFCTR_RETIRE_UOP] = BIT(7), + [M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE] = ANY_BUT_0_1 | BIT(0), + [M1_PMU_PERFCTR_INST_ALL] = BIT(7) | BIT(1), + [M1_PMU_PERFCTR_INST_BRANCH] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_CALL] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_RET] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_TAKEN] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_INDIR] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_COND] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_INT_LD] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_INT_ST] = BIT(7), + [M1_PMU_PERFCTR_INST_INT_ALU] = BIT(7), + [M1_PMU_PERFCTR_INST_SIMD_LD] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_SIMD_ST] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_SIMD_ALU] = BIT(7), + [M1_PMU_PERFCTR_INST_LDST] = BIT(7), + [M1_PMU_PERFCTR_INST_BARRIER] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), + [M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, + [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, }; static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS, + [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE, + [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_ALL, + [PERF_COUNT_HW_CACHE_MISSES] = M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC, /* No idea about the rest yet */ }; @@ -154,8 +190,32 @@ static ssize_t m1_pmu_events_sysfs_show(struct device *dev, PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config) static struct attribute *m1_pmu_event_attrs[] = { - M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES), - M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS), + M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE), + M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INST_ALL), + M1_PMU_EVENT_ATTR(retire-uops, M1_PMU_PERFCTR_RETIRE_UOP), + M1_PMU_EVENT_ATTR(inst-branch, M1_PMU_PERFCTR_INST_BRANCH), + M1_PMU_EVENT_ATTR(inst-branch-call, M1_PMU_PERFCTR_INST_BRANCH_CALL), + M1_PMU_EVENT_ATTR(inst-branch-ret, M1_PMU_PERFCTR_INST_BRANCH_RET), + M1_PMU_EVENT_ATTR(inst-branch-taken, M1_PMU_PERFCTR_INST_BRANCH_TAKEN), + M1_PMU_EVENT_ATTR(inst-branch-indir, M1_PMU_PERFCTR_INST_BRANCH_INDIR), + M1_PMU_EVENT_ATTR(inst-branch-cond, M1_PMU_PERFCTR_INST_BRANCH_COND), + M1_PMU_EVENT_ATTR(inst-int-ld, M1_PMU_PERFCTR_INST_INT_LD), + M1_PMU_EVENT_ATTR(inst-int-st, M1_PMU_PERFCTR_INST_INT_ST), + M1_PMU_EVENT_ATTR(inst-int-alu, M1_PMU_PERFCTR_INST_INT_ALU), + M1_PMU_EVENT_ATTR(inst-simd-ld, M1_PMU_PERFCTR_INST_SIMD_LD), + M1_PMU_EVENT_ATTR(inst-simd-st, M1_PMU_PERFCTR_INST_SIMD_ST), + M1_PMU_EVENT_ATTR(inst-simd-alu, M1_PMU_PERFCTR_INST_SIMD_ALU), + M1_PMU_EVENT_ATTR(inst-ldst, M1_PMU_PERFCTR_INST_LDST), + M1_PMU_EVENT_ATTR(inst-barrier, M1_PMU_PERFCTR_INST_BARRIER), + M1_PMU_EVENT_ATTR(l1d-miss-ld, M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC), + M1_PMU_EVENT_ATTR(l1d-miss-st, M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC), + M1_PMU_EVENT_ATTR(l1d-tlb-miss, M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC), + M1_PMU_EVENT_ATTR(st-mem-order-violation, M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC), + M1_PMU_EVENT_ATTR(branch-cond-mispred, M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC), + M1_PMU_EVENT_ATTR(branch-indir-mispred, M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC), + M1_PMU_EVENT_ATTR(branch-ret-indir-mispred, M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC), + M1_PMU_EVENT_ATTR(branch-call-indir-mispred, M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC), + M1_PMU_EVENT_ATTR(branch-mispred, M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC), NULL, };
This patch adds known PMU events that can be found on /usr/share/kpep in macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from the script [1], which consumes the plist file from Apple. And then added these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's documentation [2]. Link: https://github.com/cyyself/m1-pmu-gen [1] Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] Signed-off-by: Yangyu Chen <cyy@cyyself.name> --- drivers/perf/apple_m1_cpu_pmu.c | 204 +++++++++++++++++++++----------- 1 file changed, 132 insertions(+), 72 deletions(-)