Message ID | tencent_D6474BDCDD18AA90A0C656BE704136ED2807@qq.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v1] drivers/perf: apple_m1: add known PMU events | expand |
On Tue, 18 Jun 2024 14:49:48 +0100, Yangyu Chen <cyy@cyyself.name> wrote: > > This patch adds known PMU events that can be found on /usr/share/kpep in > macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from > the script [1], which consumes the plist file from Apple. And then added > these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's > documentation [2]. > > Link: https://github.com/cyyself/m1-pmu-gen [1] > Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] This needs registration, and is thus impossible to freely visit. > Signed-off-by: Yangyu Chen <cyy@cyyself.name> What is the licence applicable to the original source file? Does it explicitly allow redistribution in any form? > --- > drivers/perf/apple_m1_cpu_pmu.c | 204 +++++++++++++++++++++----------- > 1 file changed, 132 insertions(+), 72 deletions(-) > > diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c > index f322e5ca1114..e6045314ae97 100644 > --- a/drivers/perf/apple_m1_cpu_pmu.c > +++ b/drivers/perf/apple_m1_cpu_pmu.c > @@ -47,46 +47,79 @@ > * implementations, we'll have to introduce per cpu-type tables. > */ > enum m1_pmu_events { > - M1_PMU_PERFCTR_UNKNOWN_01 = 0x01, > - M1_PMU_PERFCTR_CPU_CYCLES = 0x02, > - M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c, > - M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d, > - M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e, > - M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f, > - M1_PMU_PERFCTR_UNKNOWN_90 = 0x90, > - M1_PMU_PERFCTR_UNKNOWN_93 = 0x93, > - M1_PMU_PERFCTR_UNKNOWN_94 = 0x94, > - M1_PMU_PERFCTR_UNKNOWN_95 = 0x95, > - M1_PMU_PERFCTR_UNKNOWN_96 = 0x96, > - M1_PMU_PERFCTR_UNKNOWN_97 = 0x97, > - M1_PMU_PERFCTR_UNKNOWN_98 = 0x98, > - M1_PMU_PERFCTR_UNKNOWN_99 = 0x99, > - M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a, > - M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b, > - M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c, > - M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, > - M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf, > - M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0, > - M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1, > - M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4, > - M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5, > - M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6, > - M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8, > - M1_PMU_PERFCTR_UNKNOWN_ca = 0xca, > - M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb, > - M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, > - M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, > - M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, > - M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, > - M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, > - M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, > + M1_PMU_PERFCTR_RETIRE_UOP = 0x1, > + M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE = 0x2, > + M1_PMU_PERFCTR_L1I_TLB_FILL = 0x4, > + M1_PMU_PERFCTR_L1D_TLB_FILL = 0x5, > + M1_PMU_PERFCTR_MMU_TABLE_WALK_INSTRUCTION = 0x7, > + M1_PMU_PERFCTR_MMU_TABLE_WALK_DATA = 0x8, > + M1_PMU_PERFCTR_L2_TLB_MISS_INSTRUCTION = 0xa, > + M1_PMU_PERFCTR_L2_TLB_MISS_DATA = 0xb, > + M1_PMU_PERFCTR_MMU_VIRTUAL_MEMORY_FAULT_NONSPEC = 0xd, > + M1_PMU_PERFCTR_SCHEDULE_UOP = 0x52, > + M1_PMU_PERFCTR_INTERRUPT_PENDING = 0x6c, > + M1_PMU_PERFCTR_MAP_STALL_DISPATCH = 0x70, > + M1_PMU_PERFCTR_MAP_REWIND = 0x75, > + M1_PMU_PERFCTR_MAP_STALL = 0x76, > + M1_PMU_PERFCTR_MAP_INT_UOP = 0x7c, > + M1_PMU_PERFCTR_MAP_LDST_UOP = 0x7d, > + M1_PMU_PERFCTR_MAP_SIMD_UOP = 0x7e, > + M1_PMU_PERFCTR_FLUSH_RESTART_OTHER_NONSPEC = 0x84, > + M1_PMU_PERFCTR_INST_ALL = 0x8c, > + M1_PMU_PERFCTR_INST_BRANCH = 0x8d, > + M1_PMU_PERFCTR_INST_BRANCH_CALL = 0x8e, > + M1_PMU_PERFCTR_INST_BRANCH_RET = 0x8f, > + M1_PMU_PERFCTR_INST_BRANCH_TAKEN = 0x90, > + M1_PMU_PERFCTR_INST_BRANCH_INDIR = 0x93, > + M1_PMU_PERFCTR_INST_BRANCH_COND = 0x94, > + M1_PMU_PERFCTR_INST_INT_LD = 0x95, > + M1_PMU_PERFCTR_INST_INT_ST = 0x96, > + M1_PMU_PERFCTR_INST_INT_ALU = 0x97, > + M1_PMU_PERFCTR_INST_SIMD_LD = 0x98, > + M1_PMU_PERFCTR_INST_SIMD_ST = 0x99, > + M1_PMU_PERFCTR_INST_SIMD_ALU = 0x9a, > + M1_PMU_PERFCTR_INST_LDST = 0x9b, > + M1_PMU_PERFCTR_INST_BARRIER = 0x9c, > + M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, > + M1_PMU_PERFCTR_L1D_TLB_ACCESS = 0xa0, > + M1_PMU_PERFCTR_L1D_TLB_MISS = 0xa1, > + M1_PMU_PERFCTR_L1D_CACHE_MISS_ST = 0xa2, > + M1_PMU_PERFCTR_L1D_CACHE_MISS_LD = 0xa3, > + M1_PMU_PERFCTR_LD_UNIT_UOP = 0xa6, > + M1_PMU_PERFCTR_ST_UNIT_UOP = 0xa7, > + M1_PMU_PERFCTR_L1D_CACHE_WRITEBACK = 0xa8, > + M1_PMU_PERFCTR_LDST_X64_UOP = 0xb1, > + M1_PMU_PERFCTR_LDST_XPG_UOP = 0xb2, > + M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_SUCC = 0xb3, > + M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_FAIL = 0xb4, > + M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC = 0xbf, > + M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC = 0xc0, > + M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC = 0xc1, > + M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC = 0xc4, > + M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC = 0xc5, > + M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC = 0xc6, > + M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC = 0xc8, > + M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC = 0xca, > + M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC = 0xcb, > + M1_PMU_PERFCTR_L1I_TLB_MISS_DEMAND = 0xd4, > + M1_PMU_PERFCTR_MAP_DISPATCH_BUBBLE = 0xd6, > + M1_PMU_PERFCTR_L1I_CACHE_MISS_DEMAND = 0xdb, > + M1_PMU_PERFCTR_FETCH_RESTART = 0xde, > + M1_PMU_PERFCTR_ST_NT_UOP = 0xe5, > + M1_PMU_PERFCTR_LD_NT_UOP = 0xe6, > + M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, > + M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, > + M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, > + M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, > + M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, > + M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, > > /* > * From this point onwards, these are not actual HW events, > * but attributes that get stored in hw->config_base. > */ > - M1_PMU_CFG_COUNT_USER = BIT(8), > - M1_PMU_CFG_COUNT_KERNEL = BIT(9), > + M1_PMU_CFG_COUNT_USER = BIT(8), > + M1_PMU_CFG_COUNT_KERNEL = BIT(9), > }; > > /* > @@ -96,45 +129,48 @@ enum m1_pmu_events { > * counters had strange affinities. > */ > static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = { > - [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, > - [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7), > - [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0), > - [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1), > - [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7), > - [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7), > - [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), > - [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7, > - [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, > - [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, > - [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, > - [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, > - [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, > + [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, > + [M1_PMU_PERFCTR_RETIRE_UOP] = BIT(7), > + [M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE] = ANY_BUT_0_1 | BIT(0), > + [M1_PMU_PERFCTR_INST_ALL] = BIT(7) | BIT(1), > + [M1_PMU_PERFCTR_INST_BRANCH] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_BRANCH_CALL] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_BRANCH_RET] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_BRANCH_TAKEN] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_BRANCH_INDIR] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_BRANCH_COND] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_INT_LD] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_INT_ST] = BIT(7), > + [M1_PMU_PERFCTR_INST_INT_ALU] = BIT(7), > + [M1_PMU_PERFCTR_INST_SIMD_LD] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_SIMD_ST] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_INST_SIMD_ALU] = BIT(7), > + [M1_PMU_PERFCTR_INST_LDST] = BIT(7), > + [M1_PMU_PERFCTR_INST_BARRIER] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), > + [M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC] = ONLY_5_6_7, > + [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, > + [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, > + [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, > + [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, > + [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, > }; > > static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = { > PERF_MAP_ALL_UNSUPPORTED, > - [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES, > - [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS, > + [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE, > + [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_ALL, > + [PERF_COUNT_HW_CACHE_MISSES] = M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC, > + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_BRANCH, > + [PERF_COUNT_HW_BRANCH_MISSES] = M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC, > /* No idea about the rest yet */ > }; > > @@ -154,8 +190,32 @@ static ssize_t m1_pmu_events_sysfs_show(struct device *dev, > PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config) > > static struct attribute *m1_pmu_event_attrs[] = { > - M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES), > - M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS), > + M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE), > + M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INST_ALL), > + M1_PMU_EVENT_ATTR(retire-uops, M1_PMU_PERFCTR_RETIRE_UOP), > + M1_PMU_EVENT_ATTR(inst-branch, M1_PMU_PERFCTR_INST_BRANCH), > + M1_PMU_EVENT_ATTR(inst-branch-call, M1_PMU_PERFCTR_INST_BRANCH_CALL), > + M1_PMU_EVENT_ATTR(inst-branch-ret, M1_PMU_PERFCTR_INST_BRANCH_RET), > + M1_PMU_EVENT_ATTR(inst-branch-taken, M1_PMU_PERFCTR_INST_BRANCH_TAKEN), > + M1_PMU_EVENT_ATTR(inst-branch-indir, M1_PMU_PERFCTR_INST_BRANCH_INDIR), > + M1_PMU_EVENT_ATTR(inst-branch-cond, M1_PMU_PERFCTR_INST_BRANCH_COND), > + M1_PMU_EVENT_ATTR(inst-int-ld, M1_PMU_PERFCTR_INST_INT_LD), > + M1_PMU_EVENT_ATTR(inst-int-st, M1_PMU_PERFCTR_INST_INT_ST), > + M1_PMU_EVENT_ATTR(inst-int-alu, M1_PMU_PERFCTR_INST_INT_ALU), > + M1_PMU_EVENT_ATTR(inst-simd-ld, M1_PMU_PERFCTR_INST_SIMD_LD), > + M1_PMU_EVENT_ATTR(inst-simd-st, M1_PMU_PERFCTR_INST_SIMD_ST), > + M1_PMU_EVENT_ATTR(inst-simd-alu, M1_PMU_PERFCTR_INST_SIMD_ALU), > + M1_PMU_EVENT_ATTR(inst-ldst, M1_PMU_PERFCTR_INST_LDST), > + M1_PMU_EVENT_ATTR(inst-barrier, M1_PMU_PERFCTR_INST_BARRIER), > + M1_PMU_EVENT_ATTR(l1d-miss-ld, M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC), > + M1_PMU_EVENT_ATTR(l1d-miss-st, M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC), > + M1_PMU_EVENT_ATTR(l1d-tlb-miss, M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC), > + M1_PMU_EVENT_ATTR(st-mem-order-violation, M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC), > + M1_PMU_EVENT_ATTR(branch-cond-mispred, M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC), > + M1_PMU_EVENT_ATTR(branch-indir-mispred, M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC), > + M1_PMU_EVENT_ATTR(branch-ret-indir-mispred, M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC), > + M1_PMU_EVENT_ATTR(branch-call-indir-mispred, M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC), > + M1_PMU_EVENT_ATTR(branch-mispred, M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC), > NULL, > }; > Other than the licensing concern, why should we bloat the kernel with more of this stuff when everything is moving towards a bunch of JSON files (tools/perf/pmu-events/arch/arm64). M.
> On Jun 18, 2024, at 22:03, Marc Zyngier <maz@kernel.org> wrote: > > On Tue, 18 Jun 2024 14:49:48 +0100, > Yangyu Chen <cyy@cyyself.name> wrote: >> >> This patch adds known PMU events that can be found on /usr/share/kpep in >> macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from >> the script [1], which consumes the plist file from Apple. And then added >> these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's >> documentation [2]. >> >> Link: https://github.com/cyyself/m1-pmu-gen [1] >> Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] > > This needs registration, and is thus impossible to freely visit. > >> Signed-off-by: Yangyu Chen <cyy@cyyself.name> > > What is the licence applicable to the original source file? Does it > explicitly allow redistribution in any form? > Oh. It's my fault. Sorry for the trouble caused. > > Other than the licensing concern, why should we bloat the kernel with > more of this stuff when everything is moving towards a bunch of JSON > files (tools/perf/pmu-events/arch/arm64). > Thanks for this hint. So, the thing to do might be to provide a generator that consumes Apple files and then generates a kernel patch for Linux perf tools to use rather than provide such details directly in the source code as you said from [1]. Link: https://lore.kernel.org/lkml/87czn18zev.wl-maz@kernel.org/ [1] > M. > > -- > Without deviation from the norm, progress is not possible.
On Tue, 18 Jun 2024 16:56:48 +0100, Yangyu Chen <cyy@cyyself.name> wrote: > > > > > On Jun 18, 2024, at 22:03, Marc Zyngier <maz@kernel.org> wrote: > > > > On Tue, 18 Jun 2024 14:49:48 +0100, > > Yangyu Chen <cyy@cyyself.name> wrote: > >> > >> This patch adds known PMU events that can be found on /usr/share/kpep in > >> macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from > >> the script [1], which consumes the plist file from Apple. And then added > >> these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's > >> documentation [2]. > >> > >> Link: https://github.com/cyyself/m1-pmu-gen [1] > >> Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] > > > > This needs registration, and is thus impossible to freely visit. > > > >> Signed-off-by: Yangyu Chen <cyy@cyyself.name> > > > > What is the licence applicable to the original source file? Does it > > explicitly allow redistribution in any form? > > > > Oh. It's my fault. Sorry for the trouble caused. No trouble on my side. I'm just painfully aware that this is a legal landmine, and that what is perfectly allowed in one country may be a punishable offence in another. And since I'm not a lawyer, I want to see crystal clear things in writing. > > > > > Other than the licensing concern, why should we bloat the kernel with > > more of this stuff when everything is moving towards a bunch of JSON > > files (tools/perf/pmu-events/arch/arm64). > > > > Thanks for this hint. So, the thing to do might be to provide a > generator that consumes Apple files and then generates a kernel > patch for Linux perf tools to use rather than provide such details > directly in the source code as you said from [1]. > > Link: https://lore.kernel.org/lkml/87czn18zev.wl-maz@kernel.org/ [1] Even better: teach the perf tool to directly consume the plist file, but don't distribute the file or its content. People owning such a machine can fish the file from the machine itself (or the installer can extract it from the OS image as if it was firmware data). Thanks, M.
On 2024/06/19 1:58, Marc Zyngier wrote: > On Tue, 18 Jun 2024 16:56:48 +0100, > Yangyu Chen <cyy@cyyself.name> wrote: >> >> >> >>> On Jun 18, 2024, at 22:03, Marc Zyngier <maz@kernel.org> wrote: >>> >>> On Tue, 18 Jun 2024 14:49:48 +0100, >>> Yangyu Chen <cyy@cyyself.name> wrote: >>>> >>>> This patch adds known PMU events that can be found on /usr/share/kpep in >>>> macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from >>>> the script [1], which consumes the plist file from Apple. And then added >>>> these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's >>>> documentation [2]. >>>> >>>> Link: https://github.com/cyyself/m1-pmu-gen [1] >>>> Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] >>> >>> This needs registration, and is thus impossible to freely visit. >>> >>>> Signed-off-by: Yangyu Chen <cyy@cyyself.name> >>> >>> What is the licence applicable to the original source file? Does it >>> explicitly allow redistribution in any form? >>> >> >> Oh. It's my fault. Sorry for the trouble caused. > > No trouble on my side. I'm just painfully aware that this is a legal > landmine, and that what is perfectly allowed in one country may be a > punishable offence in another. And since I'm not a lawyer, I want to > see crystal clear things in writing. > >> >>> >>> Other than the licensing concern, why should we bloat the kernel with >>> more of this stuff when everything is moving towards a bunch of JSON >>> files (tools/perf/pmu-events/arch/arm64). >>> >> >> Thanks for this hint. So, the thing to do might be to provide a >> generator that consumes Apple files and then generates a kernel >> patch for Linux perf tools to use rather than provide such details >> directly in the source code as you said from [1]. >> >> Link: https://lore.kernel.org/lkml/87czn18zev.wl-maz@kernel.org/ [1] > > Even better: teach the perf tool to directly consume the plist file, > but don't distribute the file or its content. People owning such a > machine can fish the file from the machine itself (or the installer > can extract it from the OS image as if it was firmware data). Maz, That would be a waste of time. Facts about hardware are not copyrightable. I see absolutely nothing objectionable in this patch. It doesn't matter where the information was sourced as long as it was legitimately available to the person (which it was, as long as they were running macOS on one of these machines). Let's look at the license for the ARMv8-A ARM: > Proprietary Notice > This document is protected by copyright and other related rights and the practice or implementation of the information contained > in this document may be protected by one or more patents or pending patent applications. No part of this document may be > reproduced in any form by any means without the express prior written permission of Arm. No license, express or implied, by > estoppel or otherwise to any intellectual property rights is granted by this document unless specifically stated. There is absolutely nothing in there granting a license to use the information in the document and things like register names in Linux or any other OS. And yet we can do that, because those things aren't copyrightable. It would defeat the entire point of the documentation if you could not use it, even though there is in fact no explicit copyright grant to allow you to use it. It is not needed. The same exact logic applies here. The macOS license does not grant us the right to reproduce portions of macOS, but that is completely irrelevant because the portion "reproduced" in the form of this patch is not, at all, copyrightable. If it were we would have much bigger issues and all kinds of code in Linux would be a copyvio. The fact that there was some automation involved in generating the patch contents is entirely irrelevant, as long as the output does not keep a copyright interest from the author of the input. I also have an actual lawyer's opinion that register names are not copyrightable, which further corroborates this interpretation. As far as I'm concerned this can be merged as is. Acked-by: Hector Martin <marcan@marcan.st> - Hector
> On Jul 28, 2024, at 19:00, Hector Martin <marcan@marcan.st> wrote: > > > On 2024/06/19 1:58, Marc Zyngier wrote: >> On Tue, 18 Jun 2024 16:56:48 +0100, >> Yangyu Chen <cyy@cyyself.name> wrote: >>> >>> >>> >>>> On Jun 18, 2024, at 22:03, Marc Zyngier <maz@kernel.org> wrote: >>>> >>>> On Tue, 18 Jun 2024 14:49:48 +0100, >>>> Yangyu Chen <cyy@cyyself.name> wrote: >>>>> >>>>> This patch adds known PMU events that can be found on /usr/share/kpep in >>>>> macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from >>>>> the script [1], which consumes the plist file from Apple. And then added >>>>> these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's >>>>> documentation [2]. >>>>> >>>>> Link: https://github.com/cyyself/m1-pmu-gen [1] >>>>> Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] >>>> >>>> This needs registration, and is thus impossible to freely visit. >>>> >>>>> Signed-off-by: Yangyu Chen <cyy@cyyself.name> >>>> >>>> What is the licence applicable to the original source file? Does it >>>> explicitly allow redistribution in any form? >>>> >>> >>> Oh. It's my fault. Sorry for the trouble caused. >> >> No trouble on my side. I'm just painfully aware that this is a legal >> landmine, and that what is perfectly allowed in one country may be a >> punishable offence in another. And since I'm not a lawyer, I want to >> see crystal clear things in writing. >> >>> >>>> >>>> Other than the licensing concern, why should we bloat the kernel with >>>> more of this stuff when everything is moving towards a bunch of JSON >>>> files (tools/perf/pmu-events/arch/arm64). >>>> >>> >>> Thanks for this hint. So, the thing to do might be to provide a >>> generator that consumes Apple files and then generates a kernel >>> patch for Linux perf tools to use rather than provide such details >>> directly in the source code as you said from [1]. >>> >>> Link: https://lore.kernel.org/lkml/87czn18zev.wl-maz@kernel.org/ [1] >> >> Even better: teach the perf tool to directly consume the plist file, >> but don't distribute the file or its content. People owning such a >> machine can fish the file from the machine itself (or the installer >> can extract it from the OS image as if it was firmware data). > > Maz, > > That would be a waste of time. Facts about hardware are not > copyrightable. I see absolutely nothing objectionable in this patch. It > doesn't matter where the information was sourced as long as it was > legitimately available to the person (which it was, as long as they were > running macOS on one of these machines). > > Let's look at the license for the ARMv8-A ARM: > >> Proprietary Notice >> This document is protected by copyright and other related rights and the practice or implementation of the information contained >> in this document may be protected by one or more patents or pending patent applications. No part of this document may be >> reproduced in any form by any means without the express prior written permission of Arm. No license, express or implied, by >> estoppel or otherwise to any intellectual property rights is granted by this document unless specifically stated. > > There is absolutely nothing in there granting a license to use the > information in the document and things like register names in Linux or > any other OS. And yet we can do that, because those things aren't > copyrightable. It would defeat the entire point of the documentation if > you could not use it, even though there is in fact no explicit copyright > grant to allow you to use it. It is not needed. > > The same exact logic applies here. The macOS license does not grant us > the right to reproduce portions of macOS, but that is completely > irrelevant because the portion "reproduced" in the form of this patch is > not, at all, copyrightable. If it were we would have much bigger issues > and all kinds of code in Linux would be a copyvio. The fact that there > was some automation involved in generating the patch contents is > entirely irrelevant, as long as the output does not keep a copyright > interest from the author of the input. > > I also have an actual lawyer's opinion that register names are not > copyrightable, which further corroborates this interpretation. > > As far as I'm concerned this can be merged as is. Even if there are no copyright concerns, as you said from [1], I think I should remove the lines in m1_pmu_event_attrs and then patch userspace Linux-perf tools with the definitions in the JSON file. Since I haven't received any other advice on copyright concerns, I am still waiting for other suggestions before submitting the patch revision. Thanks, Yangyu Chen [1] https://lore.kernel.org/lkml/dbf17fa6-1af6-467b-8b3d-dca8476dc785@marcan.st/ > > Acked-by: Hector Martin <marcan@marcan.st> > > - Hector
On 2024/07/28 21:19, Yangyu Chen wrote: > > >> On Jul 28, 2024, at 19:00, Hector Martin <marcan@marcan.st> wrote: >> >> >> On 2024/06/19 1:58, Marc Zyngier wrote: >>> On Tue, 18 Jun 2024 16:56:48 +0100, >>> Yangyu Chen <cyy@cyyself.name> wrote: >>>> >>>> >>>> >>>>> On Jun 18, 2024, at 22:03, Marc Zyngier <maz@kernel.org> wrote: >>>>> >>>>> On Tue, 18 Jun 2024 14:49:48 +0100, >>>>> Yangyu Chen <cyy@cyyself.name> wrote: >>>>>> >>>>>> This patch adds known PMU events that can be found on /usr/share/kpep in >>>>>> macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from >>>>>> the script [1], which consumes the plist file from Apple. And then added >>>>>> these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's >>>>>> documentation [2]. >>>>>> >>>>>> Link: https://github.com/cyyself/m1-pmu-gen [1] >>>>>> Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] >>>>> >>>>> This needs registration, and is thus impossible to freely visit. >>>>> >>>>>> Signed-off-by: Yangyu Chen <cyy@cyyself.name> >>>>> >>>>> What is the licence applicable to the original source file? Does it >>>>> explicitly allow redistribution in any form? >>>>> >>>> >>>> Oh. It's my fault. Sorry for the trouble caused. >>> >>> No trouble on my side. I'm just painfully aware that this is a legal >>> landmine, and that what is perfectly allowed in one country may be a >>> punishable offence in another. And since I'm not a lawyer, I want to >>> see crystal clear things in writing. >>> >>>> >>>>> >>>>> Other than the licensing concern, why should we bloat the kernel with >>>>> more of this stuff when everything is moving towards a bunch of JSON >>>>> files (tools/perf/pmu-events/arch/arm64). >>>>> >>>> >>>> Thanks for this hint. So, the thing to do might be to provide a >>>> generator that consumes Apple files and then generates a kernel >>>> patch for Linux perf tools to use rather than provide such details >>>> directly in the source code as you said from [1]. >>>> >>>> Link: https://lore.kernel.org/lkml/87czn18zev.wl-maz@kernel.org/ [1] >>> >>> Even better: teach the perf tool to directly consume the plist file, >>> but don't distribute the file or its content. People owning such a >>> machine can fish the file from the machine itself (or the installer >>> can extract it from the OS image as if it was firmware data). >> >> Maz, >> >> That would be a waste of time. Facts about hardware are not >> copyrightable. I see absolutely nothing objectionable in this patch. It >> doesn't matter where the information was sourced as long as it was >> legitimately available to the person (which it was, as long as they were >> running macOS on one of these machines). >> >> Let's look at the license for the ARMv8-A ARM: >> >>> Proprietary Notice >>> This document is protected by copyright and other related rights and the practice or implementation of the information contained >>> in this document may be protected by one or more patents or pending patent applications. No part of this document may be >>> reproduced in any form by any means without the express prior written permission of Arm. No license, express or implied, by >>> estoppel or otherwise to any intellectual property rights is granted by this document unless specifically stated. >> >> There is absolutely nothing in there granting a license to use the >> information in the document and things like register names in Linux or >> any other OS. And yet we can do that, because those things aren't >> copyrightable. It would defeat the entire point of the documentation if >> you could not use it, even though there is in fact no explicit copyright >> grant to allow you to use it. It is not needed. >> >> The same exact logic applies here. The macOS license does not grant us >> the right to reproduce portions of macOS, but that is completely >> irrelevant because the portion "reproduced" in the form of this patch is >> not, at all, copyrightable. If it were we would have much bigger issues >> and all kinds of code in Linux would be a copyvio. The fact that there >> was some automation involved in generating the patch contents is >> entirely irrelevant, as long as the output does not keep a copyright >> interest from the author of the input. >> >> I also have an actual lawyer's opinion that register names are not >> copyrightable, which further corroborates this interpretation. >> >> As far as I'm concerned this can be merged as is. > > Even if there are no copyright concerns, as you said from [1], I > think I should remove the lines in m1_pmu_event_attrs and then patch > userspace Linux-perf tools with the definitions in the JSON file. Sure, that's fine. I would say go ahead and move the definitions to JSON, then just submit that. > Since I haven't received any other advice on copyright concerns, I > am still waiting for other suggestions before submitting the patch > revision. > > Thanks, > Yangyu Chen > > [1] https://lore.kernel.org/lkml/dbf17fa6-1af6-467b-8b3d-dca8476dc785@marcan.st/ - Hector
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c index f322e5ca1114..e6045314ae97 100644 --- a/drivers/perf/apple_m1_cpu_pmu.c +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -47,46 +47,79 @@ * implementations, we'll have to introduce per cpu-type tables. */ enum m1_pmu_events { - M1_PMU_PERFCTR_UNKNOWN_01 = 0x01, - M1_PMU_PERFCTR_CPU_CYCLES = 0x02, - M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c, - M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d, - M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e, - M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f, - M1_PMU_PERFCTR_UNKNOWN_90 = 0x90, - M1_PMU_PERFCTR_UNKNOWN_93 = 0x93, - M1_PMU_PERFCTR_UNKNOWN_94 = 0x94, - M1_PMU_PERFCTR_UNKNOWN_95 = 0x95, - M1_PMU_PERFCTR_UNKNOWN_96 = 0x96, - M1_PMU_PERFCTR_UNKNOWN_97 = 0x97, - M1_PMU_PERFCTR_UNKNOWN_98 = 0x98, - M1_PMU_PERFCTR_UNKNOWN_99 = 0x99, - M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a, - M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b, - M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c, - M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, - M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf, - M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0, - M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1, - M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4, - M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5, - M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6, - M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8, - M1_PMU_PERFCTR_UNKNOWN_ca = 0xca, - M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb, - M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, - M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, - M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, - M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, - M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, - M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, + M1_PMU_PERFCTR_RETIRE_UOP = 0x1, + M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE = 0x2, + M1_PMU_PERFCTR_L1I_TLB_FILL = 0x4, + M1_PMU_PERFCTR_L1D_TLB_FILL = 0x5, + M1_PMU_PERFCTR_MMU_TABLE_WALK_INSTRUCTION = 0x7, + M1_PMU_PERFCTR_MMU_TABLE_WALK_DATA = 0x8, + M1_PMU_PERFCTR_L2_TLB_MISS_INSTRUCTION = 0xa, + M1_PMU_PERFCTR_L2_TLB_MISS_DATA = 0xb, + M1_PMU_PERFCTR_MMU_VIRTUAL_MEMORY_FAULT_NONSPEC = 0xd, + M1_PMU_PERFCTR_SCHEDULE_UOP = 0x52, + M1_PMU_PERFCTR_INTERRUPT_PENDING = 0x6c, + M1_PMU_PERFCTR_MAP_STALL_DISPATCH = 0x70, + M1_PMU_PERFCTR_MAP_REWIND = 0x75, + M1_PMU_PERFCTR_MAP_STALL = 0x76, + M1_PMU_PERFCTR_MAP_INT_UOP = 0x7c, + M1_PMU_PERFCTR_MAP_LDST_UOP = 0x7d, + M1_PMU_PERFCTR_MAP_SIMD_UOP = 0x7e, + M1_PMU_PERFCTR_FLUSH_RESTART_OTHER_NONSPEC = 0x84, + M1_PMU_PERFCTR_INST_ALL = 0x8c, + M1_PMU_PERFCTR_INST_BRANCH = 0x8d, + M1_PMU_PERFCTR_INST_BRANCH_CALL = 0x8e, + M1_PMU_PERFCTR_INST_BRANCH_RET = 0x8f, + M1_PMU_PERFCTR_INST_BRANCH_TAKEN = 0x90, + M1_PMU_PERFCTR_INST_BRANCH_INDIR = 0x93, + M1_PMU_PERFCTR_INST_BRANCH_COND = 0x94, + M1_PMU_PERFCTR_INST_INT_LD = 0x95, + M1_PMU_PERFCTR_INST_INT_ST = 0x96, + M1_PMU_PERFCTR_INST_INT_ALU = 0x97, + M1_PMU_PERFCTR_INST_SIMD_LD = 0x98, + M1_PMU_PERFCTR_INST_SIMD_ST = 0x99, + M1_PMU_PERFCTR_INST_SIMD_ALU = 0x9a, + M1_PMU_PERFCTR_INST_LDST = 0x9b, + M1_PMU_PERFCTR_INST_BARRIER = 0x9c, + M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, + M1_PMU_PERFCTR_L1D_TLB_ACCESS = 0xa0, + M1_PMU_PERFCTR_L1D_TLB_MISS = 0xa1, + M1_PMU_PERFCTR_L1D_CACHE_MISS_ST = 0xa2, + M1_PMU_PERFCTR_L1D_CACHE_MISS_LD = 0xa3, + M1_PMU_PERFCTR_LD_UNIT_UOP = 0xa6, + M1_PMU_PERFCTR_ST_UNIT_UOP = 0xa7, + M1_PMU_PERFCTR_L1D_CACHE_WRITEBACK = 0xa8, + M1_PMU_PERFCTR_LDST_X64_UOP = 0xb1, + M1_PMU_PERFCTR_LDST_XPG_UOP = 0xb2, + M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_SUCC = 0xb3, + M1_PMU_PERFCTR_ATOMIC_OR_EXCLUSIVE_FAIL = 0xb4, + M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC = 0xbf, + M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC = 0xc0, + M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC = 0xc1, + M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC = 0xc4, + M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC = 0xc5, + M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC = 0xc6, + M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC = 0xc8, + M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC = 0xca, + M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC = 0xcb, + M1_PMU_PERFCTR_L1I_TLB_MISS_DEMAND = 0xd4, + M1_PMU_PERFCTR_MAP_DISPATCH_BUBBLE = 0xd6, + M1_PMU_PERFCTR_L1I_CACHE_MISS_DEMAND = 0xdb, + M1_PMU_PERFCTR_FETCH_RESTART = 0xde, + M1_PMU_PERFCTR_ST_NT_UOP = 0xe5, + M1_PMU_PERFCTR_LD_NT_UOP = 0xe6, + M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, + M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, + M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, + M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, + M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, + M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, /* * From this point onwards, these are not actual HW events, * but attributes that get stored in hw->config_base. */ - M1_PMU_CFG_COUNT_USER = BIT(8), - M1_PMU_CFG_COUNT_KERNEL = BIT(9), + M1_PMU_CFG_COUNT_USER = BIT(8), + M1_PMU_CFG_COUNT_KERNEL = BIT(9), }; /* @@ -96,45 +129,48 @@ enum m1_pmu_events { * counters had strange affinities. */ static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = { - [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, - [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7), - [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0), - [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1), - [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7), - [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7), - [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), - [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7, - [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, - [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, - [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, - [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, - [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, + [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, + [M1_PMU_PERFCTR_RETIRE_UOP] = BIT(7), + [M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE] = ANY_BUT_0_1 | BIT(0), + [M1_PMU_PERFCTR_INST_ALL] = BIT(7) | BIT(1), + [M1_PMU_PERFCTR_INST_BRANCH] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_CALL] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_RET] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_TAKEN] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_INDIR] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_BRANCH_COND] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_INT_LD] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_INT_ST] = BIT(7), + [M1_PMU_PERFCTR_INST_INT_ALU] = BIT(7), + [M1_PMU_PERFCTR_INST_SIMD_LD] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_SIMD_ST] = ONLY_5_6_7, + [M1_PMU_PERFCTR_INST_SIMD_ALU] = BIT(7), + [M1_PMU_PERFCTR_INST_LDST] = BIT(7), + [M1_PMU_PERFCTR_INST_BARRIER] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), + [M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, + [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, }; static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS, + [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE, + [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_ALL, + [PERF_COUNT_HW_CACHE_MISSES] = M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = M1_PMU_PERFCTR_INST_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC, /* No idea about the rest yet */ }; @@ -154,8 +190,32 @@ static ssize_t m1_pmu_events_sysfs_show(struct device *dev, PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config) static struct attribute *m1_pmu_event_attrs[] = { - M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES), - M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS), + M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CORE_ACTIVE_CYCLE), + M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INST_ALL), + M1_PMU_EVENT_ATTR(retire-uops, M1_PMU_PERFCTR_RETIRE_UOP), + M1_PMU_EVENT_ATTR(inst-branch, M1_PMU_PERFCTR_INST_BRANCH), + M1_PMU_EVENT_ATTR(inst-branch-call, M1_PMU_PERFCTR_INST_BRANCH_CALL), + M1_PMU_EVENT_ATTR(inst-branch-ret, M1_PMU_PERFCTR_INST_BRANCH_RET), + M1_PMU_EVENT_ATTR(inst-branch-taken, M1_PMU_PERFCTR_INST_BRANCH_TAKEN), + M1_PMU_EVENT_ATTR(inst-branch-indir, M1_PMU_PERFCTR_INST_BRANCH_INDIR), + M1_PMU_EVENT_ATTR(inst-branch-cond, M1_PMU_PERFCTR_INST_BRANCH_COND), + M1_PMU_EVENT_ATTR(inst-int-ld, M1_PMU_PERFCTR_INST_INT_LD), + M1_PMU_EVENT_ATTR(inst-int-st, M1_PMU_PERFCTR_INST_INT_ST), + M1_PMU_EVENT_ATTR(inst-int-alu, M1_PMU_PERFCTR_INST_INT_ALU), + M1_PMU_EVENT_ATTR(inst-simd-ld, M1_PMU_PERFCTR_INST_SIMD_LD), + M1_PMU_EVENT_ATTR(inst-simd-st, M1_PMU_PERFCTR_INST_SIMD_ST), + M1_PMU_EVENT_ATTR(inst-simd-alu, M1_PMU_PERFCTR_INST_SIMD_ALU), + M1_PMU_EVENT_ATTR(inst-ldst, M1_PMU_PERFCTR_INST_LDST), + M1_PMU_EVENT_ATTR(inst-barrier, M1_PMU_PERFCTR_INST_BARRIER), + M1_PMU_EVENT_ATTR(l1d-miss-ld, M1_PMU_PERFCTR_L1D_CACHE_MISS_LD_NONSPEC), + M1_PMU_EVENT_ATTR(l1d-miss-st, M1_PMU_PERFCTR_L1D_CACHE_MISS_ST_NONSPEC), + M1_PMU_EVENT_ATTR(l1d-tlb-miss, M1_PMU_PERFCTR_L1D_TLB_MISS_NONSPEC), + M1_PMU_EVENT_ATTR(st-mem-order-violation, M1_PMU_PERFCTR_ST_MEMORY_ORDER_VIOLATION_NONSPEC), + M1_PMU_EVENT_ATTR(branch-cond-mispred, M1_PMU_PERFCTR_BRANCH_COND_MISPRED_NONSPEC), + M1_PMU_EVENT_ATTR(branch-indir-mispred, M1_PMU_PERFCTR_BRANCH_INDIR_MISPRED_NONSPEC), + M1_PMU_EVENT_ATTR(branch-ret-indir-mispred, M1_PMU_PERFCTR_BRANCH_RET_INDIR_MISPRED_NONSPEC), + M1_PMU_EVENT_ATTR(branch-call-indir-mispred, M1_PMU_PERFCTR_BRANCH_CALL_INDIR_MISPRED_NONSPEC), + M1_PMU_EVENT_ATTR(branch-mispred, M1_PMU_PERFCTR_BRANCH_MISPRED_NONSPEC), NULL, };
This patch adds known PMU events that can be found on /usr/share/kpep in macOS. The m1_pmu_events and m1_pmu_event_affinity are generated from the script [1], which consumes the plist file from Apple. And then added these events to m1_pmu_perf_map and m1_pmu_event_attrs with Apple's documentation [2]. Link: https://github.com/cyyself/m1-pmu-gen [1] Link: https://developer.apple.com/download/apple-silicon-cpu-optimization-guide/ [2] Signed-off-by: Yangyu Chen <cyy@cyyself.name> --- drivers/perf/apple_m1_cpu_pmu.c | 204 +++++++++++++++++++++----------- 1 file changed, 132 insertions(+), 72 deletions(-)