Message ID | 820d80272fc5627b8d00e684663a614470217606.1566290744.git.han_mao@c-sky.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | riscv: Add perf callchain support | expand |
Hi Mao, Mao Han <han_mao@c-sky.com> 於 2019年8月20日 週二 下午4:57寫道: > > This patch add support for perf callchain sampling on riscv platform. > The return address of leaf function is retrieved from pt_regs as > it is not saved in the outmost frame. > > Signed-off-by: Mao Han <han_mao@c-sky.com> > Cc: Paul Walmsley <paul.walmsley@sifive.com> > Cc: Greentime Hu <green.hu@gmail.com> > Cc: Palmer Dabbelt <palmer@sifive.com> > Cc: linux-riscv <linux-riscv@lists.infradead.org> > Cc: Christoph Hellwig <hch@lst.de> > Cc: Guo Ren <guoren@kernel.org> > --- > arch/riscv/Makefile | 3 + > arch/riscv/kernel/Makefile | 3 +- > arch/riscv/kernel/perf_callchain.c | 115 +++++++++++++++++++++++++++++++++++++ > 3 files changed, 120 insertions(+), 1 deletion(-) > create mode 100644 arch/riscv/kernel/perf_callchain.c > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile > index 7a117be..946565b 100644 > --- a/arch/riscv/Makefile > +++ b/arch/riscv/Makefile > @@ -54,6 +54,9 @@ endif > ifeq ($(CONFIG_MODULE_SECTIONS),y) > KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/riscv/kernel/module.lds > endif > +ifeq ($(CONFIG_PERF_EVENTS),y) > + KBUILD_CFLAGS += -fno-omit-frame-pointer > +endif > > KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) > > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile > index 2420d37..b1bea89 100644 > --- a/arch/riscv/kernel/Makefile > +++ b/arch/riscv/kernel/Makefile > @@ -38,6 +38,7 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o > obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o > obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o > > -obj-$(CONFIG_PERF_EVENTS) += perf_event.o > +obj-$(CONFIG_PERF_EVENTS) += perf_event.o > +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o > > clean: > diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c > new file mode 100644 > index 0000000..d75d15c > --- /dev/null > +++ b/arch/riscv/kernel/perf_callchain.c > @@ -0,0 +1,115 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. */ > + > +#include <linux/perf_event.h> > +#include <linux/uaccess.h> > + > +/* Kernel callchain */ > +struct stackframe { > + unsigned long fp; > + unsigned long ra; > +}; > + > +static int unwind_frame_kernel(struct stackframe *frame) > +{ > + if (kstack_end((void *)frame->fp)) > + return -EPERM; > + if (frame->fp & 0x3 || frame->fp < TASK_SIZE) > + return -EPERM; > + if (frame->fp < CONFIG_PAGE_OFFSET) > + return -EPERM; > + > + *frame = *((struct stackframe *)frame->fp - 1); > + if (__kernel_text_address(frame->ra)) { > + int graph = 0; > + > + frame->ra = ftrace_graph_ret_addr(NULL, &graph, frame->ra, > + NULL); > + } > + return 0; > +} > + > +static void notrace walk_stackframe(struct stackframe *fr, > + struct perf_callchain_entry_ctx *entry) > +{ > + do { > + perf_callchain_store(entry, fr->ra); > + } while (unwind_frame_kernel(fr) >= 0); > +} > + > +/* > + * Get the return address for a single stackframe and return a pointer to the > + * next frame tail. > + */ > +static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, > + unsigned long fp, unsigned long reg_ra) > +{ > + struct stackframe buftail; > + unsigned long ra = 0; > + unsigned long *user_frame_tail = > + (unsigned long *)(fp - sizeof(struct stackframe)); > + > + /* Check accessibility of one struct frame_tail beyond */ > + if (!access_ok(user_frame_tail, sizeof(buftail))) > + return 0; > + if (__copy_from_user_inatomic(&buftail, user_frame_tail, > + sizeof(buftail))) > + return 0; > + > + if (reg_ra != 0) > + ra = reg_ra; > + else > + ra = buftail.ra; > + > + fp = buftail.fp; > + perf_callchain_store(entry, ra); > + > + return fp; > +} > + > +/* > + * This will be called when the target is in user mode > + * This function will only be called when we use > + * "PERF_SAMPLE_CALLCHAIN" in > + * kernel/events/core.c:perf_prepare_sample() > + * > + * How to trigger perf_callchain_[user/kernel] : > + * $ perf record -e cpu-clock --call-graph fp ./program > + * $ perf report --call-graph > + * > + * On RISC-V platform, the program being sampled and the C library > + * need to be compiled with -fno-omit-frame-pointer, otherwise > + * the user stack will not contain function frame. > + */ > +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, > + struct pt_regs *regs) > +{ > + unsigned long fp = 0; > + > + /* RISC-V does not support virtualization. */ > + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) > + return; > + > + fp = regs->s0; > + perf_callchain_store(entry, regs->sepc); > + > + fp = user_backtrace(entry, fp, regs->ra); > + while (fp && !(fp & 0x3) && entry->nr < entry->max_stack) > + fp = user_backtrace(entry, fp, 0); > +} > + > +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, > + struct pt_regs *regs) > +{ > + struct stackframe fr; > + > + /* RISC-V does not support virtualization. */ > + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { > + pr_warn("RISC-V does not support perf in guest mode!"); > + return; > + } > + > + fr.fp = regs->s0; > + fr.ra = regs->ra; > + walk_stackframe(&fr, entry); > +} > -- > 2.7.4 > Not sure if I did something wrong. I encounter a build error when I try to build tools/perf/tests CC arch/riscv/util/dwarf-regs.o arch/riscv/util/dwarf-regs.c:64:5: error: no previous prototype for ‘regs_query_register_offset’ [-Werror=missing-prototypes] I simply add its prototype and it could be built pass. This is my testing results. # ./perf test 1: vmlinux symtab matches kallsyms : Skip 2: Detect openat syscall event : FAILED! 3: Detect openat syscall event on all cpus : FAILED! 4: Read samples using the mmap interface : FAILED! 5: Test data source output : Ok 6: Parse event definition strings : FAILED! 7: Simple expression parser : Ok 8: PERF_RECORD_* events & perf_sample fields : FAILED! 9: Parse perf pmu format : Ok 10: DSO data read : Ok 11: DSO data cache : Ok 12: DSO data reopen : Ok 13: Roundtrip evsel->name : Ok 14: Parse sched tracepoints fields : FAILED! 15: syscalls:sys_enter_openat event fields : FAILED! 16: Setup struct perf_event_attr : FAILED! 17: Match and link multiple hists : Ok 18: 'import perf' in python : FAILED! 19: Breakpoint overflow signal handler : FAILED! 20: Breakpoint overflow sampling : FAILED! 21: Breakpoint accounting : Skip 22: Watchpoint : 22.1: Read Only Watchpoint : FAILED! 22.2: Write Only Watchpoint : FAILED! 22.3: Read / Write Watchpoint : FAILED! 22.4: Modify Watchpoint : FAILED! 23: Number of exit events of a simple workload : Ok 24: Software clock events period values : Ok 25: Object code reading : Ok 26: Sample parsing : Ok 27: Use a dummy software event to keep tracking : Ok 28: Parse with no sample_id_all bit set : Ok 29: Filter hist entries : Ok 30: Lookup mmap thread : Ok 31: Share thread mg : Ok 32: Sort output of hist entries : Ok 33: Cumulate child hist entries : Ok 34: Track with sched_switch : Ok 35: Filter fds with revents mask in a fdarray : Ok 36: Add fd to a fdarray, making it autogrow : Ok 37: kmod_path__parse : Ok 38: Thread map : Ok 39: LLVM search and compile : 39.1: Basic BPF llvm compile : Skip 39.2: kbuild searching : Skip 39.3: Compile source for BPF prologue generation : Skip 39.4: Compile source for BPF relocation : Skip 40: Session topology : FAILED! 41: BPF filter : 41.1: Basic BPF filtering : Skip 41.2: BPF pinning : Skip 41.3: BPF relocation checker : Skip 42: Synthesize thread map : Ok 43: Remove thread map : Ok 44: Synthesize cpu map : Ok 45: Synthesize stat config : Ok 46: Synthesize stat : Ok 47: Synthesize stat round : Ok 48: Synthesize attr update : Ok 49: Event times : Ok 50: Read backward ring buffer : Skip 51: Print cpu map : Ok 52: Probe SDT events : Skip 53: is_printable_array : Ok 54: Print bitmap : Ok 55: perf hooks : Ok 56: builtin clang support : Skip (not compiled in) 57: unit_number__scnprintf : Ok 58: mem2node : Ok 59: time utils : Ok 60: map_groups__merge_in : Ok 61: probe libc's inet_pton & backtrace it with ping : FAILED! 62: Add vfs_getname probe to get syscall args filenames : FAILED! 63: Check open filename arg using perf trace + vfs_getname: Skip 64: Use vfs_getname probe to get syscall args filenames : FAILED! 65: Zstd perf.data compression/decompression : Skip I also try the command that Paul pointed out. ./perf record -e cpu-clock --call-graph fp /bin/ls It works fine now. It can generate a perf.data now.
Hi Greentime, On Wed, Aug 21, 2019 at 05:16:13PM +0800, Greentime Hu wrote: > Hi Mao, > > Mao Han <han_mao@c-sky.com> 於 2019年8月20日 週二 下午4:57寫道: > > > > This patch add support for perf callchain sampling on riscv platform. > > The return address of leaf function is retrieved from pt_regs as > > it is not saved in the outmost frame. > > > > > > Not sure if I did something wrong. I encounter a build error when I > try to build tools/perf/tests > > CC arch/riscv/util/dwarf-regs.o > arch/riscv/util/dwarf-regs.c:64:5: error: no previous prototype for > ‘regs_query_register_offset’ [-Werror=missing-prototypes] > This seems becasue I didn't add PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET in tools/perf/arch/riscv/Makefile so the prototype in ./util/include/dwarf-regs.h is not declared. I'll add that in the next version. > I simply add its prototype and it could be built pass. > This is my testing results. > # ./perf test > 1: vmlinux symtab matches kallsyms : Skip > 2: Detect openat syscall event : FAILED! > 3: Detect openat syscall event on all cpus : FAILED! > 4: Read samples using the mmap interface : FAILED! > 5: Test data source output : Ok > 6: Parse event definition strings : FAILED! > 7: Simple expression parser : Ok > 8: PERF_RECORD_* events & perf_sample fields : FAILED! > 9: Parse perf pmu format : Ok > 10: DSO data read : Ok > 11: DSO data cache : Ok > 12: DSO data reopen : Ok > 13: Roundtrip evsel->name : Ok > 14: Parse sched tracepoints fields : FAILED! > 15: syscalls:sys_enter_openat event fields : FAILED! > 16: Setup struct perf_event_attr : FAILED! > 17: Match and link multiple hists : Ok > 18: 'import perf' in python : FAILED! > > 19: Breakpoint overflow signal handler : FAILED! > 20: Breakpoint overflow sampling : FAILED! > 21: Breakpoint accounting : Skip > 22: Watchpoint : > 22.1: Read Only Watchpoint : FAILED! > 22.2: Write Only Watchpoint : FAILED! > 22.3: Read / Write Watchpoint : FAILED! > 22.4: Modify Watchpoint : FAILED! > 23: Number of exit events of a simple workload : Ok > 24: Software clock events period values : Ok > 25: Object code reading : Ok > 26: Sample parsing : Ok > 27: Use a dummy software event to keep tracking : Ok > 28: Parse with no sample_id_all bit set : Ok > 29: Filter hist entries : Ok > 30: Lookup mmap thread : Ok > 31: Share thread mg : Ok > 32: Sort output of hist entries : Ok > 33: Cumulate child hist entries : Ok > 34: Track with sched_switch : Ok > 35: Filter fds with revents mask in a fdarray : Ok > 36: Add fd to a fdarray, making it autogrow : Ok > 37: kmod_path__parse : Ok > 38: Thread map : Ok > 39: LLVM search and compile : > 39.1: Basic BPF llvm compile : Skip > 39.2: kbuild searching : Skip > 39.3: Compile source for BPF prologue generation : Skip > 39.4: Compile source for BPF relocation : Skip > 40: Session topology : FAILED! > 41: BPF filter : > 41.1: Basic BPF filtering : Skip > 41.2: BPF pinning : Skip > 41.3: BPF relocation checker : Skip > 42: Synthesize thread map : Ok > 43: Remove thread map : Ok > 44: Synthesize cpu map : Ok > 45: Synthesize stat config : Ok > 46: Synthesize stat : Ok > 47: Synthesize stat round : Ok > 48: Synthesize attr update : Ok > 49: Event times : Ok > 50: Read backward ring buffer : Skip > 51: Print cpu map : Ok > 52: Probe SDT events : Skip > 53: is_printable_array : Ok > 54: Print bitmap : Ok > 55: perf hooks : Ok > 56: builtin clang support : Skip (not > compiled in) > 57: unit_number__scnprintf : Ok > 58: mem2node : Ok > 59: time utils : Ok > 60: map_groups__merge_in : Ok > 61: probe libc's inet_pton & backtrace it with ping : FAILED! > 62: Add vfs_getname probe to get syscall args filenames : FAILED! > 63: Check open filename arg using perf trace + vfs_getname: Skip > 64: Use vfs_getname probe to get syscall args filenames : FAILED! > 65: Zstd perf.data compression/decompression : Skip > The perf test result I got is quiet similar to yours, but with 5 less testcases. 1: vmlinux symtab matches kallsyms : Skip 2: Detect openat syscall event : FAILED! 3: Detect openat syscall event on all cpus : FAILED! 4: Read samples using the mmap interface : FAILED! 5: Test data source output : Ok 6: Parse event definition strings : FAILED! 7: Simple expression parser : Ok 8: PERF_RECORD_* events & perf_sample fields : FAILED! 9: Parse perf pmu format : Ok 10: DSO data read : Ok 11: DSO data cache : Ok 12: DSO data reopen : Ok 13: Roundtrip evsel->name : Ok 14: Parse sched tracepoints fields : FAILED! 15: syscalls:sys_enter_openat event fields : FAILED! 16: Setup struct perf_event_attr : Skip 17: Match and link multiple hists : Ok 18: 'import perf' in python : Ok 19: Breakpoint overflow signal handler : FAILED! 20: Breakpoint overflow sampling : FAILED! 21: Breakpoint accounting : Skip 22: Watchpoint : 22.1: Read Only Watchpoint : FAILED! 22.2: Write Only Watchpoint : FAILED! 22.3: Read / Write Watchpoint : FAILED! 22.4: Modify Watchpoint : FAILED! 23: Number of exit events of a simple workload : Ok 24: Software clock events period values : Ok 25: Object code reading : Ok 26: Sample parsing : Ok 27: Use a dummy software event to keep tracking: Ok 28: Parse with no sample_id_all bit set : Ok 29: Filter hist entries : Ok 30: Lookup mmap thread : Ok 31: Share thread mg : Ok 32: Sort output of hist entries : Ok 33: Cumulate child hist entries : Ok 34: Track with sched_switch : Ok 35: Filter fds with revents mask in a fdarray : Ok 36: Add fd to a fdarray, making it autogrow : Ok 37: kmod_path__parse : Ok 38: Thread map : Ok 39: LLVM search and compile : 39.1: Basic BPF llvm compile : Skip 39.2: kbuild searching : Skip 39.3: Compile source for BPF prologue generation: Skip 39.4: Compile source for BPF relocation : Skip 40: Session topology : FAILED! 41: BPF filter : 41.1: Basic BPF filtering : Skip 41.2: BPF pinning : Skip 41.3: BPF relocation checker : Skip 42: Synthesize thread map : Ok 43: Remove thread map : Ok 44: Synthesize cpu map : Ok 45: Synthesize stat config : Ok 46: Synthesize stat : Ok 47: Synthesize stat round : Ok 48: Synthesize attr update : Ok 49: Event times : Ok 50: Read backward ring buffer : Skip 51: Print cpu map : Ok 52: Probe SDT events : Skip 53: is_printable_array : Ok 54: Print bitmap : Ok 55: perf hooks : Ok 56: builtin clang support : Skip (not compiled in) 57: unit_number__scnprintf : Ok 58: mem2node : Ok 59: time utils : Ok 60: map_groups__merge_in : Ok The comparison before/after applied this patch set: /tools/perf/util# diff perf_test_before perf_test_after 1d0 < # perf test 8c7 < 7: Simple expression parser : FAILED! --- > 7: Simple expression parser : Ok
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 7a117be..946565b 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -54,6 +54,9 @@ endif ifeq ($(CONFIG_MODULE_SECTIONS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/riscv/kernel/module.lds endif +ifeq ($(CONFIG_PERF_EVENTS),y) + KBUILD_CFLAGS += -fno-omit-frame-pointer +endif KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 2420d37..b1bea89 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o clean: diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c new file mode 100644 index 0000000..d75d15c --- /dev/null +++ b/arch/riscv/kernel/perf_callchain.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. */ + +#include <linux/perf_event.h> +#include <linux/uaccess.h> + +/* Kernel callchain */ +struct stackframe { + unsigned long fp; + unsigned long ra; +}; + +static int unwind_frame_kernel(struct stackframe *frame) +{ + if (kstack_end((void *)frame->fp)) + return -EPERM; + if (frame->fp & 0x3 || frame->fp < TASK_SIZE) + return -EPERM; + if (frame->fp < CONFIG_PAGE_OFFSET) + return -EPERM; + + *frame = *((struct stackframe *)frame->fp - 1); + if (__kernel_text_address(frame->ra)) { + int graph = 0; + + frame->ra = ftrace_graph_ret_addr(NULL, &graph, frame->ra, + NULL); + } + return 0; +} + +static void notrace walk_stackframe(struct stackframe *fr, + struct perf_callchain_entry_ctx *entry) +{ + do { + perf_callchain_store(entry, fr->ra); + } while (unwind_frame_kernel(fr) >= 0); +} + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, + unsigned long fp, unsigned long reg_ra) +{ + struct stackframe buftail; + unsigned long ra = 0; + unsigned long *user_frame_tail = + (unsigned long *)(fp - sizeof(struct stackframe)); + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic(&buftail, user_frame_tail, + sizeof(buftail))) + return 0; + + if (reg_ra != 0) + ra = reg_ra; + else + ra = buftail.ra; + + fp = buftail.fp; + perf_callchain_store(entry, ra); + + return fp; +} + +/* + * This will be called when the target is in user mode + * This function will only be called when we use + * "PERF_SAMPLE_CALLCHAIN" in + * kernel/events/core.c:perf_prepare_sample() + * + * How to trigger perf_callchain_[user/kernel] : + * $ perf record -e cpu-clock --call-graph fp ./program + * $ perf report --call-graph + * + * On RISC-V platform, the program being sampled and the C library + * need to be compiled with -fno-omit-frame-pointer, otherwise + * the user stack will not contain function frame. + */ +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long fp = 0; + + /* RISC-V does not support virtualization. */ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return; + + fp = regs->s0; + perf_callchain_store(entry, regs->sepc); + + fp = user_backtrace(entry, fp, regs->ra); + while (fp && !(fp & 0x3) && entry->nr < entry->max_stack) + fp = user_backtrace(entry, fp, 0); +} + +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct stackframe fr; + + /* RISC-V does not support virtualization. */ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + pr_warn("RISC-V does not support perf in guest mode!"); + return; + } + + fr.fp = regs->s0; + fr.ra = regs->ra; + walk_stackframe(&fr, entry); +}
This patch add support for perf callchain sampling on riscv platform. The return address of leaf function is retrieved from pt_regs as it is not saved in the outmost frame. Signed-off-by: Mao Han <han_mao@c-sky.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Greentime Hu <green.hu@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: linux-riscv <linux-riscv@lists.infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Guo Ren <guoren@kernel.org> --- arch/riscv/Makefile | 3 + arch/riscv/kernel/Makefile | 3 +- arch/riscv/kernel/perf_callchain.c | 115 +++++++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/kernel/perf_callchain.c