diff mbox series

[v5,4/4] perf tools: Support "branch-misses:pp" on arm64

Message ID 20200225115739.18740-5-james.clark@arm.com (mailing list archive)
State New, archived
Headers show
Series perf tools: Add support for some spe events and precise ip | expand

Commit Message

James Clark Feb. 25, 2020, 11:57 a.m. UTC
From: Tan Xiaojun <tanxiaojun@huawei.com>

At the suggestion of James Clark, use spe to support the precise
ip of some events. Currently its support event is:
branch-misses.

Example usage:

$ ./perf record -e branch-misses:pp dd if=/dev/zero of=/dev/null count=10000
(:p/pp/ppp is same for this case.)

$ ./perf report --stdio
("--stdio is not necessary")

--------------------------------------------------------------------
...
 # Samples: 14  of event 'branch-misses:pp'
 # Event count (approx.): 14
 #
 # Children      Self  Command  Shared Object      Symbol
 # ........  ........  .......  .................  ..........................
 #
    14.29%    14.29%  dd       [kernel.kallsyms]  [k] __arch_copy_from_user
    14.29%    14.29%  dd       libc-2.28.so       [.] _dl_addr
     7.14%     7.14%  dd       [kernel.kallsyms]  [k] __free_pages
     7.14%     7.14%  dd       [kernel.kallsyms]  [k] __pi_memcpy
     7.14%     7.14%  dd       [kernel.kallsyms]  [k] pagecache_get_page
     7.14%     7.14%  dd       [kernel.kallsyms]  [k] unmap_single_vma
     7.14%     7.14%  dd       dd                 [.] 0x00000000000025ec
     7.14%     7.14%  dd       ld-2.28.so         [.] _dl_lookup_symbol_x
     7.14%     7.14%  dd       ld-2.28.so         [.] check_match
     7.14%     7.14%  dd       libc-2.28.so       [.] __mpn_rshift
     7.14%     7.14%  dd       libc-2.28.so       [.] _nl_intern_locale_data
     7.14%     7.14%  dd       libc-2.28.so       [.] read_alias_file
...
--------------------------------------------------------------------

Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
Suggested-by: James Clark <James.Clark@arm.com>
Tested-by: Qi Liu <liuqi115@hisilicon.com>
Signed-off-by: James Clark <james.clark@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Tan Xiaojun <tanxiaojun@huawei.com>
Cc: Al Grant <al.grant@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/arch/arm/util/auxtrace.c | 39 +++++++++++++++++++++++++++++
 tools/perf/builtin-record.c         |  5 ++++
 tools/perf/util/arm-spe.c           |  9 +++++++
 tools/perf/util/arm-spe.h           |  3 +++
 tools/perf/util/auxtrace.h          |  6 +++++
 5 files changed, 62 insertions(+)

Comments

Mark Rutland Feb. 28, 2020, 4:03 p.m. UTC | #1
Hi James,

Sorry, I missed this v5 when replying to v4 just now, but my comments
there equally apply here: I don't think that we should be silently
overriding the event requested by the user, and I think that we can make
that request explicit without being too painful for the user.

Thanks,
Mark.

On Tue, Feb 25, 2020 at 11:57:39AM +0000, James Clark wrote:
> From: Tan Xiaojun <tanxiaojun@huawei.com>
> 
> At the suggestion of James Clark, use spe to support the precise
> ip of some events. Currently its support event is:
> branch-misses.
> 
> Example usage:
> 
> $ ./perf record -e branch-misses:pp dd if=/dev/zero of=/dev/null count=10000
> (:p/pp/ppp is same for this case.)
> 
> $ ./perf report --stdio
> ("--stdio is not necessary")
> 
> --------------------------------------------------------------------
> ...
>  # Samples: 14  of event 'branch-misses:pp'
>  # Event count (approx.): 14
>  #
>  # Children      Self  Command  Shared Object      Symbol
>  # ........  ........  .......  .................  ..........................
>  #
>     14.29%    14.29%  dd       [kernel.kallsyms]  [k] __arch_copy_from_user
>     14.29%    14.29%  dd       libc-2.28.so       [.] _dl_addr
>      7.14%     7.14%  dd       [kernel.kallsyms]  [k] __free_pages
>      7.14%     7.14%  dd       [kernel.kallsyms]  [k] __pi_memcpy
>      7.14%     7.14%  dd       [kernel.kallsyms]  [k] pagecache_get_page
>      7.14%     7.14%  dd       [kernel.kallsyms]  [k] unmap_single_vma
>      7.14%     7.14%  dd       dd                 [.] 0x00000000000025ec
>      7.14%     7.14%  dd       ld-2.28.so         [.] _dl_lookup_symbol_x
>      7.14%     7.14%  dd       ld-2.28.so         [.] check_match
>      7.14%     7.14%  dd       libc-2.28.so       [.] __mpn_rshift
>      7.14%     7.14%  dd       libc-2.28.so       [.] _nl_intern_locale_data
>      7.14%     7.14%  dd       libc-2.28.so       [.] read_alias_file
> ...
> --------------------------------------------------------------------
> 
> Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
> Suggested-by: James Clark <James.Clark@arm.com>
> Tested-by: Qi Liu <liuqi115@hisilicon.com>
> Signed-off-by: James Clark <james.clark@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
> Cc: Jiri Olsa <jolsa@redhat.com>
> Cc: Tan Xiaojun <tanxiaojun@huawei.com>
> Cc: Al Grant <al.grant@arm.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/arch/arm/util/auxtrace.c | 39 +++++++++++++++++++++++++++++
>  tools/perf/builtin-record.c         |  5 ++++
>  tools/perf/util/arm-spe.c           |  9 +++++++
>  tools/perf/util/arm-spe.h           |  3 +++
>  tools/perf/util/auxtrace.h          |  6 +++++
>  5 files changed, 62 insertions(+)
> 
> diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
> index 0a6e75b8777a..7f412b7894ab 100644
> --- a/tools/perf/arch/arm/util/auxtrace.c
> +++ b/tools/perf/arch/arm/util/auxtrace.c
> @@ -10,11 +10,25 @@
>  
>  #include "../../util/auxtrace.h"
>  #include "../../util/debug.h"
> +#include "../../util/env.h"
>  #include "../../util/evlist.h"
>  #include "../../util/pmu.h"
>  #include "cs-etm.h"
>  #include "arm-spe.h"
>  
> +#define SPE_ATTR_TS_ENABLE		BIT(0)
> +#define SPE_ATTR_PA_ENABLE		BIT(1)
> +#define SPE_ATTR_PCT_ENABLE		BIT(2)
> +#define SPE_ATTR_JITTER			BIT(16)
> +#define SPE_ATTR_BRANCH_FILTER		BIT(32)
> +#define SPE_ATTR_LOAD_FILTER		BIT(33)
> +#define SPE_ATTR_STORE_FILTER		BIT(34)
> +
> +#define SPE_ATTR_EV_RETIRED		BIT(1)
> +#define SPE_ATTR_EV_CACHE		BIT(3)
> +#define SPE_ATTR_EV_TLB			BIT(5)
> +#define SPE_ATTR_EV_BRANCH		BIT(7)
> +
>  static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
>  {
>  	struct perf_pmu **arm_spe_pmus = NULL;
> @@ -108,3 +122,28 @@ struct auxtrace_record
>  	*err = 0;
>  	return NULL;
>  }
> +
> +void auxtrace__preprocess_evlist(struct evlist *evlist)
> +{
> +	struct evsel *evsel;
> +	struct perf_pmu *pmu;
> +
> +	evlist__for_each_entry(evlist, evsel) {
> +		/* Currently only supports precise_ip for branch-misses on arm64 */
> +		if (!strcmp(perf_env__arch(evlist->env), "arm64")
> +			&& evsel->core.attr.config == PERF_COUNT_HW_BRANCH_MISSES
> +			&& evsel->core.attr.type == PERF_TYPE_HARDWARE
> +			&& evsel->core.attr.precise_ip)
> +		{
> +			pmu = perf_pmu__find("arm_spe_0");
> +			if (pmu) {
> +				evsel->pmu_name = pmu->name;
> +				evsel->core.attr.type = pmu->type;
> +				evsel->core.attr.config = SPE_ATTR_TS_ENABLE
> +							| SPE_ATTR_BRANCH_FILTER;
> +				evsel->core.attr.config1 = SPE_ATTR_EV_BRANCH;
> +				evsel->core.attr.precise_ip = 0;
> +			}
> +		}
> +	}
> +}
> \ No newline at end of file
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 4c301466101b..3bc61f03d572 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -2451,6 +2451,11 @@ int cmd_record(int argc, const char **argv)
>  
>  	argc = parse_options(argc, argv, record_options, record_usage,
>  			    PARSE_OPT_STOP_AT_NON_OPTION);
> +
> +	if (auxtrace__preprocess_evlist) {
> +		auxtrace__preprocess_evlist(rec->evlist);
> +	}
> +
>  	if (quiet)
>  		perf_quiet_option();
>  
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 4ef22a0775a9..b21806c97dd8 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -778,6 +778,15 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
>  	attr.sample_id_all = evsel->core.attr.sample_id_all;
>  	attr.read_format = evsel->core.attr.read_format;
>  
> +	/* If it is in the precise ip mode, there is no need to
> +	 * synthesize new events. */
> +	if (!strncmp(evsel->name, "branch-misses", 13)) {
> +		spe->sample_branch_miss = true;
> +		spe->branch_miss_id = evsel->core.id[0];
> +
> +		return 0;
> +	}
> +
>  	/* create new id val to be a fixed offset from evsel id */
>  	id = evsel->core.id[0] + 1000000000;
>  
> diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
> index 98d3235781c3..8b1fb191d03a 100644
> --- a/tools/perf/util/arm-spe.h
> +++ b/tools/perf/util/arm-spe.h
> @@ -20,6 +20,8 @@ enum {
>  union perf_event;
>  struct perf_session;
>  struct perf_pmu;
> +struct evlist;
> +struct evsel;
>  
>  struct auxtrace_record *arm_spe_recording_init(int *err,
>  					       struct perf_pmu *arm_spe_pmu);
> @@ -28,4 +30,5 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
>  				  struct perf_session *session);
>  
>  struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
> +void arm_spe_precise_ip_support(struct evlist *evlist, struct evsel *evsel);
>  #endif
> diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
> index 52e148eea7f8..4be56bca54dc 100644
> --- a/tools/perf/util/auxtrace.h
> +++ b/tools/perf/util/auxtrace.h
> @@ -584,6 +584,7 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session,
>  int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
>  void auxtrace__free_events(struct perf_session *session);
>  void auxtrace__free(struct perf_session *session);
> +void auxtrace__preprocess_evlist(struct evlist *evlist) __attribute__((weak));
>  
>  #define ITRACE_HELP \
>  "				i:	    		synthesize instructions events\n"		\
> @@ -731,6 +732,11 @@ void auxtrace__free(struct perf_session *session __maybe_unused)
>  {
>  }
>  
> +static inline
> +void auxtrace__preprocess_evlist(struct evlist *evlist __maybe_unused)
> +{
> +}
> +
>  static inline
>  int auxtrace_index__write(int fd __maybe_unused,
>  			  struct list_head *head __maybe_unused)
> -- 
> 2.17.1
>
diff mbox series

Patch

diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 0a6e75b8777a..7f412b7894ab 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -10,11 +10,25 @@ 
 
 #include "../../util/auxtrace.h"
 #include "../../util/debug.h"
+#include "../../util/env.h"
 #include "../../util/evlist.h"
 #include "../../util/pmu.h"
 #include "cs-etm.h"
 #include "arm-spe.h"
 
+#define SPE_ATTR_TS_ENABLE		BIT(0)
+#define SPE_ATTR_PA_ENABLE		BIT(1)
+#define SPE_ATTR_PCT_ENABLE		BIT(2)
+#define SPE_ATTR_JITTER			BIT(16)
+#define SPE_ATTR_BRANCH_FILTER		BIT(32)
+#define SPE_ATTR_LOAD_FILTER		BIT(33)
+#define SPE_ATTR_STORE_FILTER		BIT(34)
+
+#define SPE_ATTR_EV_RETIRED		BIT(1)
+#define SPE_ATTR_EV_CACHE		BIT(3)
+#define SPE_ATTR_EV_TLB			BIT(5)
+#define SPE_ATTR_EV_BRANCH		BIT(7)
+
 static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
 {
 	struct perf_pmu **arm_spe_pmus = NULL;
@@ -108,3 +122,28 @@  struct auxtrace_record
 	*err = 0;
 	return NULL;
 }
+
+void auxtrace__preprocess_evlist(struct evlist *evlist)
+{
+	struct evsel *evsel;
+	struct perf_pmu *pmu;
+
+	evlist__for_each_entry(evlist, evsel) {
+		/* Currently only supports precise_ip for branch-misses on arm64 */
+		if (!strcmp(perf_env__arch(evlist->env), "arm64")
+			&& evsel->core.attr.config == PERF_COUNT_HW_BRANCH_MISSES
+			&& evsel->core.attr.type == PERF_TYPE_HARDWARE
+			&& evsel->core.attr.precise_ip)
+		{
+			pmu = perf_pmu__find("arm_spe_0");
+			if (pmu) {
+				evsel->pmu_name = pmu->name;
+				evsel->core.attr.type = pmu->type;
+				evsel->core.attr.config = SPE_ATTR_TS_ENABLE
+							| SPE_ATTR_BRANCH_FILTER;
+				evsel->core.attr.config1 = SPE_ATTR_EV_BRANCH;
+				evsel->core.attr.precise_ip = 0;
+			}
+		}
+	}
+}
\ No newline at end of file
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4c301466101b..3bc61f03d572 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2451,6 +2451,11 @@  int cmd_record(int argc, const char **argv)
 
 	argc = parse_options(argc, argv, record_options, record_usage,
 			    PARSE_OPT_STOP_AT_NON_OPTION);
+
+	if (auxtrace__preprocess_evlist) {
+		auxtrace__preprocess_evlist(rec->evlist);
+	}
+
 	if (quiet)
 		perf_quiet_option();
 
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 4ef22a0775a9..b21806c97dd8 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -778,6 +778,15 @@  arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
 	attr.sample_id_all = evsel->core.attr.sample_id_all;
 	attr.read_format = evsel->core.attr.read_format;
 
+	/* If it is in the precise ip mode, there is no need to
+	 * synthesize new events. */
+	if (!strncmp(evsel->name, "branch-misses", 13)) {
+		spe->sample_branch_miss = true;
+		spe->branch_miss_id = evsel->core.id[0];
+
+		return 0;
+	}
+
 	/* create new id val to be a fixed offset from evsel id */
 	id = evsel->core.id[0] + 1000000000;
 
diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
index 98d3235781c3..8b1fb191d03a 100644
--- a/tools/perf/util/arm-spe.h
+++ b/tools/perf/util/arm-spe.h
@@ -20,6 +20,8 @@  enum {
 union perf_event;
 struct perf_session;
 struct perf_pmu;
+struct evlist;
+struct evsel;
 
 struct auxtrace_record *arm_spe_recording_init(int *err,
 					       struct perf_pmu *arm_spe_pmu);
@@ -28,4 +30,5 @@  int arm_spe_process_auxtrace_info(union perf_event *event,
 				  struct perf_session *session);
 
 struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
+void arm_spe_precise_ip_support(struct evlist *evlist, struct evsel *evsel);
 #endif
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 52e148eea7f8..4be56bca54dc 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -584,6 +584,7 @@  void auxtrace__dump_auxtrace_sample(struct perf_session *session,
 int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
 void auxtrace__free_events(struct perf_session *session);
 void auxtrace__free(struct perf_session *session);
+void auxtrace__preprocess_evlist(struct evlist *evlist) __attribute__((weak));
 
 #define ITRACE_HELP \
 "				i:	    		synthesize instructions events\n"		\
@@ -731,6 +732,11 @@  void auxtrace__free(struct perf_session *session __maybe_unused)
 {
 }
 
+static inline
+void auxtrace__preprocess_evlist(struct evlist *evlist __maybe_unused)
+{
+}
+
 static inline
 int auxtrace_index__write(int fd __maybe_unused,
 			  struct list_head *head __maybe_unused)