diff mbox series

[v4,4/6] perf tools: enable dwarf_callchain_users on arm64

Message ID 20211215151139.40854-5-german.gomez@arm.com (mailing list archive)
State New, archived
Headers show
Series Fix missing leaf-function callers when recording | expand

Commit Message

German Gomez Dec. 15, 2021, 3:11 p.m. UTC
From: Alexandre Truong <alexandre.truong@arm.com>

On arm64, enable dwarf_callchain_users which will be needed
to do a dwarf unwind in order to get the caller of the leaf frame.

Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
Signed-off-by: German Gomez <german.gomez@arm.com>
---
 tools/perf/builtin-report.c | 4 ++--
 tools/perf/builtin-script.c | 4 ++--
 tools/perf/util/callchain.c | 9 ++++++++-
 tools/perf/util/callchain.h | 2 +-
 4 files changed, 13 insertions(+), 6 deletions(-)

Comments

Mark Rutland Dec. 15, 2021, 4:37 p.m. UTC | #1
On Wed, Dec 15, 2021 at 03:11:36PM +0000, German Gomez wrote:
> From: Alexandre Truong <alexandre.truong@arm.com>
> 
> On arm64, enable dwarf_callchain_users which will be needed
> to do a dwarf unwind in order to get the caller of the leaf frame.
> 
> Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
> Signed-off-by: German Gomez <german.gomez@arm.com>
> ---
>  tools/perf/builtin-report.c | 4 ++--
>  tools/perf/builtin-script.c | 4 ++--
>  tools/perf/util/callchain.c | 9 ++++++++-
>  tools/perf/util/callchain.h | 2 +-
>  4 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 8167ebfe776a..a31ad60ba66e 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep)
>  		}
>  	}
>  
> -	callchain_param_setup(sample_type);
> +	callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env));
>  
>  	if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
>  		ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
> @@ -1124,7 +1124,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
>  	 * on events sample_type.
>  	 */
>  	sample_type = evlist__combined_sample_type(*pevlist);
> -	callchain_param_setup(sample_type);
> +	callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
>  	return 0;
>  }
>  
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index ab7d575f97f2..d308adfd1176 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -2318,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
>  	 * on events sample_type.
>  	 */
>  	sample_type = evlist__combined_sample_type(evlist);
> -	callchain_param_setup(sample_type);
> +	callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
>  
>  	/* Enable fields for callchain entries */
>  	if (symbol_conf.use_callchain &&
> @@ -3468,7 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script)
>  	struct perf_session *session = script->session;
>  	u64 sample_type = evlist__combined_sample_type(session->evlist);
>  
> -	callchain_param_setup(sample_type);
> +	callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env));
>  
>  	if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
>  		pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
> diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
> index 8e2777133bd9..aaab9a674807 100644
> --- a/tools/perf/util/callchain.c
> +++ b/tools/perf/util/callchain.c
> @@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
>  		map__zput(node->ms.map);
>  }
>  
> -void callchain_param_setup(u64 sample_type)
> +void callchain_param_setup(u64 sample_type, const char *arch)
>  {
>  	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
>  		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
> @@ -1612,6 +1612,13 @@ void callchain_param_setup(u64 sample_type)
>  		else
>  			callchain_param.record_mode = CALLCHAIN_FP;
>  	}
> +
> +	/*
> +	 * It's possible to determine the caller of leaf frames with omitted
> +	 * frame pointers on aarch64 using libunwind, so enable it.
> +	 */

I reckon it's worth mentioning *why* we need to do this; how about:

	/*
	 * It's necessary to use libunwind to reliably determine the caller of
	 * a leaf function on aarch64, as otherwise we cannot know whether to
	 * start from the LR or FP.
	 *
	 * Always starting from the LR can result in duplicate or entirely
	 * erroneous entries. Always skipping the LR and starting from the FP
	 * can result in missing entries.
	 */

Other than that, this looks fine to me!

Thanks,
Mark.

> +	if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
> +		dwarf_callchain_users = true;
>  }
>  
>  static bool chain_match(struct callchain_list *base_chain,
> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
> index 77fba053c677..d95615daed73 100644
> --- a/tools/perf/util/callchain.h
> +++ b/tools/perf/util/callchain.h
> @@ -300,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
>  			    u64 *branch_count, u64 *predicted_count,
>  			    u64 *abort_count, u64 *cycles_count);
>  
> -void callchain_param_setup(u64 sample_type);
> +void callchain_param_setup(u64 sample_type, const char *arch);
>  
>  bool callchain_cnode_matched(struct callchain_node *base_cnode,
>  			     struct callchain_node *pair_cnode);
> -- 
> 2.25.1
>
German Gomez Dec. 17, 2021, 12:08 p.m. UTC | #2
On 15/12/2021 16:37, Mark Rutland wrote:
> On Wed, Dec 15, 2021 at 03:11:36PM +0000, German Gomez wrote:
>> [...]
>>
>> +
>> +	/*
>> +	 * It's possible to determine the caller of leaf frames with omitted
>> +	 * frame pointers on aarch64 using libunwind, so enable it.
>> +	 */
> I reckon it's worth mentioning *why* we need to do this; how about:
>
> 	/*
> 	 * It's necessary to use libunwind to reliably determine the caller of
> 	 * a leaf function on aarch64, as otherwise we cannot know whether to
> 	 * start from the LR or FP.
> 	 *
> 	 * Always starting from the LR can result in duplicate or entirely
> 	 * erroneous entries. Always skipping the LR and starting from the FP
> 	 * can result in missing entries.
> 	 */
>
> Other than that, this looks fine to me!
>
> Thanks,
> Mark.


Ack, I will update this

Thanks,
German

>
>> +	if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
>> +		dwarf_callchain_users = true;
>>  }
>>  
>>  static bool chain_match(struct callchain_list *base_chain,
>> diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
>> index 77fba053c677..d95615daed73 100644
>> --- a/tools/perf/util/callchain.h
>> +++ b/tools/perf/util/callchain.h
>> @@ -300,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
>>  			    u64 *branch_count, u64 *predicted_count,
>>  			    u64 *abort_count, u64 *cycles_count);
>>  
>> -void callchain_param_setup(u64 sample_type);
>> +void callchain_param_setup(u64 sample_type, const char *arch);
>>  
>>  bool callchain_cnode_matched(struct callchain_node *base_cnode,
>>  			     struct callchain_node *pair_cnode);
>> -- 
>> 2.25.1
>>
diff mbox series

Patch

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8167ebfe776a..a31ad60ba66e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -410,7 +410,7 @@  static int report__setup_sample_type(struct report *rep)
 		}
 	}
 
-	callchain_param_setup(sample_type);
+	callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env));
 
 	if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
 		ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
@@ -1124,7 +1124,7 @@  static int process_attr(struct perf_tool *tool __maybe_unused,
 	 * on events sample_type.
 	 */
 	sample_type = evlist__combined_sample_type(*pevlist);
-	callchain_param_setup(sample_type);
+	callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
 	return 0;
 }
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ab7d575f97f2..d308adfd1176 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2318,7 +2318,7 @@  static int process_attr(struct perf_tool *tool, union perf_event *event,
 	 * on events sample_type.
 	 */
 	sample_type = evlist__combined_sample_type(evlist);
-	callchain_param_setup(sample_type);
+	callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
 
 	/* Enable fields for callchain entries */
 	if (symbol_conf.use_callchain &&
@@ -3468,7 +3468,7 @@  static void script__setup_sample_type(struct perf_script *script)
 	struct perf_session *session = script->session;
 	u64 sample_type = evlist__combined_sample_type(session->evlist);
 
-	callchain_param_setup(sample_type);
+	callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env));
 
 	if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
 		pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 8e2777133bd9..aaab9a674807 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1600,7 +1600,7 @@  void callchain_cursor_reset(struct callchain_cursor *cursor)
 		map__zput(node->ms.map);
 }
 
-void callchain_param_setup(u64 sample_type)
+void callchain_param_setup(u64 sample_type, const char *arch)
 {
 	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
 		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -1612,6 +1612,13 @@  void callchain_param_setup(u64 sample_type)
 		else
 			callchain_param.record_mode = CALLCHAIN_FP;
 	}
+
+	/*
+	 * It's possible to determine the caller of leaf frames with omitted
+	 * frame pointers on aarch64 using libunwind, so enable it.
+	 */
+	if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64"))
+		dwarf_callchain_users = true;
 }
 
 static bool chain_match(struct callchain_list *base_chain,
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 77fba053c677..d95615daed73 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -300,7 +300,7 @@  int callchain_branch_counts(struct callchain_root *root,
 			    u64 *branch_count, u64 *predicted_count,
 			    u64 *abort_count, u64 *cycles_count);
 
-void callchain_param_setup(u64 sample_type);
+void callchain_param_setup(u64 sample_type, const char *arch);
 
 bool callchain_cnode_matched(struct callchain_node *base_cnode,
 			     struct callchain_node *pair_cnode);