diff mbox series

perf lock contention: Add -S/--callstack-filter option

Message ID 20230126000936.3017683-1-namhyung@kernel.org (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series perf lock contention: Add -S/--callstack-filter option | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on s390x with gcc

Commit Message

Namhyung Kim Jan. 26, 2023, 12:09 a.m. UTC
The -S/--callstack-filter is to limit display entries having the given
string in the callstack (not only in the caller in the output).

The following example shows lock contention results if the callstack
has 'net' substring somewhere.  Note that the caller '__dev_queue_xmit'
does not match to it, but it has 'inet6_csk_xmit' in the callstack.

This applies even if you don't use -v option to show the full callstack.

  $ sudo ./perf lock con -abv -S net sleep 1
  ...
   contended   total wait     max wait     avg wait         type   caller

           5     70.20 us     16.13 us     14.04 us     spinlock   __dev_queue_xmit+0xb6d
                          0xffffffffa5dd1c60  _raw_spin_lock+0x30
                          0xffffffffa5b8f6ed  __dev_queue_xmit+0xb6d
                          0xffffffffa5cd8267  ip6_finish_output2+0x2c7
                          0xffffffffa5cdac14  ip6_finish_output+0x1d4
                          0xffffffffa5cdb477  ip6_xmit+0x457
                          0xffffffffa5d1fd17  inet6_csk_xmit+0xd7
                          0xffffffffa5c5f4aa  __tcp_transmit_skb+0x54a
                          0xffffffffa5c6467d  tcp_keepalive_timer+0x2fd

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/Documentation/perf-lock.txt |  6 +++
 tools/perf/builtin-lock.c              | 68 +++++++++++++++++++++++++-
 tools/perf/util/bpf_lock_contention.c  |  2 +-
 tools/perf/util/lock-contention.h      |  1 +
 4 files changed, 75 insertions(+), 2 deletions(-)

Comments

Arnaldo Carvalho de Melo Feb. 2, 2023, 1:31 a.m. UTC | #1
Em Wed, Jan 25, 2023 at 04:09:36PM -0800, Namhyung Kim escreveu:
> The -S/--callstack-filter is to limit display entries having the given
> string in the callstack (not only in the caller in the output).
> 
> The following example shows lock contention results if the callstack
> has 'net' substring somewhere.  Note that the caller '__dev_queue_xmit'
> does not match to it, but it has 'inet6_csk_xmit' in the callstack.

Looks useful!


Thanks, applied.

- Arnaldo

 
> This applies even if you don't use -v option to show the full callstack.
> 
>   $ sudo ./perf lock con -abv -S net sleep 1
>   ...
>    contended   total wait     max wait     avg wait         type   caller
> 
>            5     70.20 us     16.13 us     14.04 us     spinlock   __dev_queue_xmit+0xb6d
>                           0xffffffffa5dd1c60  _raw_spin_lock+0x30
>                           0xffffffffa5b8f6ed  __dev_queue_xmit+0xb6d
>                           0xffffffffa5cd8267  ip6_finish_output2+0x2c7
>                           0xffffffffa5cdac14  ip6_finish_output+0x1d4
>                           0xffffffffa5cdb477  ip6_xmit+0x457
>                           0xffffffffa5d1fd17  inet6_csk_xmit+0xd7
>                           0xffffffffa5c5f4aa  __tcp_transmit_skb+0x54a
>                           0xffffffffa5c6467d  tcp_keepalive_timer+0x2fd
> 
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/Documentation/perf-lock.txt |  6 +++
>  tools/perf/builtin-lock.c              | 68 +++++++++++++++++++++++++-
>  tools/perf/util/bpf_lock_contention.c  |  2 +-
>  tools/perf/util/lock-contention.h      |  1 +
>  4 files changed, 75 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
> index 0f9f720e599d..11b8901d8d13 100644
> --- a/tools/perf/Documentation/perf-lock.txt
> +++ b/tools/perf/Documentation/perf-lock.txt
> @@ -187,6 +187,12 @@ CONTENTION OPTIONS
>  --lock-filter=<value>::
>  	Show lock contention only for given lock addresses or names (comma separated list).
>  
> +-S::
> +--callstack-filter=<value>::
> +	Show lock contention only if the callstack contains the given string.
> +	Note that it matches the substring so 'rq' would match both 'raw_spin_rq_lock'
> +	and 'irq_enter_rcu'.
> +
>  
>  SEE ALSO
>  --------
> diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
> index 506c2fe42d52..216a9a252bf4 100644
> --- a/tools/perf/builtin-lock.c
> +++ b/tools/perf/builtin-lock.c
> @@ -63,11 +63,22 @@ static unsigned long bpf_map_entries = 10240;
>  static int max_stack_depth = CONTENTION_STACK_DEPTH;
>  static int stack_skip = CONTENTION_STACK_SKIP;
>  static int print_nr_entries = INT_MAX / 2;
> +static LIST_HEAD(callstack_filters);
> +
> +struct callstack_filter {
> +	struct list_head list;
> +	char name[];
> +};
>  
>  static struct lock_filter filters;
>  
>  static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
>  
> +static bool needs_callstack(void)
> +{
> +	return verbose > 0 || !list_empty(&callstack_filters);
> +}
> +
>  static struct thread_stat *thread_stat_find(u32 tid)
>  {
>  	struct rb_node *node;
> @@ -1060,7 +1071,7 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
>  		if (!ls)
>  			return -ENOMEM;
>  
> -		if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
> +		if (aggr_mode == LOCK_AGGR_CALLER && needs_callstack()) {
>  			ls->callstack = get_callstack(sample, max_stack_depth);
>  			if (ls->callstack == NULL)
>  				return -ENOMEM;
> @@ -1595,6 +1606,31 @@ static void print_contention_result(struct lock_contention *con)
>  		if (!st->wait_time_total)
>  			continue;
>  
> +		if (aggr_mode == LOCK_AGGR_CALLER && !list_empty(&callstack_filters)) {
> +			struct map *kmap;
> +			struct symbol *sym;
> +			u64 ip;
> +
> +			for (int i = 0; i < max_stack_depth; i++) {
> +				struct callstack_filter *filter;
> +
> +				if (!st->callstack || !st->callstack[i])
> +					break;
> +
> +				ip = st->callstack[i];
> +				sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
> +				if (sym == NULL)
> +					continue;
> +
> +				list_for_each_entry(filter, &callstack_filters, list) {
> +					if (strstr(sym->name, filter->name))
> +						goto found;
> +				}
> +			}
> +			continue;
> +		}
> +
> +found:
>  		list_for_each_entry(key, &lock_keys, list) {
>  			key->print(key, st);
>  			pr_info(" ");
> @@ -1743,6 +1779,7 @@ static int __cmd_contention(int argc, const char **argv)
>  		.max_stack = max_stack_depth,
>  		.stack_skip = stack_skip,
>  		.filters = &filters,
> +		.save_callstack = needs_callstack(),
>  	};
>  
>  	session = perf_session__new(use_bpf ? NULL : &data, &eops);
> @@ -2123,6 +2160,33 @@ static int parse_lock_addr(const struct option *opt __maybe_unused, const char *
>  	return ret;
>  }
>  
> +static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
> +			   int unset __maybe_unused)
> +{
> +	char *s, *tmp, *tok;
> +	int ret = 0;
> +
> +	s = strdup(str);
> +	if (s == NULL)
> +		return -1;
> +
> +	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
> +		struct callstack_filter *entry;
> +
> +		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
> +		if (entry == NULL) {
> +			pr_err("Memory allocation failure\n");
> +			return -1;
> +		}
> +
> +		strcpy(entry->name, tok);
> +		list_add_tail(&entry->list, &callstack_filters);
> +	}
> +
> +	free(s);
> +	return ret;
> +}
> +
>  int cmd_lock(int argc, const char **argv)
>  {
>  	const struct option lock_options[] = {
> @@ -2190,6 +2254,8 @@ int cmd_lock(int argc, const char **argv)
>  		     "Filter specific type of locks", parse_lock_type),
>  	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
>  		     "Filter specific address/symbol of locks", parse_lock_addr),
> +	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
> +		     "Filter specific function in the callstack", parse_call_stack),
>  	OPT_PARENT(lock_options)
>  	};
>  
> diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
> index 0236334fd69b..4902ac331f41 100644
> --- a/tools/perf/util/bpf_lock_contention.c
> +++ b/tools/perf/util/bpf_lock_contention.c
> @@ -268,7 +268,7 @@ int lock_contention_read(struct lock_contention *con)
>  			break;
>  		}
>  
> -		if (verbose > 0) {
> +		if (con->save_callstack) {
>  			st->callstack = memdup(stack_trace, stack_size);
>  			if (st->callstack == NULL)
>  				break;
> diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
> index b99e83fccf5c..17e594d57a61 100644
> --- a/tools/perf/util/lock-contention.h
> +++ b/tools/perf/util/lock-contention.h
> @@ -128,6 +128,7 @@ struct lock_contention {
>  	int max_stack;
>  	int stack_skip;
>  	int aggr_mode;
> +	bool save_callstack;
>  };
>  
>  #ifdef HAVE_BPF_SKEL
> -- 
> 2.39.1.456.gfc5497dd1b-goog
>
diff mbox series

Patch

diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 0f9f720e599d..11b8901d8d13 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -187,6 +187,12 @@  CONTENTION OPTIONS
 --lock-filter=<value>::
 	Show lock contention only for given lock addresses or names (comma separated list).
 
+-S::
+--callstack-filter=<value>::
+	Show lock contention only if the callstack contains the given string.
+	Note that it matches the substring so 'rq' would match both 'raw_spin_rq_lock'
+	and 'irq_enter_rcu'.
+
 
 SEE ALSO
 --------
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 506c2fe42d52..216a9a252bf4 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -63,11 +63,22 @@  static unsigned long bpf_map_entries = 10240;
 static int max_stack_depth = CONTENTION_STACK_DEPTH;
 static int stack_skip = CONTENTION_STACK_SKIP;
 static int print_nr_entries = INT_MAX / 2;
+static LIST_HEAD(callstack_filters);
+
+struct callstack_filter {
+	struct list_head list;
+	char name[];
+};
 
 static struct lock_filter filters;
 
 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
 
+static bool needs_callstack(void)
+{
+	return verbose > 0 || !list_empty(&callstack_filters);
+}
+
 static struct thread_stat *thread_stat_find(u32 tid)
 {
 	struct rb_node *node;
@@ -1060,7 +1071,7 @@  static int report_lock_contention_begin_event(struct evsel *evsel,
 		if (!ls)
 			return -ENOMEM;
 
-		if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
+		if (aggr_mode == LOCK_AGGR_CALLER && needs_callstack()) {
 			ls->callstack = get_callstack(sample, max_stack_depth);
 			if (ls->callstack == NULL)
 				return -ENOMEM;
@@ -1595,6 +1606,31 @@  static void print_contention_result(struct lock_contention *con)
 		if (!st->wait_time_total)
 			continue;
 
+		if (aggr_mode == LOCK_AGGR_CALLER && !list_empty(&callstack_filters)) {
+			struct map *kmap;
+			struct symbol *sym;
+			u64 ip;
+
+			for (int i = 0; i < max_stack_depth; i++) {
+				struct callstack_filter *filter;
+
+				if (!st->callstack || !st->callstack[i])
+					break;
+
+				ip = st->callstack[i];
+				sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
+				if (sym == NULL)
+					continue;
+
+				list_for_each_entry(filter, &callstack_filters, list) {
+					if (strstr(sym->name, filter->name))
+						goto found;
+				}
+			}
+			continue;
+		}
+
+found:
 		list_for_each_entry(key, &lock_keys, list) {
 			key->print(key, st);
 			pr_info(" ");
@@ -1743,6 +1779,7 @@  static int __cmd_contention(int argc, const char **argv)
 		.max_stack = max_stack_depth,
 		.stack_skip = stack_skip,
 		.filters = &filters,
+		.save_callstack = needs_callstack(),
 	};
 
 	session = perf_session__new(use_bpf ? NULL : &data, &eops);
@@ -2123,6 +2160,33 @@  static int parse_lock_addr(const struct option *opt __maybe_unused, const char *
 	return ret;
 }
 
+static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
+			   int unset __maybe_unused)
+{
+	char *s, *tmp, *tok;
+	int ret = 0;
+
+	s = strdup(str);
+	if (s == NULL)
+		return -1;
+
+	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		struct callstack_filter *entry;
+
+		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
+		if (entry == NULL) {
+			pr_err("Memory allocation failure\n");
+			return -1;
+		}
+
+		strcpy(entry->name, tok);
+		list_add_tail(&entry->list, &callstack_filters);
+	}
+
+	free(s);
+	return ret;
+}
+
 int cmd_lock(int argc, const char **argv)
 {
 	const struct option lock_options[] = {
@@ -2190,6 +2254,8 @@  int cmd_lock(int argc, const char **argv)
 		     "Filter specific type of locks", parse_lock_type),
 	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
 		     "Filter specific address/symbol of locks", parse_lock_addr),
+	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
+		     "Filter specific function in the callstack", parse_call_stack),
 	OPT_PARENT(lock_options)
 	};
 
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 0236334fd69b..4902ac331f41 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -268,7 +268,7 @@  int lock_contention_read(struct lock_contention *con)
 			break;
 		}
 
-		if (verbose > 0) {
+		if (con->save_callstack) {
 			st->callstack = memdup(stack_trace, stack_size);
 			if (st->callstack == NULL)
 				break;
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index b99e83fccf5c..17e594d57a61 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -128,6 +128,7 @@  struct lock_contention {
 	int max_stack;
 	int stack_skip;
 	int aggr_mode;
+	bool save_callstack;
 };
 
 #ifdef HAVE_BPF_SKEL