diff mbox series

[3/4] perf lock contention: Show per-cpu rq_lock with address

Message ID 20230313204825.2665483-4-namhyung@kernel.org (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series perf lock contention: Improve lock symbol display (v1) | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Namhyung Kim March 13, 2023, 8:48 p.m. UTC
Using the BPF_PROG_RUN mechanism, we can run a raw_tp BPF program to
collect some semi-global locks like per-cpu locks.  Let's add runqueue
locks using bpf_per_cpu_ptr() helper.

  $ sudo ./perf lock con -abl -- sleep 1
   contended   total wait     max wait     avg wait            address   symbol

         248      3.25 ms     32.23 us     13.10 us   ffff8cc75cfd2940   siglock
          60    217.91 us      9.69 us      3.63 us   ffff8cc700061c00
           8     70.23 us     13.86 us      8.78 us   ffff8cc703629484
           4     56.32 us     35.81 us     14.08 us   ffff8cc78b66f778   mmap_lock
           4     16.70 us      5.18 us      4.18 us   ffff8cc7036a0684
           3      4.99 us      2.65 us      1.66 us   ffff8d053da30c80   rq_lock
           2      3.44 us      2.28 us      1.72 us   ffff8d053dcf0c80   rq_lock
           9      2.51 us       371 ns       278 ns   ffff8ccb92479440
           2      2.11 us      1.24 us      1.06 us   ffff8d053db30c80   rq_lock
           2      2.06 us      1.69 us      1.03 us   ffff8d053d970c80   rq_lock

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/bpf_lock_contention.c         | 27 ++++++++++++++--
 .../perf/util/bpf_skel/lock_contention.bpf.c  | 31 +++++++++++++++++++
 tools/perf/util/bpf_skel/lock_data.h          |  5 +++
 3 files changed, 61 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 51631af3b4d6..235fc7150545 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -151,6 +151,8 @@  int lock_contention_prepare(struct lock_contention *con)
 	skel->bss->needs_callstack = con->save_callstack;
 	skel->bss->lock_owner = con->owner;
 
+	bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
+
 	lock_contention_bpf__attach(skel);
 	return 0;
 }
@@ -198,14 +200,26 @@  static const char *lock_contention_get_name(struct lock_contention *con,
 	}
 
 	if (con->aggr_mode == LOCK_AGGR_ADDR) {
+		int lock_fd = bpf_map__fd(skel->maps.lock_syms);
+
+		/* per-process locks set upper bits of the flags */
 		if (flags & LCD_F_MMAP_LOCK)
 			return "mmap_lock";
 		if (flags & LCD_F_SIGHAND_LOCK)
 			return "siglock";
+
+		/* global locks with symbols */
 		sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap);
 		if (sym)
-			name = sym->name;
-		return name;
+			return sym->name;
+
+		/* try semi-global locks collected separately */
+		if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr, &flags)) {
+			if (flags == LOCK_CLASS_RQLOCK)
+				return "rq_lock";
+		}
+
+		return "";
 	}
 
 	/* LOCK_AGGR_CALLER: skip lock internal functions */
@@ -258,6 +272,15 @@  int lock_contention_read(struct lock_contention *con)
 		thread__set_comm(idle, "swapper", /*timestamp=*/0);
 	}
 
+	if (con->aggr_mode == LOCK_AGGR_ADDR) {
+		DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+			.flags = BPF_F_TEST_RUN_ON_CPU,
+		);
+		int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms);
+
+		bpf_prog_test_run_opts(prog_fd, &opts);
+	}
+
 	/* make sure it loads the kernel map */
 	map__load(maps__first(machine->kmaps));
 
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 4ba34caf84eb..2d50c4395733 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -10,6 +10,9 @@ 
 /* default buffer size */
 #define MAX_ENTRIES  10240
 
+/* for collect_lock_syms().  4096 was rejected by the verifier */
+#define MAX_CPUS  1024
+
 /* lock contention flags from include/trace/events/lock.h */
 #define LCB_F_SPIN	(1U << 0)
 #define LCB_F_READ	(1U << 1)
@@ -56,6 +59,13 @@  struct {
 	__uint(max_entries, MAX_ENTRIES);
 } task_data SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u64));
+	__uint(value_size, sizeof(__u32));
+	__uint(max_entries, 16384);
+} lock_syms SEC(".maps");
+
 struct {
 	__uint(type, BPF_MAP_TYPE_HASH);
 	__uint(key_size, sizeof(__u32));
@@ -378,4 +388,25 @@  int contention_end(u64 *ctx)
 	return 0;
 }
 
+extern struct rq runqueues __ksym;
+
+SEC("raw_tp/bpf_test_finish")
+int BPF_PROG(collect_lock_syms)
+{
+	__u64 lock_addr;
+	__u32 lock_flag;
+
+	for (int i = 0; i < MAX_CPUS; i++) {
+		struct rq *rq = bpf_per_cpu_ptr(&runqueues, i);
+
+		if (rq == NULL)
+			break;
+
+		lock_addr = (__u64)&rq->__lock;
+		lock_flag = LOCK_CLASS_RQLOCK;
+		bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
+	}
+	return 0;
+}
+
 char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index 5ed1a0955015..e59366f2dba3 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -36,4 +36,9 @@  enum lock_aggr_mode {
 	LOCK_AGGR_CALLER,
 };
 
+enum lock_class_sym {
+	LOCK_CLASS_NONE,
+	LOCK_CLASS_RQLOCK,
+};
+
 #endif /* UTIL_BPF_SKEL_LOCK_DATA_H */