diff mbox series

[dwarves,3/3] dwarf_loader: Check DW_OP_[GNU_]entry_value for possible parameter matching

Message ID 20241108180524.1198900-1-yonghong.song@linux.dev (mailing list archive)
State New
Headers show
Series Check DW_OP_[GNU_]entry_value for possible parameter matching | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Yonghong Song Nov. 8, 2024, 6:05 p.m. UTC
Song Liu reported that a kernel func (perf_event_read()) cannot be traced
in certain situations since the func is not in vmlinux bTF. This happens
in kernels 6.4, 6.9 and 6.11 and the kernel is built with pahole 1.27.

The perf_event_read() signature in kernel (kernel/events/core.c):
   static int perf_event_read(struct perf_event *event, bool group)

Adding '-V' to pahole command line, and the following error msg can be found:
   skipping addition of 'perf_event_read'(perf_event_read) due to unexpected register used for parameter

Eventually the error message is attributed to the setting
(parm->unexpected_reg = 1) in parameter__new() function.

The following is the dwarf representation for perf_event_read():
    0x0334c034:   DW_TAG_subprogram
                DW_AT_low_pc    (0xffffffff812c6110)
                DW_AT_high_pc   (0xffffffff812c640a)
                DW_AT_frame_base        (DW_OP_reg7 RSP)
                DW_AT_GNU_all_call_sites        (true)
                DW_AT_name      ("perf_event_read")
                DW_AT_decl_file ("/rw/compile/kernel/events/core.c")
                DW_AT_decl_line (4641)
                DW_AT_prototyped        (true)
                DW_AT_type      (0x03324f6a "int")
    0x0334c04e:     DW_TAG_formal_parameter
                  DW_AT_location        (0x007de9fd:
                     [0xffffffff812c6115, 0xffffffff812c6141): DW_OP_reg5 RDI
                     [0xffffffff812c6141, 0xffffffff812c6323): DW_OP_reg14 R14
                     [0xffffffff812c6323, 0xffffffff812c63fe): DW_OP_GNU_entry_value(DW_OP_reg5 RDI), DW_OP_stack_value
                     [0xffffffff812c63fe, 0xffffffff812c6405): DW_OP_reg14 R14
                     [0xffffffff812c6405, 0xffffffff812c640a): DW_OP_GNU_entry_value(DW_OP_reg5 RDI), DW_OP_stack_value)
                  DW_AT_name    ("event")
                  DW_AT_decl_file       ("/rw/compile/kernel/events/core.c")
                  DW_AT_decl_line       (4641)
                  DW_AT_type    (0x0333aac2 "perf_event *")
    0x0334c05e:     DW_TAG_formal_parameter
                  DW_AT_location        (0x007dea82:
                     [0xffffffff812c6137, 0xffffffff812c63f2): DW_OP_reg12 R12
                     [0xffffffff812c63f2, 0xffffffff812c63fe): DW_OP_GNU_entry_value(DW_OP_reg4 RSI), DW_OP_stack_value
                     [0xffffffff812c63fe, 0xffffffff812c640a): DW_OP_reg12 R12)
                  DW_AT_name    ("group")
                  DW_AT_decl_file       ("/rw/compile/kernel/events/core.c")
                  DW_AT_decl_line       (4641)
                  DW_AT_type    (0x03327059 "bool")

By inspecting the binary, the second argument ("bool group") is used
in the function. The following are the disasm code:
    ffffffff812c6110 <perf_event_read>:
    ffffffff812c6110: 0f 1f 44 00 00        nopl    (%rax,%rax)
    ffffffff812c6115: 55                    pushq   %rbp
    ffffffff812c6116: 41 57                 pushq   %r15
    ffffffff812c6118: 41 56                 pushq   %r14
    ffffffff812c611a: 41 55                 pushq   %r13
    ffffffff812c611c: 41 54                 pushq   %r12
    ffffffff812c611e: 53                    pushq   %rbx
    ffffffff812c611f: 48 83 ec 18           subq    $24, %rsp
    ffffffff812c6123: 41 89 f4              movl    %esi, %r12d
    <=========== NOTE that here '%esi' is used and moved to '%r12d'.
    ffffffff812c6126: 49 89 fe              movq    %rdi, %r14
    ffffffff812c6129: 65 48 8b 04 25 28 00 00 00    movq    %gs:40, %rax
    ffffffff812c6132: 48 89 44 24 10        movq    %rax, 16(%rsp)
    ffffffff812c6137: 8b af a8 00 00 00     movl    168(%rdi), %ebp
    ffffffff812c613d: 85 ed                 testl   %ebp, %ebp
    ffffffff812c613f: 75 3f                 jne     0xffffffff812c6180 <perf_event_read+0x70>
    ffffffff812c6141: 66 2e 0f 1f 84 00 00 00 00 00 nopw    %cs:(%rax,%rax)
    ffffffff812c614b: 0f 1f 44 00 00        nopl    (%rax,%rax)
    ffffffff812c6150: 49 8b 9e 28 02 00 00  movq    552(%r14), %rbx
    ffffffff812c6157: 48 89 df              movq    %rbx, %rdi
    ffffffff812c615a: e8 c1 a0 d7 00        callq   0xffffffff82040220 <_raw_spin_lock_irqsave>
    ffffffff812c615f: 49 89 c7              movq    %rax, %r15
    ffffffff812c6162: 41 8b ae a8 00 00 00  movl    168(%r14), %ebp
    ffffffff812c6169: 85 ed                 testl   %ebp, %ebp
    ffffffff812c616b: 0f 84 9a 00 00 00     je      0xffffffff812c620b <perf_event_read+0xfb>
    ffffffff812c6171: 48 89 df              movq    %rbx, %rdi
    ffffffff812c6174: 4c 89 fe              movq    %r15, %rsi
    <=========== NOTE: %rsi is overwritten
    ......
    ffffffff812c63f0: 41 5c                 popq    %r12
    <============ POP r12
    ffffffff812c63f2: 41 5d                 popq    %r13
    ffffffff812c63f4: 41 5e                 popq    %r14
    ffffffff812c63f6: 41 5f                 popq    %r15
    ffffffff812c63f8: 5d                    popq    %rbp
    ffffffff812c63f9: e9 e2 a8 d7 00        jmp     0xffffffff82040ce0 <__x86_return_thunk>
    ffffffff812c63fe: 31 c0                 xorl    %eax, %eax
    ffffffff812c6400: e9 be fe ff ff        jmp     0xffffffff812c62c3 <perf_event_read+0x1b3>

It is not clear why dwarf didn't encode %rsi in locations. But
DW_OP_GNU_entry_value(DW_OP_reg4 RSI) tells us that RSI is live at
the entry of perf_event_read(). So this patch tries to search
DW_OP_GNU_entry_value/DW_OP_entry_value location/expression so if
the expected parameter register matchs the register in
DW_OP_GNU_entry_value/DW_OP_entry_value, then the original parameter
is not optimized.

For one of internal 6.11 kernel, there are 62498 functions in BTF and
perf_event_read() is not there. With this patch, there are 61552 functions
in BTF and perf_event_read() is included.

Reported-by: Song Liu <song@kernel.org>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 dwarf_loader.c | 81 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 57 insertions(+), 24 deletions(-)
diff mbox series

Patch

diff --git a/dwarf_loader.c b/dwarf_loader.c
index e0b8c11..1fe44bc 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -1169,34 +1169,67 @@  static bool check_dwarf_locations(Dwarf_Attribute *attr, struct parameter *parm,
 		return false;
 
 #if _ELFUTILS_PREREQ(0, 157)
-	/* dwarf_getlocations() handles location lists; here we are
-	 * only interested in the first expr.
-	 */
-	if (dwarf_getlocations(attr, 0, &base, &start, &end,
-			       &loc.expr, &loc.exprlen) > 0 &&
-		loc.exprlen != 0) {
-		expr = loc.expr;
-
-		switch (expr->atom) {
-		case DW_OP_reg0 ... DW_OP_reg31:
-			/* mark parameters that use an unexpected
-			 * register to hold a parameter; these will
-			 * be problematic for users of BTF as they
-			 * violate expectations about register
-			 * contents.
+	bool reg_matched = false, reg_unmatched = false, first_expr_reg = false, ret = false;
+	ptrdiff_t offset = 0;
+	int loc_num = -1;
+
+	while ((offset = dwarf_getlocations(attr, offset, &base, &start, &end, &loc.expr, &loc.exprlen)) > 0 &&
+	       loc.exprlen != 0) {
+		ret = true;
+		loc_num++;
+
+		for (int i = 0; i < loc.exprlen; i++) {
+			Dwarf_Attribute entry_attr;
+			Dwarf_Op *entry_ops;
+			size_t entry_len;
+
+			expr = &loc.expr[i];
+			switch (expr->atom) {
+			case DW_OP_reg0 ... DW_OP_reg31:
+				/* first location, first expression */
+				if (loc_num == 0 && i == 0) {
+					if (expected_reg >= 0) {
+						if (expected_reg == expr->atom) {
+							reg_matched = true;
+							return true;
+						} else {
+							reg_unmatched = true;
+						}
+					}
+					first_expr_reg = true;
+				}
+				break;
+			/* For the following dwarf entry (arch x86_64) in parameter locations:
+			 *    DW_OP_GNU_entry_value(DW_OP_reg4 RSI), DW_OP_stack_value
+			 * RSI register should be available at the entry of the program.
 			 */
-			if (expected_reg >= 0 && expected_reg != expr->atom)
-				parm->unexpected_reg = 1;
-			break;
-		default:
-			parm->optimized = 1;
-			break;
+			case DW_OP_entry_value:
+			case DW_OP_GNU_entry_value:
+				if (reg_matched)
+					break;
+				if (dwarf_getlocation_attr (attr, expr, &entry_attr) != 0)
+					break;
+				if (dwarf_getlocation (&entry_attr, &entry_ops, &entry_len) != 0)
+					break;
+				if (entry_len != 1)
+					break;
+				if (expected_reg >= 0 && expected_reg == entry_ops->atom) {
+					reg_matched = true;
+					return true;
+				}
+				break;
+			default:
+				break;
+			}
 		}
-
-		return true;
 	}
 
-	return false;
+	if (reg_unmatched)
+		parm->unexpected_reg = 1;
+	else if (ret && !first_expr_reg)
+		parm->optimized = 1;
+
+	return ret;
 #else
 	if (dwarf_getlocation(attr, &loc.expr, &loc.exprlen) == 0 &&
 		loc.exprlen != 0) {