Message ID | 20240614081956.19832-6-johannes@sipsolutions.net (mailing list archive) |
---|---|
Headers | show |
Series | tracing: improve symbolic printing | expand |
Hi Johannes, I finally got around to testing your patches. I did the following: # cat /sys/kernel/tracing/events/*/*/format and hit this: BUG: unable to handle page fault for address: ffffffff8e6333d0 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 183c40067 P4D 183c40067 PUD 183c41063 PMD 1003ef063 PTE 800ffffe7b9cc062 Oops: Oops: 0000 [#1] PREEMPT SMP PTI CPU: 7 UID: 0 PID: 893 Comm: cat Not tainted 6.11.0-rc4-test-00004-g4ce2836f008b #56 68afcee1248519f8b3b088836c40746e4a6b69d3 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:f_show (kernel/trace/trace_events.c:1601 kernel/trace/trace_events.c:1651 kernel/trace/trace_events.c:1689) Code: 33 63 8e 48 2d d0 33 63 8e 48 c1 f8 03 85 c0 74 67 89 c0 4c 89 c3 49 8d 04 c0 48 89 04 24 eb 0a 48 83 c3 08 48 39 1c 24 74 4e <4c> 8b 3b 4d 85 ff 74 ee 49 8b 45 10 48 8b 00 49 39 07 75 e2 49 8b All code ======== 0: 33 63 8e xor -0x72(%rbx),%esp 3: 48 2d d0 33 63 8e sub $0xffffffff8e6333d0,%rax 9: 48 c1 f8 03 sar $0x3,%rax d: 85 c0 test %eax,%eax f: 74 67 je 0x78 11: 89 c0 mov %eax,%eax 13: 4c 89 c3 mov %r8,%rbx 16: 49 8d 04 c0 lea (%r8,%rax,8),%rax 1a: 48 89 04 24 mov %rax,(%rsp) 1e: eb 0a jmp 0x2a 20: 48 83 c3 08 add $0x8,%rbx 24: 48 39 1c 24 cmp %rbx,(%rsp) 28: 74 4e je 0x78 2a:* 4c 8b 3b mov (%rbx),%r15 <-- trapping instruction 2d: 4d 85 ff test %r15,%r15 30: 74 ee je 0x20 32: 49 8b 45 10 mov 0x10(%r13),%rax 36: 48 8b 00 mov (%rax),%rax 39: 49 39 07 cmp %rax,(%r15) 3c: 75 e2 jne 0x20 3e: 49 rex.WB 3f: 8b .byte 0x8b Code starting with the faulting instruction =========================================== 0: 4c 8b 3b mov (%rbx),%r15 3: 4d 85 ff test %r15,%r15 6: 74 ee je 0xfffffffffffffff6 8: 49 8b 45 10 mov 0x10(%r13),%rax c: 48 8b 00 mov (%rax),%rax f: 49 39 07 cmp %rax,(%r15) 12: 75 e2 jne 0xfffffffffffffff6 14: 49 rex.WB 15: 8b .byte 0x8b RSP: 0018:ffffb43981457ca8 EFLAGS: 00010202 RAX: ffffffff8e6333e0 RBX: ffffffff8e6333d0 RCX: 00000000000002cd RDX: ffff942b4b0bd000 RSI: 000000000000006e RDI: ffff942b42f9cc30 RBP: ffff942b42f9cc30 R08: ffffffff8e6333d0 R09: 63696c6f626d7973 R10: 735f746e6972705f R11: 2863696c6f626d79 R12: 0000000000000000 R13: ffffffff8de44880 R14: ffffffff8de44516 R15: ffffffff8de44515 FS: 00007f556c562740(0000) GS:ffff942cbdfc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffff8e6333d0 CR3: 000000010a1d0006 CR4: 0000000000170ef0 Call Trace: <TASK> ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) ? page_fault_oops (arch/x86/mm/fault.c:715) ? search_module_extables (kernel/module/main.c:3280) ? search_bpf_extables (kernel/bpf/core.c:799) ? exc_page_fault (arch/x86/mm/fault.c:1198 arch/x86/mm/fault.c:1479 arch/x86/mm/fault.c:1539) ? asm_exc_page_fault (arch/x86/include/asm/idtentry.h:623) ? f_show (kernel/trace/trace_events.c:1601 kernel/trace/trace_events.c:1651 kernel/trace/trace_events.c:1689) ? f_show (kernel/trace/trace_events.c:1623 kernel/trace/trace_events.c:1689) seq_read_iter (fs/seq_file.c:273) seq_read (fs/seq_file.c:163) vfs_read (fs/read_write.c:474) ? __handle_mm_fault (mm/memory.c:3945 mm/memory.c:5521 mm/memory.c:5664) ksys_read (fs/read_write.c:619) do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) ? handle_mm_fault (mm/memory.c:5744 (discriminator 1) mm/memory.c:5840 (discriminator 1)) ? exc_page_fault (arch/x86/mm/fault.c:1342 arch/x86/mm/fault.c:1481 arch/x86/mm/fault.c:1539) ? irqentry_exit_to_user_mode (arch/x86/include/asm/entry-common.h:57 (discriminator 1) include/linux/entry-common.h:330 (discriminator 1) kernel/entry/common.c:231 (discriminator 1)) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) RIP: 0033:0x7f556c65ca5d Code: 31 c0 e9 c6 fe ff ff 50 48 8d 3d a6 60 0a 00 e8 a9 08 02 00 66 0f 1f 84 00 00 00 00 00 80 3d 81 3b 0e 00 00 74 17 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 5b c3 66 2e 0f 1f 84 00 00 00 00 00 48 83 ec All code ======== 0: 31 c0 xor %eax,%eax 2: e9 c6 fe ff ff jmp 0xfffffffffffffecd 7: 50 push %rax 8: 48 8d 3d a6 60 0a 00 lea 0xa60a6(%rip),%rdi # 0xa60b5 f: e8 a9 08 02 00 call 0x208bd 14: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 1b: 00 00 1d: 80 3d 81 3b 0e 00 00 cmpb $0x0,0xe3b81(%rip) # 0xe3ba5 24: 74 17 je 0x3d 26: 31 c0 xor %eax,%eax 28: 0f 05 syscall 2a:* 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax <-- trapping instruction 30: 77 5b ja 0x8d 32: c3 ret 33: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1) 3a: 00 00 00 3d: 48 rex.W 3e: 83 .byte 0x83 3f: ec in (%dx),%al Code starting with the faulting instruction =========================================== 0: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax 6: 77 5b ja 0x63 8: c3 ret 9: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1) 10: 00 00 00 13: 48 rex.W 14: 83 .byte 0x83 15: ec in (%dx),%al RSP: 002b:00007ffc88ecb878 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f556c65ca5d RDX: 0000000000020000 RSI: 00007f556c541000 RDI: 0000000000000003 RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000 R10: 0000000000000022 R11: 0000000000000246 R12: 00007f556c541000 R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000000000 </TASK> Modules linked in: snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec snd_hwdep snd_hda_core CR2: ffffffff8e6333d0 ---[ end trace 0000000000000000 ]--- -- Steve
Hi Steven, > I finally got around to testing your patches. > > I did the following: > > # cat /sys/kernel/tracing/events/*/*/format > > and hit this: > > BUG: unable to handle page fault for address: ffffffff8e6333d0 Ugh. That's ... unfortunate. I couldn't reproduce this so far, do you happen to have the .config and qemu command line perhaps? > #PF: supervisor read access in kernel mode > #PF: error_code(0x0000) - not-present page > PGD 183c40067 P4D 183c40067 PUD 183c41063 PMD 1003ef063 PTE 800ffffe7b9cc062 > Oops: Oops: 0000 [#1] PREEMPT SMP PTI > CPU: 7 UID: 0 PID: 893 Comm: cat Not tainted 6.11.0-rc4-test-00004-g4ce2836f008b #56 68afcee1248519f8b3b088836c40746e4a6b69d3 I guess that's just my four patches; reproducing that (rc4 + the patches, seems there was a small conflict in skb tracing) ... > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 > RIP: 0010:f_show (kernel/trace/trace_events.c:1601 kernel/trace/trace_events.c:1651 kernel/trace/trace_events.c:1689) means this was for (i = 0; i < n_sym_defs; i++) { unsigned int sym_len; if (!sym_defs[i]) // <--- HERE continue; which seems really strange to me. Sadly the > Code: [...] was compiled so we don't see 'i' here, and the stop value is on the stack... > All code > ======== > 0: 33 63 8e xor -0x72(%rbx),%esp > 3: 48 2d d0 33 63 8e sub $0xffffffff8e6333d0,%rax the big constant here must be __start_ftrace_sym_defs, being subtracted from an already loaded __stop_ftrace_sym_defs in %rax ... > 9: 48 c1 f8 03 sar $0x3,%rax also part of the size calculation (n_sym_defs) - we have 8 byte pointers in each entry, so >>= 3 makes sense. Given RBX is 0xffffffff8e6333d0 as well, that means __start_ftrace_sym_defs isn't a valid pointer? But then what was __stop_ftrace_sym_defs? And why would the section be non-empty but invalid to dereference? > d: 85 c0 test %eax,%eax > f: 74 67 je 0x78 Anyway this aborts if there are 0 entries so we get non-zero in %eax/%rax ... I've actually tried now with an empty section, and that also works OK. Is there something special about your build here? johannes