diff mbox series

Fix signal handler to detect crashes in qemu-linux-user

Message ID 20230812164314.352131-1-deller@gmx.de (mailing list archive)
State New, archived
Headers show
Series Fix signal handler to detect crashes in qemu-linux-user | expand

Commit Message

Helge Deller Aug. 12, 2023, 4:43 p.m. UTC
If there is an internal program error in the qemu source code which
triggers a SIGSEGV, qemu will currently assume this is a SIGSEGV of the
target and print:

(hppa-chroot)root@p100:/# cat /proc/self/maps
**
ERROR:../../home/cvs/qemu/qemu/accel/tcg/cpu-exec.c:532:cpu_exec_longjmp_cleanup: assertion failed: (cpu == current_cpu)
Bail out! ERROR:../../home/cvs/qemu/qemu/accel/tcg/cpu-exec.c:532:cpu_exec_longjmp_cleanup: assertion failed: (cpu == current_cpu)
**

This error message is very misleading for developers and end-users.

The attached patch will print instead:

(hppa-chroot)root@p100:/# cat /proc/self/maps
QEMU linux-user v8.0.93 for target parisc
QEMU internal error: signal=11, errno=0, code=1, addr=(nil)
while running: /usr/bin/cat
QEMU backtrace:
[0x7f70cd045115]
[0x7f70cd21b140]
[0x7f70cd04ec49]
[0x7f70cd04ec6b]
[0x7f70cd0597e2]
[0x7f70cd05d9ed]
[0x7f70cd064008]
[0x7f70ccffbd2d]
[0x7f70ccff57f8]
[0x7f70cd205868]
[0x7f70cd206f4f]
[0x7f70ccff60a5]

Note that glibc's backtrace() can not resolve the addresses to function
names for static binaries, which is why only addresses are show above.

Signed-off-by: Helge Deller <deller@gmx.de>

v2:
- Refined crash detection based on IP address, suggested by Richard
- More info in crash dump, e.g. qemu version and target
---
 linux-user/signal.c  | 36 ++++++++++++++++++++++++++++++++++++
 linux-user/syscall.c |  1 +
 2 files changed, 37 insertions(+)

--
2.41.0

Comments

Richard Henderson Aug. 12, 2023, 5:33 p.m. UTC | #1
On 8/12/23 09:43, Helge Deller wrote:
> +/* _init and _fini are provided by the linker */
> +extern char _init;
> +extern char _fini;
> +
>   static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
>   {
>       CPUArchState *env = thread_cpu->env_ptr;
> @@ -819,6 +848,13 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
>           if (host_sig == SIGSEGV) {
>               bool maperr = true;
> 
> +            /* Did segfault happened in qemu source code? */
> +            if ((pc >= (uintptr_t) &_init && pc < (uintptr_t) &_fini) ||
> +                (TARGET_ABI_BITS == 32 && HOST_LONG_BITS == 64
> +                 && !h2g_valid(host_addr))) {
> +                qemu_show_backtrace(info);
> +            }

This is incorrect.

I did mention that you should look at adjust_signal_pc,
which has a lot of commentary on the subject.

(0) Place this after the write-protect and restart check
just below, since that is not an error of any sort.

(1) If helper_retaddr == 1, then this is a fault reading
for translation and is a guest SIGSEGV.

(2) If helper_retaddr != 0, then this is a fault in some
cpu_ldst.h operation and is a guest SIGSEGV.

(3) If in_code_gen_buffer(host_signal_pc()), then this
is a fault within generated code and is a guest SIGSEGV.

Otherwise it's a host SIGSEGV.


r~
Helge Deller Aug. 12, 2023, 6:12 p.m. UTC | #2
* Richard Henderson <richard.henderson@linaro.org>:
> On 8/12/23 09:43, Helge Deller wrote:
> > +/* _init and _fini are provided by the linker */
> > +extern char _init;
> > +extern char _fini;
> > +
> >   static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
> >   {
> >       CPUArchState *env = thread_cpu->env_ptr;
> > @@ -819,6 +848,13 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
> >           if (host_sig == SIGSEGV) {
> >               bool maperr = true;
> >
> > +            /* Did segfault happened in qemu source code? */
> > +            if ((pc >= (uintptr_t) &_init && pc < (uintptr_t) &_fini) ||
> > +                (TARGET_ABI_BITS == 32 && HOST_LONG_BITS == 64
> > +                 && !h2g_valid(host_addr))) {
> > +                qemu_show_backtrace(info);
> > +            }
>
> This is incorrect.
>
> I did mention that you should look at adjust_signal_pc,

Yes, you did.
Maybe I was blind or didn't fully understood yet.

> which has a lot of commentary on the subject.
>
> (0) Place this after the write-protect and restart check
> just below, since that is not an error of any sort.
>
> (1) If helper_retaddr == 1, then this is a fault reading
> for translation and is a guest SIGSEGV.
>
> (2) If helper_retaddr != 0, then this is a fault in some
> cpu_ldst.h operation and is a guest SIGSEGV.

(1) and (2) means host SIGSEGV can only happen if
helper_retaddr == 0.

> (3) If in_code_gen_buffer(host_signal_pc()), then this
> is a fault within generated code and is a guest SIGSEGV.
>
> Otherwise it's a host SIGSEGV.

So, basically like this?

Helge

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 748a98f3e5..9762246eec 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -23,6 +23,7 @@

 #include <sys/ucontext.h>
 #include <sys/resource.h>
+#include <execinfo.h>

 #include "qemu.h"
 #include "user-internals.h"
@@ -32,6 +33,7 @@
 #include "signal-common.h"
 #include "host-signal.h"
 #include "user/safe-syscall.h"
+#include "tcg/tcg.h"

 static struct target_sigaction sigact_table[TARGET_NSIG];

@@ -781,6 +783,31 @@ static inline void rewind_if_in_safe_syscall(void *puc)
     }
 }

+static void qemu_show_backtrace(siginfo_t *info)
+{
+    void *array[20];
+    char **strings;
+    int size, i;
+
+    fprintf(stderr, "---------------\n");
+    fprintf(stderr, "QEMU linux-user v" QEMU_VERSION " for target "
+                     UNAME_MACHINE "\n");
+    fprintf(stderr, "QEMU internal error: signal=%d, errno=%d, "
+                    "code=%d, addr=%p\n",
+                    info->si_signo, info->si_errno, info->si_code,
+                    info->si_addr);
+    fprintf(stderr, "while running: %s\n", exec_path);
+    size = backtrace(array, ARRAY_SIZE(array));
+    strings = backtrace_symbols(array, size);
+    if (strings) {
+        fprintf(stderr, "QEMU backtrace:\n");
+        for (i = 0; i < size; i++)
+            fprintf(stderr, "%s\n", strings[i]);
+    }
+    free (strings);
+    exit(info->si_code);
+}
+
 static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
 {
     CPUArchState *env = thread_cpu->env_ptr;
@@ -800,7 +827,7 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
      */
     if ((host_sig == SIGSEGV || host_sig == SIGBUS) && info->si_code > 0) {
         MMUAccessType access_type;
-        uintptr_t host_addr;
+        uintptr_t host_addr, host_pc;
         abi_ptr guest_addr;
         bool is_write;

@@ -812,7 +839,8 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
          */
         guest_addr = h2g_nocheck(host_addr);

-        pc = host_signal_pc(uc);
+        host_pc = host_signal_pc(uc);
+        pc = host_pc;
         is_write = host_signal_write(info, uc);
         access_type = adjust_signal_pc(&pc, is_write);

@@ -837,6 +865,12 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
                 }
             }

+            /* Did segfault happened in qemu source code? */
+            if (helper_retaddr == 0 &&
+                !in_code_gen_buffer((void*)host_pc)) {
+                qemu_show_backtrace(info);
+            }
+
             sigprocmask(SIG_SETMASK, sigmask, NULL);
             cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc);
         } else {
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9353268cc1..da29d97816 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8132,6 +8132,7 @@ static int open_self_maps_1(CPUArchState *cpu_env, int fd, bool smaps)
     IntervalTreeNode *s;
     int count;

+*(int*)NULL = 1;
     for (s = interval_tree_iter_first(map_info, 0, -1); s;
          s = interval_tree_iter_next(s, 0, -1)) {
         MapInfo *e = container_of(s, MapInfo, itree);
Richard Henderson Aug. 13, 2023, 2:30 a.m. UTC | #3
On 8/12/23 11:12, Helge Deller wrote:
>> I did mention that you should look at adjust_signal_pc,
> 
> Yes, you did.
> Maybe I was blind or didn't fully understood yet.
> 
>> which has a lot of commentary on the subject.
>>
>> (0) Place this after the write-protect and restart check
>> just below, since that is not an error of any sort.
>>
>> (1) If helper_retaddr == 1, then this is a fault reading
>> for translation and is a guest SIGSEGV.
>>
>> (2) If helper_retaddr != 0, then this is a fault in some
>> cpu_ldst.h operation and is a guest SIGSEGV.
> 
> (1) and (2) means host SIGSEGV can only happen if
> helper_retaddr == 0.

Yes.

> 
>> (3) If in_code_gen_buffer(host_signal_pc()), then this
>> is a fault within generated code and is a guest SIGSEGV.
>>
>> Otherwise it's a host SIGSEGV.
> 
> So, basically like this?

Looks about right.


r~
diff mbox series

Patch

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 748a98f3e5..d445376f06 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -23,6 +23,7 @@ 

 #include <sys/ucontext.h>
 #include <sys/resource.h>
+#include <execinfo.h>

 #include "qemu.h"
 #include "user-internals.h"
@@ -781,6 +782,34 @@  static inline void rewind_if_in_safe_syscall(void *puc)
     }
 }

+static void qemu_show_backtrace(siginfo_t *info)
+{
+    void *array[20];
+    char **strings;
+    int size, i;
+
+    fprintf(stderr, "QEMU linux-user v" QEMU_VERSION " for target "
+                     UNAME_MACHINE "\n");
+    fprintf(stderr, "QEMU internal error: signal=%d, errno=%d, "
+                    "code=%d, addr=%p\n",
+                    info->si_signo, info->si_errno, info->si_code,
+                    info->si_addr);
+    fprintf(stderr, "while running: %s\n", exec_path);
+    size = backtrace(array, ARRAY_SIZE(array));
+    strings = backtrace_symbols(array, size);
+    if (strings) {
+        fprintf(stderr, "QEMU backtrace:\n");
+        for (i = 0; i < size; i++)
+            fprintf(stderr, "%s\n", strings[i]);
+    }
+    free (strings);
+    exit(info->si_code);
+}
+
+/* _init and _fini are provided by the linker */
+extern char _init;
+extern char _fini;
+
 static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
 {
     CPUArchState *env = thread_cpu->env_ptr;
@@ -819,6 +848,13 @@  static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
         if (host_sig == SIGSEGV) {
             bool maperr = true;

+            /* Did segfault happened in qemu source code? */
+            if ((pc >= (uintptr_t) &_init && pc < (uintptr_t) &_fini) ||
+                (TARGET_ABI_BITS == 32 && HOST_LONG_BITS == 64
+                 && !h2g_valid(host_addr))) {
+                qemu_show_backtrace(info);
+            }
+
             if (info->si_code == SEGV_ACCERR && h2g_valid(host_addr)) {
                 /* If this was a write to a TB protected page, restart. */
                 if (is_write &&
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 9353268cc1..da29d97816 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8132,6 +8132,7 @@  static int open_self_maps_1(CPUArchState *cpu_env, int fd, bool smaps)
     IntervalTreeNode *s;
     int count;

+*(int*)NULL = 1;
     for (s = interval_tree_iter_first(map_info, 0, -1); s;
          s = interval_tree_iter_next(s, 0, -1)) {
         MapInfo *e = container_of(s, MapInfo, itree);