diff mbox series

[02/23] tcg: Manage splitwx in tc_ptr_to_region_tree by hand

Message ID 20210128082331.196801-3-richard.henderson@linaro.org (mailing list archive)
State New, archived
Headers show
Series TCI fixes and cleanups | expand

Commit Message

Richard Henderson Jan. 28, 2021, 8:23 a.m. UTC
The use in tcg_tb_lookup is given a random pc that comes from the pc
of a signal handler.  Do not assert that the pointer is already within
the code gen buffer at all, much less the writable mirror of it.

Fixes: db0c51a3803
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---

For TCI, this indicates a bug in handle_cpu_signal, in that we
are taking PC from the host signal frame.  Which is, nearly,
unrelated to TCI at all.

The TCI "pc" is tci_tb_ptr (fixed in the next patch to at least
be thread-local).  We update this only on calls, since we don't
expect SEGV during the interpretation loop.  Which works ok for
softmmu, in which we pass down pc by hand to the helpers, but
is not ok for user-only, where we simply perform the raw memory
operation.

I don't know how to fix this, exactly.  Probably by storing to
tci_tb_ptr before each qemu_ld/qemu_st operation, with barriers.
Then Doing the Right Thing in handle_cpu_signal.  And perhaps
by clearing tci_tb_ptr whenever we're not expecting a SEGV on
behalf of the guest (and thus anything left is a qemu host bug).


r~

---
 tcg/tcg.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

Comments

Alex Bennée Jan. 28, 2021, 1:09 p.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> The use in tcg_tb_lookup is given a random pc that comes from the pc
> of a signal handler.  Do not assert that the pointer is already within
> the code gen buffer at all, much less the writable mirror of it.
>
> Fixes: db0c51a3803
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

OK I bisected to this regression while running:

  "cd builds/bisect && rm -rf * && ../../configure --disable-docs --target-list=m68k-linux-user && make -j30 && make check-tcg"

which ultimately fails during the threadcount test for m68k-linux-user.
I'm just testing now to see if that also broke my ARM system test
images.

> ---
>
> For TCI, this indicates a bug in handle_cpu_signal, in that we
> are taking PC from the host signal frame.  Which is, nearly,
> unrelated to TCI at all.
>
> The TCI "pc" is tci_tb_ptr (fixed in the next patch to at least
> be thread-local).  We update this only on calls, since we don't
> expect SEGV during the interpretation loop.  Which works ok for
> softmmu, in which we pass down pc by hand to the helpers, but
> is not ok for user-only, where we simply perform the raw memory
> operation.
>
> I don't know how to fix this, exactly.  Probably by storing to
> tci_tb_ptr before each qemu_ld/qemu_st operation, with barriers.
> Then Doing the Right Thing in handle_cpu_signal.  And perhaps
> by clearing tci_tb_ptr whenever we're not expecting a SEGV on
> behalf of the guest (and thus anything left is a qemu host bug).
>
>
> r~
>
> ---
>  tcg/tcg.c | 23 ++++++++++++++++++++---
>  1 file changed, 20 insertions(+), 3 deletions(-)
>
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 9e1b0d73c7..78701cf359 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -407,11 +407,21 @@ static void tcg_region_trees_init(void)
>      }
>  }
>  
> -static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
> +static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
>  {
> -    void *p = tcg_splitwx_to_rw(cp);
>      size_t region_idx;
>  
> +    /*
> +     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
> +     * a signal handler over which the caller has no control.
> +     */
> +    if (!in_code_gen_buffer(p)) {
> +        p -= tcg_splitwx_diff;
> +        if (!in_code_gen_buffer(p)) {
> +            return NULL;
> +        }
> +    }
> +
>      if (p < region.start_aligned) {
>          region_idx = 0;
>      } else {
> @@ -430,6 +440,7 @@ void tcg_tb_insert(TranslationBlock *tb)
>  {
>      struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
>  
> +    g_assert(rt != NULL);
>      qemu_mutex_lock(&rt->lock);
>      g_tree_insert(rt->tree, &tb->tc, tb);
>      qemu_mutex_unlock(&rt->lock);
> @@ -439,6 +450,7 @@ void tcg_tb_remove(TranslationBlock *tb)
>  {
>      struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
>  
> +    g_assert(rt != NULL);
>      qemu_mutex_lock(&rt->lock);
>      g_tree_remove(rt->tree, &tb->tc);
>      qemu_mutex_unlock(&rt->lock);
> @@ -453,8 +465,13 @@ TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
>  {
>      struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
>      TranslationBlock *tb;
> -    struct tb_tc s = { .ptr = (void *)tc_ptr };
> +    struct tb_tc s;
>  
> +    if (rt == NULL) {
> +        return NULL;
> +    }
> +
> +    s.ptr = (void *)tc_ptr;
>      qemu_mutex_lock(&rt->lock);
>      tb = g_tree_lookup(rt->tree, &s);
>      qemu_mutex_unlock(&rt->lock);
Alex Bennée Jan. 28, 2021, 1:54 p.m. UTC | #2
Alex Bennée <alex.bennee@linaro.org> writes:

> Richard Henderson <richard.henderson@linaro.org> writes:
>
>> The use in tcg_tb_lookup is given a random pc that comes from the pc
>> of a signal handler.  Do not assert that the pointer is already within
>> the code gen buffer at all, much less the writable mirror of it.
>>
>> Fixes: db0c51a3803
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>
> OK I bisected to this regression while running:
>
>   "cd builds/bisect && rm -rf * && ../../configure --disable-docs --target-list=m68k-linux-user && make -j30 && make check-tcg"
>
> which ultimately fails during the threadcount test for m68k-linux-user.
> I'm just testing now to see if that also broke my ARM system test
> images.

For my ARM test system:

./qemu-system-aarch64 -machine virt,virtualization=on -cpu cortex-a57
-serial mon:stdio -nic user,model=virtio-net-pci,hostfwd=tcp::2222-:22
-device virtio-scsi-pci -drive
file=/dev/zvol/hackpool-0/debian-bullseye-arm64,id=hd0,index=0,if=none,format=raw
-device scsi-hd,drive=hd0 -display none -m 4096 -smp 4 -drive
if=pflash,file=/usr/share/AAVMF/AAVMF_CODE.fd,format=raw,readonly -drive
if=pflash,file=$HOME/images/AAVMF_VARS.fd,format=raw


Yep with this:

  [    2.948980] Checked W+X mappings: passed, no W+X pages found
  [    2.949443] Run /init as init process
  [    2.959082] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
  [    2.959563] CPU: 3 PID: 1 Comm: init Not tainted 5.10.0-1-arm64 #1 Debian 5.10.4-1
  [    2.959768] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
  [    2.960144] Call trace:
  [    2.960267]  dump_backtrace+0x0/0x1e4
  [    2.960491]  show_stack+0x24/0x6c
  [    2.960699]  dump_stack+0xd0/0x12c
  [    2.960862]  panic+0x168/0x370
  [    2.960990]  do_exit+0x9a8/0x9c0
  [    2.961072]  do_group_exit+0x44/0xac
  [    2.961163]  get_signal+0x174/0x910
  [    2.961256]  do_notify_resume+0x22c/0x9a0
  [    2.961355]  work_pending+0xc/0x39c
  [    2.961849] SMP: stopping secondary CPUs
  [    2.962341] Kernel Offset: disabled
  [    2.962585] CPU features: 0x0240022,61006082
  [    2.962729] Memory Limit: none
  [    2.963158] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
  QEMU 5.2.50 monitor - type 'help' for more information
  (qemu) quit

Where as I can boot from the commit before....

>
>> ---
>>
>> For TCI, this indicates a bug in handle_cpu_signal, in that we
>> are taking PC from the host signal frame.  Which is, nearly,
>> unrelated to TCI at all.
>>
>> The TCI "pc" is tci_tb_ptr (fixed in the next patch to at least
>> be thread-local).  We update this only on calls, since we don't
>> expect SEGV during the interpretation loop.  Which works ok for
>> softmmu, in which we pass down pc by hand to the helpers, but
>> is not ok for user-only, where we simply perform the raw memory
>> operation.
>>
>> I don't know how to fix this, exactly.  Probably by storing to
>> tci_tb_ptr before each qemu_ld/qemu_st operation, with barriers.
>> Then Doing the Right Thing in handle_cpu_signal.  And perhaps
>> by clearing tci_tb_ptr whenever we're not expecting a SEGV on
>> behalf of the guest (and thus anything left is a qemu host bug).
>>
>>
>> r~
>>
>> ---
>>  tcg/tcg.c | 23 ++++++++++++++++++++---
>>  1 file changed, 20 insertions(+), 3 deletions(-)
>>
>> diff --git a/tcg/tcg.c b/tcg/tcg.c
>> index 9e1b0d73c7..78701cf359 100644
>> --- a/tcg/tcg.c
>> +++ b/tcg/tcg.c
>> @@ -407,11 +407,21 @@ static void tcg_region_trees_init(void)
>>      }
>>  }
>>  
>> -static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
>> +static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
>>  {
>> -    void *p = tcg_splitwx_to_rw(cp);
>>      size_t region_idx;
>>  
>> +    /*
>> +     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
>> +     * a signal handler over which the caller has no control.
>> +     */
>> +    if (!in_code_gen_buffer(p)) {
>> +        p -= tcg_splitwx_diff;
>> +        if (!in_code_gen_buffer(p)) {
>> +            return NULL;
>> +        }
>> +    }
>> +
>>      if (p < region.start_aligned) {
>>          region_idx = 0;
>>      } else {
>> @@ -430,6 +440,7 @@ void tcg_tb_insert(TranslationBlock *tb)
>>  {
>>      struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
>>  
>> +    g_assert(rt != NULL);
>>      qemu_mutex_lock(&rt->lock);
>>      g_tree_insert(rt->tree, &tb->tc, tb);
>>      qemu_mutex_unlock(&rt->lock);
>> @@ -439,6 +450,7 @@ void tcg_tb_remove(TranslationBlock *tb)
>>  {
>>      struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
>>  
>> +    g_assert(rt != NULL);
>>      qemu_mutex_lock(&rt->lock);
>>      g_tree_remove(rt->tree, &tb->tc);
>>      qemu_mutex_unlock(&rt->lock);
>> @@ -453,8 +465,13 @@ TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
>>  {
>>      struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
>>      TranslationBlock *tb;
>> -    struct tb_tc s = { .ptr = (void *)tc_ptr };
>> +    struct tb_tc s;
>>  
>> +    if (rt == NULL) {
>> +        return NULL;
>> +    }
>> +
>> +    s.ptr = (void *)tc_ptr;
>>      qemu_mutex_lock(&rt->lock);
>>      tb = g_tree_lookup(rt->tree, &s);
>>      qemu_mutex_unlock(&rt->lock);
diff mbox series

Patch

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 9e1b0d73c7..78701cf359 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -407,11 +407,21 @@  static void tcg_region_trees_init(void)
     }
 }
 
-static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
+static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
 {
-    void *p = tcg_splitwx_to_rw(cp);
     size_t region_idx;
 
+    /*
+     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
+     * a signal handler over which the caller has no control.
+     */
+    if (!in_code_gen_buffer(p)) {
+        p -= tcg_splitwx_diff;
+        if (!in_code_gen_buffer(p)) {
+            return NULL;
+        }
+    }
+
     if (p < region.start_aligned) {
         region_idx = 0;
     } else {
@@ -430,6 +440,7 @@  void tcg_tb_insert(TranslationBlock *tb)
 {
     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 
+    g_assert(rt != NULL);
     qemu_mutex_lock(&rt->lock);
     g_tree_insert(rt->tree, &tb->tc, tb);
     qemu_mutex_unlock(&rt->lock);
@@ -439,6 +450,7 @@  void tcg_tb_remove(TranslationBlock *tb)
 {
     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 
+    g_assert(rt != NULL);
     qemu_mutex_lock(&rt->lock);
     g_tree_remove(rt->tree, &tb->tc);
     qemu_mutex_unlock(&rt->lock);
@@ -453,8 +465,13 @@  TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
 {
     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
     TranslationBlock *tb;
-    struct tb_tc s = { .ptr = (void *)tc_ptr };
+    struct tb_tc s;
 
+    if (rt == NULL) {
+        return NULL;
+    }
+
+    s.ptr = (void *)tc_ptr;
     qemu_mutex_lock(&rt->lock);
     tb = g_tree_lookup(rt->tree, &s);
     qemu_mutex_unlock(&rt->lock);