diff mbox series

[1/4] cpu: free cpu->tb_jmp_cache with RCU

Message ID 20230108163905.59601-2-cota@braap.org (mailing list archive)
State New, archived
Headers show
Series tsan fixes | expand

Commit Message

Emilio Cota Jan. 8, 2023, 4:39 p.m. UTC
Fixes the appended use-after-free. The root cause is that
during tb invalidation we use CPU_FOREACH, and therefore
to safely free a vCPU we must wait for an RCU grace period
to elapse.

$ x86_64-linux-user/qemu-x86_64 tests/tcg/x86_64-linux-user/munmap-pthread
=================================================================
==1800604==ERROR: AddressSanitizer: heap-use-after-free on address 0x62d0005f7418 at pc 0x5593da6704eb bp 0x7f4961a7ac70 sp 0x7f4961a7ac60
READ of size 8 at 0x62d0005f7418 thread T2
    #0 0x5593da6704ea in tb_jmp_cache_inval_tb ../accel/tcg/tb-maint.c:244
    #1 0x5593da6704ea in do_tb_phys_invalidate ../accel/tcg/tb-maint.c:290
    #2 0x5593da670631 in tb_phys_invalidate__locked ../accel/tcg/tb-maint.c:306
    #3 0x5593da670631 in tb_invalidate_phys_page_range__locked ../accel/tcg/tb-maint.c:542
    #4 0x5593da67106d in tb_invalidate_phys_range ../accel/tcg/tb-maint.c:614
    #5 0x5593da6a64d4 in target_munmap ../linux-user/mmap.c:766
    #6 0x5593da6dba05 in do_syscall1 ../linux-user/syscall.c:10105
    #7 0x5593da6f564c in do_syscall ../linux-user/syscall.c:13329
    #8 0x5593da49e80c in cpu_loop ../linux-user/x86_64/../i386/cpu_loop.c:233
    #9 0x5593da6be28c in clone_func ../linux-user/syscall.c:6633
    #10 0x7f496231cb42 in start_thread nptl/pthread_create.c:442
    #11 0x7f49623ae9ff  (/lib/x86_64-linux-gnu/libc.so.6+0x1269ff)

0x62d0005f7418 is located 28696 bytes inside of 32768-byte region [0x62d0005f0400,0x62d0005f8400)
freed by thread T148 here:
    #0 0x7f49627b6460 in __interceptor_free ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:52
    #1 0x5593da5ac057 in cpu_exec_unrealizefn ../cpu.c:180
    #2 0x5593da81f851  (/home/cota/src/qemu/build/qemu-x86_64+0x484851)

Signed-off-by: Emilio Cota <cota@braap.org>
---
 accel/tcg/cpu-exec.c  | 1 -
 cpu.c                 | 7 +++++++
 include/hw/core/cpu.h | 3 +++
 3 files changed, 10 insertions(+), 1 deletion(-)

Comments

Richard Henderson Jan. 8, 2023, 7:19 p.m. UTC | #1
On 1/8/23 08:39, Emilio Cota wrote:
> Fixes the appended use-after-free. The root cause is that
> during tb invalidation we use CPU_FOREACH, and therefore
> to safely free a vCPU we must wait for an RCU grace period
> to elapse.
> 
> $ x86_64-linux-user/qemu-x86_64 tests/tcg/x86_64-linux-user/munmap-pthread
> =================================================================
> ==1800604==ERROR: AddressSanitizer: heap-use-after-free on address 0x62d0005f7418 at pc 0x5593da6704eb bp 0x7f4961a7ac70 sp 0x7f4961a7ac60
> READ of size 8 at 0x62d0005f7418 thread T2
>      #0 0x5593da6704ea in tb_jmp_cache_inval_tb ../accel/tcg/tb-maint.c:244
>      #1 0x5593da6704ea in do_tb_phys_invalidate ../accel/tcg/tb-maint.c:290
>      #2 0x5593da670631 in tb_phys_invalidate__locked ../accel/tcg/tb-maint.c:306
>      #3 0x5593da670631 in tb_invalidate_phys_page_range__locked ../accel/tcg/tb-maint.c:542
>      #4 0x5593da67106d in tb_invalidate_phys_range ../accel/tcg/tb-maint.c:614
>      #5 0x5593da6a64d4 in target_munmap ../linux-user/mmap.c:766
>      #6 0x5593da6dba05 in do_syscall1 ../linux-user/syscall.c:10105
>      #7 0x5593da6f564c in do_syscall ../linux-user/syscall.c:13329
>      #8 0x5593da49e80c in cpu_loop ../linux-user/x86_64/../i386/cpu_loop.c:233
>      #9 0x5593da6be28c in clone_func ../linux-user/syscall.c:6633
>      #10 0x7f496231cb42 in start_thread nptl/pthread_create.c:442
>      #11 0x7f49623ae9ff  (/lib/x86_64-linux-gnu/libc.so.6+0x1269ff)
> 
> 0x62d0005f7418 is located 28696 bytes inside of 32768-byte region [0x62d0005f0400,0x62d0005f8400)
> freed by thread T148 here:
>      #0 0x7f49627b6460 in __interceptor_free ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:52
>      #1 0x5593da5ac057 in cpu_exec_unrealizefn ../cpu.c:180
>      #2 0x5593da81f851  (/home/cota/src/qemu/build/qemu-x86_64+0x484851)
> 
> Signed-off-by: Emilio Cota <cota@braap.org>
> ---
>   accel/tcg/cpu-exec.c  | 1 -
>   cpu.c                 | 7 +++++++
>   include/hw/core/cpu.h | 3 +++
>   3 files changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 356fe348de..ca95d21528 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -1070,7 +1070,6 @@ void tcg_exec_unrealizefn(CPUState *cpu)
>   #endif /* !CONFIG_USER_ONLY */
>   
>       tlb_destroy(cpu);
> -    g_free(cpu->tb_jmp_cache);

Can you simply use g_free_rcu here?

>   }
>   
>   #ifndef CONFIG_USER_ONLY
> diff --git a/cpu.c b/cpu.c
> index 4a7d865427..564200559f 100644
> --- a/cpu.c
> +++ b/cpu.c
> @@ -164,6 +164,12 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
>   #endif /* CONFIG_USER_ONLY */
>   }
>   
> +static void cpu_free_rcu(CPUState *cpu)
> +{
> +    /* .tb_jmp_cache is NULL except under TCG */
> +    g_free(cpu->tb_jmp_cache);
> +}
> +
>   void cpu_exec_unrealizefn(CPUState *cpu)
>   {
>   #ifndef CONFIG_USER_ONLY
> @@ -181,6 +187,7 @@ void cpu_exec_unrealizefn(CPUState *cpu)
>       }
>   
>       cpu_list_remove(cpu);
> +    call_rcu(cpu, cpu_free_rcu, rcu);

Certainly this seems wrong, exposing tb_jmp_cache beyond tcg.


r~
Emilio Cota Jan. 9, 2023, 9:52 p.m. UTC | #2
On Sun, Jan 08, 2023 at 11:19:53 -0800, Richard Henderson wrote:
> On 1/8/23 08:39, Emilio Cota wrote:
(snip)
> > diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> > index 356fe348de..ca95d21528 100644
> > --- a/accel/tcg/cpu-exec.c
> > +++ b/accel/tcg/cpu-exec.c
> > @@ -1070,7 +1070,6 @@ void tcg_exec_unrealizefn(CPUState *cpu)
> >   #endif /* !CONFIG_USER_ONLY */
> >       tlb_destroy(cpu);
> > -    g_free(cpu->tb_jmp_cache);
> 
> Can you simply use g_free_rcu here?

Yes, although we must have removed the CPU from the RCU list
before doing so.

> > diff --git a/cpu.c b/cpu.c
> > index 4a7d865427..564200559f 100644
> > --- a/cpu.c
> > +++ b/cpu.c
> > @@ -164,6 +164,12 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
> >   #endif /* CONFIG_USER_ONLY */
> >   }
> > +static void cpu_free_rcu(CPUState *cpu)
> > +{
> > +    /* .tb_jmp_cache is NULL except under TCG */
> > +    g_free(cpu->tb_jmp_cache);
> > +}
> > +
> >   void cpu_exec_unrealizefn(CPUState *cpu)
> >   {
> >   #ifndef CONFIG_USER_ONLY
> > @@ -181,6 +187,7 @@ void cpu_exec_unrealizefn(CPUState *cpu)
> >       }
> >       cpu_list_remove(cpu);
> > +    call_rcu(cpu, cpu_free_rcu, rcu);
> 
> Certainly this seems wrong, exposing tb_jmp_cache beyond tcg.

I've changed this in v2 to call tcg_exec_unrealizefn after
cpu_list_remove.

An alternative would be to call the whole cpu_exec_unrealizefn
after an RCU grace period, but I think that might be more trouble
than it's worth.

Thanks,
		Emilio
diff mbox series

Patch

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 356fe348de..ca95d21528 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -1070,7 +1070,6 @@  void tcg_exec_unrealizefn(CPUState *cpu)
 #endif /* !CONFIG_USER_ONLY */
 
     tlb_destroy(cpu);
-    g_free(cpu->tb_jmp_cache);
 }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/cpu.c b/cpu.c
index 4a7d865427..564200559f 100644
--- a/cpu.c
+++ b/cpu.c
@@ -164,6 +164,12 @@  void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 #endif /* CONFIG_USER_ONLY */
 }
 
+static void cpu_free_rcu(CPUState *cpu)
+{
+    /* .tb_jmp_cache is NULL except under TCG */
+    g_free(cpu->tb_jmp_cache);
+}
+
 void cpu_exec_unrealizefn(CPUState *cpu)
 {
 #ifndef CONFIG_USER_ONLY
@@ -181,6 +187,7 @@  void cpu_exec_unrealizefn(CPUState *cpu)
     }
 
     cpu_list_remove(cpu);
+    call_rcu(cpu, cpu_free_rcu, rcu);
 }
 
 /*
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 8830546121..9fefad5656 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -266,6 +266,7 @@  struct qemu_work_item;
 
 /**
  * CPUState:
+ * @rcu: Used for safe deferred memory reclamation.
  * @cpu_index: CPU index (informative).
  * @cluster_index: Identifies which cluster this CPU is in.
  *   For boards which don't define clusters or for "loose" CPUs not assigned
@@ -321,6 +322,8 @@  struct qemu_work_item;
  * State of one CPU core or thread.
  */
 struct CPUState {
+    struct rcu_head rcu;
+
     /*< private >*/
     DeviceState parent_obj;
     /* cache to avoid expensive CPU_GET_CLASS */