Message ID | 1466375313-7562-7-git-send-email-sergey.fedorov@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Sergey Fedorov <sergey.fedorov@linaro.org> writes: > From: Sergey Fedorov <serge.fdrv@gmail.com> > > Make CPU work core functions common between system and user-mode > emulation. User-mode does not have BQL, so flush_queued_work() is > protected by 'exclusive_lock'. > > Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com> > Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org> > --- > cpu-exec-common.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ > cpus.c | 87 ++----------------------------------------------- > include/exec/exec-all.h | 4 +++ > linux-user/main.c | 13 ++++++++ > 4 files changed, 102 insertions(+), 85 deletions(-) > > diff --git a/cpu-exec-common.c b/cpu-exec-common.c > index 0cb4ae60eff9..8184e0662cbd 100644 > --- a/cpu-exec-common.c > +++ b/cpu-exec-common.c > @@ -77,3 +77,86 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) > } > siglongjmp(cpu->jmp_env, 1); > } > + > +static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > +{ > + qemu_mutex_lock(&cpu->work_mutex); > + if (cpu->queued_work_first == NULL) { > + cpu->queued_work_first = wi; > + } else { > + cpu->queued_work_last->next = wi; > + } > + cpu->queued_work_last = wi; > + wi->next = NULL; > + wi->done = false; > + qemu_mutex_unlock(&cpu->work_mutex); > + > + qemu_cpu_kick(cpu); > +} > + > +void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > +{ > + struct qemu_work_item wi; > + > + if (qemu_cpu_is_self(cpu)) { > + func(cpu, data); > + return; > + } > + > + wi.func = func; > + wi.data = data; > + wi.free = false; > + > + queue_work_on_cpu(cpu, &wi); > + while (!atomic_mb_read(&wi.done)) { > + CPUState *self_cpu = current_cpu; > + > + wait_cpu_work(); > + current_cpu = self_cpu; > + } > +} > + > +void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > +{ > + struct qemu_work_item *wi; > + > + if (qemu_cpu_is_self(cpu)) { > + func(cpu, data); > + return; > + } > + > + wi = g_malloc0(sizeof(struct qemu_work_item)); > + wi->func = func; > + wi->data = data; > + wi->free = true; > + > + queue_work_on_cpu(cpu, wi); > +} > + > +void flush_queued_work(CPUState *cpu) > +{ > + struct qemu_work_item *wi; > + > + if (cpu->queued_work_first == NULL) { > + return; > + } > + > + qemu_mutex_lock(&cpu->work_mutex); > + while (cpu->queued_work_first != NULL) { > + wi = cpu->queued_work_first; > + cpu->queued_work_first = wi->next; > + if (!cpu->queued_work_first) { > + cpu->queued_work_last = NULL; > + } > + qemu_mutex_unlock(&cpu->work_mutex); > + wi->func(cpu, wi->data); > + qemu_mutex_lock(&cpu->work_mutex); > + if (wi->free) { > + g_free(wi); > + } else { > + atomic_mb_set(&wi->done, true); > + } > + } > + qemu_mutex_unlock(&cpu->work_mutex); > + signal_cpu_work(); > +} > diff --git a/cpus.c b/cpus.c > index f123eb707cc6..98f60f6f98f5 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -910,71 +910,16 @@ void qemu_init_cpu_loop(void) > qemu_thread_get_self(&io_thread); > } > > -static void wait_cpu_work(void) > +void wait_cpu_work(void) > { > qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex); > } > > -static void signal_cpu_work(void) > +void signal_cpu_work(void) > { > qemu_cond_broadcast(&qemu_work_cond); > } > > -static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > -{ > - qemu_mutex_lock(&cpu->work_mutex); > - if (cpu->queued_work_first == NULL) { > - cpu->queued_work_first = wi; > - } else { > - cpu->queued_work_last->next = wi; > - } > - cpu->queued_work_last = wi; > - wi->next = NULL; > - wi->done = false; > - qemu_mutex_unlock(&cpu->work_mutex); > - > - qemu_cpu_kick(cpu); > -} > - > -void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > -{ > - struct qemu_work_item wi; > - > - if (qemu_cpu_is_self(cpu)) { > - func(cpu, data); > - return; > - } > - > - wi.func = func; > - wi.data = data; > - wi.free = false; > - > - queue_work_on_cpu(cpu, &wi); > - while (!atomic_mb_read(&wi.done)) { > - CPUState *self_cpu = current_cpu; > - > - wait_cpu_work(); > - current_cpu = self_cpu; > - } > -} > - > -void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > -{ > - struct qemu_work_item *wi; > - > - if (qemu_cpu_is_self(cpu)) { > - func(cpu, data); > - return; > - } > - > - wi = g_malloc0(sizeof(struct qemu_work_item)); > - wi->func = func; > - wi->data = data; > - wi->free = true; > - > - queue_work_on_cpu(cpu, wi); > -} > - > static void qemu_kvm_destroy_vcpu(CPUState *cpu) > { > if (kvm_destroy_vcpu(cpu) < 0) { > @@ -987,34 +932,6 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) > { > } > > -static void flush_queued_work(CPUState *cpu) > -{ > - struct qemu_work_item *wi; > - > - if (cpu->queued_work_first == NULL) { > - return; > - } > - > - qemu_mutex_lock(&cpu->work_mutex); > - while (cpu->queued_work_first != NULL) { > - wi = cpu->queued_work_first; > - cpu->queued_work_first = wi->next; > - if (!cpu->queued_work_first) { > - cpu->queued_work_last = NULL; > - } > - qemu_mutex_unlock(&cpu->work_mutex); > - wi->func(cpu, wi->data); > - qemu_mutex_lock(&cpu->work_mutex); > - if (wi->free) { > - g_free(wi); > - } else { > - atomic_mb_set(&wi->done, true); > - } > - } > - qemu_mutex_unlock(&cpu->work_mutex); > - signal_cpu_work(); > -} > - > static void qemu_wait_io_event_common(CPUState *cpu) > { > if (cpu->stop) { > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index c1f59fa59d2c..23b4b50e0a45 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -407,4 +407,8 @@ extern int singlestep; > extern CPUState *tcg_current_cpu; > extern bool exit_request; > > +void wait_cpu_work(void); > +void signal_cpu_work(void); > +void flush_queued_work(CPUState *cpu); > + Now these are public APIs (and have multiple implementations) some doc comments would be useful here. > #endif > diff --git a/linux-user/main.c b/linux-user/main.c > index 0093a8008c8e..5a68651159c2 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -111,6 +111,7 @@ static pthread_mutex_t cpu_list_mutex = PTHREAD_MUTEX_INITIALIZER; > static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER; > static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER; > static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER; > +static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER; > static bool exclusive_pending; > static int tcg_pending_cpus; > > @@ -140,6 +141,7 @@ void fork_end(int child) > pthread_mutex_init(&cpu_list_mutex, NULL); > pthread_cond_init(&exclusive_cond, NULL); > pthread_cond_init(&exclusive_resume, NULL); > + pthread_cond_init(&work_cond, NULL); > qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); > gdbserver_fork(thread_cpu); > } else { > @@ -148,6 +150,16 @@ void fork_end(int child) > } > } > > +void wait_cpu_work(void) > +{ > + pthread_cond_wait(&work_cond, &exclusive_lock); > +} > + > +void signal_cpu_work(void) > +{ > + pthread_cond_broadcast(&work_cond); > +} > + > /* Wait for pending exclusive operations to complete. The exclusive lock > must be held. */ > static inline void exclusive_idle(void) > @@ -206,6 +218,7 @@ static inline void cpu_exec_end(CPUState *cpu) > pthread_cond_broadcast(&exclusive_cond); > } > exclusive_idle(); > + flush_queued_work(cpu); > pthread_mutex_unlock(&exclusive_lock); > } -- Alex Bennée
On 27/06/16 12:31, Alex Bennée wrote: > Sergey Fedorov <sergey.fedorov@linaro.org> writes: > >> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h >> index c1f59fa59d2c..23b4b50e0a45 100644 >> --- a/include/exec/exec-all.h >> +++ b/include/exec/exec-all.h >> @@ -407,4 +407,8 @@ extern int singlestep; >> extern CPUState *tcg_current_cpu; >> extern bool exit_request; >> >> +void wait_cpu_work(void); >> +void signal_cpu_work(void); >> +void flush_queued_work(CPUState *cpu); >> + > Now these are public APIs (and have multiple implementations) some doc > comments would be useful here. Sure, I'll do this as soon as I'm sure that this is a right approach. Thanks, Sergey
Sergey Fedorov <sergey.fedorov@linaro.org> writes: > From: Sergey Fedorov <serge.fdrv@gmail.com> > > Make CPU work core functions common between system and user-mode > emulation. User-mode does not have BQL, so flush_queued_work() is > protected by 'exclusive_lock'. > > Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com> > Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org> > --- > cpu-exec-common.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ > cpus.c | 87 ++----------------------------------------------- > include/exec/exec-all.h | 4 +++ > linux-user/main.c | 13 ++++++++ > 4 files changed, 102 insertions(+), 85 deletions(-) > > diff --git a/cpu-exec-common.c b/cpu-exec-common.c > index 0cb4ae60eff9..8184e0662cbd 100644 > --- a/cpu-exec-common.c > +++ b/cpu-exec-common.c > @@ -77,3 +77,86 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) > } > siglongjmp(cpu->jmp_env, 1); > } > + > +static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > +{ > + qemu_mutex_lock(&cpu->work_mutex); > + if (cpu->queued_work_first == NULL) { > + cpu->queued_work_first = wi; > + } else { > + cpu->queued_work_last->next = wi; > + } > + cpu->queued_work_last = wi; > + wi->next = NULL; > + wi->done = false; > + qemu_mutex_unlock(&cpu->work_mutex); > + > + qemu_cpu_kick(cpu); > +} > + > +void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > +{ > + struct qemu_work_item wi; > + > + if (qemu_cpu_is_self(cpu)) { > + func(cpu, data); > + return; > + } > + > + wi.func = func; > + wi.data = data; > + wi.free = false; > + > + queue_work_on_cpu(cpu, &wi); > + while (!atomic_mb_read(&wi.done)) { > + CPUState *self_cpu = current_cpu; > + > + wait_cpu_work(); > + current_cpu = self_cpu; > + } > +} > + > +void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > +{ > + struct qemu_work_item *wi; > + > + if (qemu_cpu_is_self(cpu)) { > + func(cpu, data); > + return; > + } > + > + wi = g_malloc0(sizeof(struct qemu_work_item)); > + wi->func = func; > + wi->data = data; > + wi->free = true; > + > + queue_work_on_cpu(cpu, wi); > +} > + > +void flush_queued_work(CPUState *cpu) > +{ > + struct qemu_work_item *wi; > + > + if (cpu->queued_work_first == NULL) { > + return; > + } > + > + qemu_mutex_lock(&cpu->work_mutex); > + while (cpu->queued_work_first != NULL) { > + wi = cpu->queued_work_first; > + cpu->queued_work_first = wi->next; > + if (!cpu->queued_work_first) { > + cpu->queued_work_last = NULL; > + } > + qemu_mutex_unlock(&cpu->work_mutex); > + wi->func(cpu, wi->data); > + qemu_mutex_lock(&cpu->work_mutex); > + if (wi->free) { > + g_free(wi); > + } else { > + atomic_mb_set(&wi->done, true); > + } > + } > + qemu_mutex_unlock(&cpu->work_mutex); > + signal_cpu_work(); > +} > diff --git a/cpus.c b/cpus.c > index f123eb707cc6..98f60f6f98f5 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -910,71 +910,16 @@ void qemu_init_cpu_loop(void) > qemu_thread_get_self(&io_thread); > } > > -static void wait_cpu_work(void) > +void wait_cpu_work(void) > { > qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex); > } > > -static void signal_cpu_work(void) > +void signal_cpu_work(void) > { > qemu_cond_broadcast(&qemu_work_cond); > } > > -static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > -{ > - qemu_mutex_lock(&cpu->work_mutex); > - if (cpu->queued_work_first == NULL) { > - cpu->queued_work_first = wi; > - } else { > - cpu->queued_work_last->next = wi; > - } > - cpu->queued_work_last = wi; > - wi->next = NULL; > - wi->done = false; > - qemu_mutex_unlock(&cpu->work_mutex); > - > - qemu_cpu_kick(cpu); > -} > - > -void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > -{ > - struct qemu_work_item wi; > - > - if (qemu_cpu_is_self(cpu)) { > - func(cpu, data); > - return; > - } > - > - wi.func = func; > - wi.data = data; > - wi.free = false; > - > - queue_work_on_cpu(cpu, &wi); > - while (!atomic_mb_read(&wi.done)) { > - CPUState *self_cpu = current_cpu; > - > - wait_cpu_work(); > - current_cpu = self_cpu; > - } > -} > - > -void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > -{ > - struct qemu_work_item *wi; > - > - if (qemu_cpu_is_self(cpu)) { > - func(cpu, data); > - return; > - } > - > - wi = g_malloc0(sizeof(struct qemu_work_item)); > - wi->func = func; > - wi->data = data; > - wi->free = true; > - > - queue_work_on_cpu(cpu, wi); > -} > - > static void qemu_kvm_destroy_vcpu(CPUState *cpu) > { > if (kvm_destroy_vcpu(cpu) < 0) { > @@ -987,34 +932,6 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) > { > } > > -static void flush_queued_work(CPUState *cpu) > -{ > - struct qemu_work_item *wi; > - > - if (cpu->queued_work_first == NULL) { > - return; > - } > - > - qemu_mutex_lock(&cpu->work_mutex); > - while (cpu->queued_work_first != NULL) { > - wi = cpu->queued_work_first; > - cpu->queued_work_first = wi->next; > - if (!cpu->queued_work_first) { > - cpu->queued_work_last = NULL; > - } > - qemu_mutex_unlock(&cpu->work_mutex); > - wi->func(cpu, wi->data); > - qemu_mutex_lock(&cpu->work_mutex); > - if (wi->free) { > - g_free(wi); > - } else { > - atomic_mb_set(&wi->done, true); > - } > - } > - qemu_mutex_unlock(&cpu->work_mutex); > - signal_cpu_work(); > -} > - > static void qemu_wait_io_event_common(CPUState *cpu) > { > if (cpu->stop) { > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index c1f59fa59d2c..23b4b50e0a45 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -407,4 +407,8 @@ extern int singlestep; > extern CPUState *tcg_current_cpu; > extern bool exit_request; > > +void wait_cpu_work(void); > +void signal_cpu_work(void); > +void flush_queued_work(CPUState *cpu); > + > #endif > diff --git a/linux-user/main.c b/linux-user/main.c > index 0093a8008c8e..5a68651159c2 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -111,6 +111,7 @@ static pthread_mutex_t cpu_list_mutex = PTHREAD_MUTEX_INITIALIZER; > static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER; > static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER; > static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER; > +static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER; > static bool exclusive_pending; > static int tcg_pending_cpus; > > @@ -140,6 +141,7 @@ void fork_end(int child) > pthread_mutex_init(&cpu_list_mutex, NULL); > pthread_cond_init(&exclusive_cond, NULL); > pthread_cond_init(&exclusive_resume, NULL); > + pthread_cond_init(&work_cond, NULL); > qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); > gdbserver_fork(thread_cpu); > } else { > @@ -148,6 +150,16 @@ void fork_end(int child) > } > } > > +void wait_cpu_work(void) > +{ > + pthread_cond_wait(&work_cond, &exclusive_lock); > +} > + > +void signal_cpu_work(void) > +{ > + pthread_cond_broadcast(&work_cond); > +} > + > /* Wait for pending exclusive operations to complete. The exclusive lock > must be held. */ > static inline void exclusive_idle(void) > @@ -206,6 +218,7 @@ static inline void cpu_exec_end(CPUState *cpu) > pthread_cond_broadcast(&exclusive_cond); > } > exclusive_idle(); > + flush_queued_work(cpu); > pthread_mutex_unlock(&exclusive_lock); > } So I think there is a deadlock we can get with the async work: (gdb) thread apply all bt Thread 11 (Thread 0x7ffefeca7700 (LWP 2912)): #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 #1 0x00005555555cb777 in wait_cpu_work () at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:155 #2 0x00005555555a0cee in wait_safe_cpu_work () at /home/alex/lsrc/qemu/qemu.git/cpu-exec-common.c:87 #3 0x00005555555cb8fe in cpu_exec_end (cpu=0x555555bb67e0) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:222 #4 0x00005555555cc7a7 in cpu_loop (env=0x555555bbea58) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:749 #5 0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424 #6 0x00007ffff6bed6fa in start_thread (arg=0x7ffefeca7700) at pthread_create.c:333 #7 0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 <a bunch of other threads doing the same and then...> Thread 3 (Thread 0x7ffff7f38700 (LWP 2904)): #0 0x00005555555faf5d in safe_syscall_base () #1 0x00005555555cfeaf in safe_futex (uaddr=0x7ffff528a0a4, op=128, val=1, timeout=0x0, uaddr2=0x0, val3=-162668384) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:706 #2 0x00005555555dd7cc in do_futex (uaddr=4132298916, op=128, val=1, timeout=0, uaddr2=0, val3=-162668384) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:6246 #3 0x00005555555e8cdb in do_syscall (cpu_env=0x555555a81118, num=240, arg1=-162668380, arg2=128, arg3=1, arg4=0, arg5=0, arg6=-162668384, arg7=0, arg8=0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:10642 #4 0x00005555555cd20e in cpu_loop (env=0x555555a81118) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:883 #5 0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424 #6 0x00007ffff6bed6fa in start_thread (arg=0x7ffff7f38700) at pthread_create.c:333 #7 0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 So everything is stalled awaiting this thread waking up and draining its queue. So for linux-user I think we need some mechanism to kick these syscalls which I assume means throwing a signal at it. -- Alex Bennée
On 29/06/16 19:17, Alex Bennée wrote: > So I think there is a deadlock we can get with the async work: > > (gdb) thread apply all bt > > Thread 11 (Thread 0x7ffefeca7700 (LWP 2912)): > #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 > #1 0x00005555555cb777 in wait_cpu_work () at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:155 > #2 0x00005555555a0cee in wait_safe_cpu_work () at /home/alex/lsrc/qemu/qemu.git/cpu-exec-common.c:87 > #3 0x00005555555cb8fe in cpu_exec_end (cpu=0x555555bb67e0) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:222 > #4 0x00005555555cc7a7 in cpu_loop (env=0x555555bbea58) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:749 > #5 0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424 > #6 0x00007ffff6bed6fa in start_thread (arg=0x7ffefeca7700) at pthread_create.c:333 > #7 0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 > > <a bunch of other threads doing the same and then...> > > Thread 3 (Thread 0x7ffff7f38700 (LWP 2904)): > #0 0x00005555555faf5d in safe_syscall_base () > #1 0x00005555555cfeaf in safe_futex (uaddr=0x7ffff528a0a4, op=128, val=1, timeout=0x0, uaddr2=0x0, val3=-162668384) > at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:706 > #2 0x00005555555dd7cc in do_futex (uaddr=4132298916, op=128, val=1, timeout=0, uaddr2=0, val3=-162668384) > at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:6246 > #3 0x00005555555e8cdb in do_syscall (cpu_env=0x555555a81118, num=240, arg1=-162668380, arg2=128, arg3=1, arg4=0, arg5=0, arg6=-162668384, > arg7=0, arg8=0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:10642 > #4 0x00005555555cd20e in cpu_loop (env=0x555555a81118) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:883 > #5 0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424 > #6 0x00007ffff6bed6fa in start_thread (arg=0x7ffff7f38700) at pthread_create.c:333 > #7 0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 > > So everything is stalled awaiting this thread waking up and draining > its queue. So for linux-user I think we need some mechanism to kick > these syscalls which I assume means throwing a signal at it. Nice catch! How did you get it? We always go through cpu_exec_end() before serving a guest syscall and always go through cpu_exec_start() before entering the guest code execution loop. If we always schedule safe work on the current thread's queue then I think there's a way to make it safe and avoid kicking syscalls. Thanks, Sergey
Sergey Fedorov <serge.fdrv@gmail.com> writes: > On 29/06/16 19:17, Alex Bennée wrote: >> So I think there is a deadlock we can get with the async work: >> >> (gdb) thread apply all bt >> >> Thread 11 (Thread 0x7ffefeca7700 (LWP 2912)): >> #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 >> #1 0x00005555555cb777 in wait_cpu_work () at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:155 >> #2 0x00005555555a0cee in wait_safe_cpu_work () at /home/alex/lsrc/qemu/qemu.git/cpu-exec-common.c:87 >> #3 0x00005555555cb8fe in cpu_exec_end (cpu=0x555555bb67e0) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:222 >> #4 0x00005555555cc7a7 in cpu_loop (env=0x555555bbea58) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:749 >> #5 0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424 >> #6 0x00007ffff6bed6fa in start_thread (arg=0x7ffefeca7700) at pthread_create.c:333 >> #7 0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 >> >> <a bunch of other threads doing the same and then...> >> >> Thread 3 (Thread 0x7ffff7f38700 (LWP 2904)): >> #0 0x00005555555faf5d in safe_syscall_base () >> #1 0x00005555555cfeaf in safe_futex (uaddr=0x7ffff528a0a4, op=128, val=1, timeout=0x0, uaddr2=0x0, val3=-162668384) >> at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:706 >> #2 0x00005555555dd7cc in do_futex (uaddr=4132298916, op=128, val=1, timeout=0, uaddr2=0, val3=-162668384) >> at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:6246 >> #3 0x00005555555e8cdb in do_syscall (cpu_env=0x555555a81118, num=240, arg1=-162668380, arg2=128, arg3=1, arg4=0, arg5=0, arg6=-162668384, >> arg7=0, arg8=0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:10642 >> #4 0x00005555555cd20e in cpu_loop (env=0x555555a81118) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:883 >> #5 0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424 >> #6 0x00007ffff6bed6fa in start_thread (arg=0x7ffff7f38700) at pthread_create.c:333 >> #7 0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 >> >> So everything is stalled awaiting this thread waking up and draining >> its queue. So for linux-user I think we need some mechanism to kick >> these syscalls which I assume means throwing a signal at it. > > Nice catch! How did you get it? Running pigz (armhf, debian) to compress stuff. > We always go through cpu_exec_end() > before serving a guest syscall and always go through cpu_exec_start() > before entering the guest code execution loop. If we always schedule > safe work on the current thread's queue then I think there's a way to > make it safe and avoid kicking syscalls. Not let the signals complete until safe work is done? > > Thanks, > Sergey -- Alex Bennée
On 30/06/16 13:32, Alex Bennée wrote: > Sergey Fedorov <serge.fdrv@gmail.com> writes: > >> On 29/06/16 19:17, Alex Bennée wrote: >>> So I think there is a deadlock we can get with the async work: >>> >>> (gdb) thread apply all bt >>> >>> Thread 11 (Thread 0x7ffefeca7700 (LWP 2912)): >>> #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 >>> #1 0x00005555555cb777 in wait_cpu_work () at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:155 >>> #2 0x00005555555a0cee in wait_safe_cpu_work () at /home/alex/lsrc/qemu/qemu.git/cpu-exec-common.c:87 >>> #3 0x00005555555cb8fe in cpu_exec_end (cpu=0x555555bb67e0) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:222 >>> #4 0x00005555555cc7a7 in cpu_loop (env=0x555555bbea58) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:749 >>> #5 0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424 >>> #6 0x00007ffff6bed6fa in start_thread (arg=0x7ffefeca7700) at pthread_create.c:333 >>> #7 0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 >>> >>> <a bunch of other threads doing the same and then...> >>> >>> Thread 3 (Thread 0x7ffff7f38700 (LWP 2904)): >>> #0 0x00005555555faf5d in safe_syscall_base () >>> #1 0x00005555555cfeaf in safe_futex (uaddr=0x7ffff528a0a4, op=128, val=1, timeout=0x0, uaddr2=0x0, val3=-162668384) >>> at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:706 >>> #2 0x00005555555dd7cc in do_futex (uaddr=4132298916, op=128, val=1, timeout=0, uaddr2=0, val3=-162668384) >>> at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:6246 >>> #3 0x00005555555e8cdb in do_syscall (cpu_env=0x555555a81118, num=240, arg1=-162668380, arg2=128, arg3=1, arg4=0, arg5=0, arg6=-162668384, >>> arg7=0, arg8=0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:10642 >>> #4 0x00005555555cd20e in cpu_loop (env=0x555555a81118) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:883 >>> #5 0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424 >>> #6 0x00007ffff6bed6fa in start_thread (arg=0x7ffff7f38700) at pthread_create.c:333 >>> #7 0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 >>> >>> So everything is stalled awaiting this thread waking up and draining >>> its queue. So for linux-user I think we need some mechanism to kick >>> these syscalls which I assume means throwing a signal at it. >> Nice catch! How did you get it? > Running pigz (armhf, debian) to compress stuff. > >> We always go through cpu_exec_end() >> before serving a guest syscall and always go through cpu_exec_start() >> before entering the guest code execution loop. If we always schedule >> safe work on the current thread's queue then I think there's a way to >> make it safe and avoid kicking syscalls. > Not let the signals complete until safe work is done? I'm thinking of waiting for completion of safe works in cpu_exec_start() as well as in cpu_exec_end(). Regards, Sergey
diff --git a/cpu-exec-common.c b/cpu-exec-common.c index 0cb4ae60eff9..8184e0662cbd 100644 --- a/cpu-exec-common.c +++ b/cpu-exec-common.c @@ -77,3 +77,86 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) } siglongjmp(cpu->jmp_env, 1); } + +static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) +{ + qemu_mutex_lock(&cpu->work_mutex); + if (cpu->queued_work_first == NULL) { + cpu->queued_work_first = wi; + } else { + cpu->queued_work_last->next = wi; + } + cpu->queued_work_last = wi; + wi->next = NULL; + wi->done = false; + qemu_mutex_unlock(&cpu->work_mutex); + + qemu_cpu_kick(cpu); +} + +void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) +{ + struct qemu_work_item wi; + + if (qemu_cpu_is_self(cpu)) { + func(cpu, data); + return; + } + + wi.func = func; + wi.data = data; + wi.free = false; + + queue_work_on_cpu(cpu, &wi); + while (!atomic_mb_read(&wi.done)) { + CPUState *self_cpu = current_cpu; + + wait_cpu_work(); + current_cpu = self_cpu; + } +} + +void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) +{ + struct qemu_work_item *wi; + + if (qemu_cpu_is_self(cpu)) { + func(cpu, data); + return; + } + + wi = g_malloc0(sizeof(struct qemu_work_item)); + wi->func = func; + wi->data = data; + wi->free = true; + + queue_work_on_cpu(cpu, wi); +} + +void flush_queued_work(CPUState *cpu) +{ + struct qemu_work_item *wi; + + if (cpu->queued_work_first == NULL) { + return; + } + + qemu_mutex_lock(&cpu->work_mutex); + while (cpu->queued_work_first != NULL) { + wi = cpu->queued_work_first; + cpu->queued_work_first = wi->next; + if (!cpu->queued_work_first) { + cpu->queued_work_last = NULL; + } + qemu_mutex_unlock(&cpu->work_mutex); + wi->func(cpu, wi->data); + qemu_mutex_lock(&cpu->work_mutex); + if (wi->free) { + g_free(wi); + } else { + atomic_mb_set(&wi->done, true); + } + } + qemu_mutex_unlock(&cpu->work_mutex); + signal_cpu_work(); +} diff --git a/cpus.c b/cpus.c index f123eb707cc6..98f60f6f98f5 100644 --- a/cpus.c +++ b/cpus.c @@ -910,71 +910,16 @@ void qemu_init_cpu_loop(void) qemu_thread_get_self(&io_thread); } -static void wait_cpu_work(void) +void wait_cpu_work(void) { qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex); } -static void signal_cpu_work(void) +void signal_cpu_work(void) { qemu_cond_broadcast(&qemu_work_cond); } -static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) -{ - qemu_mutex_lock(&cpu->work_mutex); - if (cpu->queued_work_first == NULL) { - cpu->queued_work_first = wi; - } else { - cpu->queued_work_last->next = wi; - } - cpu->queued_work_last = wi; - wi->next = NULL; - wi->done = false; - qemu_mutex_unlock(&cpu->work_mutex); - - qemu_cpu_kick(cpu); -} - -void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) -{ - struct qemu_work_item wi; - - if (qemu_cpu_is_self(cpu)) { - func(cpu, data); - return; - } - - wi.func = func; - wi.data = data; - wi.free = false; - - queue_work_on_cpu(cpu, &wi); - while (!atomic_mb_read(&wi.done)) { - CPUState *self_cpu = current_cpu; - - wait_cpu_work(); - current_cpu = self_cpu; - } -} - -void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) -{ - struct qemu_work_item *wi; - - if (qemu_cpu_is_self(cpu)) { - func(cpu, data); - return; - } - - wi = g_malloc0(sizeof(struct qemu_work_item)); - wi->func = func; - wi->data = data; - wi->free = true; - - queue_work_on_cpu(cpu, wi); -} - static void qemu_kvm_destroy_vcpu(CPUState *cpu) { if (kvm_destroy_vcpu(cpu) < 0) { @@ -987,34 +932,6 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) { } -static void flush_queued_work(CPUState *cpu) -{ - struct qemu_work_item *wi; - - if (cpu->queued_work_first == NULL) { - return; - } - - qemu_mutex_lock(&cpu->work_mutex); - while (cpu->queued_work_first != NULL) { - wi = cpu->queued_work_first; - cpu->queued_work_first = wi->next; - if (!cpu->queued_work_first) { - cpu->queued_work_last = NULL; - } - qemu_mutex_unlock(&cpu->work_mutex); - wi->func(cpu, wi->data); - qemu_mutex_lock(&cpu->work_mutex); - if (wi->free) { - g_free(wi); - } else { - atomic_mb_set(&wi->done, true); - } - } - qemu_mutex_unlock(&cpu->work_mutex); - signal_cpu_work(); -} - static void qemu_wait_io_event_common(CPUState *cpu) { if (cpu->stop) { diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index c1f59fa59d2c..23b4b50e0a45 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -407,4 +407,8 @@ extern int singlestep; extern CPUState *tcg_current_cpu; extern bool exit_request; +void wait_cpu_work(void); +void signal_cpu_work(void); +void flush_queued_work(CPUState *cpu); + #endif diff --git a/linux-user/main.c b/linux-user/main.c index 0093a8008c8e..5a68651159c2 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -111,6 +111,7 @@ static pthread_mutex_t cpu_list_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER; static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER; +static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER; static bool exclusive_pending; static int tcg_pending_cpus; @@ -140,6 +141,7 @@ void fork_end(int child) pthread_mutex_init(&cpu_list_mutex, NULL); pthread_cond_init(&exclusive_cond, NULL); pthread_cond_init(&exclusive_resume, NULL); + pthread_cond_init(&work_cond, NULL); qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); gdbserver_fork(thread_cpu); } else { @@ -148,6 +150,16 @@ void fork_end(int child) } } +void wait_cpu_work(void) +{ + pthread_cond_wait(&work_cond, &exclusive_lock); +} + +void signal_cpu_work(void) +{ + pthread_cond_broadcast(&work_cond); +} + /* Wait for pending exclusive operations to complete. The exclusive lock must be held. */ static inline void exclusive_idle(void) @@ -206,6 +218,7 @@ static inline void cpu_exec_end(CPUState *cpu) pthread_cond_broadcast(&exclusive_cond); } exclusive_idle(); + flush_queued_work(cpu); pthread_mutex_unlock(&exclusive_lock); }