diff mbox

[RFC,6/8] linux-user: Support CPU work queue

Message ID 1466375313-7562-7-git-send-email-sergey.fedorov@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

sergey.fedorov@linaro.org June 19, 2016, 10:28 p.m. UTC
From: Sergey Fedorov <serge.fdrv@gmail.com>

Make CPU work core functions common between system and user-mode
emulation. User-mode does not have BQL, so flush_queued_work() is
protected by 'exclusive_lock'.

Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org>
---
 cpu-exec-common.c       | 83 ++++++++++++++++++++++++++++++++++++++++++++++
 cpus.c                  | 87 ++-----------------------------------------------
 include/exec/exec-all.h |  4 +++
 linux-user/main.c       | 13 ++++++++
 4 files changed, 102 insertions(+), 85 deletions(-)

Comments

Alex Bennée June 27, 2016, 9:31 a.m. UTC | #1
Sergey Fedorov <sergey.fedorov@linaro.org> writes:

> From: Sergey Fedorov <serge.fdrv@gmail.com>
>
> Make CPU work core functions common between system and user-mode
> emulation. User-mode does not have BQL, so flush_queued_work() is
> protected by 'exclusive_lock'.
>
> Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
> Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org>
> ---
>  cpu-exec-common.c       | 83 ++++++++++++++++++++++++++++++++++++++++++++++
>  cpus.c                  | 87 ++-----------------------------------------------
>  include/exec/exec-all.h |  4 +++
>  linux-user/main.c       | 13 ++++++++
>  4 files changed, 102 insertions(+), 85 deletions(-)
>
> diff --git a/cpu-exec-common.c b/cpu-exec-common.c
> index 0cb4ae60eff9..8184e0662cbd 100644
> --- a/cpu-exec-common.c
> +++ b/cpu-exec-common.c
> @@ -77,3 +77,86 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
>      }
>      siglongjmp(cpu->jmp_env, 1);
>  }
> +
> +static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
> +{
> +    qemu_mutex_lock(&cpu->work_mutex);
> +    if (cpu->queued_work_first == NULL) {
> +        cpu->queued_work_first = wi;
> +    } else {
> +        cpu->queued_work_last->next = wi;
> +    }
> +    cpu->queued_work_last = wi;
> +    wi->next = NULL;
> +    wi->done = false;
> +    qemu_mutex_unlock(&cpu->work_mutex);
> +
> +    qemu_cpu_kick(cpu);
> +}
> +
> +void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
> +{
> +    struct qemu_work_item wi;
> +
> +    if (qemu_cpu_is_self(cpu)) {
> +        func(cpu, data);
> +        return;
> +    }
> +
> +    wi.func = func;
> +    wi.data = data;
> +    wi.free = false;
> +
> +    queue_work_on_cpu(cpu, &wi);
> +    while (!atomic_mb_read(&wi.done)) {
> +        CPUState *self_cpu = current_cpu;
> +
> +        wait_cpu_work();
> +        current_cpu = self_cpu;
> +    }
> +}
> +
> +void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
> +{
> +    struct qemu_work_item *wi;
> +
> +    if (qemu_cpu_is_self(cpu)) {
> +        func(cpu, data);
> +        return;
> +    }
> +
> +    wi = g_malloc0(sizeof(struct qemu_work_item));
> +    wi->func = func;
> +    wi->data = data;
> +    wi->free = true;
> +
> +    queue_work_on_cpu(cpu, wi);
> +}
> +
> +void flush_queued_work(CPUState *cpu)
> +{
> +    struct qemu_work_item *wi;
> +
> +    if (cpu->queued_work_first == NULL) {
> +        return;
> +    }
> +
> +    qemu_mutex_lock(&cpu->work_mutex);
> +    while (cpu->queued_work_first != NULL) {
> +        wi = cpu->queued_work_first;
> +        cpu->queued_work_first = wi->next;
> +        if (!cpu->queued_work_first) {
> +            cpu->queued_work_last = NULL;
> +        }
> +        qemu_mutex_unlock(&cpu->work_mutex);
> +        wi->func(cpu, wi->data);
> +        qemu_mutex_lock(&cpu->work_mutex);
> +        if (wi->free) {
> +            g_free(wi);
> +        } else {
> +            atomic_mb_set(&wi->done, true);
> +        }
> +    }
> +    qemu_mutex_unlock(&cpu->work_mutex);
> +    signal_cpu_work();
> +}
> diff --git a/cpus.c b/cpus.c
> index f123eb707cc6..98f60f6f98f5 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -910,71 +910,16 @@ void qemu_init_cpu_loop(void)
>      qemu_thread_get_self(&io_thread);
>  }
>
> -static void wait_cpu_work(void)
> +void wait_cpu_work(void)
>  {
>      qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
>  }
>
> -static void signal_cpu_work(void)
> +void signal_cpu_work(void)
>  {
>      qemu_cond_broadcast(&qemu_work_cond);
>  }
>
> -static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
> -{
> -    qemu_mutex_lock(&cpu->work_mutex);
> -    if (cpu->queued_work_first == NULL) {
> -        cpu->queued_work_first = wi;
> -    } else {
> -        cpu->queued_work_last->next = wi;
> -    }
> -    cpu->queued_work_last = wi;
> -    wi->next = NULL;
> -    wi->done = false;
> -    qemu_mutex_unlock(&cpu->work_mutex);
> -
> -    qemu_cpu_kick(cpu);
> -}
> -
> -void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
> -{
> -    struct qemu_work_item wi;
> -
> -    if (qemu_cpu_is_self(cpu)) {
> -        func(cpu, data);
> -        return;
> -    }
> -
> -    wi.func = func;
> -    wi.data = data;
> -    wi.free = false;
> -
> -    queue_work_on_cpu(cpu, &wi);
> -    while (!atomic_mb_read(&wi.done)) {
> -        CPUState *self_cpu = current_cpu;
> -
> -        wait_cpu_work();
> -        current_cpu = self_cpu;
> -    }
> -}
> -
> -void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
> -{
> -    struct qemu_work_item *wi;
> -
> -    if (qemu_cpu_is_self(cpu)) {
> -        func(cpu, data);
> -        return;
> -    }
> -
> -    wi = g_malloc0(sizeof(struct qemu_work_item));
> -    wi->func = func;
> -    wi->data = data;
> -    wi->free = true;
> -
> -    queue_work_on_cpu(cpu, wi);
> -}
> -
>  static void qemu_kvm_destroy_vcpu(CPUState *cpu)
>  {
>      if (kvm_destroy_vcpu(cpu) < 0) {
> @@ -987,34 +932,6 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
>  {
>  }
>
> -static void flush_queued_work(CPUState *cpu)
> -{
> -    struct qemu_work_item *wi;
> -
> -    if (cpu->queued_work_first == NULL) {
> -        return;
> -    }
> -
> -    qemu_mutex_lock(&cpu->work_mutex);
> -    while (cpu->queued_work_first != NULL) {
> -        wi = cpu->queued_work_first;
> -        cpu->queued_work_first = wi->next;
> -        if (!cpu->queued_work_first) {
> -            cpu->queued_work_last = NULL;
> -        }
> -        qemu_mutex_unlock(&cpu->work_mutex);
> -        wi->func(cpu, wi->data);
> -        qemu_mutex_lock(&cpu->work_mutex);
> -        if (wi->free) {
> -            g_free(wi);
> -        } else {
> -            atomic_mb_set(&wi->done, true);
> -        }
> -    }
> -    qemu_mutex_unlock(&cpu->work_mutex);
> -    signal_cpu_work();
> -}
> -
>  static void qemu_wait_io_event_common(CPUState *cpu)
>  {
>      if (cpu->stop) {
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index c1f59fa59d2c..23b4b50e0a45 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -407,4 +407,8 @@ extern int singlestep;
>  extern CPUState *tcg_current_cpu;
>  extern bool exit_request;
>
> +void wait_cpu_work(void);
> +void signal_cpu_work(void);
> +void flush_queued_work(CPUState *cpu);
> +

Now these are public APIs (and have multiple implementations) some doc
comments would be useful here.

>  #endif
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 0093a8008c8e..5a68651159c2 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -111,6 +111,7 @@ static pthread_mutex_t cpu_list_mutex = PTHREAD_MUTEX_INITIALIZER;
>  static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER;
>  static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER;
>  static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER;
> +static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER;
>  static bool exclusive_pending;
>  static int tcg_pending_cpus;
>
> @@ -140,6 +141,7 @@ void fork_end(int child)
>          pthread_mutex_init(&cpu_list_mutex, NULL);
>          pthread_cond_init(&exclusive_cond, NULL);
>          pthread_cond_init(&exclusive_resume, NULL);
> +        pthread_cond_init(&work_cond, NULL);
>          qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
>          gdbserver_fork(thread_cpu);
>      } else {
> @@ -148,6 +150,16 @@ void fork_end(int child)
>      }
>  }
>
> +void wait_cpu_work(void)
> +{
> +    pthread_cond_wait(&work_cond, &exclusive_lock);
> +}
> +
> +void signal_cpu_work(void)
> +{
> +    pthread_cond_broadcast(&work_cond);
> +}
> +
>  /* Wait for pending exclusive operations to complete.  The exclusive lock
>     must be held.  */
>  static inline void exclusive_idle(void)
> @@ -206,6 +218,7 @@ static inline void cpu_exec_end(CPUState *cpu)
>          pthread_cond_broadcast(&exclusive_cond);
>      }
>      exclusive_idle();
> +    flush_queued_work(cpu);
>      pthread_mutex_unlock(&exclusive_lock);
>  }


--
Alex Bennée
Sergey Fedorov June 29, 2016, 2:59 p.m. UTC | #2
On 27/06/16 12:31, Alex Bennée wrote:
> Sergey Fedorov <sergey.fedorov@linaro.org> writes:
>
>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
>> index c1f59fa59d2c..23b4b50e0a45 100644
>> --- a/include/exec/exec-all.h
>> +++ b/include/exec/exec-all.h
>> @@ -407,4 +407,8 @@ extern int singlestep;
>>  extern CPUState *tcg_current_cpu;
>>  extern bool exit_request;
>>
>> +void wait_cpu_work(void);
>> +void signal_cpu_work(void);
>> +void flush_queued_work(CPUState *cpu);
>> +
> Now these are public APIs (and have multiple implementations) some doc
> comments would be useful here.


Sure, I'll do this as soon as I'm sure that this is a right approach.

Thanks,
Sergey
Alex Bennée June 29, 2016, 4:17 p.m. UTC | #3
Sergey Fedorov <sergey.fedorov@linaro.org> writes:

> From: Sergey Fedorov <serge.fdrv@gmail.com>
>
> Make CPU work core functions common between system and user-mode
> emulation. User-mode does not have BQL, so flush_queued_work() is
> protected by 'exclusive_lock'.
>
> Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
> Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org>
> ---
>  cpu-exec-common.c       | 83 ++++++++++++++++++++++++++++++++++++++++++++++
>  cpus.c                  | 87 ++-----------------------------------------------
>  include/exec/exec-all.h |  4 +++
>  linux-user/main.c       | 13 ++++++++
>  4 files changed, 102 insertions(+), 85 deletions(-)
>
> diff --git a/cpu-exec-common.c b/cpu-exec-common.c
> index 0cb4ae60eff9..8184e0662cbd 100644
> --- a/cpu-exec-common.c
> +++ b/cpu-exec-common.c
> @@ -77,3 +77,86 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
>      }
>      siglongjmp(cpu->jmp_env, 1);
>  }
> +
> +static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
> +{
> +    qemu_mutex_lock(&cpu->work_mutex);
> +    if (cpu->queued_work_first == NULL) {
> +        cpu->queued_work_first = wi;
> +    } else {
> +        cpu->queued_work_last->next = wi;
> +    }
> +    cpu->queued_work_last = wi;
> +    wi->next = NULL;
> +    wi->done = false;
> +    qemu_mutex_unlock(&cpu->work_mutex);
> +
> +    qemu_cpu_kick(cpu);
> +}
> +
> +void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
> +{
> +    struct qemu_work_item wi;
> +
> +    if (qemu_cpu_is_self(cpu)) {
> +        func(cpu, data);
> +        return;
> +    }
> +
> +    wi.func = func;
> +    wi.data = data;
> +    wi.free = false;
> +
> +    queue_work_on_cpu(cpu, &wi);
> +    while (!atomic_mb_read(&wi.done)) {
> +        CPUState *self_cpu = current_cpu;
> +
> +        wait_cpu_work();
> +        current_cpu = self_cpu;
> +    }
> +}
> +
> +void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
> +{
> +    struct qemu_work_item *wi;
> +
> +    if (qemu_cpu_is_self(cpu)) {
> +        func(cpu, data);
> +        return;
> +    }
> +
> +    wi = g_malloc0(sizeof(struct qemu_work_item));
> +    wi->func = func;
> +    wi->data = data;
> +    wi->free = true;
> +
> +    queue_work_on_cpu(cpu, wi);
> +}
> +
> +void flush_queued_work(CPUState *cpu)
> +{
> +    struct qemu_work_item *wi;
> +
> +    if (cpu->queued_work_first == NULL) {
> +        return;
> +    }
> +
> +    qemu_mutex_lock(&cpu->work_mutex);
> +    while (cpu->queued_work_first != NULL) {
> +        wi = cpu->queued_work_first;
> +        cpu->queued_work_first = wi->next;
> +        if (!cpu->queued_work_first) {
> +            cpu->queued_work_last = NULL;
> +        }
> +        qemu_mutex_unlock(&cpu->work_mutex);
> +        wi->func(cpu, wi->data);
> +        qemu_mutex_lock(&cpu->work_mutex);
> +        if (wi->free) {
> +            g_free(wi);
> +        } else {
> +            atomic_mb_set(&wi->done, true);
> +        }
> +    }
> +    qemu_mutex_unlock(&cpu->work_mutex);
> +    signal_cpu_work();
> +}
> diff --git a/cpus.c b/cpus.c
> index f123eb707cc6..98f60f6f98f5 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -910,71 +910,16 @@ void qemu_init_cpu_loop(void)
>      qemu_thread_get_self(&io_thread);
>  }
>
> -static void wait_cpu_work(void)
> +void wait_cpu_work(void)
>  {
>      qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
>  }
>
> -static void signal_cpu_work(void)
> +void signal_cpu_work(void)
>  {
>      qemu_cond_broadcast(&qemu_work_cond);
>  }
>
> -static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
> -{
> -    qemu_mutex_lock(&cpu->work_mutex);
> -    if (cpu->queued_work_first == NULL) {
> -        cpu->queued_work_first = wi;
> -    } else {
> -        cpu->queued_work_last->next = wi;
> -    }
> -    cpu->queued_work_last = wi;
> -    wi->next = NULL;
> -    wi->done = false;
> -    qemu_mutex_unlock(&cpu->work_mutex);
> -
> -    qemu_cpu_kick(cpu);
> -}
> -
> -void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
> -{
> -    struct qemu_work_item wi;
> -
> -    if (qemu_cpu_is_self(cpu)) {
> -        func(cpu, data);
> -        return;
> -    }
> -
> -    wi.func = func;
> -    wi.data = data;
> -    wi.free = false;
> -
> -    queue_work_on_cpu(cpu, &wi);
> -    while (!atomic_mb_read(&wi.done)) {
> -        CPUState *self_cpu = current_cpu;
> -
> -        wait_cpu_work();
> -        current_cpu = self_cpu;
> -    }
> -}
> -
> -void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
> -{
> -    struct qemu_work_item *wi;
> -
> -    if (qemu_cpu_is_self(cpu)) {
> -        func(cpu, data);
> -        return;
> -    }
> -
> -    wi = g_malloc0(sizeof(struct qemu_work_item));
> -    wi->func = func;
> -    wi->data = data;
> -    wi->free = true;
> -
> -    queue_work_on_cpu(cpu, wi);
> -}
> -
>  static void qemu_kvm_destroy_vcpu(CPUState *cpu)
>  {
>      if (kvm_destroy_vcpu(cpu) < 0) {
> @@ -987,34 +932,6 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
>  {
>  }
>
> -static void flush_queued_work(CPUState *cpu)
> -{
> -    struct qemu_work_item *wi;
> -
> -    if (cpu->queued_work_first == NULL) {
> -        return;
> -    }
> -
> -    qemu_mutex_lock(&cpu->work_mutex);
> -    while (cpu->queued_work_first != NULL) {
> -        wi = cpu->queued_work_first;
> -        cpu->queued_work_first = wi->next;
> -        if (!cpu->queued_work_first) {
> -            cpu->queued_work_last = NULL;
> -        }
> -        qemu_mutex_unlock(&cpu->work_mutex);
> -        wi->func(cpu, wi->data);
> -        qemu_mutex_lock(&cpu->work_mutex);
> -        if (wi->free) {
> -            g_free(wi);
> -        } else {
> -            atomic_mb_set(&wi->done, true);
> -        }
> -    }
> -    qemu_mutex_unlock(&cpu->work_mutex);
> -    signal_cpu_work();
> -}
> -
>  static void qemu_wait_io_event_common(CPUState *cpu)
>  {
>      if (cpu->stop) {
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index c1f59fa59d2c..23b4b50e0a45 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -407,4 +407,8 @@ extern int singlestep;
>  extern CPUState *tcg_current_cpu;
>  extern bool exit_request;
>
> +void wait_cpu_work(void);
> +void signal_cpu_work(void);
> +void flush_queued_work(CPUState *cpu);
> +
>  #endif
> diff --git a/linux-user/main.c b/linux-user/main.c
> index 0093a8008c8e..5a68651159c2 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -111,6 +111,7 @@ static pthread_mutex_t cpu_list_mutex = PTHREAD_MUTEX_INITIALIZER;
>  static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER;
>  static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER;
>  static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER;
> +static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER;
>  static bool exclusive_pending;
>  static int tcg_pending_cpus;
>
> @@ -140,6 +141,7 @@ void fork_end(int child)
>          pthread_mutex_init(&cpu_list_mutex, NULL);
>          pthread_cond_init(&exclusive_cond, NULL);
>          pthread_cond_init(&exclusive_resume, NULL);
> +        pthread_cond_init(&work_cond, NULL);
>          qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
>          gdbserver_fork(thread_cpu);
>      } else {
> @@ -148,6 +150,16 @@ void fork_end(int child)
>      }
>  }
>
> +void wait_cpu_work(void)
> +{
> +    pthread_cond_wait(&work_cond, &exclusive_lock);
> +}
> +
> +void signal_cpu_work(void)
> +{
> +    pthread_cond_broadcast(&work_cond);
> +}
> +
>  /* Wait for pending exclusive operations to complete.  The exclusive lock
>     must be held.  */
>  static inline void exclusive_idle(void)
> @@ -206,6 +218,7 @@ static inline void cpu_exec_end(CPUState *cpu)
>          pthread_cond_broadcast(&exclusive_cond);
>      }
>      exclusive_idle();
> +    flush_queued_work(cpu);
>      pthread_mutex_unlock(&exclusive_lock);
>  }

So I think there is a deadlock we can get with the async work:

(gdb) thread apply all bt

Thread 11 (Thread 0x7ffefeca7700 (LWP 2912)):
#0  pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1  0x00005555555cb777 in wait_cpu_work () at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:155
#2  0x00005555555a0cee in wait_safe_cpu_work () at /home/alex/lsrc/qemu/qemu.git/cpu-exec-common.c:87
#3  0x00005555555cb8fe in cpu_exec_end (cpu=0x555555bb67e0) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:222
#4  0x00005555555cc7a7 in cpu_loop (env=0x555555bbea58) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:749
#5  0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424
#6  0x00007ffff6bed6fa in start_thread (arg=0x7ffefeca7700) at pthread_create.c:333
#7  0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109

<a bunch of other threads doing the same and then...>

Thread 3 (Thread 0x7ffff7f38700 (LWP 2904)):
#0  0x00005555555faf5d in safe_syscall_base ()
#1  0x00005555555cfeaf in safe_futex (uaddr=0x7ffff528a0a4, op=128, val=1, timeout=0x0, uaddr2=0x0, val3=-162668384)
    at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:706
#2  0x00005555555dd7cc in do_futex (uaddr=4132298916, op=128, val=1, timeout=0, uaddr2=0, val3=-162668384)
    at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:6246
#3  0x00005555555e8cdb in do_syscall (cpu_env=0x555555a81118, num=240, arg1=-162668380, arg2=128, arg3=1, arg4=0, arg5=0, arg6=-162668384,
    arg7=0, arg8=0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:10642
#4  0x00005555555cd20e in cpu_loop (env=0x555555a81118) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:883
#5  0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424
#6  0x00007ffff6bed6fa in start_thread (arg=0x7ffff7f38700) at pthread_create.c:333
#7  0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109

So everything is stalled awaiting this thread waking up and draining
its queue. So for linux-user I think we need some mechanism to kick
these syscalls which I assume means throwing a signal at it.

--
Alex Bennée
Sergey Fedorov June 30, 2016, 9:39 a.m. UTC | #4
On 29/06/16 19:17, Alex Bennée wrote:
> So I think there is a deadlock we can get with the async work:
>
> (gdb) thread apply all bt
>
> Thread 11 (Thread 0x7ffefeca7700 (LWP 2912)):
> #0  pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
> #1  0x00005555555cb777 in wait_cpu_work () at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:155
> #2  0x00005555555a0cee in wait_safe_cpu_work () at /home/alex/lsrc/qemu/qemu.git/cpu-exec-common.c:87
> #3  0x00005555555cb8fe in cpu_exec_end (cpu=0x555555bb67e0) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:222
> #4  0x00005555555cc7a7 in cpu_loop (env=0x555555bbea58) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:749
> #5  0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424
> #6  0x00007ffff6bed6fa in start_thread (arg=0x7ffefeca7700) at pthread_create.c:333
> #7  0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
>
> <a bunch of other threads doing the same and then...>
>
> Thread 3 (Thread 0x7ffff7f38700 (LWP 2904)):
> #0  0x00005555555faf5d in safe_syscall_base ()
> #1  0x00005555555cfeaf in safe_futex (uaddr=0x7ffff528a0a4, op=128, val=1, timeout=0x0, uaddr2=0x0, val3=-162668384)
>     at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:706
> #2  0x00005555555dd7cc in do_futex (uaddr=4132298916, op=128, val=1, timeout=0, uaddr2=0, val3=-162668384)
>     at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:6246
> #3  0x00005555555e8cdb in do_syscall (cpu_env=0x555555a81118, num=240, arg1=-162668380, arg2=128, arg3=1, arg4=0, arg5=0, arg6=-162668384,
>     arg7=0, arg8=0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:10642
> #4  0x00005555555cd20e in cpu_loop (env=0x555555a81118) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:883
> #5  0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424
> #6  0x00007ffff6bed6fa in start_thread (arg=0x7ffff7f38700) at pthread_create.c:333
> #7  0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
>
> So everything is stalled awaiting this thread waking up and draining
> its queue. So for linux-user I think we need some mechanism to kick
> these syscalls which I assume means throwing a signal at it.

Nice catch! How did you get it? We always go through cpu_exec_end()
before serving a guest syscall and always go through cpu_exec_start()
before entering the guest code execution loop. If we always schedule
safe work on the current thread's queue then I think there's a way to
make it safe and avoid kicking syscalls.

Thanks,
Sergey
Alex Bennée June 30, 2016, 10:32 a.m. UTC | #5
Sergey Fedorov <serge.fdrv@gmail.com> writes:

> On 29/06/16 19:17, Alex Bennée wrote:
>> So I think there is a deadlock we can get with the async work:
>>
>> (gdb) thread apply all bt
>>
>> Thread 11 (Thread 0x7ffefeca7700 (LWP 2912)):
>> #0  pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
>> #1  0x00005555555cb777 in wait_cpu_work () at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:155
>> #2  0x00005555555a0cee in wait_safe_cpu_work () at /home/alex/lsrc/qemu/qemu.git/cpu-exec-common.c:87
>> #3  0x00005555555cb8fe in cpu_exec_end (cpu=0x555555bb67e0) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:222
>> #4  0x00005555555cc7a7 in cpu_loop (env=0x555555bbea58) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:749
>> #5  0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424
>> #6  0x00007ffff6bed6fa in start_thread (arg=0x7ffefeca7700) at pthread_create.c:333
>> #7  0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
>>
>> <a bunch of other threads doing the same and then...>
>>
>> Thread 3 (Thread 0x7ffff7f38700 (LWP 2904)):
>> #0  0x00005555555faf5d in safe_syscall_base ()
>> #1  0x00005555555cfeaf in safe_futex (uaddr=0x7ffff528a0a4, op=128, val=1, timeout=0x0, uaddr2=0x0, val3=-162668384)
>>     at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:706
>> #2  0x00005555555dd7cc in do_futex (uaddr=4132298916, op=128, val=1, timeout=0, uaddr2=0, val3=-162668384)
>>     at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:6246
>> #3  0x00005555555e8cdb in do_syscall (cpu_env=0x555555a81118, num=240, arg1=-162668380, arg2=128, arg3=1, arg4=0, arg5=0, arg6=-162668384,
>>     arg7=0, arg8=0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:10642
>> #4  0x00005555555cd20e in cpu_loop (env=0x555555a81118) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:883
>> #5  0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424
>> #6  0x00007ffff6bed6fa in start_thread (arg=0x7ffff7f38700) at pthread_create.c:333
>> #7  0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
>>
>> So everything is stalled awaiting this thread waking up and draining
>> its queue. So for linux-user I think we need some mechanism to kick
>> these syscalls which I assume means throwing a signal at it.
>
> Nice catch! How did you get it?

Running pigz (armhf, debian) to compress stuff.

> We always go through cpu_exec_end()
> before serving a guest syscall and always go through cpu_exec_start()
> before entering the guest code execution loop. If we always schedule
> safe work on the current thread's queue then I think there's a way to
> make it safe and avoid kicking syscalls.

Not let the signals complete until safe work is done?
>
> Thanks,
> Sergey


--
Alex Bennée
Sergey Fedorov June 30, 2016, 10:35 a.m. UTC | #6
On 30/06/16 13:32, Alex Bennée wrote:
> Sergey Fedorov <serge.fdrv@gmail.com> writes:
>
>> On 29/06/16 19:17, Alex Bennée wrote:
>>> So I think there is a deadlock we can get with the async work:
>>>
>>> (gdb) thread apply all bt
>>>
>>> Thread 11 (Thread 0x7ffefeca7700 (LWP 2912)):
>>> #0  pthread_cond_wait@@GLIBC_2.3.2 () at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
>>> #1  0x00005555555cb777 in wait_cpu_work () at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:155
>>> #2  0x00005555555a0cee in wait_safe_cpu_work () at /home/alex/lsrc/qemu/qemu.git/cpu-exec-common.c:87
>>> #3  0x00005555555cb8fe in cpu_exec_end (cpu=0x555555bb67e0) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:222
>>> #4  0x00005555555cc7a7 in cpu_loop (env=0x555555bbea58) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:749
>>> #5  0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424
>>> #6  0x00007ffff6bed6fa in start_thread (arg=0x7ffefeca7700) at pthread_create.c:333
>>> #7  0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
>>>
>>> <a bunch of other threads doing the same and then...>
>>>
>>> Thread 3 (Thread 0x7ffff7f38700 (LWP 2904)):
>>> #0  0x00005555555faf5d in safe_syscall_base ()
>>> #1  0x00005555555cfeaf in safe_futex (uaddr=0x7ffff528a0a4, op=128, val=1, timeout=0x0, uaddr2=0x0, val3=-162668384)
>>>     at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:706
>>> #2  0x00005555555dd7cc in do_futex (uaddr=4132298916, op=128, val=1, timeout=0, uaddr2=0, val3=-162668384)
>>>     at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:6246
>>> #3  0x00005555555e8cdb in do_syscall (cpu_env=0x555555a81118, num=240, arg1=-162668380, arg2=128, arg3=1, arg4=0, arg5=0, arg6=-162668384,
>>>     arg7=0, arg8=0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:10642
>>> #4  0x00005555555cd20e in cpu_loop (env=0x555555a81118) at /home/alex/lsrc/qemu/qemu.git/linux-user/main.c:883
>>> #5  0x00005555555db0b2 in clone_func (arg=0x7fffffffc9c0) at /home/alex/lsrc/qemu/qemu.git/linux-user/syscall.c:5424
>>> #6  0x00007ffff6bed6fa in start_thread (arg=0x7ffff7f38700) at pthread_create.c:333
>>> #7  0x00007ffff6923b5d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
>>>
>>> So everything is stalled awaiting this thread waking up and draining
>>> its queue. So for linux-user I think we need some mechanism to kick
>>> these syscalls which I assume means throwing a signal at it.
>> Nice catch! How did you get it?
> Running pigz (armhf, debian) to compress stuff.
>
>> We always go through cpu_exec_end()
>> before serving a guest syscall and always go through cpu_exec_start()
>> before entering the guest code execution loop. If we always schedule
>> safe work on the current thread's queue then I think there's a way to
>> make it safe and avoid kicking syscalls.
> Not let the signals complete until safe work is done?

I'm thinking of waiting for completion of safe works in cpu_exec_start()
as well as in cpu_exec_end().

Regards,
Sergey
diff mbox

Patch

diff --git a/cpu-exec-common.c b/cpu-exec-common.c
index 0cb4ae60eff9..8184e0662cbd 100644
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -77,3 +77,86 @@  void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
     }
     siglongjmp(cpu->jmp_env, 1);
 }
+
+static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
+{
+    qemu_mutex_lock(&cpu->work_mutex);
+    if (cpu->queued_work_first == NULL) {
+        cpu->queued_work_first = wi;
+    } else {
+        cpu->queued_work_last->next = wi;
+    }
+    cpu->queued_work_last = wi;
+    wi->next = NULL;
+    wi->done = false;
+    qemu_mutex_unlock(&cpu->work_mutex);
+
+    qemu_cpu_kick(cpu);
+}
+
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+{
+    struct qemu_work_item wi;
+
+    if (qemu_cpu_is_self(cpu)) {
+        func(cpu, data);
+        return;
+    }
+
+    wi.func = func;
+    wi.data = data;
+    wi.free = false;
+
+    queue_work_on_cpu(cpu, &wi);
+    while (!atomic_mb_read(&wi.done)) {
+        CPUState *self_cpu = current_cpu;
+
+        wait_cpu_work();
+        current_cpu = self_cpu;
+    }
+}
+
+void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
+{
+    struct qemu_work_item *wi;
+
+    if (qemu_cpu_is_self(cpu)) {
+        func(cpu, data);
+        return;
+    }
+
+    wi = g_malloc0(sizeof(struct qemu_work_item));
+    wi->func = func;
+    wi->data = data;
+    wi->free = true;
+
+    queue_work_on_cpu(cpu, wi);
+}
+
+void flush_queued_work(CPUState *cpu)
+{
+    struct qemu_work_item *wi;
+
+    if (cpu->queued_work_first == NULL) {
+        return;
+    }
+
+    qemu_mutex_lock(&cpu->work_mutex);
+    while (cpu->queued_work_first != NULL) {
+        wi = cpu->queued_work_first;
+        cpu->queued_work_first = wi->next;
+        if (!cpu->queued_work_first) {
+            cpu->queued_work_last = NULL;
+        }
+        qemu_mutex_unlock(&cpu->work_mutex);
+        wi->func(cpu, wi->data);
+        qemu_mutex_lock(&cpu->work_mutex);
+        if (wi->free) {
+            g_free(wi);
+        } else {
+            atomic_mb_set(&wi->done, true);
+        }
+    }
+    qemu_mutex_unlock(&cpu->work_mutex);
+    signal_cpu_work();
+}
diff --git a/cpus.c b/cpus.c
index f123eb707cc6..98f60f6f98f5 100644
--- a/cpus.c
+++ b/cpus.c
@@ -910,71 +910,16 @@  void qemu_init_cpu_loop(void)
     qemu_thread_get_self(&io_thread);
 }
 
-static void wait_cpu_work(void)
+void wait_cpu_work(void)
 {
     qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
 }
 
-static void signal_cpu_work(void)
+void signal_cpu_work(void)
 {
     qemu_cond_broadcast(&qemu_work_cond);
 }
 
-static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
-{
-    qemu_mutex_lock(&cpu->work_mutex);
-    if (cpu->queued_work_first == NULL) {
-        cpu->queued_work_first = wi;
-    } else {
-        cpu->queued_work_last->next = wi;
-    }
-    cpu->queued_work_last = wi;
-    wi->next = NULL;
-    wi->done = false;
-    qemu_mutex_unlock(&cpu->work_mutex);
-
-    qemu_cpu_kick(cpu);
-}
-
-void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
-{
-    struct qemu_work_item wi;
-
-    if (qemu_cpu_is_self(cpu)) {
-        func(cpu, data);
-        return;
-    }
-
-    wi.func = func;
-    wi.data = data;
-    wi.free = false;
-
-    queue_work_on_cpu(cpu, &wi);
-    while (!atomic_mb_read(&wi.done)) {
-        CPUState *self_cpu = current_cpu;
-
-        wait_cpu_work();
-        current_cpu = self_cpu;
-    }
-}
-
-void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
-{
-    struct qemu_work_item *wi;
-
-    if (qemu_cpu_is_self(cpu)) {
-        func(cpu, data);
-        return;
-    }
-
-    wi = g_malloc0(sizeof(struct qemu_work_item));
-    wi->func = func;
-    wi->data = data;
-    wi->free = true;
-
-    queue_work_on_cpu(cpu, wi);
-}
-
 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
 {
     if (kvm_destroy_vcpu(cpu) < 0) {
@@ -987,34 +932,6 @@  static void qemu_tcg_destroy_vcpu(CPUState *cpu)
 {
 }
 
-static void flush_queued_work(CPUState *cpu)
-{
-    struct qemu_work_item *wi;
-
-    if (cpu->queued_work_first == NULL) {
-        return;
-    }
-
-    qemu_mutex_lock(&cpu->work_mutex);
-    while (cpu->queued_work_first != NULL) {
-        wi = cpu->queued_work_first;
-        cpu->queued_work_first = wi->next;
-        if (!cpu->queued_work_first) {
-            cpu->queued_work_last = NULL;
-        }
-        qemu_mutex_unlock(&cpu->work_mutex);
-        wi->func(cpu, wi->data);
-        qemu_mutex_lock(&cpu->work_mutex);
-        if (wi->free) {
-            g_free(wi);
-        } else {
-            atomic_mb_set(&wi->done, true);
-        }
-    }
-    qemu_mutex_unlock(&cpu->work_mutex);
-    signal_cpu_work();
-}
-
 static void qemu_wait_io_event_common(CPUState *cpu)
 {
     if (cpu->stop) {
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index c1f59fa59d2c..23b4b50e0a45 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -407,4 +407,8 @@  extern int singlestep;
 extern CPUState *tcg_current_cpu;
 extern bool exit_request;
 
+void wait_cpu_work(void);
+void signal_cpu_work(void);
+void flush_queued_work(CPUState *cpu);
+
 #endif
diff --git a/linux-user/main.c b/linux-user/main.c
index 0093a8008c8e..5a68651159c2 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -111,6 +111,7 @@  static pthread_mutex_t cpu_list_mutex = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER;
 static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER;
+static pthread_cond_t work_cond = PTHREAD_COND_INITIALIZER;
 static bool exclusive_pending;
 static int tcg_pending_cpus;
 
@@ -140,6 +141,7 @@  void fork_end(int child)
         pthread_mutex_init(&cpu_list_mutex, NULL);
         pthread_cond_init(&exclusive_cond, NULL);
         pthread_cond_init(&exclusive_resume, NULL);
+        pthread_cond_init(&work_cond, NULL);
         qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
         gdbserver_fork(thread_cpu);
     } else {
@@ -148,6 +150,16 @@  void fork_end(int child)
     }
 }
 
+void wait_cpu_work(void)
+{
+    pthread_cond_wait(&work_cond, &exclusive_lock);
+}
+
+void signal_cpu_work(void)
+{
+    pthread_cond_broadcast(&work_cond);
+}
+
 /* Wait for pending exclusive operations to complete.  The exclusive lock
    must be held.  */
 static inline void exclusive_idle(void)
@@ -206,6 +218,7 @@  static inline void cpu_exec_end(CPUState *cpu)
         pthread_cond_broadcast(&exclusive_cond);
     }
     exclusive_idle();
+    flush_queued_work(cpu);
     pthread_mutex_unlock(&exclusive_lock);
 }