Message ID | 20160715185726.10181-12-sergey.fedorov@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Sergey Fedorov <sergey.fedorov@linaro.org> writes: > From: Sergey Fedorov <serge.fdrv@gmail.com> > > This patch is based on the ideas found in work of KONRAD Frederic [1], > Alex Bennée [2], and Alvise Rigo [3]. > > This mechanism allows to perform an operation safely in a quiescent > state. Quiescent state means: (1) no vCPU is running and (2) BQL in > system-mode or 'exclusive_lock' in user-mode emulation is held while > performing the operation. This functionality is required e.g. for > performing translation buffer flush safely in multi-threaded user-mode > emulation. <snip> > > QemuCond qemu_work_cond; > +QemuCond qemu_safe_work_cond; > +QemuCond qemu_exclusive_cond; > + > +static int safe_work_pending; > + > +void wait_safe_cpu_work(void) > +{ > + while (atomic_mb_read(&safe_work_pending) > 0) { > + qemu_cond_wait(&qemu_safe_work_cond, qemu_get_cpu_work_mutex()); > + } > +} Testing on system mode I got a dead-lock with one thread sitting here (I assume after doing its work) and the other threads all waiting on the exclusive condition. I think we need this: void wait_safe_cpu_work(void) { while (atomic_mb_read(&safe_work_pending) > 0) { /* * If there is pending safe work and no pending threads we * need to signal another thread to start its work. */ if (tcg_pending_threads == 0) { qemu_cond_signal(&qemu_exclusive_cond); } qemu_cond_wait(&qemu_safe_work_cond, qemu_get_cpu_work_mutex()); } } > > static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > { > @@ -91,9 +103,18 @@ static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) > cpu->queued_work_last = wi; > wi->next = NULL; > wi->done = false; > + if (wi->safe) { > + atomic_inc(&safe_work_pending); > + } > qemu_mutex_unlock(&cpu->work_mutex); > > - qemu_cpu_kick(cpu); > + if (!wi->safe) { > + qemu_cpu_kick(cpu); > + } else { > + CPU_FOREACH(cpu) { > + qemu_cpu_kick(cpu); > + } > + } > } > > void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > @@ -108,6 +129,7 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > wi.func = func; > wi.data = data; > wi.free = false; > + wi.safe = false; > > queue_work_on_cpu(cpu, &wi); > while (!atomic_mb_read(&wi.done)) { > @@ -131,6 +153,20 @@ void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > wi->func = func; > wi->data = data; > wi->free = true; > + wi->safe = false; > + > + queue_work_on_cpu(cpu, wi); > +} > + > +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) > +{ > + struct qemu_work_item *wi; > + > + wi = g_malloc0(sizeof(struct qemu_work_item)); > + wi->func = func; > + wi->data = data; > + wi->free = true; > + wi->safe = true; > > queue_work_on_cpu(cpu, wi); > } > @@ -150,9 +186,20 @@ void process_queued_cpu_work(CPUState *cpu) > if (!cpu->queued_work_first) { > cpu->queued_work_last = NULL; > } > + if (wi->safe) { > + while (tcg_pending_threads) { > + qemu_cond_wait(&qemu_exclusive_cond, > + qemu_get_cpu_work_mutex()); > + } > + } > qemu_mutex_unlock(&cpu->work_mutex); > wi->func(cpu, wi->data); > qemu_mutex_lock(&cpu->work_mutex); > + if (wi->safe) { > + if (!atomic_dec_fetch(&safe_work_pending)) { > + qemu_cond_broadcast(&qemu_safe_work_cond); > + } > + } > if (wi->free) { > g_free(wi); > } else { > diff --git a/cpus.c b/cpus.c > index 282d7e399902..b7122043f650 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -903,6 +903,8 @@ void qemu_init_cpu_loop(void) > qemu_cond_init(&qemu_cpu_cond); > qemu_cond_init(&qemu_pause_cond); > qemu_cond_init(&qemu_work_cond); > + qemu_cond_init(&qemu_safe_work_cond); > + qemu_cond_init(&qemu_exclusive_cond); > qemu_cond_init(&qemu_io_proceeded_cond); > qemu_mutex_init(&qemu_global_mutex); > > @@ -926,6 +928,20 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) > { > } > > +/* called with qemu_global_mutex held */ > +static inline void tcg_cpu_exec_start(CPUState *cpu) > +{ > + tcg_pending_threads++; > +} > + > +/* called with qemu_global_mutex held */ > +static inline void tcg_cpu_exec_end(CPUState *cpu) > +{ > + if (--tcg_pending_threads) { > + qemu_cond_broadcast(&qemu_exclusive_cond); Is the broadcast a bit excessive here. Shouldn't we just wake up one thread with a qemu_cond_signal? > + } > +} > + > static void qemu_wait_io_event_common(CPUState *cpu) > { > if (cpu->stop) { > @@ -950,6 +966,8 @@ static void qemu_tcg_wait_io_event(CPUState *cpu) > CPU_FOREACH(cpu) { > qemu_wait_io_event_common(cpu); > } > + > + wait_safe_cpu_work(); > } > > static void qemu_kvm_wait_io_event(CPUState *cpu) > @@ -1485,7 +1503,9 @@ static void tcg_exec_all(void) > (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); > > if (cpu_can_run(cpu)) { > + tcg_cpu_exec_start(cpu); > r = tcg_cpu_exec(cpu); > + tcg_cpu_exec_end(cpu); > if (r == EXCP_DEBUG) { > cpu_handle_guest_debug(cpu); > break; > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 8d5c7dbcf5a9..1f0272beb362 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -405,12 +405,22 @@ extern int singlestep; > > /* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */ > extern CPUState *tcg_current_cpu; > +extern int tcg_pending_threads; > extern bool exit_request; > > /** > * qemu_work_cond - condition to wait for CPU work items completion > */ > extern QemuCond qemu_work_cond; > +/** > + * qemu_safe_work_cond - condition to wait for safe CPU work items completion > + */ > +extern QemuCond qemu_safe_work_cond; > +/** > + * qemu_exclusive_cond - condition to wait for all TCG threads to be out of > + * guest code execution loop > + */ > +extern QemuCond qemu_exclusive_cond; > > /** > * qemu_get_cpu_work_mutex() - get the mutex which protects CPU work execution > @@ -423,5 +433,9 @@ QemuMutex *qemu_get_cpu_work_mutex(void); > * @cpu: The CPU which work queue to process. > */ > void process_queued_cpu_work(CPUState *cpu); > +/** > + * wait_safe_cpu_work() - wait until all safe CPU work items has processed > + */ > +void wait_safe_cpu_work(void); > > #endif > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index c19a673f9f68..ab67bf2ba19f 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -238,6 +238,7 @@ struct qemu_work_item { > void *data; > int done; > bool free; > + bool safe; > }; > > /** > @@ -632,6 +633,19 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > > /** > + * async_safe_run_on_cpu: > + * @cpu: The vCPU to run on. > + * @func: The function to be executed. > + * @data: Data to pass to the function. > + * > + * Schedules the function @func for execution on the vCPU @cpu asynchronously > + * and in quiescent state. Quiescent state means: (1) all other vCPUs are > + * halted and (2) #qemu_global_mutex (a.k.a. BQL) in system-mode or > + * #exclusive_lock in user-mode emulation is held while @func is executing. > + */ > +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); > + > +/** > * qemu_get_cpu: > * @index: The CPUState@cpu_index value of the CPU to obtain. > * > diff --git a/linux-user/main.c b/linux-user/main.c > index fce61d5a35fc..d0ff5f9976e5 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -110,18 +110,17 @@ int cpu_get_pic_interrupt(CPUX86State *env) > which requires quite a lot of per host/target work. */ > static QemuMutex cpu_list_mutex; > static QemuMutex exclusive_lock; > -static QemuCond exclusive_cond; > static QemuCond exclusive_resume; > static bool exclusive_pending; > -static int tcg_pending_threads; > > void qemu_init_cpu_loop(void) > { > qemu_mutex_init(&cpu_list_mutex); > qemu_mutex_init(&exclusive_lock); > - qemu_cond_init(&exclusive_cond); > qemu_cond_init(&exclusive_resume); > qemu_cond_init(&qemu_work_cond); > + qemu_cond_init(&qemu_safe_work_cond); > + qemu_cond_init(&qemu_exclusive_cond); > } > > /* Make sure everything is in a consistent state for calling fork(). */ > @@ -148,9 +147,10 @@ void fork_end(int child) > exclusive_pending = false; > qemu_mutex_init(&exclusive_lock); > qemu_mutex_init(&cpu_list_mutex); > - qemu_cond_init(&exclusive_cond); > qemu_cond_init(&exclusive_resume); > qemu_cond_init(&qemu_work_cond); > + qemu_cond_init(&qemu_safe_work_cond); > + qemu_cond_init(&qemu_exclusive_cond); > qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); > gdbserver_fork(thread_cpu); > } else { > @@ -190,7 +190,7 @@ static inline void start_exclusive(void) > } > } > while (tcg_pending_threads) { > - qemu_cond_wait(&exclusive_cond, &exclusive_lock); > + qemu_cond_wait(&qemu_exclusive_cond, &exclusive_lock); > } > } > > @@ -219,10 +219,11 @@ static inline void cpu_exec_end(CPUState *cpu) > cpu->running = false; > tcg_pending_threads--; > if (!tcg_pending_threads) { > - qemu_cond_signal(&exclusive_cond); > + qemu_cond_broadcast(&qemu_exclusive_cond); > } > exclusive_idle(); > process_queued_cpu_work(cpu); > + wait_safe_cpu_work(); > qemu_mutex_unlock(&exclusive_lock); > } -- Alex Bennée
diff --git a/bsd-user/main.c b/bsd-user/main.c index f738dd64d691..5433bca0fca6 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -66,9 +66,10 @@ int cpu_get_pic_interrupt(CPUX86State *env) void qemu_init_cpu_loop(void) { /* We need to do this becuase process_queued_cpu_work() calls - * qemu_cond_broadcast() on it + * qemu_cond_broadcast() on them */ qemu_cond_init(&qemu_work_cond); + qemu_cond_init(&qemu_safe_work_cond); } QemuMutex *qemu_get_cpu_work_mutex(void) diff --git a/cpu-exec-common.c b/cpu-exec-common.c index a233f0124559..6f278d6d3b70 100644 --- a/cpu-exec-common.c +++ b/cpu-exec-common.c @@ -25,6 +25,7 @@ bool exit_request; CPUState *tcg_current_cpu; +int tcg_pending_threads; /* exit the current TB, but without causing any exception to be raised */ void cpu_loop_exit_noexc(CPUState *cpu) @@ -79,6 +80,17 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) } QemuCond qemu_work_cond; +QemuCond qemu_safe_work_cond; +QemuCond qemu_exclusive_cond; + +static int safe_work_pending; + +void wait_safe_cpu_work(void) +{ + while (atomic_mb_read(&safe_work_pending) > 0) { + qemu_cond_wait(&qemu_safe_work_cond, qemu_get_cpu_work_mutex()); + } +} static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) { @@ -91,9 +103,18 @@ static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi) cpu->queued_work_last = wi; wi->next = NULL; wi->done = false; + if (wi->safe) { + atomic_inc(&safe_work_pending); + } qemu_mutex_unlock(&cpu->work_mutex); - qemu_cpu_kick(cpu); + if (!wi->safe) { + qemu_cpu_kick(cpu); + } else { + CPU_FOREACH(cpu) { + qemu_cpu_kick(cpu); + } + } } void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) @@ -108,6 +129,7 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) wi.func = func; wi.data = data; wi.free = false; + wi.safe = false; queue_work_on_cpu(cpu, &wi); while (!atomic_mb_read(&wi.done)) { @@ -131,6 +153,20 @@ void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) wi->func = func; wi->data = data; wi->free = true; + wi->safe = false; + + queue_work_on_cpu(cpu, wi); +} + +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data) +{ + struct qemu_work_item *wi; + + wi = g_malloc0(sizeof(struct qemu_work_item)); + wi->func = func; + wi->data = data; + wi->free = true; + wi->safe = true; queue_work_on_cpu(cpu, wi); } @@ -150,9 +186,20 @@ void process_queued_cpu_work(CPUState *cpu) if (!cpu->queued_work_first) { cpu->queued_work_last = NULL; } + if (wi->safe) { + while (tcg_pending_threads) { + qemu_cond_wait(&qemu_exclusive_cond, + qemu_get_cpu_work_mutex()); + } + } qemu_mutex_unlock(&cpu->work_mutex); wi->func(cpu, wi->data); qemu_mutex_lock(&cpu->work_mutex); + if (wi->safe) { + if (!atomic_dec_fetch(&safe_work_pending)) { + qemu_cond_broadcast(&qemu_safe_work_cond); + } + } if (wi->free) { g_free(wi); } else { diff --git a/cpus.c b/cpus.c index 282d7e399902..b7122043f650 100644 --- a/cpus.c +++ b/cpus.c @@ -903,6 +903,8 @@ void qemu_init_cpu_loop(void) qemu_cond_init(&qemu_cpu_cond); qemu_cond_init(&qemu_pause_cond); qemu_cond_init(&qemu_work_cond); + qemu_cond_init(&qemu_safe_work_cond); + qemu_cond_init(&qemu_exclusive_cond); qemu_cond_init(&qemu_io_proceeded_cond); qemu_mutex_init(&qemu_global_mutex); @@ -926,6 +928,20 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) { } +/* called with qemu_global_mutex held */ +static inline void tcg_cpu_exec_start(CPUState *cpu) +{ + tcg_pending_threads++; +} + +/* called with qemu_global_mutex held */ +static inline void tcg_cpu_exec_end(CPUState *cpu) +{ + if (--tcg_pending_threads) { + qemu_cond_broadcast(&qemu_exclusive_cond); + } +} + static void qemu_wait_io_event_common(CPUState *cpu) { if (cpu->stop) { @@ -950,6 +966,8 @@ static void qemu_tcg_wait_io_event(CPUState *cpu) CPU_FOREACH(cpu) { qemu_wait_io_event_common(cpu); } + + wait_safe_cpu_work(); } static void qemu_kvm_wait_io_event(CPUState *cpu) @@ -1485,7 +1503,9 @@ static void tcg_exec_all(void) (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); if (cpu_can_run(cpu)) { + tcg_cpu_exec_start(cpu); r = tcg_cpu_exec(cpu); + tcg_cpu_exec_end(cpu); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); break; diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 8d5c7dbcf5a9..1f0272beb362 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -405,12 +405,22 @@ extern int singlestep; /* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */ extern CPUState *tcg_current_cpu; +extern int tcg_pending_threads; extern bool exit_request; /** * qemu_work_cond - condition to wait for CPU work items completion */ extern QemuCond qemu_work_cond; +/** + * qemu_safe_work_cond - condition to wait for safe CPU work items completion + */ +extern QemuCond qemu_safe_work_cond; +/** + * qemu_exclusive_cond - condition to wait for all TCG threads to be out of + * guest code execution loop + */ +extern QemuCond qemu_exclusive_cond; /** * qemu_get_cpu_work_mutex() - get the mutex which protects CPU work execution @@ -423,5 +433,9 @@ QemuMutex *qemu_get_cpu_work_mutex(void); * @cpu: The CPU which work queue to process. */ void process_queued_cpu_work(CPUState *cpu); +/** + * wait_safe_cpu_work() - wait until all safe CPU work items has processed + */ +void wait_safe_cpu_work(void); #endif diff --git a/include/qom/cpu.h b/include/qom/cpu.h index c19a673f9f68..ab67bf2ba19f 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -238,6 +238,7 @@ struct qemu_work_item { void *data; int done; bool free; + bool safe; }; /** @@ -632,6 +633,19 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); /** + * async_safe_run_on_cpu: + * @cpu: The vCPU to run on. + * @func: The function to be executed. + * @data: Data to pass to the function. + * + * Schedules the function @func for execution on the vCPU @cpu asynchronously + * and in quiescent state. Quiescent state means: (1) all other vCPUs are + * halted and (2) #qemu_global_mutex (a.k.a. BQL) in system-mode or + * #exclusive_lock in user-mode emulation is held while @func is executing. + */ +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data); + +/** * qemu_get_cpu: * @index: The CPUState@cpu_index value of the CPU to obtain. * diff --git a/linux-user/main.c b/linux-user/main.c index fce61d5a35fc..d0ff5f9976e5 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -110,18 +110,17 @@ int cpu_get_pic_interrupt(CPUX86State *env) which requires quite a lot of per host/target work. */ static QemuMutex cpu_list_mutex; static QemuMutex exclusive_lock; -static QemuCond exclusive_cond; static QemuCond exclusive_resume; static bool exclusive_pending; -static int tcg_pending_threads; void qemu_init_cpu_loop(void) { qemu_mutex_init(&cpu_list_mutex); qemu_mutex_init(&exclusive_lock); - qemu_cond_init(&exclusive_cond); qemu_cond_init(&exclusive_resume); qemu_cond_init(&qemu_work_cond); + qemu_cond_init(&qemu_safe_work_cond); + qemu_cond_init(&qemu_exclusive_cond); } /* Make sure everything is in a consistent state for calling fork(). */ @@ -148,9 +147,10 @@ void fork_end(int child) exclusive_pending = false; qemu_mutex_init(&exclusive_lock); qemu_mutex_init(&cpu_list_mutex); - qemu_cond_init(&exclusive_cond); qemu_cond_init(&exclusive_resume); qemu_cond_init(&qemu_work_cond); + qemu_cond_init(&qemu_safe_work_cond); + qemu_cond_init(&qemu_exclusive_cond); qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); gdbserver_fork(thread_cpu); } else { @@ -190,7 +190,7 @@ static inline void start_exclusive(void) } } while (tcg_pending_threads) { - qemu_cond_wait(&exclusive_cond, &exclusive_lock); + qemu_cond_wait(&qemu_exclusive_cond, &exclusive_lock); } } @@ -219,10 +219,11 @@ static inline void cpu_exec_end(CPUState *cpu) cpu->running = false; tcg_pending_threads--; if (!tcg_pending_threads) { - qemu_cond_signal(&exclusive_cond); + qemu_cond_broadcast(&qemu_exclusive_cond); } exclusive_idle(); process_queued_cpu_work(cpu); + wait_safe_cpu_work(); qemu_mutex_unlock(&exclusive_lock); }