Message ID | 1459870344-16773-6-git-send-email-alex.bennee@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 05/04/16 18:32, Alex Bennée wrote: (snip) > diff --git a/cpu-exec.c b/cpu-exec.c > index 74065d9..bd50fef 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -205,18 +205,24 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, > if (max_cycles > CF_COUNT_MASK) > max_cycles = CF_COUNT_MASK; > > + tb_lock(); > cpu->tb_invalidated_flag = false; > tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags, > max_cycles | CF_NOCACHE > | (ignore_icount ? CF_IGNORE_ICOUNT : 0)); > tb->orig_tb = cpu->tb_invalidated_flag ? NULL : orig_tb; > cpu->current_tb = tb; > + tb_unlock(); > + > /* execute the generated code */ > trace_exec_tb_nocache(tb, tb->pc); > - cpu_tb_exec(cpu, tb); > + cpu_tb_exec(cpu, tb->tc_ptr); Very suspicious change. I can't even find which patch changes cpu_tb_exec() accordingly. > + > + tb_lock(); > cpu->current_tb = NULL; > tb_phys_invalidate(tb, -1); > tb_free(tb); > + tb_unlock(); > } > #endif > > diff --git a/exec.c b/exec.c > index 17f390e..c46c123 100644 > --- a/exec.c > +++ b/exec.c > @@ -2111,6 +2111,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) > continue; > } > cpu->watchpoint_hit = wp; > + > + /* Unlocked by cpu_loop_exit or cpu_resume_from_signal. */ In fact, neither cpu_resume_from_signal() nor cpu_loop_exit() unlocks the lock by itself, it gets unlocked after sigsetjmp() returns via siglongjmp() back to cpu_exec(). So maybe it would be more clear to say something like "'tb_lock' gets unlocked after siglongjmp()"? > + tb_lock(); > tb_check_watchpoint(cpu); > if (wp->flags & BP_STOP_BEFORE_ACCESS) { > cpu->exception_index = EXCP_DEBUG; (snip) > diff --git a/translate-all.c b/translate-all.c > index a7ff5e7..935d24c 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -834,7 +834,9 @@ static void page_flush_tb(void) > } > > /* flush all the translation blocks */ > -/* XXX: tb_flush is currently not thread safe */ > +/* XXX: tb_flush is currently not thread safe. System emulation calls it only > + * with tb_lock taken or from safe_work, so no need to take tb_lock here. > + */ "System emulation"? What about user-mode emulation? > void tb_flush(CPUState *cpu) > { > #if defined(DEBUG_FLUSH) > @@ -1350,6 +1352,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, > /* we remove all the TBs in the range [start, end[ */ > /* XXX: see if in some cases it could be faster to invalidate all > the code */ > + tb_lock(); Don't we need also protect a call to page_find() above? page_find() calls page_find_alloc() which is noted to be called with 'tb_lock' held. However, it might depend on the way we treat 'mmap_lock' in system mode emulation. We might also consider taking the lock outside of tb_invalidate_phys*() functions because they can be called after page_find(). > tb = p->first_tb; > while (tb != NULL) { > n = (uintptr_t)tb & 3; > @@ -1417,12 +1420,13 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, > if (current_tb_modified) { > /* we generate a block containing just the instruction > modifying the memory. It will ensure that it cannot modify > - itself */ > + itself. cpu_resume_from_signal unlocks tb_lock. */ > cpu->current_tb = NULL; > tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1); > cpu_resume_from_signal(cpu, NULL); > } > #endif > + tb_unlock(); > } > > #ifdef CONFIG_SOFTMMU (snip) > @ -1627,6 +1636,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) > target_ulong pc, cs_base; > uint64_t flags; > > + tb_lock(); We don't have to take 'tb_lock' for nether tb_find_pc() nor cpu_restore_state_from_tb() because the lock does not protect from tb_flush() anyway. I think the lock should be taken just before the first call to tb_phys_invalidate() in this function. > tb = tb_find_pc(retaddr); > if (!tb) { > cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", > @@ -1678,11 +1688,15 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) > /* FIXME: In theory this could raise an exception. In practice > we have already translated the block once so it's probably ok. */ > tb_gen_code(cpu, pc, cs_base, flags, cflags); > - /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not > - the first in the TB) then we end up generating a whole new TB and > - repeating the fault, which is horribly inefficient. > - Better would be to execute just this insn uncached, or generate a > - second new TB. */ > + > + /* This unlocks the tb_lock. > + * > + * TODO: If env->pc != tb->pc (i.e. the faulting instruction was not > + * the first in the TB) then we end up generating a whole new TB and > + * repeating the fault, which is horribly inefficient. > + * Better would be to execute just this insn uncached, or generate a > + * second new TB. > + */ > cpu_resume_from_signal(cpu, NULL); > } (snip) Kind regards, Sergey
Just a couple answers/remarks. On 11/05/2016 14:45, Sergey Fedorov wrote: >> diff --git a/exec.c b/exec.c >> index 17f390e..c46c123 100644 >> --- a/exec.c >> +++ b/exec.c >> @@ -2111,6 +2111,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) >> continue; >> } >> cpu->watchpoint_hit = wp; >> + >> + /* Unlocked by cpu_loop_exit or cpu_resume_from_signal. */ > > In fact, neither cpu_resume_from_signal() nor cpu_loop_exit() unlocks > the lock by itself, it gets unlocked after sigsetjmp() returns via > siglongjmp() back to cpu_exec(). So maybe it would be more clear to say > something like "'tb_lock' gets unlocked after siglongjmp()"? Yes, or "cpu_exec() unlocks tb_lock after cpu_loop_exit or cpu_resume_from_signal". Something like that, anyway. >> void tb_flush(CPUState *cpu) >> { >> #if defined(DEBUG_FLUSH) >> @@ -1350,6 +1352,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, >> /* we remove all the TBs in the range [start, end[ */ >> /* XXX: see if in some cases it could be faster to invalidate all >> the code */ >> + tb_lock(); > > Don't we need also protect a call to page_find() above? page_find() > calls page_find_alloc() which is noted to be called with 'tb_lock' held. Only if alloc=1; page_find calls it with alloc=0. > However, it might depend on the way we treat 'mmap_lock' in system mode > emulation. It's just not there; generally speaking it's replaced with tb_lock. >> @ -1627,6 +1636,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) >> target_ulong pc, cs_base; >> uint64_t flags; >> >> + tb_lock(); > > We don't have to take 'tb_lock' for nether tb_find_pc() nor > cpu_restore_state_from_tb() because the lock does not protect from > tb_flush() anyway. I think the lock should be taken just before the > first call to tb_phys_invalidate() in this function. Indeed, this dates back to when cpu_restore_state_from_tb did recompilation. In general, I don't have a big problem with slightly bigger critical sections than necessary, if they aren't in a hot path or they avoid repeated lock-unlock. Thanks, Paolo
On 11/05/16 15:52, Paolo Bonzini wrote: > Just a couple answers/remarks. > > On 11/05/2016 14:45, Sergey Fedorov wrote: (snip) >>> @@ -1350,6 +1352,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, >>> /* we remove all the TBs in the range [start, end[ */ >>> /* XXX: see if in some cases it could be faster to invalidate all >>> the code */ >>> + tb_lock(); >> Don't we need also protect a call to page_find() above? page_find() >> calls page_find_alloc() which is noted to be called with 'tb_lock' held. > Only if alloc=1; page_find calls it with alloc=0. Year, right :) >> However, it might depend on the way we treat 'mmap_lock' in system mode >> emulation. > It's just not there; generally speaking it's replaced with tb_lock. So why do we need yet another lock, 'mmap_lock', for user-mode emulation and don't need it for system mode? Kind regards, Sergey
Sergey Fedorov <serge.fdrv@gmail.com> writes: > On 05/04/16 18:32, Alex Bennée wrote: > (snip) >> diff --git a/cpu-exec.c b/cpu-exec.c >> index 74065d9..bd50fef 100644 >> --- a/cpu-exec.c >> +++ b/cpu-exec.c >> @@ -205,18 +205,24 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, >> if (max_cycles > CF_COUNT_MASK) >> max_cycles = CF_COUNT_MASK; >> >> + tb_lock(); >> cpu->tb_invalidated_flag = false; >> tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags, >> max_cycles | CF_NOCACHE >> | (ignore_icount ? CF_IGNORE_ICOUNT : 0)); >> tb->orig_tb = cpu->tb_invalidated_flag ? NULL : orig_tb; >> cpu->current_tb = tb; >> + tb_unlock(); >> + >> /* execute the generated code */ >> trace_exec_tb_nocache(tb, tb->pc); >> - cpu_tb_exec(cpu, tb); >> + cpu_tb_exec(cpu, tb->tc_ptr); > > Very suspicious change. I can't even find which patch changes > cpu_tb_exec() accordingly. I think that came from a patch this series was based on. It's gone now. > >> + >> + tb_lock(); >> cpu->current_tb = NULL; >> tb_phys_invalidate(tb, -1); >> tb_free(tb); >> + tb_unlock(); >> } >> #endif >> >> diff --git a/exec.c b/exec.c >> index 17f390e..c46c123 100644 >> --- a/exec.c >> +++ b/exec.c >> @@ -2111,6 +2111,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) >> continue; >> } >> cpu->watchpoint_hit = wp; >> + >> + /* Unlocked by cpu_loop_exit or cpu_resume_from_signal. */ > > In fact, neither cpu_resume_from_signal() nor cpu_loop_exit() unlocks > the lock by itself, it gets unlocked after sigsetjmp() returns via > siglongjmp() back to cpu_exec(). So maybe it would be more clear to say > something like "'tb_lock' gets unlocked after siglongjmp()"? "Locks are reset when we longjmp back to the main cpu_exec loop"? Looking at where the patch is though I think I need to bring that bit forward from the main series. > >> + tb_lock(); >> tb_check_watchpoint(cpu); >> if (wp->flags & BP_STOP_BEFORE_ACCESS) { >> cpu->exception_index = EXCP_DEBUG; > (snip) >> diff --git a/translate-all.c b/translate-all.c >> index a7ff5e7..935d24c 100644 >> --- a/translate-all.c >> +++ b/translate-all.c >> @@ -834,7 +834,9 @@ static void page_flush_tb(void) >> } >> >> /* flush all the translation blocks */ >> -/* XXX: tb_flush is currently not thread safe */ >> +/* XXX: tb_flush is currently not thread safe. System emulation calls it only >> + * with tb_lock taken or from safe_work, so no need to take tb_lock here. >> + */ > > "System emulation"? What about user-mode emulation? It's still not thread safe ;-) It's a harder problem to solve because we can't just suspend all threads to reset the translation buffer. I'm not sure we want to try and fix it in this series. > >> void tb_flush(CPUState *cpu) >> { >> #if defined(DEBUG_FLUSH) >> @@ -1350,6 +1352,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, >> /* we remove all the TBs in the range [start, end[ */ >> /* XXX: see if in some cases it could be faster to invalidate all >> the code */ >> + tb_lock(); > > Don't we need also protect a call to page_find() above? page_find() > calls page_find_alloc() which is noted to be called with 'tb_lock' held. > However, it might depend on the way we treat 'mmap_lock' in system mode > emulation. We might also consider taking the lock outside of > tb_invalidate_phys*() functions because they can be called after > page_find(). > >> tb = p->first_tb; >> while (tb != NULL) { >> n = (uintptr_t)tb & 3; >> @@ -1417,12 +1420,13 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, >> if (current_tb_modified) { >> /* we generate a block containing just the instruction >> modifying the memory. It will ensure that it cannot modify >> - itself */ >> + itself. cpu_resume_from_signal unlocks tb_lock. */ >> cpu->current_tb = NULL; >> tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1); >> cpu_resume_from_signal(cpu, NULL); >> } >> #endif >> + tb_unlock(); >> } >> >> #ifdef CONFIG_SOFTMMU > (snip) >> @ -1627,6 +1636,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) >> target_ulong pc, cs_base; >> uint64_t flags; >> >> + tb_lock(); > > We don't have to take 'tb_lock' for nether tb_find_pc() nor > cpu_restore_state_from_tb() because the lock does not protect from > tb_flush() anyway. I think the lock should be taken just before the > first call to tb_phys_invalidate() in this function. > >> tb = tb_find_pc(retaddr); >> if (!tb) { >> cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", >> @@ -1678,11 +1688,15 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) >> /* FIXME: In theory this could raise an exception. In practice >> we have already translated the block once so it's probably ok. */ >> tb_gen_code(cpu, pc, cs_base, flags, cflags); >> - /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not >> - the first in the TB) then we end up generating a whole new TB and >> - repeating the fault, which is horribly inefficient. >> - Better would be to execute just this insn uncached, or generate a >> - second new TB. */ >> + >> + /* This unlocks the tb_lock. >> + * >> + * TODO: If env->pc != tb->pc (i.e. the faulting instruction was not >> + * the first in the TB) then we end up generating a whole new TB and >> + * repeating the fault, which is horribly inefficient. >> + * Better would be to execute just this insn uncached, or generate a >> + * second new TB. >> + */ >> cpu_resume_from_signal(cpu, NULL); >> } > (snip) > > Kind regards, > Sergey -- Alex Bennée
On 01/06/16 13:30, Alex Bennée wrote: > Sergey Fedorov <serge.fdrv@gmail.com> writes: > >> On 05/04/16 18:32, Alex Bennée wrote: >> (snip) >>> diff --git a/exec.c b/exec.c >>> index 17f390e..c46c123 100644 >>> --- a/exec.c >>> +++ b/exec.c >>> @@ -2111,6 +2111,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) >>> continue; >>> } >>> cpu->watchpoint_hit = wp; >>> + >>> + /* Unlocked by cpu_loop_exit or cpu_resume_from_signal. */ >> In fact, neither cpu_resume_from_signal() nor cpu_loop_exit() unlocks >> the lock by itself, it gets unlocked after sigsetjmp() returns via >> siglongjmp() back to cpu_exec(). So maybe it would be more clear to say >> something like "'tb_lock' gets unlocked after siglongjmp()"? > > "Locks are reset when we longjmp back to the main cpu_exec loop"? Yes, it this looks fine. > Looking at where the patch is though I think I need to bring that bit > forward from the main series. > >>> + tb_lock(); >>> tb_check_watchpoint(cpu); >>> if (wp->flags & BP_STOP_BEFORE_ACCESS) { >>> cpu->exception_index = EXCP_DEBUG; >> (snip) >>> diff --git a/translate-all.c b/translate-all.c >>> index a7ff5e7..935d24c 100644 >>> --- a/translate-all.c >>> +++ b/translate-all.c >>> @@ -834,7 +834,9 @@ static void page_flush_tb(void) >>> } >>> >>> /* flush all the translation blocks */ >>> -/* XXX: tb_flush is currently not thread safe */ >>> +/* XXX: tb_flush is currently not thread safe. System emulation calls it only >>> + * with tb_lock taken or from safe_work, so no need to take tb_lock here. >>> + */ >> "System emulation"? What about user-mode emulation? > It's still not thread safe ;-) > > It's a harder problem to solve because we can't just suspend all > threads to reset the translation buffer. I'm not sure we want to try and > fix it in this series. I think it could be possible to do something like start_exclusive() to achieve this in user-only emulation. >>> void tb_flush(CPUState *cpu) >>> { >>> #if defined(DEBUG_FLUSH) Kind regards, Sergey
diff --git a/cpu-exec.c b/cpu-exec.c index 74065d9..bd50fef 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -205,18 +205,24 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, if (max_cycles > CF_COUNT_MASK) max_cycles = CF_COUNT_MASK; + tb_lock(); cpu->tb_invalidated_flag = false; tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags, max_cycles | CF_NOCACHE | (ignore_icount ? CF_IGNORE_ICOUNT : 0)); tb->orig_tb = cpu->tb_invalidated_flag ? NULL : orig_tb; cpu->current_tb = tb; + tb_unlock(); + /* execute the generated code */ trace_exec_tb_nocache(tb, tb->pc); - cpu_tb_exec(cpu, tb); + cpu_tb_exec(cpu, tb->tc_ptr); + + tb_lock(); cpu->current_tb = NULL; tb_phys_invalidate(tb, -1); tb_free(tb); + tb_unlock(); } #endif diff --git a/exec.c b/exec.c index 17f390e..c46c123 100644 --- a/exec.c +++ b/exec.c @@ -2111,6 +2111,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) continue; } cpu->watchpoint_hit = wp; + + /* Unlocked by cpu_loop_exit or cpu_resume_from_signal. */ + tb_lock(); tb_check_watchpoint(cpu); if (wp->flags & BP_STOP_BEFORE_ACCESS) { cpu->exception_index = EXCP_DEBUG; diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c index c69f374..7c0d542 100644 --- a/hw/i386/kvmvapic.c +++ b/hw/i386/kvmvapic.c @@ -14,6 +14,7 @@ #include "sysemu/kvm.h" #include "hw/i386/apic_internal.h" #include "hw/sysbus.h" +#include "tcg/tcg.h" #define VAPIC_IO_PORT 0x7e @@ -446,6 +447,8 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip) resume_all_vcpus(); if (!kvm_enabled()) { + /* Unlocked by cpu_resume_from_signal. */ + tb_lock(); cs->current_tb = NULL; tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1); cpu_resume_from_signal(cs, NULL); diff --git a/translate-all.c b/translate-all.c index a7ff5e7..935d24c 100644 --- a/translate-all.c +++ b/translate-all.c @@ -834,7 +834,9 @@ static void page_flush_tb(void) } /* flush all the translation blocks */ -/* XXX: tb_flush is currently not thread safe */ +/* XXX: tb_flush is currently not thread safe. System emulation calls it only + * with tb_lock taken or from safe_work, so no need to take tb_lock here. + */ void tb_flush(CPUState *cpu) { #if defined(DEBUG_FLUSH) @@ -1350,6 +1352,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, /* we remove all the TBs in the range [start, end[ */ /* XXX: see if in some cases it could be faster to invalidate all the code */ + tb_lock(); tb = p->first_tb; while (tb != NULL) { n = (uintptr_t)tb & 3; @@ -1417,12 +1420,13 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, if (current_tb_modified) { /* we generate a block containing just the instruction modifying the memory. It will ensure that it cannot modify - itself */ + itself. cpu_resume_from_signal unlocks tb_lock. */ cpu->current_tb = NULL; tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1); cpu_resume_from_signal(cpu, NULL); } #endif + tb_unlock(); } #ifdef CONFIG_SOFTMMU @@ -1489,6 +1493,8 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr, if (!p) { return; } + + tb_lock(); tb = p->first_tb; #ifdef TARGET_HAS_PRECISE_SMC if (tb && pc != 0) { @@ -1530,9 +1536,12 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr, if (locked) { mmap_unlock(); } + + /* tb_lock released by cpu_resume_from_signal. */ cpu_resume_from_signal(cpu, puc); } #endif + tb_unlock(); } #endif @@ -1627,6 +1636,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) target_ulong pc, cs_base; uint64_t flags; + tb_lock(); tb = tb_find_pc(retaddr); if (!tb) { cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", @@ -1678,11 +1688,15 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) /* FIXME: In theory this could raise an exception. In practice we have already translated the block once so it's probably ok. */ tb_gen_code(cpu, pc, cs_base, flags, cflags); - /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not - the first in the TB) then we end up generating a whole new TB and - repeating the fault, which is horribly inefficient. - Better would be to execute just this insn uncached, or generate a - second new TB. */ + + /* This unlocks the tb_lock. + * + * TODO: If env->pc != tb->pc (i.e. the faulting instruction was not + * the first in the TB) then we end up generating a whole new TB and + * repeating the fault, which is horribly inefficient. + * Better would be to execute just this insn uncached, or generate a + * second new TB. + */ cpu_resume_from_signal(cpu, NULL); } @@ -1707,6 +1721,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) int direct_jmp_count, direct_jmp2_count, cross_page; TranslationBlock *tb; + tb_lock(); + target_code_size = 0; max_target_code_size = 0; cross_page = 0; @@ -1762,6 +1778,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) tcg_ctx.tb_ctx.tb_phys_invalidate_count); cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); tcg_dump_info(f, cpu_fprintf); + + tb_unlock(); } void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)