Message ID | 57190B52.7070409@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Sergey Fedorov <serge.fdrv@gmail.com> writes: > On 21/04/16 19:16, Sergey Fedorov wrote: >> On 21/04/16 18:55, Alex Bennée wrote: >>> Sergey Fedorov <serge.fdrv@gmail.com> writes: >>> >>>> On 18/04/16 20:51, Sergey Fedorov wrote: >>>>> On 18/04/16 20:17, Alex Bennée wrote: >>>>>> Sergey Fedorov <serge.fdrv@gmail.com> writes: >>>>>>> On 18/04/16 17:09, Alex Bennée wrote: >>>>>>>> Sergey Fedorov <sergey.fedorov@linaro.org> writes: >>>>>>>>> diff --git a/cpu-exec.c b/cpu-exec.c >>>>>>> (snip) >>>>>>>>> @@ -507,14 +510,12 @@ int cpu_exec(CPUState *cpu) >>>>>>>>> } >>>>>>>>> tb_lock(); >>>>>>>>> tb = tb_find_fast(cpu); >>>>>>>>> - /* Note: we do it here to avoid a gcc bug on Mac OS X when >>>>>>>>> - doing it in tb_find_slow */ >>>>>>>> Is this still true? Would it make more sense to push the patching down >>>>>>>> to the gen_code? >>>>>>> This comment comes up to the commit: >>>>>>> >>>>>>> commit 1538800276aa7228d74f9d00bf275f54dc9e9b43 >>>>>>> Author: bellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162> >>>>>>> Date: Mon Dec 19 01:42:32 2005 +0000 >>>>>>> >>>>>>> workaround for gcc bug on PowerPC >>>>>>> >>>>>>> >>>>>>> It was added more than ten years ago. Anyway, now this code is here not >>>>>>> because of the bug: we need to reset 'next_tb' which is a local variable >>>>>>> in cpu_exec(). Personally, I don't think it would be neater to hide it >>>>>>> into gen_code(). Do you have some thoughts on how we could benefit from >>>>>>> doing so? BTW, I had a feeling that it may be useful to reorganize >>>>>>> cpu_exec() a bit, although I don't have a solid idea of how to do this >>>>>>> so far. >>>>>> I'm mainly eyeing the tb_lock/unlock which would be nice to push further >>>>>> down the call chain if we can, especially if the need to lock >>>>>> tb_find_fast can be removed later on. >>>>> Yes, it would be nice to possibly have all tb_lock/unlock() calls (or at >>>>> least their pairs) in the same block. There is a lot to be thought over :) >>>> It's not so simple because tb_find_fast() is also called in replay mode >>>> to find a TB for cpu_exec_nocache()... I'm not sure it's worth touching >>>> it now. >>> If the locking is pushed into tb_find_fast or further down is this an >>> issue? >> We would have to pass 'next_tb' (or 'last_tb' and 'tb_exit' after >> cleaning it up) if we move TB chaining code to tb_find_fast(). But >> tb_find_fast() is also called in replay mode to find a TB for >> cpu_exec_nocache() where we don't bother with TB chaining... Do you >> think it would be fine to make those changes? > > Are you thinking about something like this: > > diff --git a/cpu-exec.c b/cpu-exec.c > index 1d12e8bc2739..07e9ede49193 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -320,7 +320,9 @@ found: > return tb; > } > > -static inline TranslationBlock *tb_find_fast(CPUState *cpu) > +static inline TranslationBlock *tb_find_fast(CPUState *cpu, > + TranslationBlock **last_tb, > + int tb_exit) > { > CPUArchState *env = (CPUArchState *)cpu->env_ptr; > TranslationBlock *tb; > @@ -331,11 +333,27 @@ static inline TranslationBlock > *tb_find_fast(CPUState *cpu) > always be the same before a given translated block > is executed. */ > cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > + tb_lock(); > tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; > if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || > tb->flags != flags)) { > tb = tb_find_slow(cpu, pc, cs_base, flags); > } > + if (cpu->tb_flushed) { > + /* Ensure that no TB jump will be modified as the > + * translation buffer has been flushed. > + */ > + *last_tb = NULL; > + cpu->tb_flushed = false; > + } > + /* see if we can patch the calling TB. When the TB > + spans two pages, we cannot safely do a direct > + jump. */ > + if (*last_tb != NULL && tb->page_addr[1] == -1 > + && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { > + tb_add_jump(*last_tb, tb_exit, tb); > + } > + tb_unlock(); > return tb; > } > > @@ -441,7 +459,8 @@ int cpu_exec(CPUState *cpu) > } else if (replay_has_exception() > && cpu->icount_decr.u16.low + cpu->icount_extra > == 0) { > /* try to cause an exception pending in the log */ > - cpu_exec_nocache(cpu, 1, tb_find_fast(cpu), true); > + last_tb = NULL; /* Avoid chaining TBs */ > + cpu_exec_nocache(cpu, 1, tb_find_fast(cpu, &last_tb, > 0), true); > ret = -1; > break; > #endif > @@ -511,23 +530,7 @@ int cpu_exec(CPUState *cpu) > cpu->exception_index = EXCP_INTERRUPT; > cpu_loop_exit(cpu); > } > - tb_lock(); > - tb = tb_find_fast(cpu); > - if (cpu->tb_flushed) { > - /* Ensure that no TB jump will be modified as the > - * translation buffer has been flushed. > - */ > - last_tb = NULL; > - cpu->tb_flushed = false; > - } > - /* see if we can patch the calling TB. When the TB > - spans two pages, we cannot safely do a direct > - jump. */ > - if (last_tb != NULL && tb->page_addr[1] == -1 > - && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { > - tb_add_jump(last_tb, tb_exit, tb); > - } > - tb_unlock(); > + tb = tb_find_fast(cpu, &last_tb, tb_exit); > if (likely(!cpu->exit_request)) { > uintptr_t ret; > trace_exec_tb(tb, tb->pc); > > ... right? Yeah that sort of thing. > > Kind regards, > Sergey -- Alex Bennée
On 22/04/16 00:54, Alex Bennée wrote: > Sergey Fedorov <serge.fdrv@gmail.com> writes: > >> On 21/04/16 19:16, Sergey Fedorov wrote: >>> On 21/04/16 18:55, Alex Bennée wrote: >>>> Sergey Fedorov <serge.fdrv@gmail.com> writes: >>>> >>>>> On 18/04/16 20:51, Sergey Fedorov wrote: >>>>>> On 18/04/16 20:17, Alex Bennée wrote: >>>>>>> Sergey Fedorov <serge.fdrv@gmail.com> writes: >>>>>>>> On 18/04/16 17:09, Alex Bennée wrote: >>>>>>>>> Sergey Fedorov <sergey.fedorov@linaro.org> writes: >>>>>>>>>> diff --git a/cpu-exec.c b/cpu-exec.c >>>>>>>> (snip) >>>>>>>>>> @@ -507,14 +510,12 @@ int cpu_exec(CPUState *cpu) >>>>>>>>>> } >>>>>>>>>> tb_lock(); >>>>>>>>>> tb = tb_find_fast(cpu); >>>>>>>>>> - /* Note: we do it here to avoid a gcc bug on Mac OS X when >>>>>>>>>> - doing it in tb_find_slow */ >>>>>>>>> Is this still true? Would it make more sense to push the patching down >>>>>>>>> to the gen_code? >>>>>>>> This comment comes up to the commit: >>>>>>>> >>>>>>>> commit 1538800276aa7228d74f9d00bf275f54dc9e9b43 >>>>>>>> Author: bellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162> >>>>>>>> Date: Mon Dec 19 01:42:32 2005 +0000 >>>>>>>> >>>>>>>> workaround for gcc bug on PowerPC >>>>>>>> >>>>>>>> >>>>>>>> It was added more than ten years ago. Anyway, now this code is here not >>>>>>>> because of the bug: we need to reset 'next_tb' which is a local variable >>>>>>>> in cpu_exec(). Personally, I don't think it would be neater to hide it >>>>>>>> into gen_code(). Do you have some thoughts on how we could benefit from >>>>>>>> doing so? BTW, I had a feeling that it may be useful to reorganize >>>>>>>> cpu_exec() a bit, although I don't have a solid idea of how to do this >>>>>>>> so far. >>>>>>> I'm mainly eyeing the tb_lock/unlock which would be nice to push further >>>>>>> down the call chain if we can, especially if the need to lock >>>>>>> tb_find_fast can be removed later on. >>>>>> Yes, it would be nice to possibly have all tb_lock/unlock() calls (or at >>>>>> least their pairs) in the same block. There is a lot to be thought over :) >>>>> It's not so simple because tb_find_fast() is also called in replay mode >>>>> to find a TB for cpu_exec_nocache()... I'm not sure it's worth touching >>>>> it now. >>>> If the locking is pushed into tb_find_fast or further down is this an >>>> issue? >>> We would have to pass 'next_tb' (or 'last_tb' and 'tb_exit' after >>> cleaning it up) if we move TB chaining code to tb_find_fast(). But >>> tb_find_fast() is also called in replay mode to find a TB for >>> cpu_exec_nocache() where we don't bother with TB chaining... Do you >>> think it would be fine to make those changes? >> Are you thinking about something like this: >> >> diff --git a/cpu-exec.c b/cpu-exec.c >> index 1d12e8bc2739..07e9ede49193 100644 >> --- a/cpu-exec.c >> +++ b/cpu-exec.c >> @@ -320,7 +320,9 @@ found: >> return tb; >> } >> >> -static inline TranslationBlock *tb_find_fast(CPUState *cpu) >> +static inline TranslationBlock *tb_find_fast(CPUState *cpu, >> + TranslationBlock **last_tb, >> + int tb_exit) >> { >> CPUArchState *env = (CPUArchState *)cpu->env_ptr; >> TranslationBlock *tb; >> @@ -331,11 +333,27 @@ static inline TranslationBlock >> *tb_find_fast(CPUState *cpu) >> always be the same before a given translated block >> is executed. */ >> cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); >> + tb_lock(); >> tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; >> if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || >> tb->flags != flags)) { >> tb = tb_find_slow(cpu, pc, cs_base, flags); >> } >> + if (cpu->tb_flushed) { >> + /* Ensure that no TB jump will be modified as the >> + * translation buffer has been flushed. >> + */ >> + *last_tb = NULL; >> + cpu->tb_flushed = false; >> + } >> + /* see if we can patch the calling TB. When the TB >> + spans two pages, we cannot safely do a direct >> + jump. */ >> + if (*last_tb != NULL && tb->page_addr[1] == -1 >> + && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { >> + tb_add_jump(*last_tb, tb_exit, tb); >> + } >> + tb_unlock(); >> return tb; >> } >> >> @@ -441,7 +459,8 @@ int cpu_exec(CPUState *cpu) >> } else if (replay_has_exception() >> && cpu->icount_decr.u16.low + cpu->icount_extra >> == 0) { >> /* try to cause an exception pending in the log */ >> - cpu_exec_nocache(cpu, 1, tb_find_fast(cpu), true); >> + last_tb = NULL; /* Avoid chaining TBs */ >> + cpu_exec_nocache(cpu, 1, tb_find_fast(cpu, &last_tb, >> 0), true); >> ret = -1; >> break; >> #endif >> @@ -511,23 +530,7 @@ int cpu_exec(CPUState *cpu) >> cpu->exception_index = EXCP_INTERRUPT; >> cpu_loop_exit(cpu); >> } >> - tb_lock(); >> - tb = tb_find_fast(cpu); >> - if (cpu->tb_flushed) { >> - /* Ensure that no TB jump will be modified as the >> - * translation buffer has been flushed. >> - */ >> - last_tb = NULL; >> - cpu->tb_flushed = false; >> - } >> - /* see if we can patch the calling TB. When the TB >> - spans two pages, we cannot safely do a direct >> - jump. */ >> - if (last_tb != NULL && tb->page_addr[1] == -1 >> - && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { >> - tb_add_jump(last_tb, tb_exit, tb); >> - } >> - tb_unlock(); >> + tb = tb_find_fast(cpu, &last_tb, tb_exit); >> if (likely(!cpu->exit_request)) { >> uintptr_t ret; >> trace_exec_tb(tb, tb->pc); >> >> ... right? > Yeah that sort of thing. Okay, I'll include this in the next respin. Kind regards, Sergey
diff --git a/cpu-exec.c b/cpu-exec.c index 1d12e8bc2739..07e9ede49193 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -320,7 +320,9 @@ found: return tb; } -static inline TranslationBlock *tb_find_fast(CPUState *cpu) +static inline TranslationBlock *tb_find_fast(CPUState *cpu, + TranslationBlock **last_tb, + int tb_exit) { CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; @@ -331,11 +333,27 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu) always be the same before a given translated block is executed. */ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); + tb_lock(); tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags)) { tb = tb_find_slow(cpu, pc, cs_base, flags); } + if (cpu->tb_flushed) { + /* Ensure that no TB jump will be modified as the + * translation buffer has been flushed. + */ + *last_tb = NULL; + cpu->tb_flushed = false; + } + /* see if we can patch the calling TB. When the TB + spans two pages, we cannot safely do a direct + jump. */ + if (*last_tb != NULL && tb->page_addr[1] == -1 + && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + tb_add_jump(*last_tb, tb_exit, tb); + } + tb_unlock(); return tb; }