Message ID | 1467735496-16256-2-git-send-email-alex.bennee@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
I was not sure if the language I used in the source code comments is 100% correct. So it would be fine if someone could check if it is easy to understand ;) Thanks, Sergey On 05/07/16 19:18, Alex Bennée wrote: > From: Sergey Fedorov <serge.fdrv@gmail.com> > > First, ensure atomicity of CPU's 'tb_jmp_cache' access by: > * using atomic_read() to look up a TB when not holding 'tb_lock'; > * using atomic_write() to remove a TB from each CPU's local cache on > TB invalidation. > > Second, add some memory barriers to ensure we don't put the TB being > invalidated back to CPU's 'tb_jmp_cache'. If we fail to look up a TB in > CPU's local cache because it is being invalidated by some other thread > then it must not be found in the shared TB hash table. Otherwise we'd > put it back to CPU's local cache. > > Note that this patch does *not* make CPU's TLB invalidation safe if it > is done from some other thread while the CPU is in its execution loop. > > Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com> > Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org> > [AJB: fixed missing atomic set, tweak title] > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > Reviewed-by: Richard Henderson <rth@twiddle.net> > Reviewed-by: Emilio G. Cota <cota@braap.org> > > --- > v1 (AJB): > - tweak title > - fixed missing set of tb_jmp_cache > v2 > - fix spelling s/con't/can't/ > - add atomic_read while clearing tb_jmp_cache > - add r-b tags > --- > cpu-exec.c | 9 +++++++-- > translate-all.c | 9 +++++++-- > 2 files changed, 14 insertions(+), 4 deletions(-) > > diff --git a/cpu-exec.c b/cpu-exec.c > index b840e1d..10ce1cb 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -285,6 +285,11 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, > { > TranslationBlock *tb; > > + /* Ensure that we won't find a TB in the shared hash table > + * if it is being invalidated by some other thread. > + * Otherwise we'd put it back to CPU's local cache. > + * Pairs with smp_wmb() in tb_phys_invalidate(). */ > + smp_rmb(); > tb = tb_find_physical(cpu, pc, cs_base, flags); > if (tb) { > goto found; > @@ -315,7 +320,7 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, > > found: > /* we add the TB in the virtual pc hash table */ > - cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; > + atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); > return tb; > } > > @@ -333,7 +338,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, > is executed. */ > cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > tb_lock(); > - tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; > + tb = atomic_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); > if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || > tb->flags != flags)) { > tb = tb_find_slow(cpu, pc, cs_base, flags); > diff --git a/translate-all.c b/translate-all.c > index eaa95e4..96efe48 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -1004,11 +1004,16 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) > invalidate_page_bitmap(p); > } > > + /* Ensure that we won't find the TB in the shared hash table > + * if we can't see it in CPU's local cache. > + * Pairs with smp_rmb() in tb_find_slow(). */ > + smp_wmb(); > + > /* remove the TB from the hash list */ > h = tb_jmp_cache_hash_func(tb->pc); > CPU_FOREACH(cpu) { > - if (cpu->tb_jmp_cache[h] == tb) { > - cpu->tb_jmp_cache[h] = NULL; > + if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) { > + atomic_set(&cpu->tb_jmp_cache[h], NULL); > } > } >
On 05/07/16 19:18, Alex Bennée wrote: > From: Sergey Fedorov <serge.fdrv@gmail.com> > > First, ensure atomicity of CPU's 'tb_jmp_cache' access by: > * using atomic_read() to look up a TB when not holding 'tb_lock'; > * using atomic_write() to remove a TB from each CPU's local cache on > TB invalidation. > > Second, add some memory barriers to ensure we don't put the TB being > invalidated back to CPU's 'tb_jmp_cache'. If we fail to look up a TB in > CPU's local cache because it is being invalidated by some other thread > then it must not be found in the shared TB hash table. Otherwise we'd > put it back to CPU's local cache. > > Note that this patch does *not* make CPU's TLB invalidation safe if it > is done from some other thread while the CPU is in its execution loop. > > Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com> > Signed-off-by: Sergey Fedorov <sergey.fedorov@linaro.org> > [AJB: fixed missing atomic set, tweak title] > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > Reviewed-by: Richard Henderson <rth@twiddle.net> > Reviewed-by: Emilio G. Cota <cota@braap.org> > > --- > v1 (AJB): > - tweak title > - fixed missing set of tb_jmp_cache > v2 > - fix spelling s/con't/can't/ > - add atomic_read while clearing tb_jmp_cache > - add r-b tags > --- > cpu-exec.c | 9 +++++++-- > translate-all.c | 9 +++++++-- > 2 files changed, 14 insertions(+), 4 deletions(-) > > diff --git a/cpu-exec.c b/cpu-exec.c > index b840e1d..10ce1cb 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -285,6 +285,11 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, > { > TranslationBlock *tb; > > + /* Ensure that we won't find a TB in the shared hash table > + * if it is being invalidated by some other thread. > + * Otherwise we'd put it back to CPU's local cache. > + * Pairs with smp_wmb() in tb_phys_invalidate(). */ > + smp_rmb(); > tb = tb_find_physical(cpu, pc, cs_base, flags); > if (tb) { > goto found; > @@ -315,7 +320,7 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, > > found: > /* we add the TB in the virtual pc hash table */ > - cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; > + atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); > return tb; > } > > @@ -333,7 +338,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, > is executed. */ > cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > tb_lock(); > - tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; > + tb = atomic_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); > if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || > tb->flags != flags)) { > tb = tb_find_slow(cpu, pc, cs_base, flags); > diff --git a/translate-all.c b/translate-all.c > index eaa95e4..96efe48 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -1004,11 +1004,16 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) > invalidate_page_bitmap(p); > } > > + /* Ensure that we won't find the TB in the shared hash table > + * if we can't see it in CPU's local cache. > + * Pairs with smp_rmb() in tb_find_slow(). */ > + smp_wmb(); > + > /* remove the TB from the hash list */ > h = tb_jmp_cache_hash_func(tb->pc); > CPU_FOREACH(cpu) { > - if (cpu->tb_jmp_cache[h] == tb) { > - cpu->tb_jmp_cache[h] = NULL; > + if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) { I think this can be a simple read because we can't race here with setting 'tb_jmp_cache' in tb_find_slow(). Kind regards, Sergey > + atomic_set(&cpu->tb_jmp_cache[h], NULL); > } > } >
diff --git a/cpu-exec.c b/cpu-exec.c index b840e1d..10ce1cb 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -285,6 +285,11 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, { TranslationBlock *tb; + /* Ensure that we won't find a TB in the shared hash table + * if it is being invalidated by some other thread. + * Otherwise we'd put it back to CPU's local cache. + * Pairs with smp_wmb() in tb_phys_invalidate(). */ + smp_rmb(); tb = tb_find_physical(cpu, pc, cs_base, flags); if (tb) { goto found; @@ -315,7 +320,7 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, found: /* we add the TB in the virtual pc hash table */ - cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; + atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); return tb; } @@ -333,7 +338,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, is executed. */ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); tb_lock(); - tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; + tb = atomic_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags)) { tb = tb_find_slow(cpu, pc, cs_base, flags); diff --git a/translate-all.c b/translate-all.c index eaa95e4..96efe48 100644 --- a/translate-all.c +++ b/translate-all.c @@ -1004,11 +1004,16 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) invalidate_page_bitmap(p); } + /* Ensure that we won't find the TB in the shared hash table + * if we can't see it in CPU's local cache. + * Pairs with smp_rmb() in tb_find_slow(). */ + smp_wmb(); + /* remove the TB from the hash list */ h = tb_jmp_cache_hash_func(tb->pc); CPU_FOREACH(cpu) { - if (cpu->tb_jmp_cache[h] == tb) { - cpu->tb_jmp_cache[h] = NULL; + if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) { + atomic_set(&cpu->tb_jmp_cache[h], NULL); } }