@@ -27,6 +27,7 @@
#include "exec/address-spaces.h"
#include "qemu/rcu.h"
#include "exec/tb-hash.h"
+#include "translate-all.h"
#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
#include "hw/i386/apic.h"
#endif
@@ -224,7 +225,7 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
phys_pc = get_page_addr_code(env, pc);
phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_phys_hash_func(phys_pc);
- ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+ ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[cpu->tb_phys_idx][h];
for(;;) {
tb = *ptb1;
if (!tb) {
@@ -253,8 +254,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
/* Move the TB to the head of the list */
*ptb1 = tb->phys_hash_next;
- tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
- tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
+ tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[cpu->tb_phys_idx][h];
+ tcg_ctx.tb_ctx.tb_phys_hash[cpu->tb_phys_idx][h] = tb;
return tb;
}
@@ -488,6 +489,16 @@ int cpu_exec(CPUState *cpu)
cpu->exception_index = EXCP_INTERRUPT;
cpu_loop_exit(cpu);
}
+ if (unlikely(tcg_ctx.tb_ctx.tb_phys_hash_size_req !=
+ tcg_ctx.tb_ctx.tb_phys_hash_size)) {
+ if (tb_caches_apply() < 0) {
+ next_tb = 0;
+ }
+ }
+ if (unlikely(cpu->tb_phys_idx != cpu->tb_phys_idx_req)) {
+ cpu_tb_cache_apply(cpu);
+ next_tb = 0;
+ }
tb_lock();
tb = tb_find_fast(cpu);
/* Note: we do it here to avoid a gcc bug on Mac OS X when
@@ -177,6 +177,10 @@ static inline void tlb_flush_by_mmuidx(CPUState *cpu, ...)
#define USE_DIRECT_JUMP
#endif
+/**
+ * TranslationBlock:
+ * @phys_idx: Index of physical TB cache where this TB has been allocated.
+ */
struct TranslationBlock {
target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
target_ulong cs_base; /* CS base for this block */
@@ -216,6 +220,8 @@ struct TranslationBlock {
jmp_first */
struct TranslationBlock *jmp_next[2];
struct TranslationBlock *jmp_first;
+
+ unsigned int phys_idx;
};
#include "qemu/thread.h"
@@ -225,7 +231,9 @@ typedef struct TBContext TBContext;
struct TBContext {
TranslationBlock *tbs;
- TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+ TranslationBlock ***tb_phys_hash;
+ size_t tb_phys_hash_size;
+ size_t tb_phys_hash_size_req;
int nb_tbs;
/* any access to the tbs or the page table must use this lock */
QemuMutex tb_lock;
@@ -250,6 +250,8 @@ struct kvm_run;
* @kvm_fd: vCPU file descriptor for KVM.
* @work_mutex: Lock to prevent multiple access to queued_work_*.
* @queued_work_first: First asynchronous work pending.
+ * @tb_phys_idx: Index of current phsyical TB cache.
+ * @tb_phys_idx_req: Index of requested phsyical TB cache.
*
* State of one CPU core or thread.
*/
@@ -314,6 +316,9 @@ struct CPUState {
struct KVMState *kvm_state;
struct kvm_run *kvm_run;
+ unsigned int tb_phys_idx;
+ unsigned int tb_phys_idx_req;
+
/* TODO Move common fields from CPUArchState here. */
int cpu_index; /* used by alpha TCG */
uint32_t halted; /* used by alpha, cris, ppc TCG */
@@ -363,6 +363,14 @@ static void cpu_class_init(ObjectClass *klass, void *data)
dc->cannot_instantiate_with_device_add_yet = true;
}
+static void cpu_init(Object *obj)
+{
+ CPUState *cpu = CPU(obj);
+
+ cpu->tb_phys_idx = 0;
+ cpu->tb_phys_idx_req = 0;
+}
+
static const TypeInfo cpu_type_info = {
.name = TYPE_CPU,
.parent = TYPE_DEVICE,
@@ -372,6 +380,7 @@ static const TypeInfo cpu_type_info = {
.abstract = true,
.class_size = sizeof(CPUClass),
.class_init = cpu_class_init,
+ .instance_init = cpu_init,
};
static void cpu_register_types(void)
@@ -163,9 +163,22 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
tb_page_addr_t phys_page2);
static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
+static void tb_phys_cache_alloc(unsigned int idx)
+{
+ size_t size = sizeof(tcg_ctx.tb_ctx.tb_phys_hash[0][0]) *
+ CODE_GEN_PHYS_HASH_SIZE;
+ tcg_ctx.tb_ctx.tb_phys_hash[idx] = malloc(size);
+ memset(tcg_ctx.tb_ctx.tb_phys_hash[idx], 0, size);
+}
+
void cpu_gen_init(void)
{
tcg_context_init(&tcg_ctx);
+
+ tcg_ctx.tb_ctx.tb_phys_hash_size = 0;
+ tcg_ctx.tb_ctx.tb_phys_hash_size_req = 1;
+ tcg_ctx.tb_ctx.tb_phys_hash = NULL;
+ tb_caches_apply();
}
/* Encode VAL as a signed leb128 sequence at P.
@@ -849,7 +862,12 @@ void tb_flush(CPUState *cpu)
tb_flush_jmp_cache_all(cpu);
}
- memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
+ unsigned int cache;
+ for (cache = 0; cache < tb_caches_get(); cache++) {
+ memset(tcg_ctx.tb_ctx.tb_phys_hash[cache], 0,
+ (sizeof(tcg_ctx.tb_ctx.tb_phys_hash[0][0]) *
+ CODE_GEN_PHYS_HASH_SIZE));
+ }
page_flush_tb();
tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
@@ -863,16 +881,21 @@ void tb_flush(CPUState *cpu)
static void tb_invalidate_check(target_ulong address)
{
TranslationBlock *tb;
+ unsigned int cache;
int i;
address &= TARGET_PAGE_MASK;
- for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
- for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
- if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
- address >= tb->pc + tb->size)) {
- printf("ERROR invalidate: address=" TARGET_FMT_lx
- " PC=%08lx size=%04x\n",
- address, (long)tb->pc, tb->size);
+ for (cache = 0; cache < tb_caches_get(); cache++) {
+ for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+ for (tb = tb_phys_hash[cache][i];
+ tb != NULL;
+ tb = tb->phys_hash_next) {
+ if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
+ address >= tb->pc + tb->size)) {
+ printf("ERROR invalidate: address=" TARGET_FMT_lx
+ " PC=%08lx size=%04x\n",
+ address, (long)tb->pc, tb->size);
+ }
}
}
}
@@ -882,16 +905,20 @@ static void tb_invalidate_check(target_ulong address)
static void tb_page_check(void)
{
TranslationBlock *tb;
+ unsigned int cache;
int i, flags1, flags2;
- for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
- for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
- tb = tb->phys_hash_next) {
- flags1 = page_get_flags(tb->pc);
- flags2 = page_get_flags(tb->pc + tb->size - 1);
- if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
- printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
- (long)tb->pc, tb->size, flags1, flags2);
+ for (cache = 0; cache < tb_caches_get(); cache++) {
+ for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+ for (tb = tb_phys_hash[cache][i];
+ tb != NULL;
+ tb = tb->phys_hash_next) {
+ flags1 = page_get_flags(tb->pc);
+ flags2 = page_get_flags(tb->pc + tb->size - 1);
+ if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
+ printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
+ (long)tb->pc, tb->size, flags1, flags2);
+ }
}
}
}
@@ -978,7 +1005,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
/* remove the TB from the hash list */
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
h = tb_phys_hash_func(phys_pc);
- tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);
+ tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[tb->phys_idx][h], tb);
/* remove the TB from the page list */
if (tb->page_addr[0] != page_addr) {
@@ -1053,6 +1080,86 @@ static void build_page_bitmap(PageDesc *p)
}
}
+size_t tb_caches_get(void)
+{
+ return tcg_ctx.tb_ctx.tb_phys_hash_size;
+}
+
+void tb_caches_set(size_t count)
+{
+ assert(count > 0);
+#ifndef NDEBUG
+ /* ensure no CPU is going to switch/stay in one of the removed caches */
+ CPUState *cpu;
+ CPU_FOREACH(cpu) {
+ assert(cpu->tb_phys_idx_req < count);
+ }
+#endif
+ tcg_ctx.tb_ctx.tb_phys_hash_size_req = count;
+}
+
+int tb_caches_apply(void)
+{
+ struct TBContext *tb_ctx = &tcg_ctx.tb_ctx;
+
+ if (likely(tb_ctx->tb_phys_hash_size_req == tb_ctx->tb_phys_hash_size)) {
+ return 0;
+ }
+
+ int res = tb_ctx->tb_phys_hash_size_req < tb_ctx->tb_phys_hash_size ?
+ -1 : 1;
+
+ if (res < 0) {
+ int i;
+ for (i = tb_ctx->tb_phys_hash_size_req;
+ i < tb_ctx->tb_phys_hash_size;
+ i++) {
+ free(tb_ctx->tb_phys_hash[i]);
+ }
+
+ CPUState *cpu;
+ CPU_FOREACH(cpu) {
+ if (cpu->tb_phys_idx >= tb_ctx->tb_phys_hash_size_req) {
+ fprintf(stderr,
+ "CPU %d is using a deleted TB cache\n", cpu->cpu_index);
+ exit(1);
+ }
+ }
+ }
+
+ size_t size = sizeof(tb_ctx->tb_phys_hash[0]) *
+ tb_ctx->tb_phys_hash_size_req;
+ tb_ctx->tb_phys_hash = realloc(tb_ctx->tb_phys_hash, size);
+ int i;
+ for (i = tb_ctx->tb_phys_hash_size;
+ i < tb_ctx->tb_phys_hash_size_req;
+ i++) {
+ tb_phys_cache_alloc(i);
+ }
+
+ tb_ctx->tb_phys_hash_size = tb_ctx->tb_phys_hash_size_req;
+ return res;
+}
+
+unsigned int cpu_tb_cache_get(CPUState *cpu)
+{
+ return cpu->tb_phys_idx;
+}
+
+void cpu_tb_cache_set(CPUState *cpu, unsigned int index)
+{
+ assert(index < tcg_ctx.tb_ctx.tb_phys_hash_size_req);
+ cpu->tb_phys_idx_req = index;
+ cpu->tcg_exit_req = true;
+}
+
+void cpu_tb_cache_apply(CPUState *cpu)
+{
+ cpu->tb_phys_idx = cpu->tb_phys_idx_req;
+ tb_flush_jmp_cache_all(cpu);
+}
+
+
/* Called with mmap_lock held for user mode emulation. */
TranslationBlock *tb_gen_code(CPUState *cpu,
target_ulong pc, target_ulong cs_base,
@@ -1090,6 +1197,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
+ tb->phys_idx = ENV_GET_CPU(env)->tb_phys_idx;
#ifdef CONFIG_PROFILER
tcg_ctx.tb_count1++; /* includes aborted translations because of
@@ -1480,7 +1588,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
/* add in the physical hash table */
h = tb_phys_hash_func(phys_pc);
- ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+ ptb = &tcg_ctx.tb_ctx.tb_phys_hash[tb->phys_idx][h];
tb->phys_hash_next = *ptb;
*ptb = tb;
@@ -1643,6 +1751,8 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
pc = tb->pc;
cs_base = tb->cs_base;
flags = tb->flags;
+ /* XXX: It is OK to invalidate only this TB, as this is the one triggering
+ * the memory access */
tb_phys_invalidate(tb, -1);
if (tb->cflags & CF_NOCACHE) {
if (tb->orig_tb) {
@@ -19,6 +19,55 @@
#ifndef TRANSLATE_ALL_H
#define TRANSLATE_ALL_H
+
+/**
+ * tb_caches_get:
+ *
+ * Number of physical TB caches.
+ */
+size_t tb_caches_get(void);
+/**
+ * tb_caches_set:
+ *
+ * Request a new number of physical TB caches.
+ */
+void tb_caches_set(size_t count);
+/**
+ * tb_caches_apply:
+ *
+ * Apply the changes for a tb_caches_set() request.
+ *
+ * Returns: -1/1 if the number of caches has been shrinked/grown; 0 otherwise.
+ *
+ * Note: All TBs of eliminated caches are invalidated.
+ *
+ * Precondition: No vCPU uses any of the caches that will be removed (if any;
+ * see cpu_tb_cache_set() and tb_caches_set()).
+ */
+int tb_caches_apply(void);
+/**
+ * cpu_tb_cache_get:
+ *
+ * Get the physical TB cache index for the given CPU.
+ */
+unsigned int cpu_tb_cache_get(CPUState *cpu);
+/**
+ * cpu_tb_cache_set:
+ *
+ * Set the physical TB cache index for the given CPU.
+ *
+ * Will have effect at the beginning of the next executed TB.
+ */
+void cpu_tb_cache_set(CPUState *cpu, unsigned int index);
+/**
+ * cpu_tb_cache_apply:
+ *
+ * Apply the changes for a cpu_tb_cache_set() request.
+ *
+ * Note: Invalidates the jump cache of the given CPU.
+ */
+void cpu_tb_cache_apply(CPUState *env);
+
/* translate-all.c */
void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len);
void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
The physical translation block cache is split into as many caches as wanted, and the virtual TB cache on each guest CPU uses a (potentially) different physical TB cache. This is later exploited to support different tracing event states on a per-vCPU basis. Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu> --- cpu-exec.c | 17 +++++ include/exec/exec-all.h | 10 +++ include/qom/cpu.h | 5 ++ qom/cpu.c | 9 +++ translate-all.c | 146 +++++++++++++++++++++++++++++++++++++++++------ translate-all.h | 49 ++++++++++++++++ 6 files changed, 214 insertions(+), 22 deletions(-)