Message ID | 20240206092423.3005995-8-pierrick.bouvier@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | TCG Plugin inline operation enhancement | expand |
On 2/6/24 19:24, Pierrick Bouvier wrote: > Instead of working on a fixed memory location, allow to address it based > on cpu_index, an element size and a given offset. > Result address: ptr + offset + cpu_index * element_size. > > With this, we can target a member in a struct array from a base pointer. > > Current semantic is not modified, thus inline operation still targets > always the same memory location. > > Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> > --- > plugins/plugin.h | 2 +- > accel/tcg/plugin-gen.c | 65 +++++++++++++++++++++++++++++++++++------- > plugins/api.c | 3 +- > plugins/core.c | 12 +++++--- > 4 files changed, 65 insertions(+), 17 deletions(-) > > diff --git a/plugins/plugin.h b/plugins/plugin.h > index fd93a372803..77ed10689ca 100644 > --- a/plugins/plugin.h > +++ b/plugins/plugin.h > @@ -100,7 +100,7 @@ void plugin_register_vcpu_mem_cb(GArray **arr, > enum qemu_plugin_mem_rw rw, > void *udata); > > -void exec_inline_op(struct qemu_plugin_dyn_cb *cb); > +void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index); > > int plugin_num_vcpus(void); > > diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c > index b37ce7683e6..68dee4c68d3 100644 > --- a/accel/tcg/plugin-gen.c > +++ b/accel/tcg/plugin-gen.c > @@ -132,16 +132,28 @@ static void gen_empty_udata_cb_no_rwg(void) > */ > static void gen_empty_inline_cb(void) > { > + TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); > + TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr(); > TCGv_i64 val = tcg_temp_ebb_new_i64(); > TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); > > + tcg_gen_ld_i32(cpu_index, tcg_env, > + -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); > + /* pass an immediate != 0 so that it doesn't get optimized away */ > + tcg_gen_muli_i32(cpu_index, cpu_index, 0xdeadbeef); You don't need a random immediate here. You can just as easily use tcg_gen_mul_i32(cpu_index, cpu_index, cpu_index); with a similar comment about the true size being inserted later. Otherwise, Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
On 2/7/24 07:42, Richard Henderson wrote: > On 2/6/24 19:24, Pierrick Bouvier wrote: >> Instead of working on a fixed memory location, allow to address it based >> on cpu_index, an element size and a given offset. >> Result address: ptr + offset + cpu_index * element_size. >> >> With this, we can target a member in a struct array from a base pointer. >> >> Current semantic is not modified, thus inline operation still targets >> always the same memory location. >> >> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> >> --- >> plugins/plugin.h | 2 +- >> accel/tcg/plugin-gen.c | 65 +++++++++++++++++++++++++++++++++++------- >> plugins/api.c | 3 +- >> plugins/core.c | 12 +++++--- >> 4 files changed, 65 insertions(+), 17 deletions(-) >> >> diff --git a/plugins/plugin.h b/plugins/plugin.h >> index fd93a372803..77ed10689ca 100644 >> --- a/plugins/plugin.h >> +++ b/plugins/plugin.h >> @@ -100,7 +100,7 @@ void plugin_register_vcpu_mem_cb(GArray **arr, >> enum qemu_plugin_mem_rw rw, >> void *udata); >> >> -void exec_inline_op(struct qemu_plugin_dyn_cb *cb); >> +void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index); >> >> int plugin_num_vcpus(void); >> >> diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c >> index b37ce7683e6..68dee4c68d3 100644 >> --- a/accel/tcg/plugin-gen.c >> +++ b/accel/tcg/plugin-gen.c >> @@ -132,16 +132,28 @@ static void gen_empty_udata_cb_no_rwg(void) >> */ >> static void gen_empty_inline_cb(void) >> { >> + TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); >> + TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr(); >> TCGv_i64 val = tcg_temp_ebb_new_i64(); >> TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); >> >> + tcg_gen_ld_i32(cpu_index, tcg_env, >> + -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); >> + /* pass an immediate != 0 so that it doesn't get optimized away */ >> + tcg_gen_muli_i32(cpu_index, cpu_index, 0xdeadbeef); > > You don't need a random immediate here. > You can just as easily use > > tcg_gen_mul_i32(cpu_index, cpu_index, cpu_index); > > with a similar comment about the true size being inserted later. > Followed the tcg_gen_addi_i64 that was using this pattern in the same file. I'll change this to what you recommend. > Otherwise, > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > > > r~
diff --git a/plugins/plugin.h b/plugins/plugin.h index fd93a372803..77ed10689ca 100644 --- a/plugins/plugin.h +++ b/plugins/plugin.h @@ -100,7 +100,7 @@ void plugin_register_vcpu_mem_cb(GArray **arr, enum qemu_plugin_mem_rw rw, void *udata); -void exec_inline_op(struct qemu_plugin_dyn_cb *cb); +void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index); int plugin_num_vcpus(void); diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c index b37ce7683e6..68dee4c68d3 100644 --- a/accel/tcg/plugin-gen.c +++ b/accel/tcg/plugin-gen.c @@ -132,16 +132,28 @@ static void gen_empty_udata_cb_no_rwg(void) */ static void gen_empty_inline_cb(void) { + TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); + TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr(); TCGv_i64 val = tcg_temp_ebb_new_i64(); TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); + tcg_gen_ld_i32(cpu_index, tcg_env, + -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); + /* pass an immediate != 0 so that it doesn't get optimized away */ + tcg_gen_muli_i32(cpu_index, cpu_index, 0xdeadbeef); + tcg_gen_ext_i32_ptr(cpu_index_as_ptr, cpu_index); + tcg_gen_movi_ptr(ptr, 0); + tcg_gen_add_ptr(ptr, ptr, cpu_index_as_ptr); tcg_gen_ld_i64(val, ptr, 0); /* pass an immediate != 0 so that it doesn't get optimized away */ tcg_gen_addi_i64(val, val, 0xdeadface); + tcg_gen_st_i64(val, ptr, 0); tcg_temp_free_ptr(ptr); tcg_temp_free_i64(val); + tcg_temp_free_ptr(cpu_index_as_ptr); + tcg_temp_free_i32(cpu_index); } static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info) @@ -289,12 +301,37 @@ static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr) return op; } +static TCGOp *copy_ld_i32(TCGOp **begin_op, TCGOp *op) +{ + return copy_op(begin_op, op, INDEX_op_ld_i32); +} + +static TCGOp *copy_ext_i32_ptr(TCGOp **begin_op, TCGOp *op) +{ + if (UINTPTR_MAX == UINT32_MAX) { + op = copy_op(begin_op, op, INDEX_op_mov_i32); + } else { + op = copy_op(begin_op, op, INDEX_op_ext_i32_i64); + } + return op; +} + +static TCGOp *copy_add_ptr(TCGOp **begin_op, TCGOp *op) +{ + if (UINTPTR_MAX == UINT32_MAX) { + op = copy_op(begin_op, op, INDEX_op_add_i32); + } else { + op = copy_op(begin_op, op, INDEX_op_add_i64); + } + return op; +} + static TCGOp *copy_ld_i64(TCGOp **begin_op, TCGOp *op) { if (TCG_TARGET_REG_BITS == 32) { /* 2x ld_i32 */ - op = copy_op(begin_op, op, INDEX_op_ld_i32); - op = copy_op(begin_op, op, INDEX_op_ld_i32); + op = copy_ld_i32(begin_op, op); + op = copy_ld_i32(begin_op, op); } else { /* ld_i64 */ op = copy_op(begin_op, op, INDEX_op_ld_i64); @@ -330,6 +367,13 @@ static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op, uint64_t v) return op; } +static TCGOp *copy_mul_i32(TCGOp **begin_op, TCGOp *op, uint32_t v) +{ + op = copy_op(begin_op, op, INDEX_op_mul_i32); + op->args[2] = tcgv_i32_arg(tcg_constant_i32(v)); + return op; +} + static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op) { if (UINTPTR_MAX == UINT32_MAX) { @@ -395,18 +439,17 @@ static TCGOp *append_inline_cb(const struct qemu_plugin_dyn_cb *cb, TCGOp *begin_op, TCGOp *op, int *unused) { - /* const_ptr */ - op = copy_const_ptr(&begin_op, op, cb->userp); - - /* ld_i64 */ + char *ptr = cb->userp; + size_t elem_size = 0; + size_t offset = 0; + op = copy_ld_i32(&begin_op, op); + op = copy_mul_i32(&begin_op, op, elem_size); + op = copy_ext_i32_ptr(&begin_op, op); + op = copy_const_ptr(&begin_op, op, ptr + offset); + op = copy_add_ptr(&begin_op, op); op = copy_ld_i64(&begin_op, op); - - /* add_i64 */ op = copy_add_i64(&begin_op, op, cb->inline_insn.imm); - - /* st_i64 */ op = copy_st_i64(&begin_op, op); - return op; } diff --git a/plugins/api.c b/plugins/api.c index 15edad6769b..dedcbdfd30d 100644 --- a/plugins/api.c +++ b/plugins/api.c @@ -106,7 +106,8 @@ void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb, void *ptr, uint64_t imm) { if (!tb->mem_only) { - plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm); + plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], + 0, op, ptr, imm); } } diff --git a/plugins/core.c b/plugins/core.c index fd8604bcb79..863c2e64217 100644 --- a/plugins/core.c +++ b/plugins/core.c @@ -320,7 +320,8 @@ static struct qemu_plugin_dyn_cb *plugin_get_dyn_cb(GArray **arr) void plugin_register_inline_op(GArray **arr, enum qemu_plugin_mem_rw rw, - enum qemu_plugin_op op, void *ptr, + enum qemu_plugin_op op, + void *ptr, uint64_t imm) { struct qemu_plugin_dyn_cb *dyn_cb; @@ -476,9 +477,12 @@ void qemu_plugin_flush_cb(void) plugin_cb__simple(QEMU_PLUGIN_EV_FLUSH); } -void exec_inline_op(struct qemu_plugin_dyn_cb *cb) +void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index) { - uint64_t *val = cb->userp; + char *ptr = cb->userp; + size_t elem_size = 0; + size_t offset = 0; + uint64_t *val = (uint64_t *)(ptr + offset + cpu_index * elem_size); switch (cb->inline_insn.op) { case QEMU_PLUGIN_INLINE_ADD_U64: @@ -511,7 +515,7 @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, vaddr, cb->userp); break; case PLUGIN_CB_INLINE: - exec_inline_op(cb); + exec_inline_op(cb, cpu->cpu_index); break; default: g_assert_not_reached();
Instead of working on a fixed memory location, allow to address it based on cpu_index, an element size and a given offset. Result address: ptr + offset + cpu_index * element_size. With this, we can target a member in a struct array from a base pointer. Current semantic is not modified, thus inline operation still targets always the same memory location. Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> --- plugins/plugin.h | 2 +- accel/tcg/plugin-gen.c | 65 +++++++++++++++++++++++++++++++++++------- plugins/api.c | 3 +- plugins/core.c | 12 +++++--- 4 files changed, 65 insertions(+), 17 deletions(-)