diff mbox series

[v3,07/17] plugins: implement inline operation relative to cpu_index

Message ID 20240206092423.3005995-8-pierrick.bouvier@linaro.org (mailing list archive)
State New, archived
Headers show
Series TCG Plugin inline operation enhancement | expand

Commit Message

Pierrick Bouvier Feb. 6, 2024, 9:24 a.m. UTC
Instead of working on a fixed memory location, allow to address it based
on cpu_index, an element size and a given offset.
Result address: ptr + offset + cpu_index * element_size.

With this, we can target a member in a struct array from a base pointer.

Current semantic is not modified, thus inline operation still targets
always the same memory location.

Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
 plugins/plugin.h       |  2 +-
 accel/tcg/plugin-gen.c | 65 +++++++++++++++++++++++++++++++++++-------
 plugins/api.c          |  3 +-
 plugins/core.c         | 12 +++++---
 4 files changed, 65 insertions(+), 17 deletions(-)

Comments

Richard Henderson Feb. 7, 2024, 3:42 a.m. UTC | #1
On 2/6/24 19:24, Pierrick Bouvier wrote:
> Instead of working on a fixed memory location, allow to address it based
> on cpu_index, an element size and a given offset.
> Result address: ptr + offset + cpu_index * element_size.
> 
> With this, we can target a member in a struct array from a base pointer.
> 
> Current semantic is not modified, thus inline operation still targets
> always the same memory location.
> 
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
>   plugins/plugin.h       |  2 +-
>   accel/tcg/plugin-gen.c | 65 +++++++++++++++++++++++++++++++++++-------
>   plugins/api.c          |  3 +-
>   plugins/core.c         | 12 +++++---
>   4 files changed, 65 insertions(+), 17 deletions(-)
> 
> diff --git a/plugins/plugin.h b/plugins/plugin.h
> index fd93a372803..77ed10689ca 100644
> --- a/plugins/plugin.h
> +++ b/plugins/plugin.h
> @@ -100,7 +100,7 @@ void plugin_register_vcpu_mem_cb(GArray **arr,
>                                    enum qemu_plugin_mem_rw rw,
>                                    void *udata);
>   
> -void exec_inline_op(struct qemu_plugin_dyn_cb *cb);
> +void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index);
>   
>   int plugin_num_vcpus(void);
>   
> diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
> index b37ce7683e6..68dee4c68d3 100644
> --- a/accel/tcg/plugin-gen.c
> +++ b/accel/tcg/plugin-gen.c
> @@ -132,16 +132,28 @@ static void gen_empty_udata_cb_no_rwg(void)
>    */
>   static void gen_empty_inline_cb(void)
>   {
> +    TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
> +    TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr();
>       TCGv_i64 val = tcg_temp_ebb_new_i64();
>       TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
>   
> +    tcg_gen_ld_i32(cpu_index, tcg_env,
> +                   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
> +    /* pass an immediate != 0 so that it doesn't get optimized away */
> +    tcg_gen_muli_i32(cpu_index, cpu_index, 0xdeadbeef);

You don't need a random immediate here.
You can just as easily use

     tcg_gen_mul_i32(cpu_index, cpu_index, cpu_index);

with a similar comment about the true size being inserted later.

Otherwise,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~
Pierrick Bouvier Feb. 7, 2024, 6:01 a.m. UTC | #2
On 2/7/24 07:42, Richard Henderson wrote:
> On 2/6/24 19:24, Pierrick Bouvier wrote:
>> Instead of working on a fixed memory location, allow to address it based
>> on cpu_index, an element size and a given offset.
>> Result address: ptr + offset + cpu_index * element_size.
>>
>> With this, we can target a member in a struct array from a base pointer.
>>
>> Current semantic is not modified, thus inline operation still targets
>> always the same memory location.
>>
>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> ---
>>    plugins/plugin.h       |  2 +-
>>    accel/tcg/plugin-gen.c | 65 +++++++++++++++++++++++++++++++++++-------
>>    plugins/api.c          |  3 +-
>>    plugins/core.c         | 12 +++++---
>>    4 files changed, 65 insertions(+), 17 deletions(-)
>>
>> diff --git a/plugins/plugin.h b/plugins/plugin.h
>> index fd93a372803..77ed10689ca 100644
>> --- a/plugins/plugin.h
>> +++ b/plugins/plugin.h
>> @@ -100,7 +100,7 @@ void plugin_register_vcpu_mem_cb(GArray **arr,
>>                                     enum qemu_plugin_mem_rw rw,
>>                                     void *udata);
>>    
>> -void exec_inline_op(struct qemu_plugin_dyn_cb *cb);
>> +void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index);
>>    
>>    int plugin_num_vcpus(void);
>>    
>> diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
>> index b37ce7683e6..68dee4c68d3 100644
>> --- a/accel/tcg/plugin-gen.c
>> +++ b/accel/tcg/plugin-gen.c
>> @@ -132,16 +132,28 @@ static void gen_empty_udata_cb_no_rwg(void)
>>     */
>>    static void gen_empty_inline_cb(void)
>>    {
>> +    TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
>> +    TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr();
>>        TCGv_i64 val = tcg_temp_ebb_new_i64();
>>        TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
>>    
>> +    tcg_gen_ld_i32(cpu_index, tcg_env,
>> +                   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
>> +    /* pass an immediate != 0 so that it doesn't get optimized away */
>> +    tcg_gen_muli_i32(cpu_index, cpu_index, 0xdeadbeef);
> 
> You don't need a random immediate here.
> You can just as easily use
> 
>       tcg_gen_mul_i32(cpu_index, cpu_index, cpu_index);
> 
> with a similar comment about the true size being inserted later.
> 

Followed the tcg_gen_addi_i64 that was using this pattern in the same 
file. I'll change this to what you recommend.

> Otherwise,
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> 
> 
> r~
diff mbox series

Patch

diff --git a/plugins/plugin.h b/plugins/plugin.h
index fd93a372803..77ed10689ca 100644
--- a/plugins/plugin.h
+++ b/plugins/plugin.h
@@ -100,7 +100,7 @@  void plugin_register_vcpu_mem_cb(GArray **arr,
                                  enum qemu_plugin_mem_rw rw,
                                  void *udata);
 
-void exec_inline_op(struct qemu_plugin_dyn_cb *cb);
+void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index);
 
 int plugin_num_vcpus(void);
 
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index b37ce7683e6..68dee4c68d3 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -132,16 +132,28 @@  static void gen_empty_udata_cb_no_rwg(void)
  */
 static void gen_empty_inline_cb(void)
 {
+    TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
+    TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr();
     TCGv_i64 val = tcg_temp_ebb_new_i64();
     TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
 
+    tcg_gen_ld_i32(cpu_index, tcg_env,
+                   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+    /* pass an immediate != 0 so that it doesn't get optimized away */
+    tcg_gen_muli_i32(cpu_index, cpu_index, 0xdeadbeef);
+    tcg_gen_ext_i32_ptr(cpu_index_as_ptr, cpu_index);
+
     tcg_gen_movi_ptr(ptr, 0);
+    tcg_gen_add_ptr(ptr, ptr, cpu_index_as_ptr);
     tcg_gen_ld_i64(val, ptr, 0);
     /* pass an immediate != 0 so that it doesn't get optimized away */
     tcg_gen_addi_i64(val, val, 0xdeadface);
+
     tcg_gen_st_i64(val, ptr, 0);
     tcg_temp_free_ptr(ptr);
     tcg_temp_free_i64(val);
+    tcg_temp_free_ptr(cpu_index_as_ptr);
+    tcg_temp_free_i32(cpu_index);
 }
 
 static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info)
@@ -289,12 +301,37 @@  static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr)
     return op;
 }
 
+static TCGOp *copy_ld_i32(TCGOp **begin_op, TCGOp *op)
+{
+    return copy_op(begin_op, op, INDEX_op_ld_i32);
+}
+
+static TCGOp *copy_ext_i32_ptr(TCGOp **begin_op, TCGOp *op)
+{
+    if (UINTPTR_MAX == UINT32_MAX) {
+        op = copy_op(begin_op, op, INDEX_op_mov_i32);
+    } else {
+        op = copy_op(begin_op, op, INDEX_op_ext_i32_i64);
+    }
+    return op;
+}
+
+static TCGOp *copy_add_ptr(TCGOp **begin_op, TCGOp *op)
+{
+    if (UINTPTR_MAX == UINT32_MAX) {
+        op = copy_op(begin_op, op, INDEX_op_add_i32);
+    } else {
+        op = copy_op(begin_op, op, INDEX_op_add_i64);
+    }
+    return op;
+}
+
 static TCGOp *copy_ld_i64(TCGOp **begin_op, TCGOp *op)
 {
     if (TCG_TARGET_REG_BITS == 32) {
         /* 2x ld_i32 */
-        op = copy_op(begin_op, op, INDEX_op_ld_i32);
-        op = copy_op(begin_op, op, INDEX_op_ld_i32);
+        op = copy_ld_i32(begin_op, op);
+        op = copy_ld_i32(begin_op, op);
     } else {
         /* ld_i64 */
         op = copy_op(begin_op, op, INDEX_op_ld_i64);
@@ -330,6 +367,13 @@  static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
     return op;
 }
 
+static TCGOp *copy_mul_i32(TCGOp **begin_op, TCGOp *op, uint32_t v)
+{
+    op = copy_op(begin_op, op, INDEX_op_mul_i32);
+    op->args[2] = tcgv_i32_arg(tcg_constant_i32(v));
+    return op;
+}
+
 static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op)
 {
     if (UINTPTR_MAX == UINT32_MAX) {
@@ -395,18 +439,17 @@  static TCGOp *append_inline_cb(const struct qemu_plugin_dyn_cb *cb,
                                TCGOp *begin_op, TCGOp *op,
                                int *unused)
 {
-    /* const_ptr */
-    op = copy_const_ptr(&begin_op, op, cb->userp);
-
-    /* ld_i64 */
+    char *ptr = cb->userp;
+    size_t elem_size = 0;
+    size_t offset = 0;
+    op = copy_ld_i32(&begin_op, op);
+    op = copy_mul_i32(&begin_op, op, elem_size);
+    op = copy_ext_i32_ptr(&begin_op, op);
+    op = copy_const_ptr(&begin_op, op, ptr + offset);
+    op = copy_add_ptr(&begin_op, op);
     op = copy_ld_i64(&begin_op, op);
-
-    /* add_i64 */
     op = copy_add_i64(&begin_op, op, cb->inline_insn.imm);
-
-    /* st_i64 */
     op = copy_st_i64(&begin_op, op);
-
     return op;
 }
 
diff --git a/plugins/api.c b/plugins/api.c
index 15edad6769b..dedcbdfd30d 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -106,7 +106,8 @@  void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb,
                                               void *ptr, uint64_t imm)
 {
     if (!tb->mem_only) {
-        plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm);
+        plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE],
+                                  0, op, ptr, imm);
     }
 }
 
diff --git a/plugins/core.c b/plugins/core.c
index fd8604bcb79..863c2e64217 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -320,7 +320,8 @@  static struct qemu_plugin_dyn_cb *plugin_get_dyn_cb(GArray **arr)
 
 void plugin_register_inline_op(GArray **arr,
                                enum qemu_plugin_mem_rw rw,
-                               enum qemu_plugin_op op, void *ptr,
+                               enum qemu_plugin_op op,
+                               void *ptr,
                                uint64_t imm)
 {
     struct qemu_plugin_dyn_cb *dyn_cb;
@@ -476,9 +477,12 @@  void qemu_plugin_flush_cb(void)
     plugin_cb__simple(QEMU_PLUGIN_EV_FLUSH);
 }
 
-void exec_inline_op(struct qemu_plugin_dyn_cb *cb)
+void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index)
 {
-    uint64_t *val = cb->userp;
+    char *ptr = cb->userp;
+    size_t elem_size = 0;
+    size_t offset = 0;
+    uint64_t *val = (uint64_t *)(ptr + offset + cpu_index * elem_size);
 
     switch (cb->inline_insn.op) {
     case QEMU_PLUGIN_INLINE_ADD_U64:
@@ -511,7 +515,7 @@  void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
                            vaddr, cb->userp);
             break;
         case PLUGIN_CB_INLINE:
-            exec_inline_op(cb);
+            exec_inline_op(cb, cpu->cpu_index);
             break;
         default:
             g_assert_not_reached();