diff mbox series

plugins: optimize cpu_index code generation

Message ID 20241126190203.3094635-1-pierrick.bouvier@linaro.org (mailing list archive)
State New
Headers show
Series plugins: optimize cpu_index code generation | expand

Commit Message

Pierrick Bouvier Nov. 26, 2024, 7:02 p.m. UTC
When running with a single vcpu, we can return a constant instead of a
load when accessing cpu_index.
A side effect is that all tcg operations using it are optimized, most
notably scoreboard access.
When running a simple loop in user-mode, the speedup is around 20%.

Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
 accel/tcg/plugin-gen.c |  7 +++++++
 plugins/core.c         | 13 +++++++++++++
 2 files changed, 20 insertions(+)

Comments

Pierrick Bouvier Nov. 26, 2024, 7:02 p.m. UTC | #1
On 11/26/24 11:02, Pierrick Bouvier wrote:
> When running with a single vcpu, we can return a constant instead of a
> load when accessing cpu_index.
> A side effect is that all tcg operations using it are optimized, most
> notably scoreboard access.
> When running a simple loop in user-mode, the speedup is around 20%.
> 
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
>   accel/tcg/plugin-gen.c |  7 +++++++
>   plugins/core.c         | 13 +++++++++++++
>   2 files changed, 20 insertions(+)
> 
> diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
> index 0f47bfbb489..2eabeecbdcf 100644
> --- a/accel/tcg/plugin-gen.c
> +++ b/accel/tcg/plugin-gen.c
> @@ -102,6 +102,13 @@ static void gen_disable_mem_helper(void)
>   
>   static TCGv_i32 gen_cpu_index(void)
>   {
> +    /*
> +     * Optimize when we run with a single vcpu. All values using cpu_index,
> +     * including scoreboard index, will be optimized out.
> +     */
> +    if (qemu_plugin_num_vcpus() == 1) {
> +        return tcg_constant_i32(0);
> +    }
>       TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
>       tcg_gen_ld_i32(cpu_index, tcg_env,
>                      -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
> diff --git a/plugins/core.c b/plugins/core.c
> index bb105e8e688..8e32ca5ee08 100644
> --- a/plugins/core.c
> +++ b/plugins/core.c
> @@ -266,6 +266,19 @@ static void qemu_plugin_vcpu_init__async(CPUState *cpu, run_on_cpu_data unused)
>   
>       assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
>       qemu_rec_mutex_lock(&plugin.lock);
> +
> +    /*
> +     * We want to flush tb when a second cpu appear.
> +     * When generating plugin code, we optimize cpu_index for num_vcpus == 1.
> +     */
> +    if (plugin.num_vcpus == 1) {
> +        qemu_rec_mutex_unlock(&plugin.lock);
> +        start_exclusive();
> +        qemu_rec_mutex_lock(&plugin.lock);
> +        tb_flush(cpu);
> +        end_exclusive();
> +    }
> +
>       plugin.num_vcpus = MAX(plugin.num_vcpus, cpu->cpu_index + 1);
>       plugin_cpu_update__locked(&cpu->cpu_index, NULL, NULL);
>       success = g_hash_table_insert(plugin.cpu_ht, &cpu->cpu_index,

This patch is for 10.0.
diff mbox series

Patch

diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 0f47bfbb489..2eabeecbdcf 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -102,6 +102,13 @@  static void gen_disable_mem_helper(void)
 
 static TCGv_i32 gen_cpu_index(void)
 {
+    /*
+     * Optimize when we run with a single vcpu. All values using cpu_index,
+     * including scoreboard index, will be optimized out.
+     */
+    if (qemu_plugin_num_vcpus() == 1) {
+        return tcg_constant_i32(0);
+    }
     TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
     tcg_gen_ld_i32(cpu_index, tcg_env,
                    -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
diff --git a/plugins/core.c b/plugins/core.c
index bb105e8e688..8e32ca5ee08 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -266,6 +266,19 @@  static void qemu_plugin_vcpu_init__async(CPUState *cpu, run_on_cpu_data unused)
 
     assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
     qemu_rec_mutex_lock(&plugin.lock);
+
+    /*
+     * We want to flush tb when a second cpu appear.
+     * When generating plugin code, we optimize cpu_index for num_vcpus == 1.
+     */
+    if (plugin.num_vcpus == 1) {
+        qemu_rec_mutex_unlock(&plugin.lock);
+        start_exclusive();
+        qemu_rec_mutex_lock(&plugin.lock);
+        tb_flush(cpu);
+        end_exclusive();
+    }
+
     plugin.num_vcpus = MAX(plugin.num_vcpus, cpu->cpu_index + 1);
     plugin_cpu_update__locked(&cpu->cpu_index, NULL, NULL);
     success = g_hash_table_insert(plugin.cpu_ht, &cpu->cpu_index,