diff mbox series

[9/9] contrib/plugins: add ips plugin example for cost modeling

Message ID 20240612153508.1532940-10-alex.bennee@linaro.org (mailing list archive)
State New
Headers show
Series maintainer updates (gdbstub, plugins, time control) | expand

Commit Message

Alex Bennée June 12, 2024, 3:35 p.m. UTC
From: Pierrick Bouvier <pierrick.bouvier@linaro.org>

This plugin uses the new time control interface to make decisions
about the state of time during the emulation. The algorithm is
currently very simple. The user specifies an ips rate which applies
per core. If the core runs ahead of its allocated execution time the
plugin sleeps for a bit to let real time catch up. Either way time is
updated for the emulation as a function of total executed instructions
with some adjustments for cores that idle.

Examples
--------

Slow down execution of /bin/true:
$ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
$ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
real 4.000s

Boot a Linux kernel simulating a 250MHz cpu:
$ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
check time until kernel panic on serial0

Tested in system mode by booting a full debian system, and using:
$ sysbench cpu run
Performance decrease linearly with the given number of ips.

Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
---
 contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
 contrib/plugins/Makefile |   1 +
 2 files changed, 165 insertions(+)
 create mode 100644 contrib/plugins/ips.c

Comments

Dr. David Alan Gilbert June 12, 2024, 9:02 p.m. UTC | #1
* Alex Bennée (alex.bennee@linaro.org) wrote:
> From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> 
> This plugin uses the new time control interface to make decisions
> about the state of time during the emulation. The algorithm is
> currently very simple. The user specifies an ips rate which applies
> per core. If the core runs ahead of its allocated execution time the
> plugin sleeps for a bit to let real time catch up. Either way time is
> updated for the emulation as a function of total executed instructions
> with some adjustments for cores that idle.

A few random thoughts:
  a) Are there any definitions of what a plugin that controls time
     should do with a live migration?
  b) The sleep in migration/dirtyrate.c points out g_usleep might
     sleep for longer, so reads the actual wall clock time to
     figure out a new 'now'.
  c) A fun thing to do with this would be to follow an external simulation
     or 2nd qemu, trying to keep the two from running too far past
     each other.

Dave

> Examples
> --------
> 
> Slow down execution of /bin/true:
> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
> real 4.000s
> 
> Boot a Linux kernel simulating a 250MHz cpu:
> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
> check time until kernel panic on serial0
> 
> Tested in system mode by booting a full debian system, and using:
> $ sysbench cpu run
> Performance decrease linearly with the given number of ips.
> 
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
> ---
>  contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
>  contrib/plugins/Makefile |   1 +
>  2 files changed, 165 insertions(+)
>  create mode 100644 contrib/plugins/ips.c
> 
> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
> new file mode 100644
> index 0000000000..db77729264
> --- /dev/null
> +++ b/contrib/plugins/ips.c
> @@ -0,0 +1,164 @@
> +/*
> + * ips rate limiting plugin.
> + *
> + * This plugin can be used to restrict the execution of a system to a
> + * particular number of Instructions Per Second (ips). This controls
> + * time as seen by the guest so while wall-clock time may be longer
> + * from the guests point of view time will pass at the normal rate.
> + *
> + * This uses the new plugin API which allows the plugin to control
> + * system time.
> + *
> + * Copyright (c) 2023 Linaro Ltd
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <stdio.h>
> +#include <glib.h>
> +#include <qemu-plugin.h>
> +
> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> +
> +/* how many times do we update time per sec */
> +#define NUM_TIME_UPDATE_PER_SEC 10
> +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
> +
> +static GMutex global_state_lock;
> +
> +static uint64_t max_insn_per_second = 1000 * 1000 * 1000; /* ips per core, per second */
> +static uint64_t max_insn_per_quantum; /* trap every N instructions */
> +static int64_t virtual_time_ns; /* last set virtual time */
> +
> +static const void *time_handle;
> +
> +typedef struct {
> +    uint64_t total_insn;
> +    uint64_t quantum_insn; /* insn in last quantum */
> +    int64_t last_quantum_time; /* time when last quantum started */
> +} vCPUTime;
> +
> +struct qemu_plugin_scoreboard *vcpus;
> +
> +/* return epoch time in ns */
> +static int64_t now_ns(void)
> +{
> +    return g_get_real_time() * 1000;
> +}
> +
> +static uint64_t num_insn_during(int64_t elapsed_ns)
> +{
> +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
> +    return num_secs * (double) max_insn_per_second;
> +}
> +
> +static int64_t time_for_insn(uint64_t num_insn)
> +{
> +    double num_secs = (double) num_insn / (double) max_insn_per_second;
> +    return num_secs * (double) NSEC_IN_ONE_SEC;
> +}
> +
> +static void update_system_time(vCPUTime *vcpu)
> +{
> +    int64_t elapsed_ns = now_ns() - vcpu->last_quantum_time;
> +    uint64_t max_insn = num_insn_during(elapsed_ns);
> +
> +    if (vcpu->quantum_insn >= max_insn) {
> +        /* this vcpu ran faster than expected, so it has to sleep */
> +        uint64_t insn_advance = vcpu->quantum_insn - max_insn;
> +        uint64_t time_advance_ns = time_for_insn(insn_advance);
> +        int64_t sleep_us = time_advance_ns / 1000;
> +        g_usleep(sleep_us);
> +    }
> +
> +    vcpu->total_insn += vcpu->quantum_insn;
> +    vcpu->quantum_insn = 0;
> +    vcpu->last_quantum_time = now_ns();
> +
> +    /* based on total number of instructions, what should be the new time? */
> +    int64_t new_virtual_time = time_for_insn(vcpu->total_insn);
> +
> +    g_mutex_lock(&global_state_lock);
> +
> +    /* Time only moves forward. Another vcpu might have updated it already. */
> +    if (new_virtual_time > virtual_time_ns) {
> +        qemu_plugin_update_ns(time_handle, new_virtual_time);
> +        virtual_time_ns = new_virtual_time;
> +    }
> +
> +    g_mutex_unlock(&global_state_lock);
> +}
> +
> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
> +{
> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> +    vcpu->total_insn = 0;
> +    vcpu->quantum_insn = 0;
> +    vcpu->last_quantum_time = now_ns();
> +}
> +
> +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
> +{
> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> +    update_system_time(vcpu);
> +}
> +
> +static void every_quantum_insn(unsigned int cpu_index, void *udata)
> +{
> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> +    g_assert(vcpu->quantum_insn >= max_insn_per_quantum);
> +    update_system_time(vcpu);
> +}
> +
> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
> +{
> +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
> +    qemu_plugin_u64 quantum_insn =
> +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, quantum_insn);
> +    /* count (and eventually trap) once per tb */
> +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
> +        tb, QEMU_PLUGIN_INLINE_ADD_U64, quantum_insn, n_insns);
> +    qemu_plugin_register_vcpu_tb_exec_cond_cb(
> +        tb, every_quantum_insn,
> +        QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
> +        quantum_insn, max_insn_per_quantum, NULL);
> +}
> +
> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
> +{
> +    qemu_plugin_scoreboard_free(vcpus);
> +}
> +
> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
> +                                           const qemu_info_t *info, int argc,
> +                                           char **argv)
> +{
> +    for (int i = 0; i < argc; i++) {
> +        char *opt = argv[i];
> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
> +        if (g_strcmp0(tokens[0], "ips") == 0) {
> +            max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
> +            if (!max_insn_per_second && errno) {
> +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
> +                        __func__, tokens[1], g_strerror(errno));
> +                return -1;
> +            }
> +        } else {
> +            fprintf(stderr, "option parsing failed: %s\n", opt);
> +            return -1;
> +        }
> +    }
> +
> +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
> +    max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC;
> +
> +    time_handle = qemu_plugin_request_time_control();
> +    g_assert(time_handle);
> +
> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
> +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
> +
> +    return 0;
> +}
> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> index 0b64d2c1e3..449ead1130 100644
> --- a/contrib/plugins/Makefile
> +++ b/contrib/plugins/Makefile
> @@ -27,6 +27,7 @@ endif
>  NAMES += hwprofile
>  NAMES += cache
>  NAMES += drcov
> +NAMES += ips
>  
>  ifeq ($(CONFIG_WIN32),y)
>  SO_SUFFIX := .dll
> -- 
> 2.39.2
>
Philippe Mathieu-Daudé June 13, 2024, 8:54 a.m. UTC | #2
On 12/6/24 17:35, Alex Bennée wrote:
> From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> 
> This plugin uses the new time control interface to make decisions
> about the state of time during the emulation. The algorithm is
> currently very simple. The user specifies an ips rate which applies

... IPS rate (Instructions Per Second) which ...

> per core. If the core runs ahead of its allocated execution time the
> plugin sleeps for a bit to let real time catch up. Either way time is
> updated for the emulation as a function of total executed instructions
> with some adjustments for cores that idle.
> 
> Examples
> --------
> 
> Slow down execution of /bin/true:
> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
> real 4.000s
> 
> Boot a Linux kernel simulating a 250MHz cpu:
> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
> check time until kernel panic on serial0
> 
> Tested in system mode by booting a full debian system, and using:
> $ sysbench cpu run
> Performance decrease linearly with the given number of ips.
> 
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
> ---
>   contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
>   contrib/plugins/Makefile |   1 +
>   2 files changed, 165 insertions(+)
>   create mode 100644 contrib/plugins/ips.c
> 
> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
> new file mode 100644
> index 0000000000..db77729264
> --- /dev/null
> +++ b/contrib/plugins/ips.c
> @@ -0,0 +1,164 @@
> +/*
> + * ips rate limiting plugin.

The plugin names are really to packed to my taste (each time I look for
one I have to open most source files to figure out the correct one); so
please ease my life by using a more descriptive header at least:

      Instructions Per Second (IPS) rate limiting plugin.

Thanks.

> + * This plugin can be used to restrict the execution of a system to a
> + * particular number of Instructions Per Second (ips). This controls
> + * time as seen by the guest so while wall-clock time may be longer
> + * from the guests point of view time will pass at the normal rate.
> + *
> + * This uses the new plugin API which allows the plugin to control
> + * system time.
> + *
> + * Copyright (c) 2023 Linaro Ltd
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
Pierrick Bouvier June 14, 2024, 5:39 p.m. UTC | #3
On 6/13/24 01:54, Philippe Mathieu-Daudé wrote:
> On 12/6/24 17:35, Alex Bennée wrote:
>> From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>
>> This plugin uses the new time control interface to make decisions
>> about the state of time during the emulation. The algorithm is
>> currently very simple. The user specifies an ips rate which applies
> 
> ... IPS rate (Instructions Per Second) which ...
> 
>> per core. If the core runs ahead of its allocated execution time the
>> plugin sleeps for a bit to let real time catch up. Either way time is
>> updated for the emulation as a function of total executed instructions
>> with some adjustments for cores that idle.
>>
>> Examples
>> --------
>>
>> Slow down execution of /bin/true:
>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
>> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
>> real 4.000s
>>
>> Boot a Linux kernel simulating a 250MHz cpu:
>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
>> check time until kernel panic on serial0
>>
>> Tested in system mode by booting a full debian system, and using:
>> $ sysbench cpu run
>> Performance decrease linearly with the given number of ips.
>>
>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
>> ---
>>    contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
>>    contrib/plugins/Makefile |   1 +
>>    2 files changed, 165 insertions(+)
>>    create mode 100644 contrib/plugins/ips.c
>>
>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
>> new file mode 100644
>> index 0000000000..db77729264
>> --- /dev/null
>> +++ b/contrib/plugins/ips.c
>> @@ -0,0 +1,164 @@
>> +/*
>> + * ips rate limiting plugin.
> 
> The plugin names are really to packed to my taste (each time I look for
> one I have to open most source files to figure out the correct one); so
> please ease my life by using a more descriptive header at least:
> 
>        Instructions Per Second (IPS) rate limiting plugin.
> 
> Thanks.
> 

I agree most of the plugin names are pretty cryptic, and they are 
lacking a common "help" system, to describe what they do, and which 
options are available for them. It's definitely something we could add 
in the future.

Regarding what you reported, I'm totally ok with the change.

However, since this is a new series, I'm not if I or Alex should change 
it. If it's ok for you to modify this Alex, it could be simpler than 
waiting for me to push a new patch with just this.

Let me know how you deal with this usually, and I'll do what is needed.

Thanks,
Pierrick

>> + * This plugin can be used to restrict the execution of a system to a
>> + * particular number of Instructions Per Second (ips). This controls
>> + * time as seen by the guest so while wall-clock time may be longer
>> + * from the guests point of view time will pass at the normal rate.
>> + *
>> + * This uses the new plugin API which allows the plugin to control
>> + * system time.
>> + *
>> + * Copyright (c) 2023 Linaro Ltd
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>
Pierrick Bouvier June 14, 2024, 5:42 p.m. UTC | #4
Hi Dave,

On 6/12/24 14:02, Dr. David Alan Gilbert wrote:
> * Alex Bennée (alex.bennee@linaro.org) wrote:
>> From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>
>> This plugin uses the new time control interface to make decisions
>> about the state of time during the emulation. The algorithm is
>> currently very simple. The user specifies an ips rate which applies
>> per core. If the core runs ahead of its allocated execution time the
>> plugin sleeps for a bit to let real time catch up. Either way time is
>> updated for the emulation as a function of total executed instructions
>> with some adjustments for cores that idle.
> 
> A few random thoughts:
>    a) Are there any definitions of what a plugin that controls time
>       should do with a live migration?

It's not something that was considered as part of this work.

>    b) The sleep in migration/dirtyrate.c points out g_usleep might
>       sleep for longer, so reads the actual wall clock time to
>       figure out a new 'now'.

The current API mentions time starts at 0 from qemu startup. Maybe we 
could consider in the future to change this behavior to retrieve time 
from an existing migrated machine.

>    c) A fun thing to do with this would be to follow an external simulation
>       or 2nd qemu, trying to keep the two from running too far past
>       each other.
> 

Basically, to slow the first one, waiting for the replicated one to 
catch up?

> Dave >
>> Examples
>> --------
>>
>> Slow down execution of /bin/true:
>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
>> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
>> real 4.000s
>>
>> Boot a Linux kernel simulating a 250MHz cpu:
>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
>> check time until kernel panic on serial0
>>
>> Tested in system mode by booting a full debian system, and using:
>> $ sysbench cpu run
>> Performance decrease linearly with the given number of ips.
>>
>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
>> ---
>>   contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
>>   contrib/plugins/Makefile |   1 +
>>   2 files changed, 165 insertions(+)
>>   create mode 100644 contrib/plugins/ips.c
>>
>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
>> new file mode 100644
>> index 0000000000..db77729264
>> --- /dev/null
>> +++ b/contrib/plugins/ips.c
>> @@ -0,0 +1,164 @@
>> +/*
>> + * ips rate limiting plugin.
>> + *
>> + * This plugin can be used to restrict the execution of a system to a
>> + * particular number of Instructions Per Second (ips). This controls
>> + * time as seen by the guest so while wall-clock time may be longer
>> + * from the guests point of view time will pass at the normal rate.
>> + *
>> + * This uses the new plugin API which allows the plugin to control
>> + * system time.
>> + *
>> + * Copyright (c) 2023 Linaro Ltd
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#include <stdio.h>
>> +#include <glib.h>
>> +#include <qemu-plugin.h>
>> +
>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>> +
>> +/* how many times do we update time per sec */
>> +#define NUM_TIME_UPDATE_PER_SEC 10
>> +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
>> +
>> +static GMutex global_state_lock;
>> +
>> +static uint64_t max_insn_per_second = 1000 * 1000 * 1000; /* ips per core, per second */
>> +static uint64_t max_insn_per_quantum; /* trap every N instructions */
>> +static int64_t virtual_time_ns; /* last set virtual time */
>> +
>> +static const void *time_handle;
>> +
>> +typedef struct {
>> +    uint64_t total_insn;
>> +    uint64_t quantum_insn; /* insn in last quantum */
>> +    int64_t last_quantum_time; /* time when last quantum started */
>> +} vCPUTime;
>> +
>> +struct qemu_plugin_scoreboard *vcpus;
>> +
>> +/* return epoch time in ns */
>> +static int64_t now_ns(void)
>> +{
>> +    return g_get_real_time() * 1000;
>> +}
>> +
>> +static uint64_t num_insn_during(int64_t elapsed_ns)
>> +{
>> +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
>> +    return num_secs * (double) max_insn_per_second;
>> +}
>> +
>> +static int64_t time_for_insn(uint64_t num_insn)
>> +{
>> +    double num_secs = (double) num_insn / (double) max_insn_per_second;
>> +    return num_secs * (double) NSEC_IN_ONE_SEC;
>> +}
>> +
>> +static void update_system_time(vCPUTime *vcpu)
>> +{
>> +    int64_t elapsed_ns = now_ns() - vcpu->last_quantum_time;
>> +    uint64_t max_insn = num_insn_during(elapsed_ns);
>> +
>> +    if (vcpu->quantum_insn >= max_insn) {
>> +        /* this vcpu ran faster than expected, so it has to sleep */
>> +        uint64_t insn_advance = vcpu->quantum_insn - max_insn;
>> +        uint64_t time_advance_ns = time_for_insn(insn_advance);
>> +        int64_t sleep_us = time_advance_ns / 1000;
>> +        g_usleep(sleep_us);
>> +    }
>> +
>> +    vcpu->total_insn += vcpu->quantum_insn;
>> +    vcpu->quantum_insn = 0;
>> +    vcpu->last_quantum_time = now_ns();
>> +
>> +    /* based on total number of instructions, what should be the new time? */
>> +    int64_t new_virtual_time = time_for_insn(vcpu->total_insn);
>> +
>> +    g_mutex_lock(&global_state_lock);
>> +
>> +    /* Time only moves forward. Another vcpu might have updated it already. */
>> +    if (new_virtual_time > virtual_time_ns) {
>> +        qemu_plugin_update_ns(time_handle, new_virtual_time);
>> +        virtual_time_ns = new_virtual_time;
>> +    }
>> +
>> +    g_mutex_unlock(&global_state_lock);
>> +}
>> +
>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
>> +{
>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>> +    vcpu->total_insn = 0;
>> +    vcpu->quantum_insn = 0;
>> +    vcpu->last_quantum_time = now_ns();
>> +}
>> +
>> +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
>> +{
>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>> +    update_system_time(vcpu);
>> +}
>> +
>> +static void every_quantum_insn(unsigned int cpu_index, void *udata)
>> +{
>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>> +    g_assert(vcpu->quantum_insn >= max_insn_per_quantum);
>> +    update_system_time(vcpu);
>> +}
>> +
>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>> +{
>> +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
>> +    qemu_plugin_u64 quantum_insn =
>> +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, quantum_insn);
>> +    /* count (and eventually trap) once per tb */
>> +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
>> +        tb, QEMU_PLUGIN_INLINE_ADD_U64, quantum_insn, n_insns);
>> +    qemu_plugin_register_vcpu_tb_exec_cond_cb(
>> +        tb, every_quantum_insn,
>> +        QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>> +        quantum_insn, max_insn_per_quantum, NULL);
>> +}
>> +
>> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
>> +{
>> +    qemu_plugin_scoreboard_free(vcpus);
>> +}
>> +
>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>> +                                           const qemu_info_t *info, int argc,
>> +                                           char **argv)
>> +{
>> +    for (int i = 0; i < argc; i++) {
>> +        char *opt = argv[i];
>> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
>> +        if (g_strcmp0(tokens[0], "ips") == 0) {
>> +            max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
>> +            if (!max_insn_per_second && errno) {
>> +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
>> +                        __func__, tokens[1], g_strerror(errno));
>> +                return -1;
>> +            }
>> +        } else {
>> +            fprintf(stderr, "option parsing failed: %s\n", opt);
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
>> +    max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC;
>> +
>> +    time_handle = qemu_plugin_request_time_control();
>> +    g_assert(time_handle);
>> +
>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
>> +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>> +
>> +    return 0;
>> +}
>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>> index 0b64d2c1e3..449ead1130 100644
>> --- a/contrib/plugins/Makefile
>> +++ b/contrib/plugins/Makefile
>> @@ -27,6 +27,7 @@ endif
>>   NAMES += hwprofile
>>   NAMES += cache
>>   NAMES += drcov
>> +NAMES += ips
>>   
>>   ifeq ($(CONFIG_WIN32),y)
>>   SO_SUFFIX := .dll
>> -- 
>> 2.39.2
>>
Dr. David Alan Gilbert June 14, 2024, 10 p.m. UTC | #5
* Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
> Hi Dave,
> 
> On 6/12/24 14:02, Dr. David Alan Gilbert wrote:
> > * Alex Bennée (alex.bennee@linaro.org) wrote:
> > > From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> > > 
> > > This plugin uses the new time control interface to make decisions
> > > about the state of time during the emulation. The algorithm is
> > > currently very simple. The user specifies an ips rate which applies
> > > per core. If the core runs ahead of its allocated execution time the
> > > plugin sleeps for a bit to let real time catch up. Either way time is
> > > updated for the emulation as a function of total executed instructions
> > > with some adjustments for cores that idle.
> > 
> > A few random thoughts:
> >    a) Are there any definitions of what a plugin that controls time
> >       should do with a live migration?
> 
> It's not something that was considered as part of this work.

That's OK, the only thing is we need to stop anyone from hitting problems
when they don't realise it's not been addressed.
One way might be to add a migration blocker; see include/migration/blocker.h
then you might print something like 'Migration not available due to plugin ....'

> >    b) The sleep in migration/dirtyrate.c points out g_usleep might
> >       sleep for longer, so reads the actual wall clock time to
> >       figure out a new 'now'.
> 
> The current API mentions time starts at 0 from qemu startup. Maybe we could
> consider in the future to change this behavior to retrieve time from an
> existing migrated machine.

Ah, I meant for (b) to be independent of (a) - not related to migration; just
down to the fact you used g_usleep in the plugin and a g_usleep might sleep
for a different amount of time than you asked.

> >    c) A fun thing to do with this would be to follow an external simulation
> >       or 2nd qemu, trying to keep the two from running too far past
> >       each other.
> > 
> 
> Basically, to slow the first one, waiting for the replicated one to catch
> up?

Yes, something like that.

Dave

> > Dave >
> > > Examples
> > > --------
> > > 
> > > Slow down execution of /bin/true:
> > > $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
> > > $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
> > > real 4.000s
> > > 
> > > Boot a Linux kernel simulating a 250MHz cpu:
> > > $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
> > > check time until kernel panic on serial0
> > > 
> > > Tested in system mode by booting a full debian system, and using:
> > > $ sysbench cpu run
> > > Performance decrease linearly with the given number of ips.
> > > 
> > > Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> > > Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
> > > ---
> > >   contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
> > >   contrib/plugins/Makefile |   1 +
> > >   2 files changed, 165 insertions(+)
> > >   create mode 100644 contrib/plugins/ips.c
> > > 
> > > diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
> > > new file mode 100644
> > > index 0000000000..db77729264
> > > --- /dev/null
> > > +++ b/contrib/plugins/ips.c
> > > @@ -0,0 +1,164 @@
> > > +/*
> > > + * ips rate limiting plugin.
> > > + *
> > > + * This plugin can be used to restrict the execution of a system to a
> > > + * particular number of Instructions Per Second (ips). This controls
> > > + * time as seen by the guest so while wall-clock time may be longer
> > > + * from the guests point of view time will pass at the normal rate.
> > > + *
> > > + * This uses the new plugin API which allows the plugin to control
> > > + * system time.
> > > + *
> > > + * Copyright (c) 2023 Linaro Ltd
> > > + *
> > > + * SPDX-License-Identifier: GPL-2.0-or-later
> > > + */
> > > +
> > > +#include <stdio.h>
> > > +#include <glib.h>
> > > +#include <qemu-plugin.h>
> > > +
> > > +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> > > +
> > > +/* how many times do we update time per sec */
> > > +#define NUM_TIME_UPDATE_PER_SEC 10
> > > +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
> > > +
> > > +static GMutex global_state_lock;
> > > +
> > > +static uint64_t max_insn_per_second = 1000 * 1000 * 1000; /* ips per core, per second */
> > > +static uint64_t max_insn_per_quantum; /* trap every N instructions */
> > > +static int64_t virtual_time_ns; /* last set virtual time */
> > > +
> > > +static const void *time_handle;
> > > +
> > > +typedef struct {
> > > +    uint64_t total_insn;
> > > +    uint64_t quantum_insn; /* insn in last quantum */
> > > +    int64_t last_quantum_time; /* time when last quantum started */
> > > +} vCPUTime;
> > > +
> > > +struct qemu_plugin_scoreboard *vcpus;
> > > +
> > > +/* return epoch time in ns */
> > > +static int64_t now_ns(void)
> > > +{
> > > +    return g_get_real_time() * 1000;
> > > +}
> > > +
> > > +static uint64_t num_insn_during(int64_t elapsed_ns)
> > > +{
> > > +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
> > > +    return num_secs * (double) max_insn_per_second;
> > > +}
> > > +
> > > +static int64_t time_for_insn(uint64_t num_insn)
> > > +{
> > > +    double num_secs = (double) num_insn / (double) max_insn_per_second;
> > > +    return num_secs * (double) NSEC_IN_ONE_SEC;
> > > +}
> > > +
> > > +static void update_system_time(vCPUTime *vcpu)
> > > +{
> > > +    int64_t elapsed_ns = now_ns() - vcpu->last_quantum_time;
> > > +    uint64_t max_insn = num_insn_during(elapsed_ns);
> > > +
> > > +    if (vcpu->quantum_insn >= max_insn) {
> > > +        /* this vcpu ran faster than expected, so it has to sleep */
> > > +        uint64_t insn_advance = vcpu->quantum_insn - max_insn;
> > > +        uint64_t time_advance_ns = time_for_insn(insn_advance);
> > > +        int64_t sleep_us = time_advance_ns / 1000;
> > > +        g_usleep(sleep_us);
> > > +    }
> > > +
> > > +    vcpu->total_insn += vcpu->quantum_insn;
> > > +    vcpu->quantum_insn = 0;
> > > +    vcpu->last_quantum_time = now_ns();
> > > +
> > > +    /* based on total number of instructions, what should be the new time? */
> > > +    int64_t new_virtual_time = time_for_insn(vcpu->total_insn);
> > > +
> > > +    g_mutex_lock(&global_state_lock);
> > > +
> > > +    /* Time only moves forward. Another vcpu might have updated it already. */
> > > +    if (new_virtual_time > virtual_time_ns) {
> > > +        qemu_plugin_update_ns(time_handle, new_virtual_time);
> > > +        virtual_time_ns = new_virtual_time;
> > > +    }
> > > +
> > > +    g_mutex_unlock(&global_state_lock);
> > > +}
> > > +
> > > +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
> > > +{
> > > +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> > > +    vcpu->total_insn = 0;
> > > +    vcpu->quantum_insn = 0;
> > > +    vcpu->last_quantum_time = now_ns();
> > > +}
> > > +
> > > +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
> > > +{
> > > +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> > > +    update_system_time(vcpu);
> > > +}
> > > +
> > > +static void every_quantum_insn(unsigned int cpu_index, void *udata)
> > > +{
> > > +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> > > +    g_assert(vcpu->quantum_insn >= max_insn_per_quantum);
> > > +    update_system_time(vcpu);
> > > +}
> > > +
> > > +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
> > > +{
> > > +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
> > > +    qemu_plugin_u64 quantum_insn =
> > > +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, quantum_insn);
> > > +    /* count (and eventually trap) once per tb */
> > > +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
> > > +        tb, QEMU_PLUGIN_INLINE_ADD_U64, quantum_insn, n_insns);
> > > +    qemu_plugin_register_vcpu_tb_exec_cond_cb(
> > > +        tb, every_quantum_insn,
> > > +        QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
> > > +        quantum_insn, max_insn_per_quantum, NULL);
> > > +}
> > > +
> > > +static void plugin_exit(qemu_plugin_id_t id, void *udata)
> > > +{
> > > +    qemu_plugin_scoreboard_free(vcpus);
> > > +}
> > > +
> > > +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
> > > +                                           const qemu_info_t *info, int argc,
> > > +                                           char **argv)
> > > +{
> > > +    for (int i = 0; i < argc; i++) {
> > > +        char *opt = argv[i];
> > > +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
> > > +        if (g_strcmp0(tokens[0], "ips") == 0) {
> > > +            max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
> > > +            if (!max_insn_per_second && errno) {
> > > +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
> > > +                        __func__, tokens[1], g_strerror(errno));
> > > +                return -1;
> > > +            }
> > > +        } else {
> > > +            fprintf(stderr, "option parsing failed: %s\n", opt);
> > > +            return -1;
> > > +        }
> > > +    }
> > > +
> > > +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
> > > +    max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC;
> > > +
> > > +    time_handle = qemu_plugin_request_time_control();
> > > +    g_assert(time_handle);
> > > +
> > > +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
> > > +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
> > > +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
> > > +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
> > > +
> > > +    return 0;
> > > +}
> > > diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> > > index 0b64d2c1e3..449ead1130 100644
> > > --- a/contrib/plugins/Makefile
> > > +++ b/contrib/plugins/Makefile
> > > @@ -27,6 +27,7 @@ endif
> > >   NAMES += hwprofile
> > >   NAMES += cache
> > >   NAMES += drcov
> > > +NAMES += ips
> > >   ifeq ($(CONFIG_WIN32),y)
> > >   SO_SUFFIX := .dll
> > > -- 
> > > 2.39.2
> > >
Alex Bennée June 16, 2024, 6:43 p.m. UTC | #6
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:

> On 6/13/24 01:54, Philippe Mathieu-Daudé wrote:
>> On 12/6/24 17:35, Alex Bennée wrote:
>>> From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>
>>> This plugin uses the new time control interface to make decisions
>>> about the state of time during the emulation. The algorithm is
>>> currently very simple. The user specifies an ips rate which applies
>> ... IPS rate (Instructions Per Second) which ...
>> 
>>> per core. If the core runs ahead of its allocated execution time the
>>> plugin sleeps for a bit to let real time catch up. Either way time is
>>> updated for the emulation as a function of total executed instructions
>>> with some adjustments for cores that idle.
>>>
>>> Examples
>>> --------
>>>
>>> Slow down execution of /bin/true:
>>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
>>> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
>>> real 4.000s
>>>
>>> Boot a Linux kernel simulating a 250MHz cpu:
>>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
>>> check time until kernel panic on serial0
>>>
>>> Tested in system mode by booting a full debian system, and using:
>>> $ sysbench cpu run
>>> Performance decrease linearly with the given number of ips.
>>>
>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>> Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
>>> ---
>>>    contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
>>>    contrib/plugins/Makefile |   1 +
>>>    2 files changed, 165 insertions(+)
>>>    create mode 100644 contrib/plugins/ips.c
>>>
>>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
>>> new file mode 100644
>>> index 0000000000..db77729264
>>> --- /dev/null
>>> +++ b/contrib/plugins/ips.c
>>> @@ -0,0 +1,164 @@
>>> +/*
>>> + * ips rate limiting plugin.
>> The plugin names are really to packed to my taste (each time I look
>> for
>> one I have to open most source files to figure out the correct one); so
>> please ease my life by using a more descriptive header at least:
>>        Instructions Per Second (IPS) rate limiting plugin.
>> Thanks.
>> 
>
> I agree most of the plugin names are pretty cryptic, and they are
> lacking a common "help" system, to describe what they do, and which
> options are available for them. It's definitely something we could add
> in the future.
>
> Regarding what you reported, I'm totally ok with the change.
>
> However, since this is a new series, I'm not if I or Alex should
> change it. If it's ok for you to modify this Alex, it could be simpler
> than waiting for me to push a new patch with just this.

Its my tree so I'll fix it up. I'll ask you if I want a respin ;-)
Pierrick Bouvier June 17, 2024, 7:11 p.m. UTC | #7
On 6/16/24 11:43, Alex Bennée wrote:
> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
> 
>> On 6/13/24 01:54, Philippe Mathieu-Daudé wrote:
>>> On 12/6/24 17:35, Alex Bennée wrote:
>>>> From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>>
>>>> This plugin uses the new time control interface to make decisions
>>>> about the state of time during the emulation. The algorithm is
>>>> currently very simple. The user specifies an ips rate which applies
>>> ... IPS rate (Instructions Per Second) which ...
>>>
>>>> per core. If the core runs ahead of its allocated execution time the
>>>> plugin sleeps for a bit to let real time catch up. Either way time is
>>>> updated for the emulation as a function of total executed instructions
>>>> with some adjustments for cores that idle.
>>>>
>>>> Examples
>>>> --------
>>>>
>>>> Slow down execution of /bin/true:
>>>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
>>>> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
>>>> real 4.000s
>>>>
>>>> Boot a Linux kernel simulating a 250MHz cpu:
>>>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
>>>> check time until kernel panic on serial0
>>>>
>>>> Tested in system mode by booting a full debian system, and using:
>>>> $ sysbench cpu run
>>>> Performance decrease linearly with the given number of ips.
>>>>
>>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>> Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
>>>> ---
>>>>     contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
>>>>     contrib/plugins/Makefile |   1 +
>>>>     2 files changed, 165 insertions(+)
>>>>     create mode 100644 contrib/plugins/ips.c
>>>>
>>>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
>>>> new file mode 100644
>>>> index 0000000000..db77729264
>>>> --- /dev/null
>>>> +++ b/contrib/plugins/ips.c
>>>> @@ -0,0 +1,164 @@
>>>> +/*
>>>> + * ips rate limiting plugin.
>>> The plugin names are really to packed to my taste (each time I look
>>> for
>>> one I have to open most source files to figure out the correct one); so
>>> please ease my life by using a more descriptive header at least:
>>>         Instructions Per Second (IPS) rate limiting plugin.
>>> Thanks.
>>>
>>
>> I agree most of the plugin names are pretty cryptic, and they are
>> lacking a common "help" system, to describe what they do, and which
>> options are available for them. It's definitely something we could add
>> in the future.
>>
>> Regarding what you reported, I'm totally ok with the change.
>>
>> However, since this is a new series, I'm not if I or Alex should
>> change it. If it's ok for you to modify this Alex, it could be simpler
>> than waiting for me to push a new patch with just this.
> 
> Its my tree so I'll fix it up. I'll ask you if I want a respin ;-)
> 

Thanks Alex.
Pierrick Bouvier June 17, 2024, 7:19 p.m. UTC | #8
On 6/14/24 15:00, Dr. David Alan Gilbert wrote:
> * Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
>> Hi Dave,
>>
>> On 6/12/24 14:02, Dr. David Alan Gilbert wrote:
>>> * Alex Bennée (alex.bennee@linaro.org) wrote:
>>>> From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>>
>>>> This plugin uses the new time control interface to make decisions
>>>> about the state of time during the emulation. The algorithm is
>>>> currently very simple. The user specifies an ips rate which applies
>>>> per core. If the core runs ahead of its allocated execution time the
>>>> plugin sleeps for a bit to let real time catch up. Either way time is
>>>> updated for the emulation as a function of total executed instructions
>>>> with some adjustments for cores that idle.
>>>
>>> A few random thoughts:
>>>     a) Are there any definitions of what a plugin that controls time
>>>        should do with a live migration?
>>
>> It's not something that was considered as part of this work.
> 
> That's OK, the only thing is we need to stop anyone from hitting problems
> when they don't realise it's not been addressed.
> One way might be to add a migration blocker; see include/migration/blocker.h
> then you might print something like 'Migration not available due to plugin ....'
> 

So basically, we could make a call to migrate_add_blocker(), when 
someone request time_control through plugin API?

IMHO, it's something that should be part of plugin API (if any plugin 
calls qemu_plugin_request_time_control()), instead of the plugin code 
itself. This way, any plugin getting time control automatically blocks 
any potential migration.

>>>     b) The sleep in migration/dirtyrate.c points out g_usleep might
>>>        sleep for longer, so reads the actual wall clock time to
>>>        figure out a new 'now'.
>>
>> The current API mentions time starts at 0 from qemu startup. Maybe we could
>> consider in the future to change this behavior to retrieve time from an
>> existing migrated machine.
> 
> Ah, I meant for (b) to be independent of (a) - not related to migration; just
> down to the fact you used g_usleep in the plugin and a g_usleep might sleep
> for a different amount of time than you asked.
> 

We know that, and the plugin is not meant to be "cycle accurate" in 
general, we just set a upper bound for number of instructions we can 
execute in a given amount of time (1/10 second for now).

We compute the new time based on how many instructions effectively ran 
on the most used cpu, so even if we slept a bit more than expected, it's 
correct.

>>>     c) A fun thing to do with this would be to follow an external simulation
>>>        or 2nd qemu, trying to keep the two from running too far past
>>>        each other.
>>>
>>
>> Basically, to slow the first one, waiting for the replicated one to catch
>> up?
> 
> Yes, something like that.
> 
> Dave
> 
>>> Dave >
>>>> Examples
>>>> --------
>>>>
>>>> Slow down execution of /bin/true:
>>>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
>>>> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
>>>> real 4.000s
>>>>
>>>> Boot a Linux kernel simulating a 250MHz cpu:
>>>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
>>>> check time until kernel panic on serial0
>>>>
>>>> Tested in system mode by booting a full debian system, and using:
>>>> $ sysbench cpu run
>>>> Performance decrease linearly with the given number of ips.
>>>>
>>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>> Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
>>>> ---
>>>>    contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
>>>>    contrib/plugins/Makefile |   1 +
>>>>    2 files changed, 165 insertions(+)
>>>>    create mode 100644 contrib/plugins/ips.c
>>>>
>>>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
>>>> new file mode 100644
>>>> index 0000000000..db77729264
>>>> --- /dev/null
>>>> +++ b/contrib/plugins/ips.c
>>>> @@ -0,0 +1,164 @@
>>>> +/*
>>>> + * ips rate limiting plugin.
>>>> + *
>>>> + * This plugin can be used to restrict the execution of a system to a
>>>> + * particular number of Instructions Per Second (ips). This controls
>>>> + * time as seen by the guest so while wall-clock time may be longer
>>>> + * from the guests point of view time will pass at the normal rate.
>>>> + *
>>>> + * This uses the new plugin API which allows the plugin to control
>>>> + * system time.
>>>> + *
>>>> + * Copyright (c) 2023 Linaro Ltd
>>>> + *
>>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>>> + */
>>>> +
>>>> +#include <stdio.h>
>>>> +#include <glib.h>
>>>> +#include <qemu-plugin.h>
>>>> +
>>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>>>> +
>>>> +/* how many times do we update time per sec */
>>>> +#define NUM_TIME_UPDATE_PER_SEC 10
>>>> +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
>>>> +
>>>> +static GMutex global_state_lock;
>>>> +
>>>> +static uint64_t max_insn_per_second = 1000 * 1000 * 1000; /* ips per core, per second */
>>>> +static uint64_t max_insn_per_quantum; /* trap every N instructions */
>>>> +static int64_t virtual_time_ns; /* last set virtual time */
>>>> +
>>>> +static const void *time_handle;
>>>> +
>>>> +typedef struct {
>>>> +    uint64_t total_insn;
>>>> +    uint64_t quantum_insn; /* insn in last quantum */
>>>> +    int64_t last_quantum_time; /* time when last quantum started */
>>>> +} vCPUTime;
>>>> +
>>>> +struct qemu_plugin_scoreboard *vcpus;
>>>> +
>>>> +/* return epoch time in ns */
>>>> +static int64_t now_ns(void)
>>>> +{
>>>> +    return g_get_real_time() * 1000;
>>>> +}
>>>> +
>>>> +static uint64_t num_insn_during(int64_t elapsed_ns)
>>>> +{
>>>> +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
>>>> +    return num_secs * (double) max_insn_per_second;
>>>> +}
>>>> +
>>>> +static int64_t time_for_insn(uint64_t num_insn)
>>>> +{
>>>> +    double num_secs = (double) num_insn / (double) max_insn_per_second;
>>>> +    return num_secs * (double) NSEC_IN_ONE_SEC;
>>>> +}
>>>> +
>>>> +static void update_system_time(vCPUTime *vcpu)
>>>> +{
>>>> +    int64_t elapsed_ns = now_ns() - vcpu->last_quantum_time;
>>>> +    uint64_t max_insn = num_insn_during(elapsed_ns);
>>>> +
>>>> +    if (vcpu->quantum_insn >= max_insn) {
>>>> +        /* this vcpu ran faster than expected, so it has to sleep */
>>>> +        uint64_t insn_advance = vcpu->quantum_insn - max_insn;
>>>> +        uint64_t time_advance_ns = time_for_insn(insn_advance);
>>>> +        int64_t sleep_us = time_advance_ns / 1000;
>>>> +        g_usleep(sleep_us);
>>>> +    }
>>>> +
>>>> +    vcpu->total_insn += vcpu->quantum_insn;
>>>> +    vcpu->quantum_insn = 0;
>>>> +    vcpu->last_quantum_time = now_ns();
>>>> +
>>>> +    /* based on total number of instructions, what should be the new time? */
>>>> +    int64_t new_virtual_time = time_for_insn(vcpu->total_insn);
>>>> +
>>>> +    g_mutex_lock(&global_state_lock);
>>>> +
>>>> +    /* Time only moves forward. Another vcpu might have updated it already. */
>>>> +    if (new_virtual_time > virtual_time_ns) {
>>>> +        qemu_plugin_update_ns(time_handle, new_virtual_time);
>>>> +        virtual_time_ns = new_virtual_time;
>>>> +    }
>>>> +
>>>> +    g_mutex_unlock(&global_state_lock);
>>>> +}
>>>> +
>>>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
>>>> +{
>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>> +    vcpu->total_insn = 0;
>>>> +    vcpu->quantum_insn = 0;
>>>> +    vcpu->last_quantum_time = now_ns();
>>>> +}
>>>> +
>>>> +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
>>>> +{
>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>> +    update_system_time(vcpu);
>>>> +}
>>>> +
>>>> +static void every_quantum_insn(unsigned int cpu_index, void *udata)
>>>> +{
>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>> +    g_assert(vcpu->quantum_insn >= max_insn_per_quantum);
>>>> +    update_system_time(vcpu);
>>>> +}
>>>> +
>>>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>>>> +{
>>>> +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
>>>> +    qemu_plugin_u64 quantum_insn =
>>>> +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, quantum_insn);
>>>> +    /* count (and eventually trap) once per tb */
>>>> +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
>>>> +        tb, QEMU_PLUGIN_INLINE_ADD_U64, quantum_insn, n_insns);
>>>> +    qemu_plugin_register_vcpu_tb_exec_cond_cb(
>>>> +        tb, every_quantum_insn,
>>>> +        QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>>>> +        quantum_insn, max_insn_per_quantum, NULL);
>>>> +}
>>>> +
>>>> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
>>>> +{
>>>> +    qemu_plugin_scoreboard_free(vcpus);
>>>> +}
>>>> +
>>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>>>> +                                           const qemu_info_t *info, int argc,
>>>> +                                           char **argv)
>>>> +{
>>>> +    for (int i = 0; i < argc; i++) {
>>>> +        char *opt = argv[i];
>>>> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
>>>> +        if (g_strcmp0(tokens[0], "ips") == 0) {
>>>> +            max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
>>>> +            if (!max_insn_per_second && errno) {
>>>> +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
>>>> +                        __func__, tokens[1], g_strerror(errno));
>>>> +                return -1;
>>>> +            }
>>>> +        } else {
>>>> +            fprintf(stderr, "option parsing failed: %s\n", opt);
>>>> +            return -1;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
>>>> +    max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC;
>>>> +
>>>> +    time_handle = qemu_plugin_request_time_control();
>>>> +    g_assert(time_handle);
>>>> +
>>>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>>>> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
>>>> +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
>>>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>>>> index 0b64d2c1e3..449ead1130 100644
>>>> --- a/contrib/plugins/Makefile
>>>> +++ b/contrib/plugins/Makefile
>>>> @@ -27,6 +27,7 @@ endif
>>>>    NAMES += hwprofile
>>>>    NAMES += cache
>>>>    NAMES += drcov
>>>> +NAMES += ips
>>>>    ifeq ($(CONFIG_WIN32),y)
>>>>    SO_SUFFIX := .dll
>>>> -- 
>>>> 2.39.2
>>>>
Dr. David Alan Gilbert June 17, 2024, 8:56 p.m. UTC | #9
* Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
> On 6/14/24 15:00, Dr. David Alan Gilbert wrote:
> > * Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
> > > Hi Dave,
> > > 
> > > On 6/12/24 14:02, Dr. David Alan Gilbert wrote:
> > > > * Alex Bennée (alex.bennee@linaro.org) wrote:
> > > > > From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> > > > > 
> > > > > This plugin uses the new time control interface to make decisions
> > > > > about the state of time during the emulation. The algorithm is
> > > > > currently very simple. The user specifies an ips rate which applies
> > > > > per core. If the core runs ahead of its allocated execution time the
> > > > > plugin sleeps for a bit to let real time catch up. Either way time is
> > > > > updated for the emulation as a function of total executed instructions
> > > > > with some adjustments for cores that idle.
> > > > 
> > > > A few random thoughts:
> > > >     a) Are there any definitions of what a plugin that controls time
> > > >        should do with a live migration?
> > > 
> > > It's not something that was considered as part of this work.
> > 
> > That's OK, the only thing is we need to stop anyone from hitting problems
> > when they don't realise it's not been addressed.
> > One way might be to add a migration blocker; see include/migration/blocker.h
> > then you might print something like 'Migration not available due to plugin ....'
> > 
> 
> So basically, we could make a call to migrate_add_blocker(), when someone
> request time_control through plugin API?
> 
> IMHO, it's something that should be part of plugin API (if any plugin calls
> qemu_plugin_request_time_control()), instead of the plugin code itself. This
> way, any plugin getting time control automatically blocks any potential
> migration.

Note my question asked for a 'any definitions of what a plugin ..' - so
you could define it that way, another one is to think that in the future
you may allow it and the plugin somehow interacts with migration not to
change time at certain migration phases.

> > > >     b) The sleep in migration/dirtyrate.c points out g_usleep might
> > > >        sleep for longer, so reads the actual wall clock time to
> > > >        figure out a new 'now'.
> > > 
> > > The current API mentions time starts at 0 from qemu startup. Maybe we could
> > > consider in the future to change this behavior to retrieve time from an
> > > existing migrated machine.
> > 
> > Ah, I meant for (b) to be independent of (a) - not related to migration; just
> > down to the fact you used g_usleep in the plugin and a g_usleep might sleep
> > for a different amount of time than you asked.
> > 
> 
> We know that, and the plugin is not meant to be "cycle accurate" in general,
> we just set a upper bound for number of instructions we can execute in a
> given amount of time (1/10 second for now).
> 
> We compute the new time based on how many instructions effectively ran on
> the most used cpu, so even if we slept a bit more than expected, it's
> correct.

Ah OK.

Dave

> > > >     c) A fun thing to do with this would be to follow an external simulation
> > > >        or 2nd qemu, trying to keep the two from running too far past
> > > >        each other.
> > > > 
> > > 
> > > Basically, to slow the first one, waiting for the replicated one to catch
> > > up?
> > 
> > Yes, something like that.
> > 
> > Dave
> > 
> > > > Dave >
> > > > > Examples
> > > > > --------
> > > > > 
> > > > > Slow down execution of /bin/true:
> > > > > $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
> > > > > $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
> > > > > real 4.000s
> > > > > 
> > > > > Boot a Linux kernel simulating a 250MHz cpu:
> > > > > $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
> > > > > check time until kernel panic on serial0
> > > > > 
> > > > > Tested in system mode by booting a full debian system, and using:
> > > > > $ sysbench cpu run
> > > > > Performance decrease linearly with the given number of ips.
> > > > > 
> > > > > Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> > > > > Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
> > > > > ---
> > > > >    contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
> > > > >    contrib/plugins/Makefile |   1 +
> > > > >    2 files changed, 165 insertions(+)
> > > > >    create mode 100644 contrib/plugins/ips.c
> > > > > 
> > > > > diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
> > > > > new file mode 100644
> > > > > index 0000000000..db77729264
> > > > > --- /dev/null
> > > > > +++ b/contrib/plugins/ips.c
> > > > > @@ -0,0 +1,164 @@
> > > > > +/*
> > > > > + * ips rate limiting plugin.
> > > > > + *
> > > > > + * This plugin can be used to restrict the execution of a system to a
> > > > > + * particular number of Instructions Per Second (ips). This controls
> > > > > + * time as seen by the guest so while wall-clock time may be longer
> > > > > + * from the guests point of view time will pass at the normal rate.
> > > > > + *
> > > > > + * This uses the new plugin API which allows the plugin to control
> > > > > + * system time.
> > > > > + *
> > > > > + * Copyright (c) 2023 Linaro Ltd
> > > > > + *
> > > > > + * SPDX-License-Identifier: GPL-2.0-or-later
> > > > > + */
> > > > > +
> > > > > +#include <stdio.h>
> > > > > +#include <glib.h>
> > > > > +#include <qemu-plugin.h>
> > > > > +
> > > > > +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> > > > > +
> > > > > +/* how many times do we update time per sec */
> > > > > +#define NUM_TIME_UPDATE_PER_SEC 10
> > > > > +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
> > > > > +
> > > > > +static GMutex global_state_lock;
> > > > > +
> > > > > +static uint64_t max_insn_per_second = 1000 * 1000 * 1000; /* ips per core, per second */
> > > > > +static uint64_t max_insn_per_quantum; /* trap every N instructions */
> > > > > +static int64_t virtual_time_ns; /* last set virtual time */
> > > > > +
> > > > > +static const void *time_handle;
> > > > > +
> > > > > +typedef struct {
> > > > > +    uint64_t total_insn;
> > > > > +    uint64_t quantum_insn; /* insn in last quantum */
> > > > > +    int64_t last_quantum_time; /* time when last quantum started */
> > > > > +} vCPUTime;
> > > > > +
> > > > > +struct qemu_plugin_scoreboard *vcpus;
> > > > > +
> > > > > +/* return epoch time in ns */
> > > > > +static int64_t now_ns(void)
> > > > > +{
> > > > > +    return g_get_real_time() * 1000;
> > > > > +}
> > > > > +
> > > > > +static uint64_t num_insn_during(int64_t elapsed_ns)
> > > > > +{
> > > > > +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
> > > > > +    return num_secs * (double) max_insn_per_second;
> > > > > +}
> > > > > +
> > > > > +static int64_t time_for_insn(uint64_t num_insn)
> > > > > +{
> > > > > +    double num_secs = (double) num_insn / (double) max_insn_per_second;
> > > > > +    return num_secs * (double) NSEC_IN_ONE_SEC;
> > > > > +}
> > > > > +
> > > > > +static void update_system_time(vCPUTime *vcpu)
> > > > > +{
> > > > > +    int64_t elapsed_ns = now_ns() - vcpu->last_quantum_time;
> > > > > +    uint64_t max_insn = num_insn_during(elapsed_ns);
> > > > > +
> > > > > +    if (vcpu->quantum_insn >= max_insn) {
> > > > > +        /* this vcpu ran faster than expected, so it has to sleep */
> > > > > +        uint64_t insn_advance = vcpu->quantum_insn - max_insn;
> > > > > +        uint64_t time_advance_ns = time_for_insn(insn_advance);
> > > > > +        int64_t sleep_us = time_advance_ns / 1000;
> > > > > +        g_usleep(sleep_us);
> > > > > +    }
> > > > > +
> > > > > +    vcpu->total_insn += vcpu->quantum_insn;
> > > > > +    vcpu->quantum_insn = 0;
> > > > > +    vcpu->last_quantum_time = now_ns();
> > > > > +
> > > > > +    /* based on total number of instructions, what should be the new time? */
> > > > > +    int64_t new_virtual_time = time_for_insn(vcpu->total_insn);
> > > > > +
> > > > > +    g_mutex_lock(&global_state_lock);
> > > > > +
> > > > > +    /* Time only moves forward. Another vcpu might have updated it already. */
> > > > > +    if (new_virtual_time > virtual_time_ns) {
> > > > > +        qemu_plugin_update_ns(time_handle, new_virtual_time);
> > > > > +        virtual_time_ns = new_virtual_time;
> > > > > +    }
> > > > > +
> > > > > +    g_mutex_unlock(&global_state_lock);
> > > > > +}
> > > > > +
> > > > > +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
> > > > > +{
> > > > > +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> > > > > +    vcpu->total_insn = 0;
> > > > > +    vcpu->quantum_insn = 0;
> > > > > +    vcpu->last_quantum_time = now_ns();
> > > > > +}
> > > > > +
> > > > > +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
> > > > > +{
> > > > > +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> > > > > +    update_system_time(vcpu);
> > > > > +}
> > > > > +
> > > > > +static void every_quantum_insn(unsigned int cpu_index, void *udata)
> > > > > +{
> > > > > +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> > > > > +    g_assert(vcpu->quantum_insn >= max_insn_per_quantum);
> > > > > +    update_system_time(vcpu);
> > > > > +}
> > > > > +
> > > > > +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
> > > > > +{
> > > > > +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
> > > > > +    qemu_plugin_u64 quantum_insn =
> > > > > +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, quantum_insn);
> > > > > +    /* count (and eventually trap) once per tb */
> > > > > +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
> > > > > +        tb, QEMU_PLUGIN_INLINE_ADD_U64, quantum_insn, n_insns);
> > > > > +    qemu_plugin_register_vcpu_tb_exec_cond_cb(
> > > > > +        tb, every_quantum_insn,
> > > > > +        QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
> > > > > +        quantum_insn, max_insn_per_quantum, NULL);
> > > > > +}
> > > > > +
> > > > > +static void plugin_exit(qemu_plugin_id_t id, void *udata)
> > > > > +{
> > > > > +    qemu_plugin_scoreboard_free(vcpus);
> > > > > +}
> > > > > +
> > > > > +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
> > > > > +                                           const qemu_info_t *info, int argc,
> > > > > +                                           char **argv)
> > > > > +{
> > > > > +    for (int i = 0; i < argc; i++) {
> > > > > +        char *opt = argv[i];
> > > > > +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
> > > > > +        if (g_strcmp0(tokens[0], "ips") == 0) {
> > > > > +            max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
> > > > > +            if (!max_insn_per_second && errno) {
> > > > > +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
> > > > > +                        __func__, tokens[1], g_strerror(errno));
> > > > > +                return -1;
> > > > > +            }
> > > > > +        } else {
> > > > > +            fprintf(stderr, "option parsing failed: %s\n", opt);
> > > > > +            return -1;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
> > > > > +    max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC;
> > > > > +
> > > > > +    time_handle = qemu_plugin_request_time_control();
> > > > > +    g_assert(time_handle);
> > > > > +
> > > > > +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
> > > > > +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
> > > > > +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
> > > > > +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
> > > > > +
> > > > > +    return 0;
> > > > > +}
> > > > > diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> > > > > index 0b64d2c1e3..449ead1130 100644
> > > > > --- a/contrib/plugins/Makefile
> > > > > +++ b/contrib/plugins/Makefile
> > > > > @@ -27,6 +27,7 @@ endif
> > > > >    NAMES += hwprofile
> > > > >    NAMES += cache
> > > > >    NAMES += drcov
> > > > > +NAMES += ips
> > > > >    ifeq ($(CONFIG_WIN32),y)
> > > > >    SO_SUFFIX := .dll
> > > > > -- 
> > > > > 2.39.2
> > > > >
Pierrick Bouvier June 17, 2024, 10:29 p.m. UTC | #10
On 6/17/24 13:56, Dr. David Alan Gilbert wrote:
> * Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
>> On 6/14/24 15:00, Dr. David Alan Gilbert wrote:
>>> * Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
>>>> Hi Dave,
>>>>
>>>> On 6/12/24 14:02, Dr. David Alan Gilbert wrote:
>>>>> * Alex Bennée (alex.bennee@linaro.org) wrote:
>>>>>> From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>>>>
>>>>>> This plugin uses the new time control interface to make decisions
>>>>>> about the state of time during the emulation. The algorithm is
>>>>>> currently very simple. The user specifies an ips rate which applies
>>>>>> per core. If the core runs ahead of its allocated execution time the
>>>>>> plugin sleeps for a bit to let real time catch up. Either way time is
>>>>>> updated for the emulation as a function of total executed instructions
>>>>>> with some adjustments for cores that idle.
>>>>>
>>>>> A few random thoughts:
>>>>>      a) Are there any definitions of what a plugin that controls time
>>>>>         should do with a live migration?
>>>>
>>>> It's not something that was considered as part of this work.
>>>
>>> That's OK, the only thing is we need to stop anyone from hitting problems
>>> when they don't realise it's not been addressed.
>>> One way might be to add a migration blocker; see include/migration/blocker.h
>>> then you might print something like 'Migration not available due to plugin ....'
>>>
>>
>> So basically, we could make a call to migrate_add_blocker(), when someone
>> request time_control through plugin API?
>>
>> IMHO, it's something that should be part of plugin API (if any plugin calls
>> qemu_plugin_request_time_control()), instead of the plugin code itself. This
>> way, any plugin getting time control automatically blocks any potential
>> migration.
> 
> Note my question asked for a 'any definitions of what a plugin ..' - so
> you could define it that way, another one is to think that in the future
> you may allow it and the plugin somehow interacts with migration not to
> change time at certain migration phases.
> 

I would be in favor to forbid usage for now in this context. I'm not 
sure why people would play with migration and plugins generally at this 
time (there might be experiments or use cases I'm not aware of), so a 
simple barrier preventing that seems ok.

This plugin is part of an experiment where we implement a qemu feature 
(icount=auto in this case) by using plugins. If it turns into a 
successful usage and this plugin becomes popular, we can always lift the 
limitation later.

@Alex, would you like to add this now (icount=auto is still not removed 
from qemu), or wait for integration, and add this as another patch?

>>>>>      b) The sleep in migration/dirtyrate.c points out g_usleep might
>>>>>         sleep for longer, so reads the actual wall clock time to
>>>>>         figure out a new 'now'.
>>>>
>>>> The current API mentions time starts at 0 from qemu startup. Maybe we could
>>>> consider in the future to change this behavior to retrieve time from an
>>>> existing migrated machine.
>>>
>>> Ah, I meant for (b) to be independent of (a) - not related to migration; just
>>> down to the fact you used g_usleep in the plugin and a g_usleep might sleep
>>> for a different amount of time than you asked.
>>>
>>
>> We know that, and the plugin is not meant to be "cycle accurate" in general,
>> we just set a upper bound for number of instructions we can execute in a
>> given amount of time (1/10 second for now).
>>
>> We compute the new time based on how many instructions effectively ran on
>> the most used cpu, so even if we slept a bit more than expected, it's
>> correct.
> 
> Ah OK.
> 
> Dave
> 
>>>>>      c) A fun thing to do with this would be to follow an external simulation
>>>>>         or 2nd qemu, trying to keep the two from running too far past
>>>>>         each other.
>>>>>
>>>>
>>>> Basically, to slow the first one, waiting for the replicated one to catch
>>>> up?
>>>
>>> Yes, something like that.
>>>
>>> Dave
>>>
>>>>> Dave >
>>>>>> Examples
>>>>>> --------
>>>>>>
>>>>>> Slow down execution of /bin/true:
>>>>>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
>>>>>> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
>>>>>> real 4.000s
>>>>>>
>>>>>> Boot a Linux kernel simulating a 250MHz cpu:
>>>>>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
>>>>>> check time until kernel panic on serial0
>>>>>>
>>>>>> Tested in system mode by booting a full debian system, and using:
>>>>>> $ sysbench cpu run
>>>>>> Performance decrease linearly with the given number of ips.
>>>>>>
>>>>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>>>> Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
>>>>>> ---
>>>>>>     contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
>>>>>>     contrib/plugins/Makefile |   1 +
>>>>>>     2 files changed, 165 insertions(+)
>>>>>>     create mode 100644 contrib/plugins/ips.c
>>>>>>
>>>>>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
>>>>>> new file mode 100644
>>>>>> index 0000000000..db77729264
>>>>>> --- /dev/null
>>>>>> +++ b/contrib/plugins/ips.c
>>>>>> @@ -0,0 +1,164 @@
>>>>>> +/*
>>>>>> + * ips rate limiting plugin.
>>>>>> + *
>>>>>> + * This plugin can be used to restrict the execution of a system to a
>>>>>> + * particular number of Instructions Per Second (ips). This controls
>>>>>> + * time as seen by the guest so while wall-clock time may be longer
>>>>>> + * from the guests point of view time will pass at the normal rate.
>>>>>> + *
>>>>>> + * This uses the new plugin API which allows the plugin to control
>>>>>> + * system time.
>>>>>> + *
>>>>>> + * Copyright (c) 2023 Linaro Ltd
>>>>>> + *
>>>>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>>>>> + */
>>>>>> +
>>>>>> +#include <stdio.h>
>>>>>> +#include <glib.h>
>>>>>> +#include <qemu-plugin.h>
>>>>>> +
>>>>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>>>>>> +
>>>>>> +/* how many times do we update time per sec */
>>>>>> +#define NUM_TIME_UPDATE_PER_SEC 10
>>>>>> +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
>>>>>> +
>>>>>> +static GMutex global_state_lock;
>>>>>> +
>>>>>> +static uint64_t max_insn_per_second = 1000 * 1000 * 1000; /* ips per core, per second */
>>>>>> +static uint64_t max_insn_per_quantum; /* trap every N instructions */
>>>>>> +static int64_t virtual_time_ns; /* last set virtual time */
>>>>>> +
>>>>>> +static const void *time_handle;
>>>>>> +
>>>>>> +typedef struct {
>>>>>> +    uint64_t total_insn;
>>>>>> +    uint64_t quantum_insn; /* insn in last quantum */
>>>>>> +    int64_t last_quantum_time; /* time when last quantum started */
>>>>>> +} vCPUTime;
>>>>>> +
>>>>>> +struct qemu_plugin_scoreboard *vcpus;
>>>>>> +
>>>>>> +/* return epoch time in ns */
>>>>>> +static int64_t now_ns(void)
>>>>>> +{
>>>>>> +    return g_get_real_time() * 1000;
>>>>>> +}
>>>>>> +
>>>>>> +static uint64_t num_insn_during(int64_t elapsed_ns)
>>>>>> +{
>>>>>> +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
>>>>>> +    return num_secs * (double) max_insn_per_second;
>>>>>> +}
>>>>>> +
>>>>>> +static int64_t time_for_insn(uint64_t num_insn)
>>>>>> +{
>>>>>> +    double num_secs = (double) num_insn / (double) max_insn_per_second;
>>>>>> +    return num_secs * (double) NSEC_IN_ONE_SEC;
>>>>>> +}
>>>>>> +
>>>>>> +static void update_system_time(vCPUTime *vcpu)
>>>>>> +{
>>>>>> +    int64_t elapsed_ns = now_ns() - vcpu->last_quantum_time;
>>>>>> +    uint64_t max_insn = num_insn_during(elapsed_ns);
>>>>>> +
>>>>>> +    if (vcpu->quantum_insn >= max_insn) {
>>>>>> +        /* this vcpu ran faster than expected, so it has to sleep */
>>>>>> +        uint64_t insn_advance = vcpu->quantum_insn - max_insn;
>>>>>> +        uint64_t time_advance_ns = time_for_insn(insn_advance);
>>>>>> +        int64_t sleep_us = time_advance_ns / 1000;
>>>>>> +        g_usleep(sleep_us);
>>>>>> +    }
>>>>>> +
>>>>>> +    vcpu->total_insn += vcpu->quantum_insn;
>>>>>> +    vcpu->quantum_insn = 0;
>>>>>> +    vcpu->last_quantum_time = now_ns();
>>>>>> +
>>>>>> +    /* based on total number of instructions, what should be the new time? */
>>>>>> +    int64_t new_virtual_time = time_for_insn(vcpu->total_insn);
>>>>>> +
>>>>>> +    g_mutex_lock(&global_state_lock);
>>>>>> +
>>>>>> +    /* Time only moves forward. Another vcpu might have updated it already. */
>>>>>> +    if (new_virtual_time > virtual_time_ns) {
>>>>>> +        qemu_plugin_update_ns(time_handle, new_virtual_time);
>>>>>> +        virtual_time_ns = new_virtual_time;
>>>>>> +    }
>>>>>> +
>>>>>> +    g_mutex_unlock(&global_state_lock);
>>>>>> +}
>>>>>> +
>>>>>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>>> +{
>>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>> +    vcpu->total_insn = 0;
>>>>>> +    vcpu->quantum_insn = 0;
>>>>>> +    vcpu->last_quantum_time = now_ns();
>>>>>> +}
>>>>>> +
>>>>>> +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>>> +{
>>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>> +    update_system_time(vcpu);
>>>>>> +}
>>>>>> +
>>>>>> +static void every_quantum_insn(unsigned int cpu_index, void *udata)
>>>>>> +{
>>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>> +    g_assert(vcpu->quantum_insn >= max_insn_per_quantum);
>>>>>> +    update_system_time(vcpu);
>>>>>> +}
>>>>>> +
>>>>>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>>>>>> +{
>>>>>> +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
>>>>>> +    qemu_plugin_u64 quantum_insn =
>>>>>> +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, quantum_insn);
>>>>>> +    /* count (and eventually trap) once per tb */
>>>>>> +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
>>>>>> +        tb, QEMU_PLUGIN_INLINE_ADD_U64, quantum_insn, n_insns);
>>>>>> +    qemu_plugin_register_vcpu_tb_exec_cond_cb(
>>>>>> +        tb, every_quantum_insn,
>>>>>> +        QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>>>>>> +        quantum_insn, max_insn_per_quantum, NULL);
>>>>>> +}
>>>>>> +
>>>>>> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
>>>>>> +{
>>>>>> +    qemu_plugin_scoreboard_free(vcpus);
>>>>>> +}
>>>>>> +
>>>>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>>>>>> +                                           const qemu_info_t *info, int argc,
>>>>>> +                                           char **argv)
>>>>>> +{
>>>>>> +    for (int i = 0; i < argc; i++) {
>>>>>> +        char *opt = argv[i];
>>>>>> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
>>>>>> +        if (g_strcmp0(tokens[0], "ips") == 0) {
>>>>>> +            max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
>>>>>> +            if (!max_insn_per_second && errno) {
>>>>>> +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
>>>>>> +                        __func__, tokens[1], g_strerror(errno));
>>>>>> +                return -1;
>>>>>> +            }
>>>>>> +        } else {
>>>>>> +            fprintf(stderr, "option parsing failed: %s\n", opt);
>>>>>> +            return -1;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
>>>>>> +    max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC;
>>>>>> +
>>>>>> +    time_handle = qemu_plugin_request_time_control();
>>>>>> +    g_assert(time_handle);
>>>>>> +
>>>>>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>>>>>> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
>>>>>> +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
>>>>>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>>>>>> index 0b64d2c1e3..449ead1130 100644
>>>>>> --- a/contrib/plugins/Makefile
>>>>>> +++ b/contrib/plugins/Makefile
>>>>>> @@ -27,6 +27,7 @@ endif
>>>>>>     NAMES += hwprofile
>>>>>>     NAMES += cache
>>>>>>     NAMES += drcov
>>>>>> +NAMES += ips
>>>>>>     ifeq ($(CONFIG_WIN32),y)
>>>>>>     SO_SUFFIX := .dll
>>>>>> -- 
>>>>>> 2.39.2
>>>>>>
Dr. David Alan Gilbert June 17, 2024, 10:45 p.m. UTC | #11
* Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
> On 6/17/24 13:56, Dr. David Alan Gilbert wrote:
> > * Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
> > > On 6/14/24 15:00, Dr. David Alan Gilbert wrote:
> > > > * Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
> > > > > Hi Dave,
> > > > > 
> > > > > On 6/12/24 14:02, Dr. David Alan Gilbert wrote:
> > > > > > * Alex Bennée (alex.bennee@linaro.org) wrote:
> > > > > > > From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> > > > > > > 
> > > > > > > This plugin uses the new time control interface to make decisions
> > > > > > > about the state of time during the emulation. The algorithm is
> > > > > > > currently very simple. The user specifies an ips rate which applies
> > > > > > > per core. If the core runs ahead of its allocated execution time the
> > > > > > > plugin sleeps for a bit to let real time catch up. Either way time is
> > > > > > > updated for the emulation as a function of total executed instructions
> > > > > > > with some adjustments for cores that idle.
> > > > > > 
> > > > > > A few random thoughts:
> > > > > >      a) Are there any definitions of what a plugin that controls time
> > > > > >         should do with a live migration?
> > > > > 
> > > > > It's not something that was considered as part of this work.
> > > > 
> > > > That's OK, the only thing is we need to stop anyone from hitting problems
> > > > when they don't realise it's not been addressed.
> > > > One way might be to add a migration blocker; see include/migration/blocker.h
> > > > then you might print something like 'Migration not available due to plugin ....'
> > > > 
> > > 
> > > So basically, we could make a call to migrate_add_blocker(), when someone
> > > request time_control through plugin API?
> > > 
> > > IMHO, it's something that should be part of plugin API (if any plugin calls
> > > qemu_plugin_request_time_control()), instead of the plugin code itself. This
> > > way, any plugin getting time control automatically blocks any potential
> > > migration.
> > 
> > Note my question asked for a 'any definitions of what a plugin ..' - so
> > you could define it that way, another one is to think that in the future
> > you may allow it and the plugin somehow interacts with migration not to
> > change time at certain migration phases.
> > 
> 
> I would be in favor to forbid usage for now in this context. I'm not sure
> why people would play with migration and plugins generally at this time
> (there might be experiments or use cases I'm not aware of), so a simple
> barrier preventing that seems ok.
> 
> This plugin is part of an experiment where we implement a qemu feature
> (icount=auto in this case) by using plugins. If it turns into a successful
> usage and this plugin becomes popular, we can always lift the limitation
> later.

Sounds reasonable to me.

Dave

> @Alex, would you like to add this now (icount=auto is still not removed from
> qemu), or wait for integration, and add this as another patch?
> 
> > > > > >      b) The sleep in migration/dirtyrate.c points out g_usleep might
> > > > > >         sleep for longer, so reads the actual wall clock time to
> > > > > >         figure out a new 'now'.
> > > > > 
> > > > > The current API mentions time starts at 0 from qemu startup. Maybe we could
> > > > > consider in the future to change this behavior to retrieve time from an
> > > > > existing migrated machine.
> > > > 
> > > > Ah, I meant for (b) to be independent of (a) - not related to migration; just
> > > > down to the fact you used g_usleep in the plugin and a g_usleep might sleep
> > > > for a different amount of time than you asked.
> > > > 
> > > 
> > > We know that, and the plugin is not meant to be "cycle accurate" in general,
> > > we just set a upper bound for number of instructions we can execute in a
> > > given amount of time (1/10 second for now).
> > > 
> > > We compute the new time based on how many instructions effectively ran on
> > > the most used cpu, so even if we slept a bit more than expected, it's
> > > correct.
> > 
> > Ah OK.
> > 
> > Dave
> > 
> > > > > >      c) A fun thing to do with this would be to follow an external simulation
> > > > > >         or 2nd qemu, trying to keep the two from running too far past
> > > > > >         each other.
> > > > > > 
> > > > > 
> > > > > Basically, to slow the first one, waiting for the replicated one to catch
> > > > > up?
> > > > 
> > > > Yes, something like that.
> > > > 
> > > > Dave
> > > > 
> > > > > > Dave >
> > > > > > > Examples
> > > > > > > --------
> > > > > > > 
> > > > > > > Slow down execution of /bin/true:
> > > > > > > $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
> > > > > > > $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
> > > > > > > real 4.000s
> > > > > > > 
> > > > > > > Boot a Linux kernel simulating a 250MHz cpu:
> > > > > > > $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
> > > > > > > check time until kernel panic on serial0
> > > > > > > 
> > > > > > > Tested in system mode by booting a full debian system, and using:
> > > > > > > $ sysbench cpu run
> > > > > > > Performance decrease linearly with the given number of ips.
> > > > > > > 
> > > > > > > Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> > > > > > > Message-Id: <20240530220610.1245424-7-pierrick.bouvier@linaro.org>
> > > > > > > ---
> > > > > > >     contrib/plugins/ips.c    | 164 +++++++++++++++++++++++++++++++++++++++
> > > > > > >     contrib/plugins/Makefile |   1 +
> > > > > > >     2 files changed, 165 insertions(+)
> > > > > > >     create mode 100644 contrib/plugins/ips.c
> > > > > > > 
> > > > > > > diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
> > > > > > > new file mode 100644
> > > > > > > index 0000000000..db77729264
> > > > > > > --- /dev/null
> > > > > > > +++ b/contrib/plugins/ips.c
> > > > > > > @@ -0,0 +1,164 @@
> > > > > > > +/*
> > > > > > > + * ips rate limiting plugin.
> > > > > > > + *
> > > > > > > + * This plugin can be used to restrict the execution of a system to a
> > > > > > > + * particular number of Instructions Per Second (ips). This controls
> > > > > > > + * time as seen by the guest so while wall-clock time may be longer
> > > > > > > + * from the guests point of view time will pass at the normal rate.
> > > > > > > + *
> > > > > > > + * This uses the new plugin API which allows the plugin to control
> > > > > > > + * system time.
> > > > > > > + *
> > > > > > > + * Copyright (c) 2023 Linaro Ltd
> > > > > > > + *
> > > > > > > + * SPDX-License-Identifier: GPL-2.0-or-later
> > > > > > > + */
> > > > > > > +
> > > > > > > +#include <stdio.h>
> > > > > > > +#include <glib.h>
> > > > > > > +#include <qemu-plugin.h>
> > > > > > > +
> > > > > > > +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> > > > > > > +
> > > > > > > +/* how many times do we update time per sec */
> > > > > > > +#define NUM_TIME_UPDATE_PER_SEC 10
> > > > > > > +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
> > > > > > > +
> > > > > > > +static GMutex global_state_lock;
> > > > > > > +
> > > > > > > +static uint64_t max_insn_per_second = 1000 * 1000 * 1000; /* ips per core, per second */
> > > > > > > +static uint64_t max_insn_per_quantum; /* trap every N instructions */
> > > > > > > +static int64_t virtual_time_ns; /* last set virtual time */
> > > > > > > +
> > > > > > > +static const void *time_handle;
> > > > > > > +
> > > > > > > +typedef struct {
> > > > > > > +    uint64_t total_insn;
> > > > > > > +    uint64_t quantum_insn; /* insn in last quantum */
> > > > > > > +    int64_t last_quantum_time; /* time when last quantum started */
> > > > > > > +} vCPUTime;
> > > > > > > +
> > > > > > > +struct qemu_plugin_scoreboard *vcpus;
> > > > > > > +
> > > > > > > +/* return epoch time in ns */
> > > > > > > +static int64_t now_ns(void)
> > > > > > > +{
> > > > > > > +    return g_get_real_time() * 1000;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static uint64_t num_insn_during(int64_t elapsed_ns)
> > > > > > > +{
> > > > > > > +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
> > > > > > > +    return num_secs * (double) max_insn_per_second;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int64_t time_for_insn(uint64_t num_insn)
> > > > > > > +{
> > > > > > > +    double num_secs = (double) num_insn / (double) max_insn_per_second;
> > > > > > > +    return num_secs * (double) NSEC_IN_ONE_SEC;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void update_system_time(vCPUTime *vcpu)
> > > > > > > +{
> > > > > > > +    int64_t elapsed_ns = now_ns() - vcpu->last_quantum_time;
> > > > > > > +    uint64_t max_insn = num_insn_during(elapsed_ns);
> > > > > > > +
> > > > > > > +    if (vcpu->quantum_insn >= max_insn) {
> > > > > > > +        /* this vcpu ran faster than expected, so it has to sleep */
> > > > > > > +        uint64_t insn_advance = vcpu->quantum_insn - max_insn;
> > > > > > > +        uint64_t time_advance_ns = time_for_insn(insn_advance);
> > > > > > > +        int64_t sleep_us = time_advance_ns / 1000;
> > > > > > > +        g_usleep(sleep_us);
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    vcpu->total_insn += vcpu->quantum_insn;
> > > > > > > +    vcpu->quantum_insn = 0;
> > > > > > > +    vcpu->last_quantum_time = now_ns();
> > > > > > > +
> > > > > > > +    /* based on total number of instructions, what should be the new time? */
> > > > > > > +    int64_t new_virtual_time = time_for_insn(vcpu->total_insn);
> > > > > > > +
> > > > > > > +    g_mutex_lock(&global_state_lock);
> > > > > > > +
> > > > > > > +    /* Time only moves forward. Another vcpu might have updated it already. */
> > > > > > > +    if (new_virtual_time > virtual_time_ns) {
> > > > > > > +        qemu_plugin_update_ns(time_handle, new_virtual_time);
> > > > > > > +        virtual_time_ns = new_virtual_time;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    g_mutex_unlock(&global_state_lock);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
> > > > > > > +{
> > > > > > > +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> > > > > > > +    vcpu->total_insn = 0;
> > > > > > > +    vcpu->quantum_insn = 0;
> > > > > > > +    vcpu->last_quantum_time = now_ns();
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
> > > > > > > +{
> > > > > > > +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> > > > > > > +    update_system_time(vcpu);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void every_quantum_insn(unsigned int cpu_index, void *udata)
> > > > > > > +{
> > > > > > > +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> > > > > > > +    g_assert(vcpu->quantum_insn >= max_insn_per_quantum);
> > > > > > > +    update_system_time(vcpu);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
> > > > > > > +{
> > > > > > > +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
> > > > > > > +    qemu_plugin_u64 quantum_insn =
> > > > > > > +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, quantum_insn);
> > > > > > > +    /* count (and eventually trap) once per tb */
> > > > > > > +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
> > > > > > > +        tb, QEMU_PLUGIN_INLINE_ADD_U64, quantum_insn, n_insns);
> > > > > > > +    qemu_plugin_register_vcpu_tb_exec_cond_cb(
> > > > > > > +        tb, every_quantum_insn,
> > > > > > > +        QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
> > > > > > > +        quantum_insn, max_insn_per_quantum, NULL);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void plugin_exit(qemu_plugin_id_t id, void *udata)
> > > > > > > +{
> > > > > > > +    qemu_plugin_scoreboard_free(vcpus);
> > > > > > > +}
> > > > > > > +
> > > > > > > +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
> > > > > > > +                                           const qemu_info_t *info, int argc,
> > > > > > > +                                           char **argv)
> > > > > > > +{
> > > > > > > +    for (int i = 0; i < argc; i++) {
> > > > > > > +        char *opt = argv[i];
> > > > > > > +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
> > > > > > > +        if (g_strcmp0(tokens[0], "ips") == 0) {
> > > > > > > +            max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
> > > > > > > +            if (!max_insn_per_second && errno) {
> > > > > > > +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
> > > > > > > +                        __func__, tokens[1], g_strerror(errno));
> > > > > > > +                return -1;
> > > > > > > +            }
> > > > > > > +        } else {
> > > > > > > +            fprintf(stderr, "option parsing failed: %s\n", opt);
> > > > > > > +            return -1;
> > > > > > > +        }
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
> > > > > > > +    max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC;
> > > > > > > +
> > > > > > > +    time_handle = qemu_plugin_request_time_control();
> > > > > > > +    g_assert(time_handle);
> > > > > > > +
> > > > > > > +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
> > > > > > > +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
> > > > > > > +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
> > > > > > > +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
> > > > > > > +
> > > > > > > +    return 0;
> > > > > > > +}
> > > > > > > diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> > > > > > > index 0b64d2c1e3..449ead1130 100644
> > > > > > > --- a/contrib/plugins/Makefile
> > > > > > > +++ b/contrib/plugins/Makefile
> > > > > > > @@ -27,6 +27,7 @@ endif
> > > > > > >     NAMES += hwprofile
> > > > > > >     NAMES += cache
> > > > > > >     NAMES += drcov
> > > > > > > +NAMES += ips
> > > > > > >     ifeq ($(CONFIG_WIN32),y)
> > > > > > >     SO_SUFFIX := .dll
> > > > > > > -- 
> > > > > > > 2.39.2
> > > > > > >
Alex Bennée June 18, 2024, 9:53 a.m. UTC | #12
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:

> On 6/17/24 13:56, Dr. David Alan Gilbert wrote:
>> * Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
>>> On 6/14/24 15:00, Dr. David Alan Gilbert wrote:
>>>> * Pierrick Bouvier (pierrick.bouvier@linaro.org) wrote:
>>>>> Hi Dave,
>>>>>
>>>>> On 6/12/24 14:02, Dr. David Alan Gilbert wrote:
>>>>>> * Alex Bennée (alex.bennee@linaro.org) wrote:
>>>>>>> From: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>>>>>
>>>>>>> This plugin uses the new time control interface to make decisions
>>>>>>> about the state of time during the emulation. The algorithm is
>>>>>>> currently very simple. The user specifies an ips rate which applies
>>>>>>> per core. If the core runs ahead of its allocated execution time the
>>>>>>> plugin sleeps for a bit to let real time catch up. Either way time is
>>>>>>> updated for the emulation as a function of total executed instructions
>>>>>>> with some adjustments for cores that idle.
>>>>>>
>>>>>> A few random thoughts:
>>>>>>      a) Are there any definitions of what a plugin that controls time
>>>>>>         should do with a live migration?
>>>>>
>>>>> It's not something that was considered as part of this work.
>>>>
>>>> That's OK, the only thing is we need to stop anyone from hitting problems
>>>> when they don't realise it's not been addressed.
>>>> One way might be to add a migration blocker; see include/migration/blocker.h
>>>> then you might print something like 'Migration not available due to plugin ....'
>>>>
>>>
>>> So basically, we could make a call to migrate_add_blocker(), when someone
>>> request time_control through plugin API?
>>>
>>> IMHO, it's something that should be part of plugin API (if any plugin calls
>>> qemu_plugin_request_time_control()), instead of the plugin code itself. This
>>> way, any plugin getting time control automatically blocks any potential
>>> migration.
>> Note my question asked for a 'any definitions of what a plugin ..' -
>> so
>> you could define it that way, another one is to think that in the future
>> you may allow it and the plugin somehow interacts with migration not to
>> change time at certain migration phases.
>> 
>
> I would be in favor to forbid usage for now in this context. I'm not
> sure why people would play with migration and plugins generally at
> this time (there might be experiments or use cases I'm not aware of),
> so a simple barrier preventing that seems ok.
>
> This plugin is part of an experiment where we implement a qemu feature
> (icount=auto in this case) by using plugins. If it turns into a
> successful usage and this plugin becomes popular, we can always lift
> the limitation later.
>
> @Alex, would you like to add this now (icount=auto is still not
> removed from qemu), or wait for integration, and add this as another
> patch?

I think we follow the deprecation process so once integrated we post a
deprecation notice in:

  https://qemu.readthedocs.io/en/master/about/deprecated.html

and then remove it after a couple of releases.
diff mbox series

Patch

diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
new file mode 100644
index 0000000000..db77729264
--- /dev/null
+++ b/contrib/plugins/ips.c
@@ -0,0 +1,164 @@ 
+/*
+ * ips rate limiting plugin.
+ *
+ * This plugin can be used to restrict the execution of a system to a
+ * particular number of Instructions Per Second (ips). This controls
+ * time as seen by the guest so while wall-clock time may be longer
+ * from the guests point of view time will pass at the normal rate.
+ *
+ * This uses the new plugin API which allows the plugin to control
+ * system time.
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <glib.h>
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+/* how many times do we update time per sec */
+#define NUM_TIME_UPDATE_PER_SEC 10
+#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
+
+static GMutex global_state_lock;
+
+static uint64_t max_insn_per_second = 1000 * 1000 * 1000; /* ips per core, per second */
+static uint64_t max_insn_per_quantum; /* trap every N instructions */
+static int64_t virtual_time_ns; /* last set virtual time */
+
+static const void *time_handle;
+
+typedef struct {
+    uint64_t total_insn;
+    uint64_t quantum_insn; /* insn in last quantum */
+    int64_t last_quantum_time; /* time when last quantum started */
+} vCPUTime;
+
+struct qemu_plugin_scoreboard *vcpus;
+
+/* return epoch time in ns */
+static int64_t now_ns(void)
+{
+    return g_get_real_time() * 1000;
+}
+
+static uint64_t num_insn_during(int64_t elapsed_ns)
+{
+    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
+    return num_secs * (double) max_insn_per_second;
+}
+
+static int64_t time_for_insn(uint64_t num_insn)
+{
+    double num_secs = (double) num_insn / (double) max_insn_per_second;
+    return num_secs * (double) NSEC_IN_ONE_SEC;
+}
+
+static void update_system_time(vCPUTime *vcpu)
+{
+    int64_t elapsed_ns = now_ns() - vcpu->last_quantum_time;
+    uint64_t max_insn = num_insn_during(elapsed_ns);
+
+    if (vcpu->quantum_insn >= max_insn) {
+        /* this vcpu ran faster than expected, so it has to sleep */
+        uint64_t insn_advance = vcpu->quantum_insn - max_insn;
+        uint64_t time_advance_ns = time_for_insn(insn_advance);
+        int64_t sleep_us = time_advance_ns / 1000;
+        g_usleep(sleep_us);
+    }
+
+    vcpu->total_insn += vcpu->quantum_insn;
+    vcpu->quantum_insn = 0;
+    vcpu->last_quantum_time = now_ns();
+
+    /* based on total number of instructions, what should be the new time? */
+    int64_t new_virtual_time = time_for_insn(vcpu->total_insn);
+
+    g_mutex_lock(&global_state_lock);
+
+    /* Time only moves forward. Another vcpu might have updated it already. */
+    if (new_virtual_time > virtual_time_ns) {
+        qemu_plugin_update_ns(time_handle, new_virtual_time);
+        virtual_time_ns = new_virtual_time;
+    }
+
+    g_mutex_unlock(&global_state_lock);
+}
+
+static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
+{
+    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+    vcpu->total_insn = 0;
+    vcpu->quantum_insn = 0;
+    vcpu->last_quantum_time = now_ns();
+}
+
+static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
+{
+    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+    update_system_time(vcpu);
+}
+
+static void every_quantum_insn(unsigned int cpu_index, void *udata)
+{
+    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+    g_assert(vcpu->quantum_insn >= max_insn_per_quantum);
+    update_system_time(vcpu);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n_insns = qemu_plugin_tb_n_insns(tb);
+    qemu_plugin_u64 quantum_insn =
+        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, quantum_insn);
+    /* count (and eventually trap) once per tb */
+    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
+        tb, QEMU_PLUGIN_INLINE_ADD_U64, quantum_insn, n_insns);
+    qemu_plugin_register_vcpu_tb_exec_cond_cb(
+        tb, every_quantum_insn,
+        QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
+        quantum_insn, max_insn_per_quantum, NULL);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *udata)
+{
+    qemu_plugin_scoreboard_free(vcpus);
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info, int argc,
+                                           char **argv)
+{
+    for (int i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
+        if (g_strcmp0(tokens[0], "ips") == 0) {
+            max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
+            if (!max_insn_per_second && errno) {
+                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
+                        __func__, tokens[1], g_strerror(errno));
+                return -1;
+            }
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
+    max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC;
+
+    time_handle = qemu_plugin_request_time_control();
+    g_assert(time_handle);
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
+    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+    return 0;
+}
diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index 0b64d2c1e3..449ead1130 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -27,6 +27,7 @@  endif
 NAMES += hwprofile
 NAMES += cache
 NAMES += drcov
+NAMES += ips
 
 ifeq ($(CONFIG_WIN32),y)
 SO_SUFFIX := .dll