diff mbox series

contrib/plugins: add execlog to log instruction execution and memory access

Message ID 20210614090116.816833-1-erdnaxe@crans.org (mailing list archive)
State New, archived
Headers show
Series contrib/plugins: add execlog to log instruction execution and memory access | expand

Commit Message

Alexandre IOOSS June 14, 2021, 9:01 a.m. UTC
Log instruction execution and memory access to a file.
This plugin can be used for reverse engineering or for side-channel analysis
using QEMU.

Signed-off-by: Alexandre Iooss <erdnaxe@crans.org>
---
 MAINTAINERS               |   1 +
 contrib/plugins/Makefile  |   1 +
 contrib/plugins/execlog.c | 112 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 114 insertions(+)
 create mode 100644 contrib/plugins/execlog.c

Comments

Mahmoud Abumandour June 14, 2021, 5:04 p.m. UTC | #1
On 14/06/2021 11:01, Alexandre Iooss wrote:
> Log instruction execution and memory access to a file.
> This plugin can be used for reverse engineering or for side-channel analysis
> using QEMU.
>
> Signed-off-by: Alexandre Iooss <erdnaxe@crans.org>
> ---
>   MAINTAINERS               |   1 +
>   contrib/plugins/Makefile  |   1 +
>   contrib/plugins/execlog.c | 112 ++++++++++++++++++++++++++++++++++++++
>   3 files changed, 114 insertions(+)
>   create mode 100644 contrib/plugins/execlog.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 7d9cd29042..65942d5802 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2974,6 +2974,7 @@ F: include/tcg/
>   
>   TCG Plugins
>   M: Alex Bennée <alex.bennee@linaro.org>
> +R: Alexandre Iooss <erdnaxe@crans.org>
>   S: Maintained
>   F: docs/devel/tcg-plugins.rst
>   F: plugins/
> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> index b9d7935e5e..51093acd17 100644
> --- a/contrib/plugins/Makefile
> +++ b/contrib/plugins/Makefile
> @@ -13,6 +13,7 @@ include $(BUILD_DIR)/config-host.mak
>   VPATH += $(SRC_PATH)/contrib/plugins
>   
>   NAMES :=
> +NAMES += execlog
>   NAMES += hotblocks
>   NAMES += hotpages
>   NAMES += howvec
> diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
> new file mode 100644
> index 0000000000..80716e8eed
> --- /dev/null
> +++ b/contrib/plugins/execlog.c
> @@ -0,0 +1,112 @@
> +/*
> + * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org>
> + *
> + * Log instruction execution and memory access to a file.
> + * You may pass the output filename as argument.
> + *
> + * License: GNU GPL, version 2 or later.
> + *   See the COPYING file in the top-level directory.
> + */
> +#include <glib.h>
> +#include <inttypes.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +
> +#include <qemu-plugin.h>
> +
> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> +
> +/* Execution trace buffer */
> +FILE *output;

Is there a reason for using a regular FILE * instead of using 
`qemu_plugin_outs()`?

I don't see a benefit and since there's an API for logging I guess it's 
better to use it instead

> +
> +/**
> + * Log memory read or write
> + */
> +static void vcpu_mem(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
> +                     uint64_t vaddr, void *udata)
> +{
> +    struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
> +    if (!hwaddr) {
> +        return;
> +    }

So you just reject all memory accesses in user mode? I think that it 
equally useful

to log only the virtual address of a memory access in user-mode 
emulation? However, we better

wait for Alex's opinion on all this since he had remarks about 
introducing a new ad-hoc

tracing format...

> +
> +    /* Add data to execution log */
> +    const char *name = qemu_plugin_hwaddr_device_name(hwaddr);
> +    uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr);
> +    if (qemu_plugin_mem_is_store(info)) {
> +        fprintf(output, "mem: %s store at 0x%08lx\n", name, addr);
> +    } else {
> +        fprintf(output, "mem: %s load at 0x%08lx\n", name, addr);
> +    }
> +}
> +
> +/**
> + * Log instruction execution
> + */
> +static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
> +{
> +    char *insn_disas = (char *)udata;
> +
> +    /* Add data to execution log */
> +    fprintf(output, "insn: %s\n", insn_disas);
> +}
> +
> +/**
> + * On translation block new translation
> + *
> + * QEMU convert code by translation block (TB). By hooking here we can then hook
> + * a callback on each instruction and memory access.
> + */
> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
> +{
> +    size_t n = qemu_plugin_tb_n_insns(tb);
> +    for (size_t i = 0; i < n; i++) {
> +        /* insn is shared between translations in QEMU, copy needed data here */
> +        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
> +        char *insn_disas = qemu_plugin_insn_disas(insn);
> +
> +        /* Register callback on memory read or write */
> +        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem,
> +                                         QEMU_PLUGIN_CB_NO_REGS,
> +                                         QEMU_PLUGIN_MEM_RW, NULL);
> +
> +        /* Register callback on instruction */
> +        qemu_plugin_register_vcpu_insn_exec_cb(
> +            insn, vcpu_insn_exec, QEMU_PLUGIN_CB_R_REGS, insn_disas);
> +    }
> +}
> +
> +/**
> + * On plugin exit, close output file
> + */
> +static void plugin_exit(qemu_plugin_id_t id, void *p)
> +{
> +    fclose(output);
> +}
> +
> +/**
> + * Install the plugin
> + */
> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
> +                                           const qemu_info_t *info, int argc,
> +                                           char **argv)
> +{
> +    /* Parse arguments to get output name and open for writing */
> +    char *filename = "execution.log";
> +    if (argc > 0) {
> +        filename = argv[0];
> +    }
> +    output = fopen(filename, "w");
> +    if (output == NULL) {
> +        qemu_plugin_outs("Cannot open output file for writing.\n");

Here, I think that it's more logical to output error messages to stderr 
since

logging can be redirected to a file so QEMU will error but the error message

would be written to the log file.

> +        return -1;
> +    }
> +
> +    /* Register translation block and exit callbacks */
> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
> +
> +    return 0;
> +}


Thanks,

Mahmoud
Alex Bennée June 15, 2021, 8:22 a.m. UTC | #2
Mahmoud Mandour <ma.mandourr@gmail.com> writes:

> On 14/06/2021 11:01, Alexandre Iooss wrote:
>> Log instruction execution and memory access to a file.
>> This plugin can be used for reverse engineering or for side-channel analysis
>> using QEMU.
>>
>> Signed-off-by: Alexandre Iooss <erdnaxe@crans.org>
>> ---
>>   MAINTAINERS               |   1 +
>>   contrib/plugins/Makefile  |   1 +
>>   contrib/plugins/execlog.c | 112 ++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 114 insertions(+)
>>   create mode 100644 contrib/plugins/execlog.c
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 7d9cd29042..65942d5802 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -2974,6 +2974,7 @@ F: include/tcg/
>>     TCG Plugins
>>   M: Alex Bennée <alex.bennee@linaro.org>
>> +R: Alexandre Iooss <erdnaxe@crans.org>
>>   S: Maintained
>>   F: docs/devel/tcg-plugins.rst
>>   F: plugins/
>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>> index b9d7935e5e..51093acd17 100644
>> --- a/contrib/plugins/Makefile
>> +++ b/contrib/plugins/Makefile
>> @@ -13,6 +13,7 @@ include $(BUILD_DIR)/config-host.mak
>>   VPATH += $(SRC_PATH)/contrib/plugins
>>     NAMES :=
>> +NAMES += execlog
>>   NAMES += hotblocks
>>   NAMES += hotpages
>>   NAMES += howvec
>> diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
>> new file mode 100644
>> index 0000000000..80716e8eed
>> --- /dev/null
>> +++ b/contrib/plugins/execlog.c
>> @@ -0,0 +1,112 @@
>> +/*
>> + * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org>
>> + *
>> + * Log instruction execution and memory access to a file.
>> + * You may pass the output filename as argument.
>> + *
>> + * License: GNU GPL, version 2 or later.
>> + *   See the COPYING file in the top-level directory.
>> + */
>> +#include <glib.h>
>> +#include <inttypes.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +
>> +#include <qemu-plugin.h>
>> +
>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>> +
>> +/* Execution trace buffer */
>> +FILE *output;
>
> Is there a reason for using a regular FILE * instead of using
> `qemu_plugin_outs()`?
>
> I don't see a benefit and since there's an API for logging I guess
> it's better to use it instead

I would prefer using qemu_plugin_outs unless there is a reason to have
multiple streams.

>
>> +
>> +/**
>> + * Log memory read or write
>> + */
>> +static void vcpu_mem(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
>> +                     uint64_t vaddr, void *udata)
>> +{
>> +    struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
>> +    if (!hwaddr) {
>> +        return;
>> +    }
>
> So you just reject all memory accesses in user mode? I think that it
> equally useful
>
> to log only the virtual address of a memory access in user-mode
> emulation? However, we better
>
> wait for Alex's opinion on all this since he had remarks about
> introducing a new ad-hoc
>
> tracing format...

It would be nice to integrate with the QEMU tracing support but
following Stefan's comments I think it will be fairly involved to work
out a way to do it. In the meantime using plain text logging will have
to do.

>> +
>> +    /* Add data to execution log */
>> +    const char *name = qemu_plugin_hwaddr_device_name(hwaddr);
>> +    uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr);
>> +    if (qemu_plugin_mem_is_store(info)) {
>> +        fprintf(output, "mem: %s store at 0x%08lx\n", name, addr);
>> +    } else {
>> +        fprintf(output, "mem: %s load at 0x%08lx\n", name, addr);
>> +    }

I would consider a comma separated output to make the file easily
digestible by other tools. Maybe:

  memory, 0x%08lx, load|store

>> +}
>> +
>> +/**
>> + * Log instruction execution
>> + */
>> +static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
>> +{
>> +    char *insn_disas = (char *)udata;
>> +
>> +    /* Add data to execution log */
>> +    fprintf(output, "insn: %s\n", insn_disas);

  insn, 0x%08lx, disas

Currently however on a multi-threaded execution you will potentially
loose any synchronisation between the insn_exec and any memory operation
associated with it. If you really just care about what's tickling
hardware you could just drop the insn_exec callback and pass the
instruction information via udata to the vcpu_mem callback. You could
then dump everything in one line:

  0xPC, ld [x1], x2, 0xADDR, load|store

you wouldn't dump other instructions leading up to that though.

>> +}
>> +
>> +/**
>> + * On translation block new translation
>> + *
>> + * QEMU convert code by translation block (TB). By hooking here we can then hook
>> + * a callback on each instruction and memory access.
>> + */
>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>> +{
>> +    size_t n = qemu_plugin_tb_n_insns(tb);
>> +    for (size_t i = 0; i < n; i++) {
>> +        /* insn is shared between translations in QEMU, copy needed data here */
>> +        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb,
>> i);

If you had some knowledge of the guest architecture you could attempt a
partial decode of the op and work out if it was a load/store and then
conditionally register the mem or exec callback.

>> +        char *insn_disas = qemu_plugin_insn_disas(insn);
>> +
>> +        /* Register callback on memory read or write */
>> +        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem,
>> +                                         QEMU_PLUGIN_CB_NO_REGS,
>> +                                         QEMU_PLUGIN_MEM_RW, NULL);
>> +
>> +        /* Register callback on instruction */
>> +        qemu_plugin_register_vcpu_insn_exec_cb(
>> +            insn, vcpu_insn_exec, QEMU_PLUGIN_CB_R_REGS, insn_disas);
>> +    }
>> +}
>> +
>> +/**
>> + * On plugin exit, close output file
>> + */
>> +static void plugin_exit(qemu_plugin_id_t id, void *p)
>> +{
>> +    fclose(output);
>> +}
>> +
>> +/**
>> + * Install the plugin
>> + */
>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>> +                                           const qemu_info_t *info, int argc,
>> +                                           char **argv)
>> +{
>> +    /* Parse arguments to get output name and open for writing */
>> +    char *filename = "execution.log";
>> +    if (argc > 0) {
>> +        filename = argv[0];
>> +    }
>> +    output = fopen(filename, "w");
>> +    if (output == NULL) {
>> +        qemu_plugin_outs("Cannot open output file for writing.\n");
>
> Here, I think that it's more logical to output error messages to
> stderr since
>
> logging can be redirected to a file so QEMU will error but the error message
>
> would be written to the log file.

Yes please, report errors directly to stderr as the execution is going
to trigger and exit at this point and the user needs direct feeback.

>
>> +        return -1;
>> +    }
>> +
>> +    /* Register translation block and exit callbacks */
>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>> +
>> +    return 0;
>> +}
>
>
> Thanks,
>
> Mahmoud
>
>
>
Alexandre IOOSS June 15, 2021, 4:47 p.m. UTC | #3
On 6/15/21 10:22 AM, Alex Bennée wrote:
> Mahmoud Mandour <ma.mandourr@gmail.com> writes:
> 
>> On 14/06/2021 11:01, Alexandre Iooss wrote:
>>> Log instruction execution and memory access to a file.
>>> This plugin can be used for reverse engineering or for side-channel analysis
>>> using QEMU.
>>>
>>> Signed-off-by: Alexandre Iooss <erdnaxe@crans.org>
>>> ---
>>>    MAINTAINERS               |   1 +
>>>    contrib/plugins/Makefile  |   1 +
>>>    contrib/plugins/execlog.c | 112 ++++++++++++++++++++++++++++++++++++++
>>>    3 files changed, 114 insertions(+)
>>>    create mode 100644 contrib/plugins/execlog.c
>>>
>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>> index 7d9cd29042..65942d5802 100644
>>> --- a/MAINTAINERS
>>> +++ b/MAINTAINERS
>>> @@ -2974,6 +2974,7 @@ F: include/tcg/
>>>      TCG Plugins
>>>    M: Alex Bennée <alex.bennee@linaro.org>
>>> +R: Alexandre Iooss <erdnaxe@crans.org>
>>>    S: Maintained
>>>    F: docs/devel/tcg-plugins.rst
>>>    F: plugins/
>>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>>> index b9d7935e5e..51093acd17 100644
>>> --- a/contrib/plugins/Makefile
>>> +++ b/contrib/plugins/Makefile
>>> @@ -13,6 +13,7 @@ include $(BUILD_DIR)/config-host.mak
>>>    VPATH += $(SRC_PATH)/contrib/plugins
>>>      NAMES :=
>>> +NAMES += execlog
>>>    NAMES += hotblocks
>>>    NAMES += hotpages
>>>    NAMES += howvec
>>> diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
>>> new file mode 100644
>>> index 0000000000..80716e8eed
>>> --- /dev/null
>>> +++ b/contrib/plugins/execlog.c
>>> @@ -0,0 +1,112 @@
>>> +/*
>>> + * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org>
>>> + *
>>> + * Log instruction execution and memory access to a file.
>>> + * You may pass the output filename as argument.
>>> + *
>>> + * License: GNU GPL, version 2 or later.
>>> + *   See the COPYING file in the top-level directory.
>>> + */
>>> +#include <glib.h>
>>> +#include <inttypes.h>
>>> +#include <stdio.h>
>>> +#include <stdlib.h>
>>> +#include <string.h>
>>> +#include <unistd.h>
>>> +
>>> +#include <qemu-plugin.h>
>>> +
>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>>> +
>>> +/* Execution trace buffer */
>>> +FILE *output;
>>
>> Is there a reason for using a regular FILE * instead of using
>> `qemu_plugin_outs()`?
>>
>> I don't see a benefit and since there's an API for logging I guess
>> it's better to use it instead
> 
> I would prefer using qemu_plugin_outs unless there is a reason to have
> multiple streams.

I was not aware of `-D` option in QEMU that does exactly what I need. So 
I am going to use qemu_plugin_outs for next version of this patch.

>>> +
>>> +/**
>>> + * Log memory read or write
>>> + */
>>> +static void vcpu_mem(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
>>> +                     uint64_t vaddr, void *udata)
>>> +{
>>> +    struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
>>> +    if (!hwaddr) {
>>> +        return;
>>> +    }
>>
>> So you just reject all memory accesses in user mode? I think that it
>> equally useful
>>
>> to log only the virtual address of a memory access in user-mode
>> emulation? However, we better
>>
>> wait for Alex's opinion on all this since he had remarks about
>> introducing a new ad-hoc
>>
>> tracing format...
> 
> It would be nice to integrate with the QEMU tracing support but
> following Stefan's comments I think it will be fairly involved to work
> out a way to do it. In the meantime using plain text logging will have
> to do.

I will change this to log the vaddr in user mode without device name.

>>> +
>>> +    /* Add data to execution log */
>>> +    const char *name = qemu_plugin_hwaddr_device_name(hwaddr);
>>> +    uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr);
>>> +    if (qemu_plugin_mem_is_store(info)) {
>>> +        fprintf(output, "mem: %s store at 0x%08lx\n", name, addr);
>>> +    } else {
>>> +        fprintf(output, "mem: %s load at 0x%08lx\n", name, addr);
>>> +    }
> 
> I would consider a comma separated output to make the file easily
> digestible by other tools. Maybe:
> 
>    memory, 0x%08lx, load|store
> 

Agree, I will change this.

>>> +}
>>> +
>>> +/**
>>> + * Log instruction execution
>>> + */
>>> +static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
>>> +{
>>> +    char *insn_disas = (char *)udata;
>>> +
>>> +    /* Add data to execution log */
>>> +    fprintf(output, "insn: %s\n", insn_disas);
> 
>    insn, 0x%08lx, disas
> 
> Currently however on a multi-threaded execution you will potentially
> loose any synchronisation between the insn_exec and any memory operation
> associated with it. If you really just care about what's tickling
> hardware you could just drop the insn_exec callback and pass the
> instruction information via udata to the vcpu_mem callback. You could
> then dump everything in one line:
> 
>    0xPC, ld [x1], x2, 0xADDR, load|store
> 
> you wouldn't dump other instructions leading up to that though.

You are correct, this is indeed an issue and it's currently making my 
life really hard when I try to apply a side-channel model on the memory 
interactions.
I prefer to log all instructions, so I need to use vcpu_mem_cb when it's 
a memory instruction, or vcpu_insn_exec_cb if it's not.

>>> +}
>>> +
>>> +/**
>>> + * On translation block new translation
>>> + *
>>> + * QEMU convert code by translation block (TB). By hooking here we can then hook
>>> + * a callback on each instruction and memory access.
>>> + */
>>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>>> +{
>>> +    size_t n = qemu_plugin_tb_n_insns(tb);
>>> +    for (size_t i = 0; i < n; i++) {
>>> +        /* insn is shared between translations in QEMU, copy needed data here */
>>> +        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb,
>>> i);
> 
> If you had some knowledge of the guest architecture you could attempt a
> partial decode of the op and work out if it was a load/store and then
> conditionally register the mem or exec callback.

Agree. The TCG API does not offer a way to know from a insn if it's 
something that is going to trigger a vcpu_mem_cb, so I will add some 
code to decode this information from the opcode.
When the architecture is unknown, I might default to exec callback.

>>> +        char *insn_disas = qemu_plugin_insn_disas(insn);
>>> +
>>> +        /* Register callback on memory read or write */
>>> +        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem,
>>> +                                         QEMU_PLUGIN_CB_NO_REGS,
>>> +                                         QEMU_PLUGIN_MEM_RW, NULL);
>>> +
>>> +        /* Register callback on instruction */
>>> +        qemu_plugin_register_vcpu_insn_exec_cb(
>>> +            insn, vcpu_insn_exec, QEMU_PLUGIN_CB_R_REGS, insn_disas);
>>> +    }
>>> +}
>>> +
>>> +/**
>>> + * On plugin exit, close output file
>>> + */
>>> +static void plugin_exit(qemu_plugin_id_t id, void *p)
>>> +{
>>> +    fclose(output);
>>> +}
>>> +
>>> +/**
>>> + * Install the plugin
>>> + */
>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>>> +                                           const qemu_info_t *info, int argc,
>>> +                                           char **argv)
>>> +{
>>> +    /* Parse arguments to get output name and open for writing */
>>> +    char *filename = "execution.log";
>>> +    if (argc > 0) {
>>> +        filename = argv[0];
>>> +    }
>>> +    output = fopen(filename, "w");
>>> +    if (output == NULL) {
>>> +        qemu_plugin_outs("Cannot open output file for writing.\n");
>>
>> Here, I think that it's more logical to output error messages to
>> stderr since
>>
>> logging can be redirected to a file so QEMU will error but the error message
>>
>> would be written to the log file.
> 
> Yes please, report errors directly to stderr as the execution is going
> to trigger and exit at this point and the user needs direct feeback.
> 

I am going to remove this section of code as I don't need to open a file 
anymore.

>>
>>> +        return -1;
>>> +    }
>>> +
>>> +    /* Register translation block and exit callbacks */
>>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>>> +
>>> +    return 0;
>>> +}
>>
>>
>> Thanks,
>>
>> Mahmoud
>>
>>
>>
> 

Thanks,
-- Alexandre
Alexandre IOOSS June 16, 2021, 3:15 p.m. UTC | #4
On 6/15/21 6:47 PM, Alexandre IOOSS wrote:
> On 6/15/21 10:22 AM, Alex Bennée wrote:
>> Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>>> On 14/06/2021 11:01, Alexandre Iooss wrote:
>>>> +}
>>>> +
>>>> +/**
>>>> + * Log instruction execution
>>>> + */
>>>> +static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
>>>> +{
>>>> +    char *insn_disas = (char *)udata;
>>>> +
>>>> +    /* Add data to execution log */
>>>> +    fprintf(output, "insn: %s\n", insn_disas);
>>
>>    insn, 0x%08lx, disas
>>
>> Currently however on a multi-threaded execution you will potentially
>> loose any synchronisation between the insn_exec and any memory operation
>> associated with it. If you really just care about what's tickling
>> hardware you could just drop the insn_exec callback and pass the
>> instruction information via udata to the vcpu_mem callback. You could
>> then dump everything in one line:
>>
>>    0xPC, ld [x1], x2, 0xADDR, load|store
>>
>> you wouldn't dump other instructions leading up to that though.
> 
> You are correct, this is indeed an issue and it's currently making my 
> life really hard when I try to apply a side-channel model on the memory 
> interactions.
> I prefer to log all instructions, so I need to use vcpu_mem_cb when it's 
> a memory instruction, or vcpu_insn_exec_cb if it's not.

If I always set vcpu_mem_cb and vcpu_insn_exec_cb, then an user can do a 
bit of postprocessing of the data to merge lines that correspond to 
memory interactions. Example of output (Cortex-M0 in Thumb mode):

```
# vaddr, opcode, disassembly, [load/store, memory addr, device]
0xa14, 0xf87f42b4, "cmp r4, r6"
0xa16, 0xd206, "bhs #0xa26"
0xa18, 0xfff94803, "ldr r0, [pc, #0xc]"
0xa18, 0xfff94803, "ldr r0, [pc, #0xc]", load, 0x00010a28, RAM
0xa1a, 0xf989f000, "bl #0xd30"
0xd30, 0xfff9b510, "push {r4, lr}"
0xd30, 0xfff9b510, "push {r4, lr}", store, 0x20003ee0, RAM
0xd30, 0xfff9b510, "push {r4, lr}", store, 0x20003ee4, RAM
0xd32, 0xf9893014, "adds r0, #0x14"
0xd34, 0xf9c8f000, "bl #0x10c8"
0x10c8, 0xfff96c43, "ldr r3, [r0, #0x44]"
0x10c8, 0xfff96c43, "ldr r3, [r0, #0x44]", load, 0x200000e4, RAM
```

If we don't want to call `vcpu_insn_exec_cb` when `vcpu_mem_cb` is 
triggered, then I would have either to:

1. Implement load/store instructions matchers, similar to what is done 
in `howvec.c` plugin.
2. Implement instructions mnemonic matchers (using the output of 
qemu_plugin_insn_disas).
3. Use Capstone and disassemble a second time each instructions.

What is your opinion on these solutions? Maybe for a first version we 
can keep it simple?

Thanks,
-- Alexandre
Alex Bennée June 17, 2021, 9:55 a.m. UTC | #5
Alexandre IOOSS <erdnaxe@crans.org> writes:

> [[PGP Signed Part:Undecided]]
> On 6/15/21 6:47 PM, Alexandre IOOSS wrote:
>> On 6/15/21 10:22 AM, Alex Bennée wrote:
>>> Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>>>> On 14/06/2021 11:01, Alexandre Iooss wrote:
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * Log instruction execution
>>>>> + */
>>>>> +static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
>>>>> +{
>>>>> +    char *insn_disas = (char *)udata;
>>>>> +
>>>>> +    /* Add data to execution log */
>>>>> +    fprintf(output, "insn: %s\n", insn_disas);
>>>
>>>    insn, 0x%08lx, disas
>>>
>>> Currently however on a multi-threaded execution you will potentially
>>> loose any synchronisation between the insn_exec and any memory operation
>>> associated with it. If you really just care about what's tickling
>>> hardware you could just drop the insn_exec callback and pass the
>>> instruction information via udata to the vcpu_mem callback. You could
>>> then dump everything in one line:
>>>
>>>    0xPC, ld [x1], x2, 0xADDR, load|store
>>>
>>> you wouldn't dump other instructions leading up to that though.
>> You are correct, this is indeed an issue and it's currently making
>> my life really hard when I try to apply a side-channel model on the
>> memory interactions.
>> I prefer to log all instructions, so I need to use vcpu_mem_cb when
>> it's a memory instruction, or vcpu_insn_exec_cb if it's not.
>
> If I always set vcpu_mem_cb and vcpu_insn_exec_cb, then an user can do
> a bit of postprocessing of the data to merge lines that correspond to
> memory interactions. Example of output (Cortex-M0 in Thumb mode):
>
> ```
> # vaddr, opcode, disassembly, [load/store, memory addr, device]
> 0xa14, 0xf87f42b4, "cmp r4, r6"
> 0xa16, 0xd206, "bhs #0xa26"
> 0xa18, 0xfff94803, "ldr r0, [pc, #0xc]"
> 0xa18, 0xfff94803, "ldr r0, [pc, #0xc]", load, 0x00010a28, RAM
> 0xa1a, 0xf989f000, "bl #0xd30"
> 0xd30, 0xfff9b510, "push {r4, lr}"
> 0xd30, 0xfff9b510, "push {r4, lr}", store, 0x20003ee0, RAM
> 0xd30, 0xfff9b510, "push {r4, lr}", store, 0x20003ee4, RAM
> 0xd32, 0xf9893014, "adds r0, #0x14"
> 0xd34, 0xf9c8f000, "bl #0x10c8"
> 0x10c8, 0xfff96c43, "ldr r3, [r0, #0x44]"
> 0x10c8, 0xfff96c43, "ldr r3, [r0, #0x44]", load, 0x200000e4, RAM
> ```

To be honest the post-processing is probably as workable solution as
anything else.

>
> If we don't want to call `vcpu_insn_exec_cb` when `vcpu_mem_cb` is
> triggered, then I would have either to:
>
> 1. Implement load/store instructions matchers, similar to what is done
> in `howvec.c` plugin.

This is probably fairly easy for most RISCs with sane encodings.

> 2. Implement instructions mnemonic matchers (using the output of
> qemu_plugin_insn_disas).

I wouldn't rely on textual formatting and reverse parsing the data.

> 3. Use Capstone and disassemble a second time each instructions.

The disassembly memory belongs to the plugin until it frees it so there
is no reason why you can't cache the data. If you share the same
callback data between the insn and mem callbacks you could just cache
the data in the insn callback and only print it out in the memory
callback. If you run another insn callback without having run a memory
callback then you know the last insn was a non-memory affecting one and
you print it out then before caching the current data. For a single
thread this would still look coherent and will be coherent per core if
you take care to cache in a per-cpu aware structure.

> What is your opinion on these solutions? Maybe for a first version we
> can keep it simple?

There is no problem with keeping it simple - and it certainly shouldn't
hold up merging. It can always be improved upon later ;-)

>
> Thanks,
> -- Alexandre
>
> [[End of PGP Signed Part]]
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 7d9cd29042..65942d5802 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2974,6 +2974,7 @@  F: include/tcg/
 
 TCG Plugins
 M: Alex Bennée <alex.bennee@linaro.org>
+R: Alexandre Iooss <erdnaxe@crans.org>
 S: Maintained
 F: docs/devel/tcg-plugins.rst
 F: plugins/
diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index b9d7935e5e..51093acd17 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -13,6 +13,7 @@  include $(BUILD_DIR)/config-host.mak
 VPATH += $(SRC_PATH)/contrib/plugins
 
 NAMES :=
+NAMES += execlog
 NAMES += hotblocks
 NAMES += hotpages
 NAMES += howvec
diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
new file mode 100644
index 0000000000..80716e8eed
--- /dev/null
+++ b/contrib/plugins/execlog.c
@@ -0,0 +1,112 @@ 
+/*
+ * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org>
+ *
+ * Log instruction execution and memory access to a file.
+ * You may pass the output filename as argument.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include <glib.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+/* Execution trace buffer */
+FILE *output;
+
+/**
+ * Log memory read or write
+ */
+static void vcpu_mem(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
+                     uint64_t vaddr, void *udata)
+{
+    struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
+    if (!hwaddr) {
+        return;
+    }
+
+    /* Add data to execution log */
+    const char *name = qemu_plugin_hwaddr_device_name(hwaddr);
+    uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr);
+    if (qemu_plugin_mem_is_store(info)) {
+        fprintf(output, "mem: %s store at 0x%08lx\n", name, addr);
+    } else {
+        fprintf(output, "mem: %s load at 0x%08lx\n", name, addr);
+    }
+}
+
+/**
+ * Log instruction execution
+ */
+static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
+{
+    char *insn_disas = (char *)udata;
+
+    /* Add data to execution log */
+    fprintf(output, "insn: %s\n", insn_disas);
+}
+
+/**
+ * On translation block new translation
+ *
+ * QEMU convert code by translation block (TB). By hooking here we can then hook
+ * a callback on each instruction and memory access.
+ */
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n = qemu_plugin_tb_n_insns(tb);
+    for (size_t i = 0; i < n; i++) {
+        /* insn is shared between translations in QEMU, copy needed data here */
+        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+        char *insn_disas = qemu_plugin_insn_disas(insn);
+
+        /* Register callback on memory read or write */
+        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem,
+                                         QEMU_PLUGIN_CB_NO_REGS,
+                                         QEMU_PLUGIN_MEM_RW, NULL);
+
+        /* Register callback on instruction */
+        qemu_plugin_register_vcpu_insn_exec_cb(
+            insn, vcpu_insn_exec, QEMU_PLUGIN_CB_R_REGS, insn_disas);
+    }
+}
+
+/**
+ * On plugin exit, close output file
+ */
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    fclose(output);
+}
+
+/**
+ * Install the plugin
+ */
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info, int argc,
+                                           char **argv)
+{
+    /* Parse arguments to get output name and open for writing */
+    char *filename = "execution.log";
+    if (argc > 0) {
+        filename = argv[0];
+    }
+    output = fopen(filename, "w");
+    if (output == NULL) {
+        qemu_plugin_outs("Cannot open output file for writing.\n");
+        return -1;
+    }
+
+    /* Register translation block and exit callbacks */
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+    return 0;
+}