diff mbox series

[1/2] accel/tcg/plugin: export host insn size

Message ID 20230406022751.757980-2-fei2.wu@intel.com (mailing list archive)
State New, archived
Headers show
Series accel/tcg/plugin: host insn size for plugin | expand

Commit Message

Wu, Fei April 6, 2023, 2:27 a.m. UTC
The translation ratio of host to guest instruction count is one of the
key performance factor of binary translation. TCG doesn't collect host
instruction count at present, it does collect host instruction size
instead, although they are not the same thing as instruction size might
not be fixed, instruction size is still a valid estimation.

Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 accel/tcg/plugin-gen.c       | 1 +
 include/qemu/plugin.h        | 2 ++
 include/qemu/qemu-plugin.h   | 8 ++++++++
 plugins/api.c                | 5 +++++
 plugins/qemu-plugins.symbols | 1 +
 5 files changed, 17 insertions(+)

Comments

Alex Bennée April 6, 2023, 7:46 a.m. UTC | #1
Fei Wu <fei2.wu@intel.com> writes:

> The translation ratio of host to guest instruction count is one of the
> key performance factor of binary translation. TCG doesn't collect host
> instruction count at present, it does collect host instruction size
> instead, although they are not the same thing as instruction size might
> not be fixed, instruction size is still a valid estimation.

I'm not so sure about exposing this information to plugins because we
try to avoid leaking internal implementation details to plugins. Aside
from that the very act of instrumenting will increase the size of the
target buffer.

If your aim is to examine JIT efficiency what is wrong with the current
"info jit" that you can access via the HMP? Also I'm wondering if its
time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
extra data it collects is that expensive.

Richard, what do you think?

>
> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> ---
>  accel/tcg/plugin-gen.c       | 1 +
>  include/qemu/plugin.h        | 2 ++
>  include/qemu/qemu-plugin.h   | 8 ++++++++
>  plugins/api.c                | 5 +++++
>  plugins/qemu-plugins.symbols | 1 +
>  5 files changed, 17 insertions(+)
>
> diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
> index 5efb8db258..4a3ca8fa2f 100644
> --- a/accel/tcg/plugin-gen.c
> +++ b/accel/tcg/plugin-gen.c
> @@ -881,6 +881,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
>          ptb->haddr2 = NULL;
>          ptb->mem_only = mem_only;
>          ptb->mem_helper = false;
> +        ptb->host_insn_size = &db->tb->tc.size;
>  
>          plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
>      }
> diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
> index bc0781cab8..b38fd139e1 100644
> --- a/include/qemu/plugin.h
> +++ b/include/qemu/plugin.h
> @@ -151,6 +151,8 @@ struct qemu_plugin_tb {
>      /* if set, the TB calls helpers that might access guest memory */
>      bool mem_helper;
>  
> +    uint64_t *host_insn_size;
> +
>      GArray *cbs[PLUGIN_N_CB_SUBTYPES];
>  };
>  
> diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
> index 50a9957279..2397574a21 100644
> --- a/include/qemu/qemu-plugin.h
> +++ b/include/qemu/qemu-plugin.h
> @@ -336,6 +336,14 @@ void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,
>   */
>  size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb);
>  
> +/**
> + * qemu_plugin_tb_n_insns() - query helper for host insns size in TB
> + * @tb: opaque handle to TB passed to callback
> + *
> + * Returns: address of host insns size of this block

If we went ahead with this we need to be very clear when you can call
this helper because the data will only be valid at certain points (which
is another argument against this).

> + */
> +void *qemu_plugin_tb_host_insn_size(const struct qemu_plugin_tb *tb);
> +
>  /**
>   * qemu_plugin_tb_vaddr() - query helper for vaddr of TB start
>   * @tb: opaque handle to TB passed to callback
> diff --git a/plugins/api.c b/plugins/api.c
> index 2078b16edb..0d70cb1f0f 100644
> --- a/plugins/api.c
> +++ b/plugins/api.c
> @@ -188,6 +188,11 @@ size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb)
>      return tb->n;
>  }
>  
> +void *qemu_plugin_tb_host_insn_size(const struct qemu_plugin_tb *tb)
> +{
> +    return tb->host_insn_size;
> +}
> +
>  uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb)
>  {
>      return tb->vaddr;
> diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
> index 71f6c90549..3e92c3b8ba 100644
> --- a/plugins/qemu-plugins.symbols
> +++ b/plugins/qemu-plugins.symbols
> @@ -39,6 +39,7 @@
>    qemu_plugin_start_code;
>    qemu_plugin_tb_get_insn;
>    qemu_plugin_tb_n_insns;
> +  qemu_plugin_tb_host_insn_size;
>    qemu_plugin_tb_vaddr;
>    qemu_plugin_uninstall;
>    qemu_plugin_vcpu_for_each;
Wu, Fei April 7, 2023, 1:31 a.m. UTC | #2
On 4/6/2023 3:46 PM, Alex Bennée wrote:
> 
> Fei Wu <fei2.wu@intel.com> writes:
> 
>> The translation ratio of host to guest instruction count is one of the
>> key performance factor of binary translation. TCG doesn't collect host
>> instruction count at present, it does collect host instruction size
>> instead, although they are not the same thing as instruction size might
>> not be fixed, instruction size is still a valid estimation.
> 
> I'm not so sure about exposing this information to plugins because we
> try to avoid leaking internal implementation details to plugins. Aside
> from that the very act of instrumenting will increase the size of the
> target buffer.
> 
> If your aim is to examine JIT efficiency what is wrong with the current
> "info jit" that you can access via the HMP? Also I'm wondering if its
> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
> extra data it collects is that expensive.
> 
"info jit" collects the translation time expansion ratio, it doesn't
distinguish between hot and cold blocks:
    TB avg target size  14 max=1918 bytes
    TB avg host size    287 bytes (expansion ratio: 19.7)

My primary aim is to collect the runtime expansion ratio, so hot blocks
weigh more than cold blocks. My concern is this series might not be the
proper way to implement it, just as you mentioned in another reply.

Thanks,
Fei.

> Richard, what do you think?
> 
>>
>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>> ---
>>  accel/tcg/plugin-gen.c       | 1 +
>>  include/qemu/plugin.h        | 2 ++
>>  include/qemu/qemu-plugin.h   | 8 ++++++++
>>  plugins/api.c                | 5 +++++
>>  plugins/qemu-plugins.symbols | 1 +
>>  5 files changed, 17 insertions(+)
>>
>> diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
>> index 5efb8db258..4a3ca8fa2f 100644
>> --- a/accel/tcg/plugin-gen.c
>> +++ b/accel/tcg/plugin-gen.c
>> @@ -881,6 +881,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
>>          ptb->haddr2 = NULL;
>>          ptb->mem_only = mem_only;
>>          ptb->mem_helper = false;
>> +        ptb->host_insn_size = &db->tb->tc.size;
>>  
>>          plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
>>      }
>> diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
>> index bc0781cab8..b38fd139e1 100644
>> --- a/include/qemu/plugin.h
>> +++ b/include/qemu/plugin.h
>> @@ -151,6 +151,8 @@ struct qemu_plugin_tb {
>>      /* if set, the TB calls helpers that might access guest memory */
>>      bool mem_helper;
>>  
>> +    uint64_t *host_insn_size;
>> +
>>      GArray *cbs[PLUGIN_N_CB_SUBTYPES];
>>  };
>>  
>> diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
>> index 50a9957279..2397574a21 100644
>> --- a/include/qemu/qemu-plugin.h
>> +++ b/include/qemu/qemu-plugin.h
>> @@ -336,6 +336,14 @@ void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,
>>   */
>>  size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb);
>>  
>> +/**
>> + * qemu_plugin_tb_n_insns() - query helper for host insns size in TB
>> + * @tb: opaque handle to TB passed to callback
>> + *
>> + * Returns: address of host insns size of this block
> 
> If we went ahead with this we need to be very clear when you can call
> this helper because the data will only be valid at certain points (which
> is another argument against this).
> 
>> + */
>> +void *qemu_plugin_tb_host_insn_size(const struct qemu_plugin_tb *tb);
>> +
>>  /**
>>   * qemu_plugin_tb_vaddr() - query helper for vaddr of TB start
>>   * @tb: opaque handle to TB passed to callback
>> diff --git a/plugins/api.c b/plugins/api.c
>> index 2078b16edb..0d70cb1f0f 100644
>> --- a/plugins/api.c
>> +++ b/plugins/api.c
>> @@ -188,6 +188,11 @@ size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb)
>>      return tb->n;
>>  }
>>  
>> +void *qemu_plugin_tb_host_insn_size(const struct qemu_plugin_tb *tb)
>> +{
>> +    return tb->host_insn_size;
>> +}
>> +
>>  uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb)
>>  {
>>      return tb->vaddr;
>> diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
>> index 71f6c90549..3e92c3b8ba 100644
>> --- a/plugins/qemu-plugins.symbols
>> +++ b/plugins/qemu-plugins.symbols
>> @@ -39,6 +39,7 @@
>>    qemu_plugin_start_code;
>>    qemu_plugin_tb_get_insn;
>>    qemu_plugin_tb_n_insns;
>> +  qemu_plugin_tb_host_insn_size;
>>    qemu_plugin_tb_vaddr;
>>    qemu_plugin_uninstall;
>>    qemu_plugin_vcpu_for_each;
> 
>
Richard Henderson April 8, 2023, 3:34 a.m. UTC | #3
On 4/6/23 00:46, Alex Bennée wrote:
> If your aim is to examine JIT efficiency what is wrong with the current
> "info jit" that you can access via the HMP? Also I'm wondering if its
> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
> extra data it collects is that expensive.
> 
> Richard, what do you think?

What is it that you want from CONFIG_PROFILER that you can't get from perf?
I've been tempted to remove CONFIG_PROFILER entirely.


r~
Alex Bennée April 10, 2023, 10:36 a.m. UTC | #4
Richard Henderson <richard.henderson@linaro.org> writes:

> On 4/6/23 00:46, Alex Bennée wrote:
>> If your aim is to examine JIT efficiency what is wrong with the current
>> "info jit" that you can access via the HMP? Also I'm wondering if its
>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>> extra data it collects is that expensive.
>> Richard, what do you think?
>
> What is it that you want from CONFIG_PROFILER that you can't get from perf?
> I've been tempted to remove CONFIG_PROFILER entirely.

I think perf is pretty good at getting the hot paths in the translator
and pretty much all of the timer related stuff in CONFIG_PROFILER could
be dropped. However some of the additional information about TCG ops
usage and distribution is useful. That said last time I had a tilt at
this on the back of a GSoC project:

  Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
  Date: Mon,  7 Oct 2019 16:28:26 +0100
  Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>

The series ended up moving all the useful bits of CONFIG_PROFILER into
tb stats which was dynamically controlled on a per TB basis. Now that
the perf integration stuff was merged maybe there is a simpler series to
be picked out of the remains?

Fei Wu,

Have you looked at the above series? Is that gathering the sort of
things you need? Is this all in service of examining the translation
quality of hot code?

>
>
> r~
Alex Bennée April 10, 2023, 10:46 a.m. UTC | #5
"Wu, Fei" <fei2.wu@intel.com> writes:

> On 4/6/2023 3:46 PM, Alex Bennée wrote:
>> 
>> Fei Wu <fei2.wu@intel.com> writes:
>> 
>>> The translation ratio of host to guest instruction count is one of the
>>> key performance factor of binary translation. TCG doesn't collect host
>>> instruction count at present, it does collect host instruction size
>>> instead, although they are not the same thing as instruction size might
>>> not be fixed, instruction size is still a valid estimation.
>> 
>> I'm not so sure about exposing this information to plugins because we
>> try to avoid leaking internal implementation details to plugins. Aside
>> from that the very act of instrumenting will increase the size of the
>> target buffer.
>> 
>> If your aim is to examine JIT efficiency what is wrong with the current
>> "info jit" that you can access via the HMP? Also I'm wondering if its
>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>> extra data it collects is that expensive.
>> 
> "info jit" collects the translation time expansion ratio, it doesn't
> distinguish between hot and cold blocks:
>     TB avg target size  14 max=1918 bytes
>     TB avg host size    287 bytes (expansion ratio: 19.7)
>
> My primary aim is to collect the runtime expansion ratio, so hot blocks
> weigh more than cold blocks. My concern is this series might not be the
> proper way to implement it, just as you mentioned in another reply.

See my reply to Richard but:

  Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
  Date: Mon,  7 Oct 2019 16:28:26 +0100
  Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>

may be of interest?

>
> Thanks,
> Fei.
>
>> Richard, what do you think?
>> 
>>>
>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>> ---
>>>  accel/tcg/plugin-gen.c       | 1 +
>>>  include/qemu/plugin.h        | 2 ++
>>>  include/qemu/qemu-plugin.h   | 8 ++++++++
>>>  plugins/api.c                | 5 +++++
>>>  plugins/qemu-plugins.symbols | 1 +
>>>  5 files changed, 17 insertions(+)
>>>
>>> diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
>>> index 5efb8db258..4a3ca8fa2f 100644
>>> --- a/accel/tcg/plugin-gen.c
>>> +++ b/accel/tcg/plugin-gen.c
>>> @@ -881,6 +881,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
>>>          ptb->haddr2 = NULL;
>>>          ptb->mem_only = mem_only;
>>>          ptb->mem_helper = false;
>>> +        ptb->host_insn_size = &db->tb->tc.size;
>>>  
>>>          plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
>>>      }
>>> diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
>>> index bc0781cab8..b38fd139e1 100644
>>> --- a/include/qemu/plugin.h
>>> +++ b/include/qemu/plugin.h
>>> @@ -151,6 +151,8 @@ struct qemu_plugin_tb {
>>>      /* if set, the TB calls helpers that might access guest memory */
>>>      bool mem_helper;
>>>  
>>> +    uint64_t *host_insn_size;
>>> +
>>>      GArray *cbs[PLUGIN_N_CB_SUBTYPES];
>>>  };
>>>  
>>> diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
>>> index 50a9957279..2397574a21 100644
>>> --- a/include/qemu/qemu-plugin.h
>>> +++ b/include/qemu/qemu-plugin.h
>>> @@ -336,6 +336,14 @@ void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,
>>>   */
>>>  size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb);
>>>  
>>> +/**
>>> + * qemu_plugin_tb_n_insns() - query helper for host insns size in TB
>>> + * @tb: opaque handle to TB passed to callback
>>> + *
>>> + * Returns: address of host insns size of this block
>> 
>> If we went ahead with this we need to be very clear when you can call
>> this helper because the data will only be valid at certain points (which
>> is another argument against this).
>> 
>>> + */
>>> +void *qemu_plugin_tb_host_insn_size(const struct qemu_plugin_tb *tb);
>>> +
>>>  /**
>>>   * qemu_plugin_tb_vaddr() - query helper for vaddr of TB start
>>>   * @tb: opaque handle to TB passed to callback
>>> diff --git a/plugins/api.c b/plugins/api.c
>>> index 2078b16edb..0d70cb1f0f 100644
>>> --- a/plugins/api.c
>>> +++ b/plugins/api.c
>>> @@ -188,6 +188,11 @@ size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb)
>>>      return tb->n;
>>>  }
>>>  
>>> +void *qemu_plugin_tb_host_insn_size(const struct qemu_plugin_tb *tb)
>>> +{
>>> +    return tb->host_insn_size;
>>> +}
>>> +
>>>  uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb)
>>>  {
>>>      return tb->vaddr;
>>> diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
>>> index 71f6c90549..3e92c3b8ba 100644
>>> --- a/plugins/qemu-plugins.symbols
>>> +++ b/plugins/qemu-plugins.symbols
>>> @@ -39,6 +39,7 @@
>>>    qemu_plugin_start_code;
>>>    qemu_plugin_tb_get_insn;
>>>    qemu_plugin_tb_n_insns;
>>> +  qemu_plugin_tb_host_insn_size;
>>>    qemu_plugin_tb_vaddr;
>>>    qemu_plugin_uninstall;
>>>    qemu_plugin_vcpu_for_each;
>> 
>>
Wu, Fei April 10, 2023, 1:02 p.m. UTC | #6
On 4/10/2023 6:36 PM, Alex Bennée wrote:
> 
> Richard Henderson <richard.henderson@linaro.org> writes:
> 
>> On 4/6/23 00:46, Alex Bennée wrote:
>>> If your aim is to examine JIT efficiency what is wrong with the current
>>> "info jit" that you can access via the HMP? Also I'm wondering if its
>>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>>> extra data it collects is that expensive.
>>> Richard, what do you think?
>>
>> What is it that you want from CONFIG_PROFILER that you can't get from perf?
>> I've been tempted to remove CONFIG_PROFILER entirely.
> 
> I think perf is pretty good at getting the hot paths in the translator
> and pretty much all of the timer related stuff in CONFIG_PROFILER could
> be dropped. However some of the additional information about TCG ops
> usage and distribution is useful. That said last time I had a tilt at
> this on the back of a GSoC project:
> 
>   Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
>   Date: Mon,  7 Oct 2019 16:28:26 +0100
>   Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>
> 
> The series ended up moving all the useful bits of CONFIG_PROFILER into
> tb stats which was dynamically controlled on a per TB basis. Now that
> the perf integration stuff was merged maybe there is a simpler series to
> be picked out of the remains?
> 
> Fei Wu,
> 
> Have you looked at the above series? Is that gathering the sort of
> things you need? Is this all in service of examining the translation
> quality of hot code?
> 
Yes, it does have what I want, I suppose this wiki is for the series:
    https://wiki.qemu.org/Features/TCGCodeQuality

btw, the archive seems broken and cannot show the whole series:
    https://www.mail-archive.com/qemu-devel@nongnu.org/msg650258.html

Thanks,
Fei.

>>
>>
>> r~
> 
>
Alex Bennée April 11, 2023, 7:27 a.m. UTC | #7
"Wu, Fei" <fei2.wu@intel.com> writes:

> On 4/10/2023 6:36 PM, Alex Bennée wrote:
>> 
>> Richard Henderson <richard.henderson@linaro.org> writes:
>> 
>>> On 4/6/23 00:46, Alex Bennée wrote:
>>>> If your aim is to examine JIT efficiency what is wrong with the current
>>>> "info jit" that you can access via the HMP? Also I'm wondering if its
>>>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>>>> extra data it collects is that expensive.
>>>> Richard, what do you think?
>>>
>>> What is it that you want from CONFIG_PROFILER that you can't get from perf?
>>> I've been tempted to remove CONFIG_PROFILER entirely.
>> 
>> I think perf is pretty good at getting the hot paths in the translator
>> and pretty much all of the timer related stuff in CONFIG_PROFILER could
>> be dropped. However some of the additional information about TCG ops
>> usage and distribution is useful. That said last time I had a tilt at
>> this on the back of a GSoC project:
>> 
>>   Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
>>   Date: Mon,  7 Oct 2019 16:28:26 +0100
>>   Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>
>> 
>> The series ended up moving all the useful bits of CONFIG_PROFILER into
>> tb stats which was dynamically controlled on a per TB basis. Now that
>> the perf integration stuff was merged maybe there is a simpler series to
>> be picked out of the remains?
>> 
>> Fei Wu,
>> 
>> Have you looked at the above series? Is that gathering the sort of
>> things you need? Is this all in service of examining the translation
>> quality of hot code?
>> 
> Yes, it does have what I want, I suppose this wiki is for the series:
>     https://wiki.qemu.org/Features/TCGCodeQuality

Yes.

>
> btw, the archive seems broken and cannot show the whole series:
>     https://www.mail-archive.com/qemu-devel@nongnu.org/msg650258.html

I have a v10 branch here:

  https://github.com/stsquad/qemu/tree/tcg/tbstats-and-perf-v10

I think the top two patches can be dropped on a re-base as the JIT/perf
integration is already merged. It might be a tricky re-base though.
Depends on how much churn there has been in the tree since.

>
> Thanks,
> Fei.
>
>>>
>>>
>>> r~
>> 
>>
Wu, Fei April 12, 2023, 12:50 p.m. UTC | #8
On 4/11/2023 3:27 PM, Alex Bennée wrote:
> 
> "Wu, Fei" <fei2.wu@intel.com> writes:
> 
>> On 4/10/2023 6:36 PM, Alex Bennée wrote:
>>>
>>> Richard Henderson <richard.henderson@linaro.org> writes:
>>>
>>>> On 4/6/23 00:46, Alex Bennée wrote:
>>>>> If your aim is to examine JIT efficiency what is wrong with the current
>>>>> "info jit" that you can access via the HMP? Also I'm wondering if its
>>>>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>>>>> extra data it collects is that expensive.
>>>>> Richard, what do you think?
>>>>
>>>> What is it that you want from CONFIG_PROFILER that you can't get from perf?
>>>> I've been tempted to remove CONFIG_PROFILER entirely.
>>>
>>> I think perf is pretty good at getting the hot paths in the translator
>>> and pretty much all of the timer related stuff in CONFIG_PROFILER could
>>> be dropped. However some of the additional information about TCG ops
>>> usage and distribution is useful. That said last time I had a tilt at
>>> this on the back of a GSoC project:
>>>
>>>   Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
>>>   Date: Mon,  7 Oct 2019 16:28:26 +0100
>>>   Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>
>>>
>>> The series ended up moving all the useful bits of CONFIG_PROFILER into
>>> tb stats which was dynamically controlled on a per TB basis. Now that
>>> the perf integration stuff was merged maybe there is a simpler series to
>>> be picked out of the remains?
>>>
>>> Fei Wu,
>>>
>>> Have you looked at the above series? Is that gathering the sort of
>>> things you need? Is this all in service of examining the translation
>>> quality of hot code?
>>>
>> Yes, it does have what I want, I suppose this wiki is for the series:
>>     https://wiki.qemu.org/Features/TCGCodeQuality
> 
> Yes.
> 
>>
>> btw, the archive seems broken and cannot show the whole series:
>>     https://www.mail-archive.com/qemu-devel@nongnu.org/msg650258.html
> 
> I have a v10 branch here:
> 
>   https://github.com/stsquad/qemu/tree/tcg/tbstats-and-perf-v10
> 
> I think the top two patches can be dropped on a re-base as the JIT/perf
> integration is already merged. It might be a tricky re-base though.
> Depends on how much churn there has been in the tree since.
> 
I'd like to try it. Why has it not been merged upstream?

Thanks,
Fei.

>>
>> Thanks,
>> Fei.
>>
>>>>
>>>>
>>>> r~
>>>
>>>
> 
>
Alex Bennée April 12, 2023, 1:28 p.m. UTC | #9
"Wu, Fei" <fei2.wu@intel.com> writes:

> On 4/11/2023 3:27 PM, Alex Bennée wrote:
>> 
>> "Wu, Fei" <fei2.wu@intel.com> writes:
>> 
>>> On 4/10/2023 6:36 PM, Alex Bennée wrote:
>>>>
>>>> Richard Henderson <richard.henderson@linaro.org> writes:
>>>>
>>>>> On 4/6/23 00:46, Alex Bennée wrote:
>>>>>> If your aim is to examine JIT efficiency what is wrong with the current
>>>>>> "info jit" that you can access via the HMP? Also I'm wondering if its
>>>>>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>>>>>> extra data it collects is that expensive.
>>>>>> Richard, what do you think?
>>>>>
>>>>> What is it that you want from CONFIG_PROFILER that you can't get from perf?
>>>>> I've been tempted to remove CONFIG_PROFILER entirely.
>>>>
>>>> I think perf is pretty good at getting the hot paths in the translator
>>>> and pretty much all of the timer related stuff in CONFIG_PROFILER could
>>>> be dropped. However some of the additional information about TCG ops
>>>> usage and distribution is useful. That said last time I had a tilt at
>>>> this on the back of a GSoC project:
>>>>
>>>>   Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
>>>>   Date: Mon,  7 Oct 2019 16:28:26 +0100
>>>>   Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>
>>>>
>>>> The series ended up moving all the useful bits of CONFIG_PROFILER into
>>>> tb stats which was dynamically controlled on a per TB basis. Now that
>>>> the perf integration stuff was merged maybe there is a simpler series to
>>>> be picked out of the remains?
>>>>
>>>> Fei Wu,
>>>>
>>>> Have you looked at the above series? Is that gathering the sort of
>>>> things you need? Is this all in service of examining the translation
>>>> quality of hot code?
>>>>
>>> Yes, it does have what I want, I suppose this wiki is for the series:
>>>     https://wiki.qemu.org/Features/TCGCodeQuality
>> 
>> Yes.
>> 
>>>
>>> btw, the archive seems broken and cannot show the whole series:
>>>     https://www.mail-archive.com/qemu-devel@nongnu.org/msg650258.html
>> 
>> I have a v10 branch here:
>> 
>>   https://github.com/stsquad/qemu/tree/tcg/tbstats-and-perf-v10
>> 
>> I think the top two patches can be dropped on a re-base as the JIT/perf
>> integration is already merged. It might be a tricky re-base though.
>> Depends on how much churn there has been in the tree since.
>> 
> I'd like to try it. Why has it not been merged upstream?

Bits have been merged (the perf jit support) but the original GSoC
student moved on and I ran out of time to work on it. It became yet another
back burner series that awaits some spare hacking time.

>
> Thanks,
> Fei.
>
>>>
>>> Thanks,
>>> Fei.
>>>
>>>>>
>>>>>
>>>>> r~
>>>>
>>>>
>> 
>>
Wu, Fei April 12, 2023, 1:47 p.m. UTC | #10
On 4/12/2023 9:28 PM, Alex Bennée wrote:
> 
> "Wu, Fei" <fei2.wu@intel.com> writes:
> 
>> On 4/11/2023 3:27 PM, Alex Bennée wrote:
>>>
>>> "Wu, Fei" <fei2.wu@intel.com> writes:
>>>
>>>> On 4/10/2023 6:36 PM, Alex Bennée wrote:
>>>>>
>>>>> Richard Henderson <richard.henderson@linaro.org> writes:
>>>>>
>>>>>> On 4/6/23 00:46, Alex Bennée wrote:
>>>>>>> If your aim is to examine JIT efficiency what is wrong with the current
>>>>>>> "info jit" that you can access via the HMP? Also I'm wondering if its
>>>>>>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>>>>>>> extra data it collects is that expensive.
>>>>>>> Richard, what do you think?
>>>>>>
>>>>>> What is it that you want from CONFIG_PROFILER that you can't get from perf?
>>>>>> I've been tempted to remove CONFIG_PROFILER entirely.
>>>>>
>>>>> I think perf is pretty good at getting the hot paths in the translator
>>>>> and pretty much all of the timer related stuff in CONFIG_PROFILER could
>>>>> be dropped. However some of the additional information about TCG ops
>>>>> usage and distribution is useful. That said last time I had a tilt at
>>>>> this on the back of a GSoC project:
>>>>>
>>>>>   Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
>>>>>   Date: Mon,  7 Oct 2019 16:28:26 +0100
>>>>>   Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>
>>>>>
>>>>> The series ended up moving all the useful bits of CONFIG_PROFILER into
>>>>> tb stats which was dynamically controlled on a per TB basis. Now that
>>>>> the perf integration stuff was merged maybe there is a simpler series to
>>>>> be picked out of the remains?
>>>>>
>>>>> Fei Wu,
>>>>>
>>>>> Have you looked at the above series? Is that gathering the sort of
>>>>> things you need? Is this all in service of examining the translation
>>>>> quality of hot code?
>>>>>
>>>> Yes, it does have what I want, I suppose this wiki is for the series:
>>>>     https://wiki.qemu.org/Features/TCGCodeQuality
>>>
>>> Yes.
>>>
>>>>
>>>> btw, the archive seems broken and cannot show the whole series:
>>>>     https://www.mail-archive.com/qemu-devel@nongnu.org/msg650258.html
>>>
>>> I have a v10 branch here:
>>>
>>>   https://github.com/stsquad/qemu/tree/tcg/tbstats-and-perf-v10
>>>
>>> I think the top two patches can be dropped on a re-base as the JIT/perf
>>> integration is already merged. It might be a tricky re-base though.
>>> Depends on how much churn there has been in the tree since.
>>>
>> I'd like to try it. Why has it not been merged upstream?
> 
> Bits have been merged (the perf jit support) but the original GSoC
> student moved on and I ran out of time to work on it. It became yet another
> back burner series that awaits some spare hacking time.
> 
Got it, let's see if I can help.

Thanks,
Fei.
Wu, Fei April 17, 2023, 11:11 a.m. UTC | #11
On 4/11/2023 3:27 PM, Alex Bennée wrote:
> 
> "Wu, Fei" <fei2.wu@intel.com> writes:
> 
>> On 4/10/2023 6:36 PM, Alex Bennée wrote:
>>>
>>> Richard Henderson <richard.henderson@linaro.org> writes:
>>>
>>>> On 4/6/23 00:46, Alex Bennée wrote:
>>>>> If your aim is to examine JIT efficiency what is wrong with the current
>>>>> "info jit" that you can access via the HMP? Also I'm wondering if its
>>>>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>>>>> extra data it collects is that expensive.
>>>>> Richard, what do you think?
>>>>
>>>> What is it that you want from CONFIG_PROFILER that you can't get from perf?
>>>> I've been tempted to remove CONFIG_PROFILER entirely.
>>>
>>> I think perf is pretty good at getting the hot paths in the translator
>>> and pretty much all of the timer related stuff in CONFIG_PROFILER could
>>> be dropped. However some of the additional information about TCG ops
>>> usage and distribution is useful. That said last time I had a tilt at
>>> this on the back of a GSoC project:
>>>
>>>   Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
>>>   Date: Mon,  7 Oct 2019 16:28:26 +0100
>>>   Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>
>>>
>>> The series ended up moving all the useful bits of CONFIG_PROFILER into
>>> tb stats which was dynamically controlled on a per TB basis. Now that
>>> the perf integration stuff was merged maybe there is a simpler series to
>>> be picked out of the remains?
>>>
>>> Fei Wu,
>>>
>>> Have you looked at the above series? Is that gathering the sort of
>>> things you need? Is this all in service of examining the translation
>>> quality of hot code?
>>>
>> Yes, it does have what I want, I suppose this wiki is for the series:
>>     https://wiki.qemu.org/Features/TCGCodeQuality
> 
> Yes.
> 
>>
>> btw, the archive seems broken and cannot show the whole series:
>>     https://www.mail-archive.com/qemu-devel@nongnu.org/msg650258.html
> 
> I have a v10 branch here:
> 
>   https://github.com/stsquad/qemu/tree/tcg/tbstats-and-perf-v10
> 
> I think the top two patches can be dropped on a re-base as the JIT/perf
> integration is already merged. It might be a tricky re-base though.
> Depends on how much churn there has been in the tree since.
> 
I have rebased the patches to upstream here:
    https://github.com/atwufei/qemu/tree/tbstats

I try to keep the patches as possible as they are, but there are lots of
changes since then, so changes are inevitable, e.g. CF_NOCACHE has been
removed from upstream, I just removed its usage in the corresponding
patch, which might not be preferred.

I did some basic tests and they worked (the output of info goes to qemu
console, instead of telnet terminal), including:
    * tb_stats start
    * info tb-list
    * info tb 10

Alex, would you please take a look?

Thanks,
Fei.
Alex Bennée April 17, 2023, 12:11 p.m. UTC | #12
"Wu, Fei" <fei2.wu@intel.com> writes:

> On 4/11/2023 3:27 PM, Alex Bennée wrote:
>> 
>> "Wu, Fei" <fei2.wu@intel.com> writes:
>> 
>>> On 4/10/2023 6:36 PM, Alex Bennée wrote:
>>>>
>>>> Richard Henderson <richard.henderson@linaro.org> writes:
>>>>
>>>>> On 4/6/23 00:46, Alex Bennée wrote:
>>>>>> If your aim is to examine JIT efficiency what is wrong with the current
>>>>>> "info jit" that you can access via the HMP? Also I'm wondering if its
>>>>>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>>>>>> extra data it collects is that expensive.
>>>>>> Richard, what do you think?
>>>>>
>>>>> What is it that you want from CONFIG_PROFILER that you can't get from perf?
>>>>> I've been tempted to remove CONFIG_PROFILER entirely.
>>>>
>>>> I think perf is pretty good at getting the hot paths in the translator
>>>> and pretty much all of the timer related stuff in CONFIG_PROFILER could
>>>> be dropped. However some of the additional information about TCG ops
>>>> usage and distribution is useful. That said last time I had a tilt at
>>>> this on the back of a GSoC project:
>>>>
>>>>   Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
>>>>   Date: Mon,  7 Oct 2019 16:28:26 +0100
>>>>   Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>
>>>>
>>>> The series ended up moving all the useful bits of CONFIG_PROFILER into
>>>> tb stats which was dynamically controlled on a per TB basis. Now that
>>>> the perf integration stuff was merged maybe there is a simpler series to
>>>> be picked out of the remains?
>>>>
>>>> Fei Wu,
>>>>
>>>> Have you looked at the above series? Is that gathering the sort of
>>>> things you need? Is this all in service of examining the translation
>>>> quality of hot code?
>>>>
>>> Yes, it does have what I want, I suppose this wiki is for the series:
>>>     https://wiki.qemu.org/Features/TCGCodeQuality
>> 
>> Yes.
>> 
>>>
>>> btw, the archive seems broken and cannot show the whole series:
>>>     https://www.mail-archive.com/qemu-devel@nongnu.org/msg650258.html
>> 
>> I have a v10 branch here:
>> 
>>   https://github.com/stsquad/qemu/tree/tcg/tbstats-and-perf-v10
>> 
>> I think the top two patches can be dropped on a re-base as the JIT/perf
>> integration is already merged. It might be a tricky re-base though.
>> Depends on how much churn there has been in the tree since.
>> 
> I have rebased the patches to upstream here:
>     https://github.com/atwufei/qemu/tree/tbstats
>
> I try to keep the patches as possible as they are, but there are lots of
> changes since then, so changes are inevitable, e.g. CF_NOCACHE has been
> removed from upstream, I just removed its usage in the corresponding
> patch, which might not be preferred.

Yeah that fine. CF_NOCACHE was removed to avoid special cases in the
generation code - we simply don't link or store the TBs in the QHT
anymore. As long as the guest isn't executing a lot of non-RAM code we
won't run out of translation buffer too quickly.

>
> I did some basic tests and they worked (the output of info goes to qemu
> console, instead of telnet terminal), including:
>     * tb_stats start
>     * info tb-list
>     * info tb 10
>
> Alex, would you please take a look?

That looks pretty good, glad it wasn't too painful a re-base.

The next question is do you want to pick up the series and put through a
review cycle or two to get merged? It would probably be worth checking
the last posting thread to see if their are any outstanding review
comments.
Wu, Fei April 17, 2023, 1:01 p.m. UTC | #13
On 4/17/2023 8:11 PM, Alex Bennée wrote:
> 
> "Wu, Fei" <fei2.wu@intel.com> writes:
> 
>> On 4/11/2023 3:27 PM, Alex Bennée wrote:
>>>
>>> "Wu, Fei" <fei2.wu@intel.com> writes:
>>>
>>>> On 4/10/2023 6:36 PM, Alex Bennée wrote:
>>>>>
>>>>> Richard Henderson <richard.henderson@linaro.org> writes:
>>>>>
>>>>>> On 4/6/23 00:46, Alex Bennée wrote:
>>>>>>> If your aim is to examine JIT efficiency what is wrong with the current
>>>>>>> "info jit" that you can access via the HMP? Also I'm wondering if its
>>>>>>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>>>>>>> extra data it collects is that expensive.
>>>>>>> Richard, what do you think?
>>>>>>
>>>>>> What is it that you want from CONFIG_PROFILER that you can't get from perf?
>>>>>> I've been tempted to remove CONFIG_PROFILER entirely.
>>>>>
>>>>> I think perf is pretty good at getting the hot paths in the translator
>>>>> and pretty much all of the timer related stuff in CONFIG_PROFILER could
>>>>> be dropped. However some of the additional information about TCG ops
>>>>> usage and distribution is useful. That said last time I had a tilt at
>>>>> this on the back of a GSoC project:
>>>>>
>>>>>   Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
>>>>>   Date: Mon,  7 Oct 2019 16:28:26 +0100
>>>>>   Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>
>>>>>
>>>>> The series ended up moving all the useful bits of CONFIG_PROFILER into
>>>>> tb stats which was dynamically controlled on a per TB basis. Now that
>>>>> the perf integration stuff was merged maybe there is a simpler series to
>>>>> be picked out of the remains?
>>>>>
>>>>> Fei Wu,
>>>>>
>>>>> Have you looked at the above series? Is that gathering the sort of
>>>>> things you need? Is this all in service of examining the translation
>>>>> quality of hot code?
>>>>>
>>>> Yes, it does have what I want, I suppose this wiki is for the series:
>>>>     https://wiki.qemu.org/Features/TCGCodeQuality
>>>
>>> Yes.
>>>
>>>>
>>>> btw, the archive seems broken and cannot show the whole series:
>>>>     https://www.mail-archive.com/qemu-devel@nongnu.org/msg650258.html
>>>
>>> I have a v10 branch here:
>>>
>>>   https://github.com/stsquad/qemu/tree/tcg/tbstats-and-perf-v10
>>>
>>> I think the top two patches can be dropped on a re-base as the JIT/perf
>>> integration is already merged. It might be a tricky re-base though.
>>> Depends on how much churn there has been in the tree since.
>>>
>> I have rebased the patches to upstream here:
>>     https://github.com/atwufei/qemu/tree/tbstats
>>
>> I try to keep the patches as possible as they are, but there are lots of
>> changes since then, so changes are inevitable, e.g. CF_NOCACHE has been
>> removed from upstream, I just removed its usage in the corresponding
>> patch, which might not be preferred.
> 
> Yeah that fine. CF_NOCACHE was removed to avoid special cases in the
> generation code - we simply don't link or store the TBs in the QHT
> anymore. As long as the guest isn't executing a lot of non-RAM code we
> won't run out of translation buffer too quickly.
> 
>>
>> I did some basic tests and they worked (the output of info goes to qemu
>> console, instead of telnet terminal), including:
>>     * tb_stats start
>>     * info tb-list
>>     * info tb 10
>>
>> Alex, would you please take a look?
> 
> That looks pretty good, glad it wasn't too painful a re-base.
> 
> The next question is do you want to pick up the series and put through a
> review cycle or two to get merged? It would probably be worth checking
> the last posting thread to see if their are any outstanding review
> comments.
> 
Yes, I can do it. I have something else in hand right now, so the review
request may be sent out in a few days.

Thanks,
Fei.
Wu, Fei April 21, 2023, 1:46 p.m. UTC | #14
On 4/17/2023 9:01 PM, Wu, Fei wrote:
> On 4/17/2023 8:11 PM, Alex Bennée wrote:
>>
>> "Wu, Fei" <fei2.wu@intel.com> writes:
>>
>>> On 4/11/2023 3:27 PM, Alex Bennée wrote:
>>>>
>>>> "Wu, Fei" <fei2.wu@intel.com> writes:
>>>>
>>>>> On 4/10/2023 6:36 PM, Alex Bennée wrote:
>>>>>>
>>>>>> Richard Henderson <richard.henderson@linaro.org> writes:
>>>>>>
>>>>>>> On 4/6/23 00:46, Alex Bennée wrote:
>>>>>>>> If your aim is to examine JIT efficiency what is wrong with the current
>>>>>>>> "info jit" that you can access via the HMP? Also I'm wondering if its
>>>>>>>> time to remove the #ifdefs from CONFIG_PROFILER because I doubt the
>>>>>>>> extra data it collects is that expensive.
>>>>>>>> Richard, what do you think?
>>>>>>>
>>>>>>> What is it that you want from CONFIG_PROFILER that you can't get from perf?
>>>>>>> I've been tempted to remove CONFIG_PROFILER entirely.
>>>>>>
>>>>>> I think perf is pretty good at getting the hot paths in the translator
>>>>>> and pretty much all of the timer related stuff in CONFIG_PROFILER could
>>>>>> be dropped. However some of the additional information about TCG ops
>>>>>> usage and distribution is useful. That said last time I had a tilt at
>>>>>> this on the back of a GSoC project:
>>>>>>
>>>>>>   Subject: [PATCH  v9 00/13] TCG code quality tracking and perf integration
>>>>>>   Date: Mon,  7 Oct 2019 16:28:26 +0100
>>>>>>   Message-Id: <20191007152839.30804-1-alex.bennee@linaro.org>
>>>>>>
>>>>>> The series ended up moving all the useful bits of CONFIG_PROFILER into
>>>>>> tb stats which was dynamically controlled on a per TB basis. Now that
>>>>>> the perf integration stuff was merged maybe there is a simpler series to
>>>>>> be picked out of the remains?
>>>>>>
>>>>>> Fei Wu,
>>>>>>
>>>>>> Have you looked at the above series? Is that gathering the sort of
>>>>>> things you need? Is this all in service of examining the translation
>>>>>> quality of hot code?
>>>>>>
>>>>> Yes, it does have what I want, I suppose this wiki is for the series:
>>>>>     https://wiki.qemu.org/Features/TCGCodeQuality
>>>>
>>>> Yes.
>>>>
>>>>>
>>>>> btw, the archive seems broken and cannot show the whole series:
>>>>>     https://www.mail-archive.com/qemu-devel@nongnu.org/msg650258.html
>>>>
>>>> I have a v10 branch here:
>>>>
>>>>   https://github.com/stsquad/qemu/tree/tcg/tbstats-and-perf-v10
>>>>
>>>> I think the top two patches can be dropped on a re-base as the JIT/perf
>>>> integration is already merged. It might be a tricky re-base though.
>>>> Depends on how much churn there has been in the tree since.
>>>>
>>> I have rebased the patches to upstream here:
>>>     https://github.com/atwufei/qemu/tree/tbstats
>>>
>>> I try to keep the patches as possible as they are, but there are lots of
>>> changes since then, so changes are inevitable, e.g. CF_NOCACHE has been
>>> removed from upstream, I just removed its usage in the corresponding
>>> patch, which might not be preferred.
>>
>> Yeah that fine. CF_NOCACHE was removed to avoid special cases in the
>> generation code - we simply don't link or store the TBs in the QHT
>> anymore. As long as the guest isn't executing a lot of non-RAM code we
>> won't run out of translation buffer too quickly.
>>
>>>
>>> I did some basic tests and they worked (the output of info goes to qemu
>>> console, instead of telnet terminal), including:
>>>     * tb_stats start
>>>     * info tb-list
>>>     * info tb 10
>>>
>>> Alex, would you please take a look?
>>
>> That looks pretty good, glad it wasn't too painful a re-base.
>>
>> The next question is do you want to pick up the series and put through a
>> review cycle or two to get merged? It would probably be worth checking
>> the last posting thread to see if their are any outstanding review
>> comments.
>>
> Yes, I can do it. I have something else in hand right now, so the review
> request may be sent out in a few days.
> 
I have sent the review out here, hope you have received it:
   https://www.mail-archive.com/qemu-devel@nongnu.org/msg955889.html

I just received the cover letter w/o the following patches, I do
subscribe to qemu-devel@nongnu.org, not sure why.

Thanks,
Fei.


> Thanks,
> Fei.
diff mbox series

Patch

diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 5efb8db258..4a3ca8fa2f 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -881,6 +881,7 @@  bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
         ptb->haddr2 = NULL;
         ptb->mem_only = mem_only;
         ptb->mem_helper = false;
+        ptb->host_insn_size = &db->tb->tc.size;
 
         plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
     }
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index bc0781cab8..b38fd139e1 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -151,6 +151,8 @@  struct qemu_plugin_tb {
     /* if set, the TB calls helpers that might access guest memory */
     bool mem_helper;
 
+    uint64_t *host_insn_size;
+
     GArray *cbs[PLUGIN_N_CB_SUBTYPES];
 };
 
diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index 50a9957279..2397574a21 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -336,6 +336,14 @@  void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,
  */
 size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb);
 
+/**
+ * qemu_plugin_tb_n_insns() - query helper for host insns size in TB
+ * @tb: opaque handle to TB passed to callback
+ *
+ * Returns: address of host insns size of this block
+ */
+void *qemu_plugin_tb_host_insn_size(const struct qemu_plugin_tb *tb);
+
 /**
  * qemu_plugin_tb_vaddr() - query helper for vaddr of TB start
  * @tb: opaque handle to TB passed to callback
diff --git a/plugins/api.c b/plugins/api.c
index 2078b16edb..0d70cb1f0f 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -188,6 +188,11 @@  size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb)
     return tb->n;
 }
 
+void *qemu_plugin_tb_host_insn_size(const struct qemu_plugin_tb *tb)
+{
+    return tb->host_insn_size;
+}
+
 uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb)
 {
     return tb->vaddr;
diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
index 71f6c90549..3e92c3b8ba 100644
--- a/plugins/qemu-plugins.symbols
+++ b/plugins/qemu-plugins.symbols
@@ -39,6 +39,7 @@ 
   qemu_plugin_start_code;
   qemu_plugin_tb_get_insn;
   qemu_plugin_tb_n_insns;
+  qemu_plugin_tb_host_insn_size;
   qemu_plugin_tb_vaddr;
   qemu_plugin_uninstall;
   qemu_plugin_vcpu_for_each;